From: Kim Phillips kim.phillips@amd.com
[ Upstream commit f967140dfb7442e2db0868b03b961f9c59418a1b ]
Enable the sampling check in kernel/events/core.c::perf_event_open(), which returns the more appropriate -EOPNOTSUPP.
BEFORE:
$ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: The sys_perf_event_open() syscall returned with 22 (Invalid argument) for event (l3_request_g1.caching_l3_cache_accesses). /bin/dmesg | grep -i perf may provide additional information.
With nothing relevant in dmesg.
AFTER:
$ sudo perf record -a -e instructions,l3_request_g1.caching_l3_cache_accesses true Error: l3_request_g1.caching_l3_cache_accesses: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'
Fixes: c43ca5091a37 ("perf/x86/amd: Add support for AMD NB and L2I "uncore" counters") Signed-off-by: Kim Phillips kim.phillips@amd.com Signed-off-by: Borislav Petkov bp@suse.de Acked-by: Peter Zijlstra peterz@infradead.org Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20200311191323.13124-1-kim.phillips@amd.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/events/amd/uncore.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index baa7e36..604a855 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -193,20 +193,18 @@ static int amd_uncore_event_init(struct perf_event *event)
/* * NB and Last level cache counters (MSRs) are shared across all cores - * that share the same NB / Last level cache. Interrupts can be directed - * to a single target core, however, event counts generated by processes - * running on other cores cannot be masked out. So we do not support - * sampling and per-thread events. + * that share the same NB / Last level cache. On family 16h and below, + * Interrupts can be directed to a single target core, however, event + * counts generated by processes running on other cores cannot be masked + * out. So we do not support sampling and per-thread events via + * CAP_NO_INTERRUPT, and we do not enable counter overflow interrupts: */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL;
/* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL;
- /* and we do not enable counter overflow interrupts */ hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; hwc->idx = -1;
@@ -314,6 +312,7 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, };
static struct pmu amd_llc_pmu = { @@ -324,6 +323,7 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, .start = amd_uncore_start, .stop = amd_uncore_stop, .read = amd_uncore_read, + .capabilities = PERF_PMU_CAP_NO_INTERRUPT, };
static struct amd_uncore *amd_uncore_alloc(unsigned int cpu)
From: Faiz Abbas faiz_abbas@ti.com
[ Upstream commit 5b0d62108b468b13410533c0ceea3821942bf592 ]
The TRM (SPRUIC2C - January 2017 - Revised May 2018 [1]) forbids assertion of data reset while tuning is happening. Implement a platform specific callback that takes care of this condition.
[1] http://www.ti.com/lit/pdf/spruic2 Section 25.5.1.2.4
Acked-by: Kishon Vijay Abraham I kishon@ti.com Signed-off-by: Faiz Abbas faiz_abbas@ti.com Acked-by: Adrian Hunter adrian.hunter@intel.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/sdhci-omap.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index d02f5cf..8a17257 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -115,6 +115,7 @@ struct sdhci_omap_host {
struct pinctrl *pinctrl; struct pinctrl_state **pinctrl_state; + bool is_tuning; };
static void sdhci_omap_start_clock(struct sdhci_omap_host *omap_host); @@ -326,6 +327,8 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) dcrc_was_enabled = true; }
+ omap_host->is_tuning = true; + while (phase_delay <= MAX_PHASE_DELAY) { sdhci_omap_set_dll(omap_host, phase_delay);
@@ -363,9 +366,12 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) phase_delay = max_window + 4 * (max_len >> 1); sdhci_omap_set_dll(omap_host, phase_delay);
+ omap_host->is_tuning = false; + goto ret;
tuning_error: + omap_host->is_tuning = false; dev_err(dev, "Tuning failed\n"); sdhci_omap_disable_tuning(omap_host);
@@ -695,6 +701,18 @@ static void sdhci_omap_set_uhs_signaling(struct sdhci_host *host, sdhci_omap_start_clock(omap_host); }
+void sdhci_omap_reset(struct sdhci_host *host, u8 mask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + + /* Don't reset data lines during tuning operation */ + if (omap_host->is_tuning) + mask &= ~SDHCI_RESET_DATA; + + sdhci_reset(host, mask); +} + static struct sdhci_ops sdhci_omap_ops = { .set_clock = sdhci_omap_set_clock, .set_power = sdhci_omap_set_power, @@ -703,7 +721,7 @@ static void sdhci_omap_set_uhs_signaling(struct sdhci_host *host, .get_min_clock = sdhci_omap_get_min_clock, .set_bus_width = sdhci_omap_set_bus_width, .platform_send_init_74_clocks = sdhci_omap_init_74_clocks, - .reset = sdhci_reset, + .reset = sdhci_omap_reset, .set_uhs_signaling = sdhci_omap_set_uhs_signaling, };
From: Faiz Abbas faiz_abbas@ti.com
[ Upstream commit 961de0a856e3a30c0238d1269c0b17f9b179b6c3 ]
Errata i929 in certain OMAP5/DRA7XX/AM57XX silicon revisions (SPRZ426D - November 2014 - Revised February 2018 [1]) mentions unexpected tuning pattern errors. A small failure band may be present in the tuning range which may be missed by the current algorithm. Furthermore, the failure bands vary with temperature leading to different optimum tuning values for different temperatures.
As suggested in the related Application Report (SPRACA9B - October 2017 - Revised July 2018 [2]), tuning should be done in two stages. In stage 1, assign the optimum ratio in the maximum pass window for the current temperature. In stage 2, if the chosen value is close to the small failure band, move away from it in the appropriate direction.
References: [1] http://www.ti.com/lit/pdf/sprz426 [2] http://www.ti.com/lit/pdf/SPRACA9
Signed-off-by: Faiz Abbas faiz_abbas@ti.com Acked-by: Adrian Hunter adrian.hunter@intel.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/Kconfig | 2 + drivers/mmc/host/sdhci-omap.c | 90 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 91 insertions(+), 1 deletion(-)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 694d082..79b8ac9 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -935,6 +935,8 @@ config MMC_SDHCI_XENON config MMC_SDHCI_OMAP tristate "TI SDHCI Controller Support" depends on MMC_SDHCI_PLTFM && OF + select THERMAL + select TI_SOC_THERMAL help This selects the Secure Digital Host Controller Interface (SDHCI) support present in TI's DRA7 SOCs. The controller supports diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 8a17257..5698af2 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -27,6 +27,7 @@ #include <linux/regulator/consumer.h> #include <linux/pinctrl/consumer.h> #include <linux/sys_soc.h> +#include <linux/thermal.h>
#include "sdhci-pltfm.h"
@@ -290,15 +291,19 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) struct sdhci_host *host = mmc_priv(mmc); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + struct thermal_zone_device *thermal_dev; struct device *dev = omap_host->dev; struct mmc_ios *ios = &mmc->ios; u32 start_window = 0, max_window = 0; + bool single_point_failure = false; bool dcrc_was_enabled = false; u8 cur_match, prev_match = 0; u32 length = 0, max_len = 0; u32 phase_delay = 0; + int temperature; int ret = 0; u32 reg; + int i;
pltfm_host = sdhci_priv(host); omap_host = sdhci_pltfm_priv(pltfm_host); @@ -312,6 +317,16 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) if (ios->timing == MMC_TIMING_UHS_SDR50 && !(reg & CAPA2_TSDR50)) return 0;
+ thermal_dev = thermal_zone_get_zone_by_name("cpu_thermal"); + if (IS_ERR(thermal_dev)) { + dev_err(dev, "Unable to get thermal zone for tuning\n"); + return PTR_ERR(thermal_dev); + } + + ret = thermal_zone_get_temp(thermal_dev, &temperature); + if (ret) + return ret; + reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_DLL); reg |= DLL_SWT; sdhci_omap_writel(omap_host, SDHCI_OMAP_DLL, reg); @@ -329,6 +344,11 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode)
omap_host->is_tuning = true;
+ /* + * Stage 1: Search for a maximum pass window ignoring any + * any single point failures. If the tuning value ends up + * near it, move away from it in stage 2 below + */ while (phase_delay <= MAX_PHASE_DELAY) { sdhci_omap_set_dll(omap_host, phase_delay);
@@ -336,10 +356,15 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) if (cur_match) { if (prev_match) { length++; + } else if (single_point_failure) { + /* ignore single point failure */ + length++; } else { start_window = phase_delay; length = 1; } + } else { + single_point_failure = prev_match; }
if (length > max_len) { @@ -357,13 +382,76 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) goto tuning_error; }
+ /* + * Assign tuning value as a ratio of maximum pass window based + * on temperature + */ + if (temperature < -20000) + phase_delay = min(max_window + 4 * max_len - 24, + max_window + + DIV_ROUND_UP(13 * max_len, 16) * 4); + else if (temperature < 20000) + phase_delay = max_window + DIV_ROUND_UP(9 * max_len, 16) * 4; + else if (temperature < 40000) + phase_delay = max_window + DIV_ROUND_UP(8 * max_len, 16) * 4; + else if (temperature < 70000) + phase_delay = max_window + DIV_ROUND_UP(7 * max_len, 16) * 4; + else if (temperature < 90000) + phase_delay = max_window + DIV_ROUND_UP(5 * max_len, 16) * 4; + else if (temperature < 120000) + phase_delay = max_window + DIV_ROUND_UP(4 * max_len, 16) * 4; + else + phase_delay = max_window + DIV_ROUND_UP(3 * max_len, 16) * 4; + + /* + * Stage 2: Search for a single point failure near the chosen tuning + * value in two steps. First in the +3 to +10 range and then in the + * +2 to -10 range. If found, move away from it in the appropriate + * direction by the appropriate amount depending on the temperature. + */ + for (i = 3; i <= 10; i++) { + sdhci_omap_set_dll(omap_host, phase_delay + i); + + if (mmc_send_tuning(mmc, opcode, NULL)) { + if (temperature < 10000) + phase_delay += i + 6; + else if (temperature < 20000) + phase_delay += i - 12; + else if (temperature < 70000) + phase_delay += i - 8; + else + phase_delay += i - 6; + + goto single_failure_found; + } + } + + for (i = 2; i >= -10; i--) { + sdhci_omap_set_dll(omap_host, phase_delay + i); + + if (mmc_send_tuning(mmc, opcode, NULL)) { + if (temperature < 10000) + phase_delay += i + 12; + else if (temperature < 20000) + phase_delay += i + 8; + else if (temperature < 70000) + phase_delay += i + 8; + else if (temperature < 90000) + phase_delay += i + 10; + else + phase_delay += i + 12; + + goto single_failure_found; + } + } + +single_failure_found: reg = sdhci_omap_readl(omap_host, SDHCI_OMAP_AC12); if (!(reg & AC12_SCLK_SEL)) { ret = -EIO; goto tuning_error; }
- phase_delay = max_window + 4 * (max_len >> 1); sdhci_omap_set_dll(omap_host, phase_delay);
omap_host->is_tuning = false;
From: Faiz Abbas faiz_abbas@ti.com
[ Upstream commit 287b1da6a458a30da2e5be745498d31092ebb001 ]
Commit 961de0a856e3 ("mmc: sdhci-omap: Workaround errata regarding SDR104/HS200 tuning failures (i929)") added a select on TI_SOC_THERMAL for the driver to get temperature for tuning.
However, this causes the following warning on keystone_defconfig because keystone does not support TI_SOC_THERMAL:
"WARNING: unmet direct dependencies detected for TI_SOC_THERMAL"
Fix this by changing the select to imply.
Fixes: 961de0a856e3 ("mmc: sdhci-omap: Workaround errata regarding SDR104/HS200 tuning failures (i929)") Signed-off-by: Faiz Abbas faiz_abbas@ti.com Tested-by: Borislav Petkov bp@suse.de Acked-by: Adrian Hunter adrian.hunter@intel.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 79b8ac9..b7f809a 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -936,7 +936,7 @@ config MMC_SDHCI_OMAP tristate "TI SDHCI Controller Support" depends on MMC_SDHCI_PLTFM && OF select THERMAL - select TI_SOC_THERMAL + imply TI_SOC_THERMAL help This selects the Secure Digital Host Controller Interface (SDHCI) support present in TI's DRA7 SOCs. The controller supports
From: Jean Delvare jdelvare@suse.de
[ Upstream commit 3f9e12e0df012c4a9a7fd7eb0d3ae69b459d6b2c ]
In case the WDAT interface is broken, give the user an option to ignore it to let a native driver bind to the watchdog device instead.
Signed-off-by: Jean Delvare jdelvare@suse.de Acked-by: Mika Westerberg mika.westerberg@linux.intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ drivers/acpi/acpi_watchdog.c | 12 +++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index b889430..1a55cb4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -136,6 +136,10 @@ dynamic table installation which will install SSDT tables to /sys/firmware/acpi/tables/dynamic.
+ acpi_no_watchdog [HW,ACPI,WDT] + Ignore the ACPI-based watchdog interface (WDAT) and let + a native driver control the watchdog device instead. + acpi_rsdp= [ACPI,EFI,KEXEC] Pass the RSDP address to the kernel, mostly used on machines running EFI runtime service to boot the diff --git a/drivers/acpi/acpi_watchdog.c b/drivers/acpi/acpi_watchdog.c index 23cde3d..0bd1899 100644 --- a/drivers/acpi/acpi_watchdog.c +++ b/drivers/acpi/acpi_watchdog.c @@ -58,12 +58,14 @@ static bool acpi_watchdog_uses_rtc(const struct acpi_table_wdat *wdat) } #endif
+static bool acpi_no_watchdog; + static const struct acpi_table_wdat *acpi_watchdog_get_wdat(void) { const struct acpi_table_wdat *wdat = NULL; acpi_status status;
- if (acpi_disabled) + if (acpi_disabled || acpi_no_watchdog) return NULL;
status = acpi_get_table(ACPI_SIG_WDAT, 0, @@ -91,6 +93,14 @@ bool acpi_has_watchdog(void) } EXPORT_SYMBOL_GPL(acpi_has_watchdog);
+/* ACPI watchdog can be disabled on boot command line */ +static int __init disable_acpi_watchdog(char *str) +{ + acpi_no_watchdog = true; + return 1; +} +__setup("acpi_no_watchdog", disable_acpi_watchdog); + void __init acpi_watchdog_init(void) { const struct acpi_wdat_entry *entries;
From: Mansour Behabadi mansour@oxplot.com
[ Upstream commit e433be929e63265b7412478eb7ff271467aee2d7 ]
Magic Keyboards with more recent firmware (0x0100) report Fn key differently. Without this patch, Fn key may not behave as expected and may not be configurable via hid_apple fnmode module parameter.
Signed-off-by: Mansour Behabadi mansour@oxplot.com Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-apple.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/hid/hid-apple.c b/drivers/hid/hid-apple.c index d0a81a0..8ab8f23 100644 --- a/drivers/hid/hid-apple.c +++ b/drivers/hid/hid-apple.c @@ -343,7 +343,8 @@ static int apple_input_mapping(struct hid_device *hdev, struct hid_input *hi, unsigned long **bit, int *max) { if (usage->hid == (HID_UP_CUSTOM | 0x0003) || - usage->hid == (HID_UP_MSVENDOR | 0x0003)) { + usage->hid == (HID_UP_MSVENDOR | 0x0003) || + usage->hid == (HID_UP_HPVENDOR2 | 0x0003)) { /* The fn key on Apple USB keyboards */ set_bit(EV_REP, hi->input->evbit); hid_map_usage_clear(hi, usage, bit, max, EV_KEY, KEY_FN);
From: Kai-Heng Feng kai.heng.feng@canonical.com
[ Upstream commit be0aba826c4a6ba5929def1962a90d6127871969 ]
The Surfbook E11B uses the SIPODEV SP1064 touchpad, which does not supply descriptors, so it has to be added to the override list.
BugLink: https://bugs.launchpad.net/bugs/1858299 Signed-off-by: Kai-Heng Feng kai.heng.feng@canonical.com Reviewed-by: Hans de Goede hdegoede@redhat.com Signed-off-by: Benjamin Tissoires benjamin.tissoires@redhat.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 8 ++++++++ 1 file changed, 8 insertions(+)
diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c index 10af8585..9505237 100644 --- a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -342,6 +342,14 @@ struct i2c_hid_desc_override { .driver_data = (void *)&sipodev_desc }, { + .ident = "Trekstor SURFBOOK E11B", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SURFBOOK E11B"), + }, + .driver_data = (void *)&sipodev_desc + }, + { .ident = "Direkt-Tek DTLAPY116-2", .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"),
From: Johannes Berg johannes.berg@intel.com
[ Upstream commit a7ee7d44b57c9ae174088e53a668852b7f4f452d ]
We may end up with a NULL reg_rule after the loop in handle_channel_custom() if the bandwidth didn't fit, check if this is the case and bail out if so.
Signed-off-by: Johannes Berg johannes.berg@intel.com Link: https://lore.kernel.org/r/20200221104449.3b558a50201c.I4ad3725c4dacaefd2d18d... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 018c60b..32f5758 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2269,7 +2269,7 @@ static void handle_channel_custom(struct wiphy *wiphy, break; }
- if (IS_ERR(reg_rule)) { + if (IS_ERR_OR_NULL(reg_rule)) { pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n", chan->center_freq); if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
From: Igor Druzhinin igor.druzhinin@citrix.com
[ Upstream commit ff6993bb79b9f99bdac0b5378169052931b65432 ]
fc_disc_gpn_id_resp() should be the last function using it so free it here to avoid memory leak.
Link: https://lore.kernel.org/r/1579013000-14570-2-git-send-email-igor.druzhinin@c... Reviewed-by: Hannes Reinecke hare@suse.de Signed-off-by: Igor Druzhinin igor.druzhinin@citrix.com Signed-off-by: Martin K. Petersen martin.petersen@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/scsi/libfc/fc_disc.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index f969a71..8839f50 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -640,6 +640,8 @@ static void fc_disc_gpn_id_resp(struct fc_seq *sp, struct fc_frame *fp, } out: kref_put(&rdata->kref, fc_rport_destroy); + if (!IS_ERR(fp)) + fc_frame_free(fp); }
/**
From: Daniele Palmas dnlplm@gmail.com
[ Upstream commit eae7172f8141eb98e64e6e81acc9e9d5b2add127 ]
usbnet creates network interfaces with min_mtu = 0 and max_mtu = ETH_MAX_MTU.
These values are not modified by qmi_wwan when the network interface is created initially, allowing, for example, to set mtu greater than 1500.
When a raw_ip switch is done (raw_ip set to 'Y', then set to 'N') the mtu values for the network interface are set through ether_setup, with min_mtu = ETH_MIN_MTU and max_mtu = ETH_DATA_LEN, not allowing anymore to set mtu greater than 1500 (error: mtu greater than device maximum).
The patch restores the original min/max mtu values set by usbnet after a raw_ip switch.
Signed-off-by: Daniele Palmas dnlplm@gmail.com Acked-by: Bjørn Mork bjorn@mork.no Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/usb/qmi_wwan.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index a04f857..1d60ccd 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -274,6 +274,9 @@ static void qmi_wwan_netdev_setup(struct net_device *net) netdev_dbg(net, "mode: raw IP\n"); } else if (!net->header_ops) { /* don't bother if already set */ ether_setup(net); + /* Restoring min/max mtu values set originally by usbnet */ + net->min_mtu = 0; + net->max_mtu = ETH_MAX_MTU; clear_bit(EVENT_NO_IP_ALIGN, &dev->flags); netdev_dbg(net, "mode: Ethernet\n"); }
From: Marek Vasut marex@denx.de
[ Upstream commit 44343418d0f2f623cb9da6f5000df793131cbe3b ]
The KS8851 requires that packet RX and TX are mutually exclusive. Currently, the driver hopes to achieve this by disabling interrupt from the card by writing the card registers and by disabling the interrupt on the interrupt controller. This however is racy on SMP.
Replace this approach by expanding the spinlock used around the ks_start_xmit() TX path to ks_irq() RX path to assure true mutual exclusion and remove the interrupt enabling/disabling, which is now not needed anymore. Furthermore, disable interrupts also in ks_net_stop(), which was missing before.
Note that a massive improvement here would be to re-use the KS8851 driver approach, which is to move the TX path into a worker thread, interrupt handling to threaded interrupt, and synchronize everything with mutexes, but that would be a much bigger rework, for a separate patch.
Signed-off-by: Marek Vasut marex@denx.de Cc: David S. Miller davem@davemloft.net Cc: Lukas Wunner lukas@wunner.de Cc: Petr Stetiar ynezz@true.cz Cc: YueHaibing yuehaibing@huawei.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/micrel/ks8851_mll.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 9de59fa..a5525bf 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -832,14 +832,17 @@ static irqreturn_t ks_irq(int irq, void *pw) { struct net_device *netdev = pw; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags; u16 status;
+ spin_lock_irqsave(&ks->statelock, flags); /*this should be the first in IRQ handler */ ks_save_cmd_reg(ks);
status = ks_rdreg16(ks, KS_ISR); if (unlikely(!status)) { ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_NONE; }
@@ -865,6 +868,7 @@ static irqreturn_t ks_irq(int irq, void *pw) ks->netdev->stats.rx_over_errors++; /* this should be the last in IRQ handler*/ ks_restore_cmd_reg(ks); + spin_unlock_irqrestore(&ks->statelock, flags); return IRQ_HANDLED; }
@@ -934,6 +938,7 @@ static int ks_net_stop(struct net_device *netdev)
/* shutdown RX/TX QMU */ ks_disable_qmu(ks); + ks_disable_int(ks);
/* set powermode to soft power down to save power */ ks_set_powermode(ks, PMECR_PM_SOFTDOWN); @@ -990,10 +995,9 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) { netdev_tx_t retv = NETDEV_TX_OK; struct ks_net *ks = netdev_priv(netdev); + unsigned long flags;
- disable_irq(netdev->irq); - ks_disable_int(ks); - spin_lock(&ks->statelock); + spin_lock_irqsave(&ks->statelock, flags);
/* Extra space are required: * 4 byte for alignment, 4 for status/length, 4 for CRC @@ -1007,9 +1011,7 @@ static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev) dev_kfree_skb(skb); } else retv = NETDEV_TX_BUSY; - spin_unlock(&ks->statelock); - ks_enable_int(ks); - enable_irq(netdev->irq); + spin_unlock_irqrestore(&ks->statelock, flags); return retv; }
From: Madhuparna Bhowmik madhuparnabhowmik10@gmail.com
[ Upstream commit 253216ffb2a002a682c6f68bd3adff5b98b71de8 ]
local->sta_mtx is held in __ieee80211_check_fast_rx_iface(). No need to use list_for_each_entry_rcu() as it also requires a cond argument to avoid false lockdep warnings when not used in RCU read-side section (with CONFIG_PROVE_RCU_LIST). Therefore use list_for_each_entry();
Signed-off-by: Madhuparna Bhowmik madhuparnabhowmik10@gmail.com Link: https://lore.kernel.org/r/20200223143302.15390-1-madhuparnabhowmik10@gmail.c... Signed-off-by: Johannes Berg johannes.berg@intel.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 02d0b22..c7c456c 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -4042,7 +4042,7 @@ void __ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->sta_mtx);
- list_for_each_entry_rcu(sta, &local->sta_list, list) { + list_for_each_entry(sta, &local->sta_list, list) { if (sdata != sta->sdata && (!sta->sdata->bss || sta->sdata->bss != sdata->bss)) continue;
From: Linus Torvalds torvalds@linux-foundation.org
[ Upstream commit fda31c50292a5062332fa0343c084bd9f46604d9 ]
When queueing a signal, we increment both the users count of pending signals (for RLIMIT_SIGPENDING tracking) and we increment the refcount of the user struct itself (because we keep a reference to the user in the signal structure in order to correctly account for it when freeing).
That turns out to be fairly expensive, because both of them are atomic updates, and particularly under extreme signal handling pressure on big machines, you can get a lot of cache contention on the user struct. That can then cause horrid cacheline ping-pong when you do these multiple accesses.
So change the reference counting to only pin the user for the _first_ pending signal, and to unpin it when the last pending signal is dequeued. That means that when a user sees a lot of concurrent signal queuing - which is the only situation when this matters - the only atomic access needed is generally the 'sigpending' count update.
This was noticed because of a particularly odd timing artifact on a dual-socket 96C/192T Cascade Lake platform: when you get into bad contention, on that machine for some reason seems to be much worse when the contention happens in the upper 32-byte half of the cacheline.
As a result, the kernel test robot will-it-scale 'signal1' benchmark had an odd performance regression simply due to random alignment of the 'struct user_struct' (and pointed to a completely unrelated and apparently nonsensical commit for the regression).
Avoiding the double increments (and decrements on the dequeueing side, of course) makes for much less contention and hugely improved performance on that will-it-scale microbenchmark.
Quoting Feng Tang:
"It makes a big difference, that the performance score is tripled! bump from original 17000 to 54000. Also the gap between 5.0-rc6 and 5.0-rc6+Jiri's patch is reduced to around 2%"
[ The "2% gap" is the odd cacheline placement difference on that platform: under the extreme contention case, the effect of which half of the cacheline was hot was 5%, so with the reduced contention the odd timing artifact is reduced too ]
It does help in the non-contended case too, but is not nearly as noticeable.
Reported-and-tested-by: Feng Tang feng.tang@intel.com Cc: Eric W. Biederman ebiederm@xmission.com Cc: Huang, Ying ying.huang@intel.com Cc: Philip Li philip.li@intel.com Cc: Andi Kleen andi.kleen@intel.com Cc: Jiri Olsa jolsa@redhat.com Cc: Peter Zijlstra peterz@infradead.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/signal.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/kernel/signal.c b/kernel/signal.c index 7d98716a..0b6e302 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -407,27 +407,32 @@ void task_join_group_stop(struct task_struct *task) { struct sigqueue *q = NULL; struct user_struct *user; + int sigpending;
/* * Protect access to @t credentials. This can go away when all * callers hold rcu read lock. + * + * NOTE! A pending signal will hold on to the user refcount, + * and we get/put the refcount only when the sigpending count + * changes from/to zero. */ rcu_read_lock(); - user = get_uid(__task_cred(t)->user); - atomic_inc(&user->sigpending); + user = __task_cred(t)->user; + sigpending = atomic_inc_return(&user->sigpending); + if (sigpending == 1) + get_uid(user); rcu_read_unlock();
- if (override_rlimit || - atomic_read(&user->sigpending) <= - task_rlimit(t, RLIMIT_SIGPENDING)) { + if (override_rlimit || likely(sigpending <= task_rlimit(t, RLIMIT_SIGPENDING))) { q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig); }
if (unlikely(q == NULL)) { - atomic_dec(&user->sigpending); - free_uid(user); + if (atomic_dec_and_test(&user->sigpending)) + free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; @@ -441,8 +446,8 @@ static void __sigqueue_free(struct sigqueue *q) { if (q->flags & SIGQUEUE_PREALLOC) return; - atomic_dec(&q->user->sigpending); - free_uid(q->user); + if (atomic_dec_and_test(&q->user->sigpending)) + free_uid(q->user); kmem_cache_free(sigqueue_cachep, q); }
From: yangerkun yangerkun@huawei.com
[ Upstream commit f596c87005f7b1baeb7d62d9a9e25d68c3dfae10 ]
As the description before netdev_run_todo, we cannot call free_netdev before rtnl_unlock, fix it by reorder the code.
Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: Oliver Hartkopp socketcan@hartkopp.net Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/slip/slip.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 93f303e..5d864f8 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -863,7 +863,10 @@ static int slip_open(struct tty_struct *tty) tty->disc_data = NULL; clear_bit(SLF_INUSE, &sl->flags); sl_free_netdev(sl->dev); + /* do not call free_netdev before rtnl_unlock */ + rtnl_unlock(); free_netdev(sl->dev); + return err;
err_exit: rtnl_unlock();
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 93b5cbfa9636d385126f211dca9efa7e3f683202 ]
rmnet registers IFLA_LINK interface as a lower interface. But, IFLA_LINK could be NULL. In the current code, rmnet doesn't check IFLA_LINK. So, panic would occur.
Test commands: modprobe rmnet ip link add rmnet0 type rmnet mux_id 1
Splat looks like: [ 36.826109][ T1115] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 36.838817][ T1115] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 36.839908][ T1115] CPU: 1 PID: 1115 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 36.840569][ T1115] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 36.841408][ T1115] RIP: 0010:rmnet_newlink+0x54/0x510 [rmnet] [ 36.841986][ T1115] Code: 83 ec 18 48 c1 e9 03 80 3c 01 00 0f 85 d4 03 00 00 48 8b 6a 28 48 b8 00 00 00 00 00 c [ 36.843923][ T1115] RSP: 0018:ffff8880b7e0f1c0 EFLAGS: 00010247 [ 36.844756][ T1115] RAX: dffffc0000000000 RBX: ffff8880d14cca00 RCX: 1ffff11016fc1e99 [ 36.845859][ T1115] RDX: 0000000000000000 RSI: ffff8880c3d04000 RDI: 0000000000000004 [ 36.846961][ T1115] RBP: 0000000000000000 R08: ffff8880b7e0f8b0 R09: ffff8880b6ac2d90 [ 36.848020][ T1115] R10: ffffffffc0589a40 R11: ffffed1016d585b7 R12: ffffffff88ceaf80 [ 36.848788][ T1115] R13: ffff8880c3d04000 R14: ffff8880b7e0f8b0 R15: ffff8880c3d04000 [ 36.849546][ T1115] FS: 00007f50ab3360c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 36.851784][ T1115] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 36.852422][ T1115] CR2: 000055871afe5ab0 CR3: 00000000ae246001 CR4: 00000000000606e0 [ 36.853181][ T1115] Call Trace: [ 36.853514][ T1115] __rtnl_newlink+0xbdb/0x1270 [ 36.853967][ T1115] ? lock_downgrade+0x6e0/0x6e0 [ 36.854420][ T1115] ? rtnl_link_unregister+0x220/0x220 [ 36.854936][ T1115] ? lock_acquire+0x164/0x3b0 [ 36.855376][ T1115] ? is_bpf_image_address+0xff/0x1d0 [ 36.855884][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.856304][ T1115] ? kernel_text_address+0x111/0x140 [ 36.856857][ T1115] ? __kernel_text_address+0xe/0x30 [ 36.857440][ T1115] ? unwind_get_return_address+0x5f/0xa0 [ 36.858063][ T1115] ? create_prof_cpu_mask+0x20/0x20 [ 36.858644][ T1115] ? arch_stack_walk+0x83/0xb0 [ 36.859171][ T1115] ? stack_trace_save+0x82/0xb0 [ 36.859710][ T1115] ? stack_trace_consume_entry+0x160/0x160 [ 36.860357][ T1115] ? deactivate_slab.isra.78+0x2c5/0x800 [ 36.860928][ T1115] ? kasan_unpoison_shadow+0x30/0x40 [ 36.861520][ T1115] ? kmem_cache_alloc_trace+0x135/0x350 [ 36.862125][ T1115] ? rtnl_newlink+0x4c/0x90 [ 36.864073][ T1115] rtnl_newlink+0x65/0x90 [ ... ]
Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 4c476fa..0fb9375 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -144,6 +144,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, int err = 0; u16 mux_id;
+ if (!tb[IFLA_LINK]) { + NL_SET_ERR_MSG_MOD(extack, "link not specified"); + return -EINVAL; + } + real_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK])); if (!real_dev || !dev) return -ENODEV;
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 1eb1f43a6e37282348a41e3d68f5e9a6a4359212 ]
In the rmnet_changelink(), it uses IFLA_LINK without checking NULL pointer. tb[IFLA_LINK] could be NULL pointer. So, NULL-ptr-deref could occur.
rmnet already has a lower interface (real_dev). So, after this patch, rmnet_changelink() does not use IFLA_LINK anymore.
Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set rmnet0 type rmnet mux_id 2
Splat looks like: [ 90.578726][ T1131] general protection fault, probably for non-canonical address 0xdffffc0000000000I [ 90.581121][ T1131] KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] [ 90.582380][ T1131] CPU: 2 PID: 1131 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 90.584285][ T1131] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 90.587506][ T1131] RIP: 0010:rmnet_changelink+0x5a/0x8a0 [rmnet] [ 90.588546][ T1131] Code: 83 ec 20 48 c1 ea 03 80 3c 02 00 0f 85 6f 07 00 00 48 8b 5e 28 48 b8 00 00 00 00 00 0 [ 90.591447][ T1131] RSP: 0018:ffff8880ce78f1b8 EFLAGS: 00010247 [ 90.592329][ T1131] RAX: dffffc0000000000 RBX: 0000000000000000 RCX: ffff8880ce78f8b0 [ 90.593253][ T1131] RDX: 0000000000000000 RSI: ffff8880ce78f4a0 RDI: 0000000000000004 [ 90.594058][ T1131] RBP: ffff8880cf543e00 R08: 0000000000000002 R09: 0000000000000002 [ 90.594859][ T1131] R10: ffffffffc0586a40 R11: 0000000000000000 R12: ffff8880ca47c000 [ 90.595690][ T1131] R13: ffff8880ca47c000 R14: ffff8880cf545000 R15: 0000000000000000 [ 90.596553][ T1131] FS: 00007f21f6c7e0c0(0000) GS:ffff8880da400000(0000) knlGS:0000000000000000 [ 90.597504][ T1131] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 90.599418][ T1131] CR2: 0000556e413db458 CR3: 00000000c917a002 CR4: 00000000000606e0 [ 90.600289][ T1131] Call Trace: [ 90.600631][ T1131] __rtnl_newlink+0x922/0x1270 [ 90.601194][ T1131] ? lock_downgrade+0x6e0/0x6e0 [ 90.601724][ T1131] ? rtnl_link_unregister+0x220/0x220 [ 90.602309][ T1131] ? lock_acquire+0x164/0x3b0 [ 90.602784][ T1131] ? is_bpf_image_address+0xff/0x1d0 [ 90.603331][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.603810][ T1131] ? kernel_text_address+0x111/0x140 [ 90.604419][ T1131] ? __kernel_text_address+0xe/0x30 [ 90.604981][ T1131] ? unwind_get_return_address+0x5f/0xa0 [ 90.605616][ T1131] ? create_prof_cpu_mask+0x20/0x20 [ 90.606304][ T1131] ? arch_stack_walk+0x83/0xb0 [ 90.606985][ T1131] ? stack_trace_save+0x82/0xb0 [ 90.607656][ T1131] ? stack_trace_consume_entry+0x160/0x160 [ 90.608503][ T1131] ? deactivate_slab.isra.78+0x2c5/0x800 [ 90.609336][ T1131] ? kasan_unpoison_shadow+0x30/0x40 [ 90.610096][ T1131] ? kmem_cache_alloc_trace+0x135/0x350 [ 90.610889][ T1131] ? rtnl_newlink+0x4c/0x90 [ 90.611512][ T1131] rtnl_newlink+0x65/0x90 [ ... ]
Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 0fb9375..ad46f5a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -309,10 +309,8 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[], if (!dev) return -ENODEV;
- real_dev = __dev_get_by_index(dev_net(dev), - nla_get_u32(tb[IFLA_LINK])); - - if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + real_dev = priv->real_dev; + if (!rmnet_is_real_dev_registered(real_dev)) return -ENODEV;
port = rmnet_get_port_rtnl(real_dev);
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 102210f7664442d8c0ce332c006ea90626df745b ]
rmnet_get_port() internally calls rcu_dereference_rtnl(), which checks RTNL. But rmnet_get_port() could be called by packet path. The packet path is not protected by RTNL. So, the suspicious RCU usage problem occurs.
Test commands: modprobe rmnet ip netns add nst ip link add veth0 type veth peer name veth1 ip link set veth1 netns nst ip link add rmnet0 link veth0 type rmnet mux_id 1 ip netns exec nst ip link add rmnet1 link veth1 type rmnet mux_id 1 ip netns exec nst ip link set veth1 up ip netns exec nst ip link set rmnet1 up ip netns exec nst ip a a 192.168.100.2/24 dev rmnet1 ip link set veth0 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0 ping 192.168.100.2
Splat looks like: [ 146.630958][ T1174] WARNING: suspicious RCU usage [ 146.631735][ T1174] 5.6.0-rc1+ #447 Not tainted [ 146.632387][ T1174] ----------------------------- [ 146.633151][ T1174] drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c:386 suspicious rcu_dereference_check() ! [ 146.634742][ T1174] [ 146.634742][ T1174] other info that might help us debug this: [ 146.634742][ T1174] [ 146.645992][ T1174] [ 146.645992][ T1174] rcu_scheduler_active = 2, debug_locks = 1 [ 146.646937][ T1174] 5 locks held by ping/1174: [ 146.647609][ T1174] #0: ffff8880c31dea70 (sk_lock-AF_INET){+.+.}, at: raw_sendmsg+0xab8/0x2980 [ 146.662463][ T1174] #1: ffffffff93925660 (rcu_read_lock_bh){....}, at: ip_finish_output2+0x243/0x2150 [ 146.671696][ T1174] #2: ffffffff93925660 (rcu_read_lock_bh){....}, at: __dev_queue_xmit+0x213/0x2940 [ 146.673064][ T1174] #3: ffff8880c19ecd58 (&dev->qdisc_running_key#7){+...}, at: ip_finish_output2+0x714/0x2150 [ 146.690358][ T1174] #4: ffff8880c5796898 (&dev->qdisc_xmit_lock_key#3){+.-.}, at: sch_direct_xmit+0x1e2/0x1020 [ 146.699875][ T1174] [ 146.699875][ T1174] stack backtrace: [ 146.701091][ T1174] CPU: 0 PID: 1174 Comm: ping Not tainted 5.6.0-rc1+ #447 [ 146.705215][ T1174] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 146.706565][ T1174] Call Trace: [ 146.707102][ T1174] dump_stack+0x96/0xdb [ 146.708007][ T1174] rmnet_get_port.part.9+0x76/0x80 [rmnet] [ 146.709233][ T1174] rmnet_egress_handler+0x107/0x420 [rmnet] [ 146.710492][ T1174] ? sch_direct_xmit+0x1e2/0x1020 [ 146.716193][ T1174] rmnet_vnd_start_xmit+0x3d/0xa0 [rmnet] [ 146.717012][ T1174] dev_hard_start_xmit+0x160/0x740 [ 146.717854][ T1174] sch_direct_xmit+0x265/0x1020 [ 146.718577][ T1174] ? register_lock_class+0x14d0/0x14d0 [ 146.719429][ T1174] ? dev_watchdog+0xac0/0xac0 [ 146.723738][ T1174] ? __dev_queue_xmit+0x15fd/0x2940 [ 146.724469][ T1174] ? lock_acquire+0x164/0x3b0 [ 146.725172][ T1174] __dev_queue_xmit+0x20c7/0x2940 [ ... ]
Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 13 ++++++------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 2 +- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index ad46f5a..915165c 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -391,11 +391,10 @@ struct rtnl_link_ops rmnet_link_ops __read_mostly = { .fill_info = rmnet_fill_info, };
-/* Needs either rcu_read_lock() or rtnl lock */ -struct rmnet_port *rmnet_get_port(struct net_device *real_dev) +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev) { if (rmnet_is_real_dev_registered(real_dev)) - return rcu_dereference_rtnl(real_dev->rx_handler_data); + return rcu_dereference_bh(real_dev->rx_handler_data); else return NULL; } @@ -421,7 +420,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, struct rmnet_port *port, *slave_port; int err;
- port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev);
/* If there is more than one rmnet dev attached, its probably being * used for muxing. Skip the briding in that case @@ -436,7 +435,7 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY;
- slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev;
@@ -454,11 +453,11 @@ int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *real_dev = priv->real_dev; struct rmnet_port *port, *slave_port;
- port = rmnet_get_port(real_dev); + port = rmnet_get_port_rtnl(real_dev); port->rmnet_mode = RMNET_EPMODE_VND; port->bridge_ep = NULL;
- slave_port = rmnet_get_port(slave_dev); + slave_port = rmnet_get_port_rtnl(slave_dev); rmnet_unregister_real_device(slave_dev, slave_port);
netdev_dbg(slave_dev, "removed from rmnet as slave\n"); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 34ac45a..6a978f4 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -74,7 +74,7 @@ struct rmnet_priv { struct rmnet_priv_stats stats; };
-struct rmnet_port *rmnet_get_port(struct net_device *real_dev); +struct rmnet_port *rmnet_get_port_rcu(struct net_device *real_dev); struct rmnet_endpoint *rmnet_get_endpoint(struct rmnet_port *port, u8 mux_id); int rmnet_add_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev, diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 11167ab..5177d0f 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -193,7 +193,7 @@ rx_handler_result_t rmnet_rx_handler(struct sk_buff **pskb) return RX_HANDLER_PASS;
dev = skb->dev; - port = rmnet_get_port(dev); + port = rmnet_get_port_rcu(dev);
switch (port->rmnet_mode) { case RMNET_EPMODE_VND: @@ -226,7 +226,7 @@ void rmnet_egress_handler(struct sk_buff *skb) skb->dev = priv->real_dev; mux_id = priv->mux_id;
- port = rmnet_get_port(skb->dev); + port = rmnet_get_port_rcu(skb->dev); if (!port) goto drop;
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit c026d970102e9af9958edefb4a015702c6aab636 ]
The notifier_call() of the slave interface removes rmnet interface with unregister_netdevice_queue(). But, before calling unregister_netdevice_queue(), it acquires rcu readlock. In the RCU critical section, sleeping isn't be allowed. But, unregister_netdevice_queue() internally calls synchronize_net(), which would sleep. So, suspicious RCU usage warning occurs.
Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add dummy1 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link set dummy1 master rmnet0 ip link del dummy0
Splat looks like: [ 79.639245][ T1195] ============================= [ 79.640134][ T1195] WARNING: suspicious RCU usage [ 79.640852][ T1195] 5.6.0-rc1+ #447 Not tainted [ 79.641657][ T1195] ----------------------------- [ 79.642472][ T1195] ./include/linux/rcupdate.h:273 Illegal context switch in RCU read-side critical section! [ 79.644043][ T1195] [ 79.644043][ T1195] other info that might help us debug this: [ 79.644043][ T1195] [ 79.645682][ T1195] [ 79.645682][ T1195] rcu_scheduler_active = 2, debug_locks = 1 [ 79.646980][ T1195] 2 locks held by ip/1195: [ 79.647629][ T1195] #0: ffffffffa3cf64f0 (rtnl_mutex){+.+.}, at: rtnetlink_rcv_msg+0x457/0x890 [ 79.649312][ T1195] #1: ffffffffa39256c0 (rcu_read_lock){....}, at: rmnet_config_notify_cb+0xf0/0x590 [rmnet] [ 79.651717][ T1195] [ 79.651717][ T1195] stack backtrace: [ 79.652650][ T1195] CPU: 3 PID: 1195 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 79.653702][ T1195] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 79.655037][ T1195] Call Trace: [ 79.655560][ T1195] dump_stack+0x96/0xdb [ 79.656252][ T1195] ___might_sleep+0x345/0x440 [ 79.656994][ T1195] synchronize_net+0x18/0x30 [ 79.661132][ T1195] netdev_rx_handler_unregister+0x40/0xb0 [ 79.666266][ T1195] rmnet_unregister_real_device+0x42/0xb0 [rmnet] [ 79.667211][ T1195] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 79.668121][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.669166][ T1195] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 79.670286][ T1195] ? __module_text_address+0x13/0x140 [ 79.671139][ T1195] notifier_call_chain+0x90/0x160 [ 79.671973][ T1195] rollback_registered_many+0x660/0xcf0 [ 79.672893][ T1195] ? netif_set_real_num_tx_queues+0x780/0x780 [ 79.675091][ T1195] ? __lock_acquire+0xdfe/0x3de0 [ 79.675825][ T1195] ? memset+0x1f/0x40 [ 79.676367][ T1195] ? __nla_validate_parse+0x98/0x1ab0 [ 79.677290][ T1195] unregister_netdevice_many.part.133+0x13/0x1b0 [ 79.678163][ T1195] rtnl_delete_link+0xbc/0x100 [ ... ]
Fixes: ceed73a2cf4a ("drivers: net: ethernet: qualcomm: rmnet: Initial implementation") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 915165c..b7acedf 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -239,7 +239,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev)
port = rmnet_get_port_rtnl(dev);
- rcu_read_lock(); rmnet_unregister_bridge(dev, port);
hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { @@ -250,7 +249,6 @@ static void rmnet_force_unassociate_device(struct net_device *dev) kfree(ep); }
- rcu_read_unlock(); unregister_netdevice_many(&list);
rmnet_unregister_real_device(real_dev, port);
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 1dc49e9d164cd7e11c81279c83db84a147e14740 ]
Basically, duplicate mux id isn't be allowed. So, the creation of rmnet will be failed if there is duplicate mux id is existing. But, changelink routine doesn't check duplicate mux id.
Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add rmnet1 link dummy0 type rmnet mux_id 2 ip link set rmnet1 type rmnet mux_id 1
Fixes: 23790ef12082 ("net: qualcomm: rmnet: Allow to configure flags for existing devices") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index b7acedf..272a08f 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -315,6 +315,10 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
if (data[IFLA_RMNET_MUX_ID]) { mux_id = nla_get_u16(data[IFLA_RMNET_MUX_ID]); + if (rmnet_get_endpoint(port, mux_id)) { + NL_SET_ERR_MSG_MOD(extack, "MUX ID already exists"); + return -EINVAL; + } ep = rmnet_get_endpoint(port, priv->mux_id); if (!ep) return -ENODEV;
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit 037f9cdf72fb8a7ff9ec2b5dd05336ec1492bdf1 ]
netdev_upper_dev_link() is useful to manage lower/upper interfaces. And this function internally validates looping, maximum depth. All or most virtual interfaces that could have a real interface (e.g. macsec, macvlan, ipvlan etc.) use lower/upper infrastructure.
Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet1 link dummy0 type rmnet mux_id 1 for i in {2..100} do let A=$i-1 ip link add rmnet$i link rmnet$A type rmnet mux_id $i done ip link del dummy0
The purpose of the test commands is to make stack overflow.
Splat looks like: [ 52.411438][ T1395] BUG: KASAN: slab-out-of-bounds in find_busiest_group+0x27e/0x2c00 [ 52.413218][ T1395] Write of size 64 at addr ffff8880c774bde0 by task ip/1395 [ 52.414841][ T1395] [ 52.430720][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.496511][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.513597][ T1395] Call Trace: [ 52.546516][ T1395] [ 52.558773][ T1395] Allocated by task 3171537984: [ 52.588290][ T1395] BUG: unable to handle page fault for address: ffffffffb999e260 [ 52.589311][ T1395] #PF: supervisor read access in kernel mode [ 52.590529][ T1395] #PF: error_code(0x0000) - not-present page [ 52.591374][ T1395] PGD d6818067 P4D d6818067 PUD d6819063 PMD 0 [ 52.592288][ T1395] Thread overran stack, or stack corrupted [ 52.604980][ T1395] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 52.605856][ T1395] CPU: 1 PID: 1395 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 52.611764][ T1395] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 52.621520][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.622296][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 52.627887][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 52.628735][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 52.631773][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 52.649584][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 52.674857][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 52.678257][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 52.694541][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 52.764039][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 52.815008][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 52.862312][ T1395] Call Trace: [ 52.887133][ T1395] Modules linked in: dummy rmnet veth openvswitch nsh nf_conncount nf_nat nf_conntrack nf_dex [ 52.936749][ T1395] CR2: ffffffffb999e260 [ 52.965695][ T1395] ---[ end trace 7e32ca99482dbb31 ]--- [ 52.966556][ T1395] RIP: 0010:stack_depot_fetch+0x10/0x30 [ 52.971083][ T1395] Code: ff e9 f9 fe ff ff 48 89 df e8 9c 1d 91 ff e9 ca fe ff ff cc cc cc cc cc cc cc 89 f8 0 [ 53.003650][ T1395] RSP: 0018:ffff8880c774bb60 EFLAGS: 00010006 [ 53.043183][ T1395] RAX: 00000000001f8880 RBX: ffff8880c774d140 RCX: 0000000000000000 [ 53.076480][ T1395] RDX: 000000000000001d RSI: ffff8880c774bb68 RDI: 0000000000003ff0 [ 53.093858][ T1395] RBP: ffffea00031dd200 R08: ffffed101b43e403 R09: ffffed101b43e403 [ 53.112795][ T1395] R10: 0000000000000001 R11: ffffed101b43e402 R12: ffff8880d900e5c0 [ 53.139837][ T1395] R13: ffff8880c774c000 R14: 0000000000000000 R15: dffffc0000000000 [ 53.141500][ T1395] FS: 00007fe867f6e0c0(0000) GS:ffff8880da000000(0000) knlGS:0000000000000000 [ 53.143343][ T1395] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 53.152007][ T1395] CR2: ffffffffb999e260 CR3: 00000000c26aa005 CR4: 00000000000606e0 [ 53.156459][ T1395] Kernel panic - not syncing: Fatal exception [ 54.213570][ T1395] Shutting down cpus with NMI [ 54.354112][ T1395] Kernel Offset: 0x33000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0x) [ 54.355687][ T1395] Rebooting in 5 seconds..
Fixes: b37f78f234bf ("net: qualcomm: rmnet: Fix crash on real dev unregistration") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 35 ++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 272a08f..01a1d3f 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -70,9 +70,6 @@ static int rmnet_unregister_real_device(struct net_device *real_dev,
kfree(port);
- /* release reference on real_dev */ - dev_put(real_dev); - netdev_dbg(real_dev, "Removed from rmnet\n"); return 0; } @@ -98,9 +95,6 @@ static int rmnet_register_real_device(struct net_device *real_dev) return -EBUSY; }
- /* hold on to real dev for MAP data */ - dev_hold(real_dev); - for (entry = 0; entry < RMNET_MAX_LOGICAL_EP; entry++) INIT_HLIST_HEAD(&port->muxed_ep[entry]);
@@ -171,6 +165,10 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, if (err) goto err1;
+ err = netdev_upper_dev_link(real_dev, dev, extack); + if (err < 0) + goto err2; + port->rmnet_mode = mode;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]); @@ -187,6 +185,8 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
return 0;
+err2: + unregister_netdevice(dev); err1: rmnet_unregister_real_device(real_dev, port); err0: @@ -218,33 +218,30 @@ static void rmnet_dellink(struct net_device *dev, struct list_head *head) rmnet_vnd_dellink(mux_id, port, ep); kfree(ep); } + netdev_upper_dev_unlink(real_dev, dev); rmnet_unregister_real_device(real_dev, port);
unregister_netdevice_queue(dev, head); }
-static void rmnet_force_unassociate_device(struct net_device *dev) +static void rmnet_force_unassociate_device(struct net_device *real_dev) { - struct net_device *real_dev = dev; struct hlist_node *tmp_ep; struct rmnet_endpoint *ep; struct rmnet_port *port; unsigned long bkt_ep; LIST_HEAD(list);
- if (!rmnet_is_real_dev_registered(real_dev)) - return; - ASSERT_RTNL();
- port = rmnet_get_port_rtnl(dev); + port = rmnet_get_port_rtnl(real_dev);
- rmnet_unregister_bridge(dev, port); + rmnet_unregister_bridge(real_dev, port);
hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + netdev_upper_dev_unlink(real_dev, ep->egress_dev); unregister_netdevice_queue(ep->egress_dev, &list); rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); kfree(ep); } @@ -257,15 +254,15 @@ static void rmnet_force_unassociate_device(struct net_device *dev) static int rmnet_config_notify_cb(struct notifier_block *nb, unsigned long event, void *data) { - struct net_device *dev = netdev_notifier_info_to_dev(data); + struct net_device *real_dev = netdev_notifier_info_to_dev(data);
- if (!dev) + if (!rmnet_is_real_dev_registered(real_dev)) return NOTIFY_DONE;
switch (event) { case NETDEV_UNREGISTER: - netdev_dbg(dev, "Kernel unregister\n"); - rmnet_force_unassociate_device(dev); + netdev_dbg(real_dev, "Kernel unregister\n"); + rmnet_force_unassociate_device(real_dev); break;
default: @@ -486,8 +483,8 @@ static int __init rmnet_init(void)
static void __exit rmnet_exit(void) { - unregister_netdevice_notifier(&rmnet_dev_notifier); rtnl_link_unregister(&rmnet_link_ops); + unregister_netdevice_notifier(&rmnet_dev_notifier); }
module_init(rmnet_init)
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit d939b6d30bea1a2322bc536b12be0a7c4c2bccd7 ]
In order to attach a bridge interface to the rmnet interface, "master" operation is used. (e.g. ip link set dummy1 master rmnet0) But, in the rmnet_add_bridge(), which is a callback of ->ndo_add_slave() doesn't register lower interface. So, ->ndo_del_slave() doesn't work. There are other problems too. 1. It couldn't detect circular upper/lower interface relationship. 2. It couldn't prevent stack overflow because of too deep depth of upper/lower interface 3. It doesn't check the number of lower interfaces. 4. Panics because of several reasons.
The root problem of these issues is actually the same. So, in this patch, these all problems will be fixed.
Test commands: modprobe rmnet ip link add dummy0 type dummy ip link add rmnet0 link dummy0 type rmnet mux_id 1 ip link add dummy1 master rmnet0 type dummy ip link add dummy2 master rmnet0 type dummy ip link del rmnet0 ip link del dummy2 ip link del dummy1
Splat looks like: [ 41.867595][ T1164] general protection fault, probably for non-canonical address 0xdffffc0000000101I [ 41.869993][ T1164] KASAN: null-ptr-deref in range [0x0000000000000808-0x000000000000080f] [ 41.872950][ T1164] CPU: 0 PID: 1164 Comm: ip Not tainted 5.6.0-rc1+ #447 [ 41.873915][ T1164] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 41.875161][ T1164] RIP: 0010:rmnet_unregister_bridge.isra.6+0x71/0xf0 [rmnet] [ 41.876178][ T1164] Code: 48 89 ef 48 89 c6 5b 5d e9 fc fe ff ff e8 f7 f3 ff ff 48 8d b8 08 08 00 00 48 ba 00 7 [ 41.878925][ T1164] RSP: 0018:ffff8880c4d0f188 EFLAGS: 00010202 [ 41.879774][ T1164] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000101 [ 41.887689][ T1164] RDX: dffffc0000000000 RSI: ffffffffb8cf64f0 RDI: 0000000000000808 [ 41.888727][ T1164] RBP: ffff8880c40e4000 R08: ffffed101b3c0e3c R09: 0000000000000001 [ 41.889749][ T1164] R10: 0000000000000001 R11: ffffed101b3c0e3b R12: 1ffff110189a1e3c [ 41.890783][ T1164] R13: ffff8880c4d0f200 R14: ffffffffb8d56160 R15: ffff8880ccc2c000 [ 41.891794][ T1164] FS: 00007f4300edc0c0(0000) GS:ffff8880d9c00000(0000) knlGS:0000000000000000 [ 41.892953][ T1164] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 41.893800][ T1164] CR2: 00007f43003bc8c0 CR3: 00000000ca53e001 CR4: 00000000000606f0 [ 41.894824][ T1164] Call Trace: [ 41.895274][ T1164] ? rcu_is_watching+0x2c/0x80 [ 41.895895][ T1164] rmnet_config_notify_cb+0x1f7/0x590 [rmnet] [ 41.896687][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.897611][ T1164] ? rmnet_unregister_bridge.isra.6+0xf0/0xf0 [rmnet] [ 41.898508][ T1164] ? __module_text_address+0x13/0x140 [ 41.899162][ T1164] notifier_call_chain+0x90/0x160 [ 41.899814][ T1164] rollback_registered_many+0x660/0xcf0 [ 41.900544][ T1164] ? netif_set_real_num_tx_queues+0x780/0x780 [ 41.901316][ T1164] ? __lock_acquire+0xdfe/0x3de0 [ 41.901958][ T1164] ? memset+0x1f/0x40 [ 41.902468][ T1164] ? __nla_validate_parse+0x98/0x1ab0 [ 41.903166][ T1164] unregister_netdevice_many.part.133+0x13/0x1b0 [ 41.903988][ T1164] rtnl_delete_link+0xbc/0x100 [ ... ]
Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c | 131 ++++++++++----------- drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h | 1 + drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 8 -- drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h | 1 - 4 files changed, 64 insertions(+), 77 deletions(-)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c index 01a1d3f..37786af 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c @@ -22,25 +22,6 @@ #include "rmnet_vnd.h" #include "rmnet_private.h"
-/* Locking scheme - - * The shared resource which needs to be protected is realdev->rx_handler_data. - * For the writer path, this is using rtnl_lock(). The writer paths are - * rmnet_newlink(), rmnet_dellink() and rmnet_force_unassociate_device(). These - * paths are already called with rtnl_lock() acquired in. There is also an - * ASSERT_RTNL() to ensure that we are calling with rtnl acquired. For - * dereference here, we will need to use rtnl_dereference(). Dev list writing - * needs to happen with rtnl_lock() acquired for netdev_master_upper_dev_link(). - * For the reader path, the real_dev->rx_handler_data is called in the TX / RX - * path. We only need rcu_read_lock() for these scenarios. In these cases, - * the rcu_read_lock() is held in __dev_queue_xmit() and - * netif_receive_skb_internal(), so readers need to use rcu_dereference_rtnl() - * to get the relevant information. For dev list reading, we again acquire - * rcu_read_lock() in rmnet_dellink() for netdev_master_upper_dev_get_rcu(). - * We also use unregister_netdevice_many() to free all rmnet devices in - * rmnet_force_unassociate_device() so we dont lose the rtnl_lock() and free in - * same context. - */ - /* Local Definitions and Declarations */
static const struct nla_policy rmnet_policy[IFLA_RMNET_MAX + 1] = { @@ -60,9 +41,10 @@ static int rmnet_is_real_dev_registered(const struct net_device *real_dev) return rtnl_dereference(real_dev->rx_handler_data); }
-static int rmnet_unregister_real_device(struct net_device *real_dev, - struct rmnet_port *port) +static int rmnet_unregister_real_device(struct net_device *real_dev) { + struct rmnet_port *port = rmnet_get_port_rtnl(real_dev); + if (port->nr_rmnet_devs) return -EINVAL;
@@ -102,28 +84,33 @@ static int rmnet_register_real_device(struct net_device *real_dev) return 0; }
-static void rmnet_unregister_bridge(struct net_device *dev, - struct rmnet_port *port) +static void rmnet_unregister_bridge(struct rmnet_port *port) { - struct rmnet_port *bridge_port; - struct net_device *bridge_dev; + struct net_device *bridge_dev, *real_dev, *rmnet_dev; + struct rmnet_port *real_port;
if (port->rmnet_mode != RMNET_EPMODE_BRIDGE) return;
- /* bridge slave handling */ + rmnet_dev = port->rmnet_dev; if (!port->nr_rmnet_devs) { - bridge_dev = port->bridge_ep; + /* bridge device */ + real_dev = port->bridge_ep; + bridge_dev = port->dev;
- bridge_port = rmnet_get_port_rtnl(bridge_dev); - bridge_port->bridge_ep = NULL; - bridge_port->rmnet_mode = RMNET_EPMODE_VND; + real_port = rmnet_get_port_rtnl(real_dev); + real_port->bridge_ep = NULL; + real_port->rmnet_mode = RMNET_EPMODE_VND; } else { + /* real device */ bridge_dev = port->bridge_ep;
- bridge_port = rmnet_get_port_rtnl(bridge_dev); - rmnet_unregister_real_device(bridge_dev, bridge_port); + port->bridge_ep = NULL; + port->rmnet_mode = RMNET_EPMODE_VND; } + + netdev_upper_dev_unlink(bridge_dev, rmnet_dev); + rmnet_unregister_real_device(bridge_dev); }
static int rmnet_newlink(struct net *src_net, struct net_device *dev, @@ -170,6 +157,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, goto err2;
port->rmnet_mode = mode; + port->rmnet_dev = dev;
hlist_add_head_rcu(&ep->hlnode, &port->muxed_ep[mux_id]);
@@ -187,8 +175,9 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
err2: unregister_netdevice(dev); + rmnet_vnd_dellink(mux_id, port, ep); err1: - rmnet_unregister_real_device(real_dev, port); + rmnet_unregister_real_device(real_dev); err0: kfree(ep); return err; @@ -197,30 +186,32 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev, static void rmnet_dellink(struct net_device *dev, struct list_head *head) { struct rmnet_priv *priv = netdev_priv(dev); - struct net_device *real_dev; + struct net_device *real_dev, *bridge_dev; + struct rmnet_port *real_port, *bridge_port; struct rmnet_endpoint *ep; - struct rmnet_port *port; - u8 mux_id; + u8 mux_id = priv->mux_id;
real_dev = priv->real_dev;
- if (!real_dev || !rmnet_is_real_dev_registered(real_dev)) + if (!rmnet_is_real_dev_registered(real_dev)) return;
- port = rmnet_get_port_rtnl(real_dev); - - mux_id = rmnet_vnd_get_mux(dev); + real_port = rmnet_get_port_rtnl(real_dev); + bridge_dev = real_port->bridge_ep; + if (bridge_dev) { + bridge_port = rmnet_get_port_rtnl(bridge_dev); + rmnet_unregister_bridge(bridge_port); + }
- ep = rmnet_get_endpoint(port, mux_id); + ep = rmnet_get_endpoint(real_port, mux_id); if (ep) { hlist_del_init_rcu(&ep->hlnode); - rmnet_unregister_bridge(dev, port); - rmnet_vnd_dellink(mux_id, port, ep); + rmnet_vnd_dellink(mux_id, real_port, ep); kfree(ep); } - netdev_upper_dev_unlink(real_dev, dev); - rmnet_unregister_real_device(real_dev, port);
+ netdev_upper_dev_unlink(real_dev, dev); + rmnet_unregister_real_device(real_dev); unregister_netdevice_queue(dev, head); }
@@ -232,23 +223,23 @@ static void rmnet_force_unassociate_device(struct net_device *real_dev) unsigned long bkt_ep; LIST_HEAD(list);
- ASSERT_RTNL(); - port = rmnet_get_port_rtnl(real_dev);
- rmnet_unregister_bridge(real_dev, port); - - hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { - netdev_upper_dev_unlink(real_dev, ep->egress_dev); - unregister_netdevice_queue(ep->egress_dev, &list); - rmnet_vnd_dellink(ep->mux_id, port, ep); - hlist_del_init_rcu(&ep->hlnode); - kfree(ep); + if (port->nr_rmnet_devs) { + /* real device */ + rmnet_unregister_bridge(port); + hash_for_each_safe(port->muxed_ep, bkt_ep, tmp_ep, ep, hlnode) { + unregister_netdevice_queue(ep->egress_dev, &list); + netdev_upper_dev_unlink(real_dev, ep->egress_dev); + rmnet_vnd_dellink(ep->mux_id, port, ep); + hlist_del_init_rcu(&ep->hlnode); + kfree(ep); + } + rmnet_unregister_real_device(real_dev); + unregister_netdevice_many(&list); + } else { + rmnet_unregister_bridge(port); } - - unregister_netdevice_many(&list); - - rmnet_unregister_real_device(real_dev, port); }
static int rmnet_config_notify_cb(struct notifier_block *nb, @@ -427,6 +418,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (port->nr_rmnet_devs > 1) return -EINVAL;
+ if (port->rmnet_mode != RMNET_EPMODE_VND) + return -EINVAL; + if (rmnet_is_real_dev_registered(slave_dev)) return -EBUSY;
@@ -434,9 +428,17 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, if (err) return -EBUSY;
+ err = netdev_master_upper_dev_link(slave_dev, rmnet_dev, NULL, NULL, + extack); + if (err) { + rmnet_unregister_real_device(slave_dev); + return err; + } + slave_port = rmnet_get_port_rtnl(slave_dev); slave_port->rmnet_mode = RMNET_EPMODE_BRIDGE; slave_port->bridge_ep = real_dev; + slave_port->rmnet_dev = rmnet_dev;
port->rmnet_mode = RMNET_EPMODE_BRIDGE; port->bridge_ep = slave_dev; @@ -448,16 +450,9 @@ int rmnet_add_bridge(struct net_device *rmnet_dev, int rmnet_del_bridge(struct net_device *rmnet_dev, struct net_device *slave_dev) { - struct rmnet_priv *priv = netdev_priv(rmnet_dev); - struct net_device *real_dev = priv->real_dev; - struct rmnet_port *port, *slave_port; - - port = rmnet_get_port_rtnl(real_dev); - port->rmnet_mode = RMNET_EPMODE_VND; - port->bridge_ep = NULL; + struct rmnet_port *port = rmnet_get_port_rtnl(slave_dev);
- slave_port = rmnet_get_port_rtnl(slave_dev); - rmnet_unregister_real_device(slave_dev, slave_port); + rmnet_unregister_bridge(port);
netdev_dbg(slave_dev, "removed from rmnet as slave\n"); return 0; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h index 6a978f4..7691d1a 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h @@ -37,6 +37,7 @@ struct rmnet_port { u8 rmnet_mode; struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP]; struct net_device *bridge_ep; + struct net_device *rmnet_dev; };
extern struct rtnl_link_ops rmnet_link_ops; diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c index d11c16a..ed02926 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c @@ -276,14 +276,6 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, return 0; }
-u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev) -{ - struct rmnet_priv *priv; - - priv = netdev_priv(rmnet_dev); - return priv->mux_id; -} - int rmnet_vnd_do_flow_control(struct net_device *rmnet_dev, int enable) { netdev_dbg(rmnet_dev, "Setting VND TX queue state to %d\n", enable); diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h index 71e4c32..22cfdfd 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.h @@ -25,6 +25,5 @@ int rmnet_vnd_dellink(u8 id, struct rmnet_port *port, struct rmnet_endpoint *ep); void rmnet_vnd_rx_fixup(struct sk_buff *skb, struct net_device *dev); void rmnet_vnd_tx_fixup(struct sk_buff *skb, struct net_device *dev); -u8 rmnet_vnd_get_mux(struct net_device *rmnet_dev); void rmnet_vnd_setup(struct net_device *dev); #endif /* _RMNET_VND_H_ */
From: Taehee Yoo ap420073@gmail.com
[ Upstream commit ad3cc31b599ea80f06b29ebdc18b3a39878a48d6 ]
Packet forwarding is not working in rmnet bridge mode. Because when a packet is forwarded, skb_push() for an ethernet header is needed. But it doesn't call skb_push(). So, the ethernet header will be lost.
Test commands: modprobe rmnet ip netns add nst ip netns add nst2 ip link add veth0 type veth peer name veth1 ip link add veth2 type veth peer name veth3 ip link set veth1 netns nst ip link set veth3 netns nst2
ip link add rmnet0 link veth0 type rmnet mux_id 1 ip link set veth2 master rmnet0 ip link set veth0 up ip link set veth2 up ip link set rmnet0 up ip a a 192.168.100.1/24 dev rmnet0
ip netns exec nst ip link set veth1 up ip netns exec nst ip a a 192.168.100.2/24 dev veth1 ip netns exec nst2 ip link set veth3 up ip netns exec nst2 ip a a 192.168.100.3/24 dev veth3 ip netns exec nst2 ping 192.168.100.2
Fixes: 60d58f971c10 ("net: qualcomm: rmnet: Implement bridge mode") Signed-off-by: Taehee Yoo ap420073@gmail.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c index 5177d0f..c9d43ba 100644 --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c @@ -168,6 +168,9 @@ static int rmnet_map_egress_handler(struct sk_buff *skb, static void rmnet_bridge_handler(struct sk_buff *skb, struct net_device *bridge_dev) { + if (skb_mac_header_was_set(skb)) + skb_push(skb, skb->mac_len); + if (bridge_dev) { skb->dev = bridge_dev; dev_queue_xmit(skb);
From: "Alex Maftei (amaftei)" amaftei@solarflare.com
[ Upstream commit 23797b98909f34b75fd130369bde86f760db69d0 ]
We can't just use the top bits of the last sync event as they could be off-by-one every 65,536 seconds, giving an error in reconstruction of 65,536 seconds.
This patch uses the difference in the bottom 16 bits (mod 2^16) to calculate an offset that needs to be applied to the last sync event to get to the current time.
Signed-off-by: Alexandru-Mihai Maftei amaftei@solarflare.com Acked-by: Martin Habets mhabets@solarflare.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/net/ethernet/sfc/ptp.c | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index cc8fbf3..d47151d 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -563,13 +563,45 @@ static u32 last_sync_timestamp_major(struct efx_nic *efx) u32 nic_major, u32 nic_minor, s32 correction) { + u32 sync_timestamp; ktime_t kt = { 0 }; + s16 delta;
if (!(nic_major & 0x80000000)) { WARN_ON_ONCE(nic_major >> 16); - /* Use the top bits from the latest sync event. */ - nic_major &= 0xffff; - nic_major |= (last_sync_timestamp_major(efx) & 0xffff0000); + + /* Medford provides 48 bits of timestamp, so we must get the top + * 16 bits from the timesync event state. + * + * We only have the lower 16 bits of the time now, but we do + * have a full resolution timestamp at some point in past. As + * long as the difference between the (real) now and the sync + * is less than 2^15, then we can reconstruct the difference + * between those two numbers using only the lower 16 bits of + * each. + * + * Put another way + * + * a - b = ((a mod k) - b) mod k + * + * when -k/2 < (a-b) < k/2. In our case k is 2^16. We know + * (a mod k) and b, so can calculate the delta, a - b. + * + */ + sync_timestamp = last_sync_timestamp_major(efx); + + /* Because delta is s16 this does an implicit mask down to + * 16 bits which is what we need, assuming + * MEDFORD_TX_SECS_EVENT_BITS is 16. delta is signed so that + * we can deal with the (unlikely) case of sync timestamps + * arriving from the future. + */ + delta = nic_major - sync_timestamp; + + /* Recover the fully specified time now, by applying the offset + * to the (fully specified) sync time. + */ + nic_major = sync_timestamp + delta;
kt = ptp->nic_to_kernel_time(nic_major, nic_minor, correction);
From: Qian Cai cai@lca.pw
[ Upstream commit 6c5d911249290f41f7b50b43344a7520605b1acb ]
journal_head::b_transaction and journal_head::b_next_transaction could be accessed concurrently as noticed by KCSAN,
LTP: starting fsync04 /dev/zero: Can't open blockdev EXT4-fs (loop0): mounting ext3 file system using the ext4 subsystem EXT4-fs (loop0): mounted filesystem with ordered data mode. Opts: (null) ================================================================== BUG: KCSAN: data-race in __jbd2_journal_refile_buffer [jbd2] / jbd2_write_access_granted [jbd2]
write to 0xffff99f9b1bd0e30 of 8 bytes by task 25721 on cpu 70: __jbd2_journal_refile_buffer+0xdd/0x210 [jbd2] __jbd2_journal_refile_buffer at fs/jbd2/transaction.c:2569 jbd2_journal_commit_transaction+0x2d15/0x3f20 [jbd2] (inlined by) jbd2_journal_commit_transaction at fs/jbd2/commit.c:1034 kjournald2+0x13b/0x450 [jbd2] kthread+0x1cd/0x1f0 ret_from_fork+0x27/0x50
read to 0xffff99f9b1bd0e30 of 8 bytes by task 25724 on cpu 68: jbd2_write_access_granted+0x1b2/0x250 [jbd2] jbd2_write_access_granted at fs/jbd2/transaction.c:1155 jbd2_journal_get_write_access+0x2c/0x60 [jbd2] __ext4_journal_get_write_access+0x50/0x90 [ext4] ext4_mb_mark_diskspace_used+0x158/0x620 [ext4] ext4_mb_new_blocks+0x54f/0xca0 [ext4] ext4_ind_map_blocks+0xc79/0x1b40 [ext4] ext4_map_blocks+0x3b4/0x950 [ext4] _ext4_get_block+0xfc/0x270 [ext4] ext4_get_block+0x3b/0x50 [ext4] __block_write_begin_int+0x22e/0xae0 __block_write_begin+0x39/0x50 ext4_write_begin+0x388/0xb50 [ext4] generic_perform_write+0x15d/0x290 ext4_buffered_write_iter+0x11f/0x210 [ext4] ext4_file_write_iter+0xce/0x9e0 [ext4] new_sync_write+0x29c/0x3b0 __vfs_write+0x92/0xa0 vfs_write+0x103/0x260 ksys_write+0x9d/0x130 __x64_sys_write+0x4c/0x60 do_syscall_64+0x91/0xb05 entry_SYSCALL_64_after_hwframe+0x49/0xbe
5 locks held by fsync04/25724: #0: ffff99f9911093f8 (sb_writers#13){.+.+}, at: vfs_write+0x21c/0x260 #1: ffff99f9db4c0348 (&sb->s_type->i_mutex_key#15){+.+.}, at: ext4_buffered_write_iter+0x65/0x210 [ext4] #2: ffff99f5e7dfcf58 (jbd2_handle){++++}, at: start_this_handle+0x1c1/0x9d0 [jbd2] #3: ffff99f9db4c0168 (&ei->i_data_sem){++++}, at: ext4_map_blocks+0x176/0x950 [ext4] #4: ffffffff99086b40 (rcu_read_lock){....}, at: jbd2_write_access_granted+0x4e/0x250 [jbd2] irq event stamp: 1407125 hardirqs last enabled at (1407125): [<ffffffff980da9b7>] __find_get_block+0x107/0x790 hardirqs last disabled at (1407124): [<ffffffff980da8f9>] __find_get_block+0x49/0x790 softirqs last enabled at (1405528): [<ffffffff98a0034c>] __do_softirq+0x34c/0x57c softirqs last disabled at (1405521): [<ffffffff97cc67a2>] irq_exit+0xa2/0xc0
Reported by Kernel Concurrency Sanitizer on: CPU: 68 PID: 25724 Comm: fsync04 Tainted: G L 5.6.0-rc2-next-20200221+ #7 Hardware name: HPE ProLiant DL385 Gen10/ProLiant DL385 Gen10, BIOS A40 07/10/2019
The plain reads are outside of jh->b_state_lock critical section which result in data races. Fix them by adding pairs of READ|WRITE_ONCE().
Reviewed-by: Jan Kara jack@suse.cz Signed-off-by: Qian Cai cai@lca.pw Link: https://lore.kernel.org/r/20200222043111.2227-1-cai@lca.pw Signed-off-by: Theodore Ts'o tytso@mit.edu Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/jbd2/transaction.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 45a6132..c396131 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1116,8 +1116,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, /* For undo access buffer must have data copied */ if (undo && !jh->b_committed_data) goto out; - if (jh->b_transaction != handle->h_transaction && - jh->b_next_transaction != handle->h_transaction) + if (READ_ONCE(jh->b_transaction) != handle->h_transaction && + READ_ONCE(jh->b_next_transaction) != handle->h_transaction) goto out; /* * There are two reasons for the barrier here: @@ -2543,8 +2543,8 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) * our jh reference and thus __jbd2_journal_file_buffer() must not * take a new one. */ - jh->b_transaction = jh->b_next_transaction; - jh->b_next_transaction = NULL; + WRITE_ONCE(jh->b_transaction, jh->b_next_transaction); + WRITE_ONCE(jh->b_next_transaction, NULL); if (buffer_freed(bh)) jlist = BJ_Forget; else if (jh->b_modified)
From: Faiz Abbas faiz_abbas@ti.com
[ Upstream commit 5c41ea6d52003b5bc77c2a82fd5ca7d480237d89 ]
commit 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset callback") skips data resets during tuning operation. Because of this, a data error or data finish interrupt might still arrive after a command error has been handled and the mrq ended. This ends up with a "mmc0: Got data interrupt 0x00000002 even though no data operation was in progress" error message.
Fix this by adding a platform specific callback for sdhci_irq. Mark the mrq as a failure but wait for a data interrupt instead of calling finish_mrq().
Fixes: 5b0d62108b46 ("mmc: sdhci-omap: Add platform specific reset callback") Signed-off-by: Faiz Abbas faiz_abbas@ti.com Acked-by: Adrian Hunter adrian.hunter@intel.com Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/sdhci-omap.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 5698af2..890fcd2 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -801,6 +801,43 @@ void sdhci_omap_reset(struct sdhci_host *host, u8 mask) sdhci_reset(host, mask); }
+#define CMD_ERR_MASK (SDHCI_INT_CRC | SDHCI_INT_END_BIT | SDHCI_INT_INDEX |\ + SDHCI_INT_TIMEOUT) +#define CMD_MASK (CMD_ERR_MASK | SDHCI_INT_RESPONSE) + +static u32 sdhci_omap_irq(struct sdhci_host *host, u32 intmask) +{ + struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); + struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host); + + if (omap_host->is_tuning && host->cmd && !host->data_early && + (intmask & CMD_ERR_MASK)) { + + /* + * Since we are not resetting data lines during tuning + * operation, data error or data complete interrupts + * might still arrive. Mark this request as a failure + * but still wait for the data interrupt + */ + if (intmask & SDHCI_INT_TIMEOUT) + host->cmd->error = -ETIMEDOUT; + else + host->cmd->error = -EILSEQ; + + host->cmd = NULL; + + /* + * Sometimes command error interrupts and command complete + * interrupt will arrive together. Clear all command related + * interrupts here. + */ + sdhci_writel(host, intmask & CMD_MASK, SDHCI_INT_STATUS); + intmask &= ~CMD_MASK; + } + + return intmask; +} + static struct sdhci_ops sdhci_omap_ops = { .set_clock = sdhci_omap_set_clock, .set_power = sdhci_omap_set_power, @@ -811,6 +848,7 @@ void sdhci_omap_reset(struct sdhci_host *host, u8 mask) .platform_send_init_74_clocks = sdhci_omap_init_74_clocks, .reset = sdhci_omap_reset, .set_uhs_signaling = sdhci_omap_set_uhs_signaling, + .irq = sdhci_omap_irq, };
static int sdhci_omap_set_capabilities(struct sdhci_omap_host *omap_host)
From: Faiz Abbas faiz_abbas@ti.com
[ Upstream commit feb40824d78eac5e48f56498dca941754dff33d7 ]
According to the App note[1] detailing the tuning algorithm, for temperatures < -20C, the initial tuning value should be min(largest value in LPW - 24, ceil(13/16 ratio of LPW)). The largest value in LPW is (max_window + 4 * (max_len - 1)) and not (max_window + 4 * max_len) itself. Fix this implementation.
[1] http://www.ti.com/lit/an/spraca9b/spraca9b.pdf
Fixes: 961de0a856e3 ("mmc: sdhci-omap: Workaround errata regarding SDR104/HS200 tuning failures (i929)") Cc: stable@vger.kernel.org Signed-off-by: Faiz Abbas faiz_abbas@ti.com Signed-off-by: Ulf Hansson ulf.hansson@linaro.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/mmc/host/sdhci-omap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index 890fcd2..e9793d8 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -387,7 +387,7 @@ static int sdhci_omap_execute_tuning(struct mmc_host *mmc, u32 opcode) * on temperature */ if (temperature < -20000) - phase_delay = min(max_window + 4 * max_len - 24, + phase_delay = min(max_window + 4 * (max_len - 1) - 24, max_window + DIV_ROUND_UP(13 * max_len, 16) * 4); else if (temperature < 20000)
From: Yong Wu yong.wu@mediatek.com
commit 0fe6f7874d467456da6f6a221dd92499a3ab1780 upstream.
DL_FLAG_AUTOREMOVE_CONSUMER/SUPPLIER means "Remove the link automatically on consumer/supplier driver unbind", that means we should remove whole the device_link when there is no this driver no matter what the ref_count of the link is.
CC: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yong Wu yong.wu@mediatek.com Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c index 742bc60..b354fdd 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -546,7 +546,7 @@ static void __device_links_no_driver(struct device *dev) continue;
if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) - kref_put(&link->kref, __device_link_del); + __device_link_del(&link->kref); else if (link->status != DL_STATE_SUPPLIER_UNBIND) WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } @@ -591,7 +591,7 @@ void device_links_driver_cleanup(struct device *dev) */ if (link->status == DL_STATE_SUPPLIER_UNBIND && link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) - kref_put(&link->kref, __device_link_del); + __device_link_del(&link->kref);
WRITE_ONCE(link->status, DL_STATE_DORMANT); }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
commit 15cfb094160385cc0b303c4cda483caa102af654 upstream.
Currently, it is not valid to add a device link from a consumer driver ->probe callback to a supplier that is still probing too, but generally this is a valid use case. For example, if the consumer has just acquired a resource that can only be available if the supplier is functional, adding a device link to that supplier right away should be safe (and even desirable arguably), but device_link_add() doesn't handle that case correctly and the initial state of the link created by it is wrong then.
To address this problem, change the initial state of device links added between a probing supplier and a probing consumer to DL_STATE_CONSUMER_PROBE and update device_links_driver_bound() to skip such links on the supplier side.
With this change, if the supplier probe completes first, device_links_driver_bound() called for it will skip the link state update and when it is called for the consumer, the link state will be updated to "active". In turn, if the consumer probe completes first, device_links_driver_bound() called for it will change the state of the link to "active" and when it is called for the supplier, the link status update will be skipped.
However, in principle the supplier or consumer probe may still fail after the link has been added, so modify device_links_no_driver() to change device links in the "active" or "consumer probe" state to "dormant" on the supplier side and update __device_links_no_driver() to change the link state to "available" only if it is "consumer probe" or "active".
Then, if the supplier probe fails first, the leftover link to the probing consumer will become "dormant" and device_links_no_driver() called for the consumer (when its probe fails) will clean it up. In turn, if the consumer probe fails first, it will either drop the link, or change its state to "available" and, in the latter case, when device_links_no_driver() is called for the supplier, it will update the link state to "dormant". [If the supplier probe fails, but the consumer probe succeeds, which should not happen as long as the consumer driver is correct, the link still will be around, but it will be "dormant" until the supplier is probed again.]
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/driver-api/device_link.rst | 10 +++-- drivers/base/core.c | 74 ++++++++++++++++++++++++++++---- 2 files changed, 73 insertions(+), 11 deletions(-)
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index d676327..5c717818 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -59,11 +59,15 @@ device ``->probe`` callback or a boot-time PCI quirk.
Another example for an inconsistent state would be a device link that represents a driver presence dependency, yet is added from the consumer's -``->probe`` callback while the supplier hasn't probed yet: Had the driver -core known about the device link earlier, it wouldn't have probed the +``->probe`` callback while the supplier hasn't started to probe yet: Had the +driver core known about the device link earlier, it wouldn't have probed the consumer in the first place. The onus is thus on the consumer to check presence of the supplier after adding the link, and defer probing on -non-presence. +non-presence. [Note that it is valid to create a link from the consumer's +``->probe`` callback while the supplier is still probing, but the consumer must +know that the supplier is functional already at the link creation time (that is +the case, for instance, if the consumer has just acquired some resources that +would not have been available had the supplier not been functional then).]
If a device link is added in the ``->probe`` callback of the supplier or consumer driver, it is typically deleted in its ``->remove`` callback for diff --git a/drivers/base/core.c b/drivers/base/core.c index b354fdd..ae2f85c 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -287,17 +287,26 @@ struct device_link *device_link_add(struct device *consumer, link->status = DL_STATE_NONE; } else { switch (supplier->links.status) { - case DL_DEV_DRIVER_BOUND: + case DL_DEV_PROBING: switch (consumer->links.status) { case DL_DEV_PROBING: /* - * Some callers expect the link creation during - * consumer driver probe to resume the supplier - * even without DL_FLAG_RPM_ACTIVE. + * A consumer driver can create a link to a + * supplier that has not completed its probing + * yet as long as it knows that the supplier is + * already functional (for example, it has just + * acquired some resources from the supplier). */ - if (flags & DL_FLAG_PM_RUNTIME) - pm_runtime_resume(supplier); - + link->status = DL_STATE_CONSUMER_PROBE; + break; + default: + link->status = DL_STATE_DORMANT; + break; + } + break; + case DL_DEV_DRIVER_BOUND: + switch (consumer->links.status) { + case DL_DEV_PROBING: link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: @@ -318,6 +327,14 @@ struct device_link *device_link_add(struct device *consumer, }
/* + * Some callers expect the link creation during consumer driver probe to + * resume the supplier even without DL_FLAG_RPM_ACTIVE. + */ + if (link->status == DL_STATE_CONSUMER_PROBE && + flags & DL_FLAG_PM_RUNTIME) + pm_runtime_resume(supplier); + + /* * Move the consumer and all of the devices depending on it to the end * of dpm_list and the devices_kset list. * @@ -508,6 +525,16 @@ void device_links_driver_bound(struct device *dev) if (link->flags & DL_FLAG_STATELESS) continue;
+ /* + * Links created during consumer probe may be in the "consumer + * probe" state to start with if the supplier is still probing + * when they are created and they may become "active" if the + * consumer probe returns first. Skip them here. + */ + if (link->status == DL_STATE_CONSUMER_PROBE || + link->status == DL_STATE_ACTIVE) + continue; + WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } @@ -547,17 +574,48 @@ static void __device_links_no_driver(struct device *dev)
if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) __device_link_del(&link->kref); - else if (link->status != DL_STATE_SUPPLIER_UNBIND) + else if (link->status == DL_STATE_CONSUMER_PROBE || + link->status == DL_STATE_ACTIVE) WRITE_ONCE(link->status, DL_STATE_AVAILABLE); }
dev->links.status = DL_DEV_NO_DRIVER; }
+/** + * device_links_no_driver - Update links after failing driver probe. + * @dev: Device whose driver has just failed to probe. + * + * Clean up leftover links to consumers for @dev and invoke + * %__device_links_no_driver() to update links to suppliers for it as + * appropriate. + * + * Links with the DL_FLAG_STATELESS flag set are ignored. + */ void device_links_no_driver(struct device *dev) { + struct device_link *link; + device_links_write_lock(); + + list_for_each_entry(link, &dev->links.consumers, s_node) { + if (link->flags & DL_FLAG_STATELESS) + continue; + + /* + * The probe has failed, so if the status of the link is + * "consumer probe" or "active", it must have been added by + * a probing consumer while this device was still probing. + * Change its state to "dormant", as it represents a valid + * relationship, but it is not functionally meaningful. + */ + if (link->status == DL_STATE_CONSUMER_PROBE || + link->status == DL_STATE_ACTIVE) + WRITE_ONCE(link->status, DL_STATE_DORMANT); + } + __device_links_no_driver(dev); + device_links_write_unlock(); }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
commit 72175d4ea4c442d95cf690c3e968eeee90fd43ca upstream.
Even though stateful device links are managed by the driver core in principle, their creators are allowed and sometimes even expected to drop references to them via device_link_del() or device_link_remove(), but that doesn't really play well with the "persistent" link concept.
If "persistent" managed device links are created from driver probe callbacks, device_link_add() called to do that will take a new reference on the link each time the callback runs and those references will never be dropped, which kind of isn't nice.
This issues arises because of the link reference counting carried out by device_link_add() for existing links, but that is only done to avoid deleting device links that may still be necessary, which shouldn't be a concern for managed (stateful) links. These device links are managed by the driver core and whoever creates one of them will need it at least as long as until the consumer driver is detached from its device and deleting it may be left to the driver core just fine.
For this reason, rework device_link_add() to apply the reference counting to stateless links only and make device_link_del() and device_link_remove() drop references to stateless links only too. After this change, if called to add a stateful device link for a consumer-supplier pair for which a stateful device link is present already, device_link_add() will return the existing link without incrementing its reference counter. Accordingly, device_link_del() and device_link_remove() will WARN() and do nothing when called to drop a reference to a stateful link. Thus, effectively, all stateful device links will be owned by the driver core.
In addition, clean up the handling of the link management flags, DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER, so that (a) they are never set at the same time and (b) if device_link_add() is called for a consumer-supplier pair with an existing stateful link between them, the flags of that link will be combined with the flags passed to device_link_add() to ensure that the life time of the link is sufficient for all of the callers of device_link_add() for the same consumer-supplier pair.
Update the device_link_add() kerneldoc comment to reflect the above changes.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/driver-api/device_link.rst | 42 +++++++++++-------- drivers/base/core.c | 69 +++++++++++++++++++++++++------- 2 files changed, 79 insertions(+), 32 deletions(-)
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index 5c717818..e249e07 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -25,8 +25,8 @@ suspend/resume and shutdown ordering.
Device links allow representation of such dependencies in the driver core.
-In its standard form, a device link combines *both* dependency types: -It guarantees correct suspend/resume and shutdown ordering between a +In its standard or *managed* form, a device link combines *both* dependency +types: It guarantees correct suspend/resume and shutdown ordering between a "supplier" device and its "consumer" devices, and it guarantees driver presence on the supplier. The consumer devices are not probed before the supplier is bound to a driver, and they're unbound before the supplier @@ -69,12 +69,14 @@ know that the supplier is functional already at the link creation time (that is the case, for instance, if the consumer has just acquired some resources that would not have been available had the supplier not been functional then).]
-If a device link is added in the ``->probe`` callback of the supplier or -consumer driver, it is typically deleted in its ``->remove`` callback for -symmetry. That way, if the driver is compiled as a module, the device -link is added on module load and orderly deleted on unload. The same -restrictions that apply to device link addition (e.g. exclusion of a -parallel suspend/resume transition) apply equally to deletion. +If a device link with ``DL_FLAG_STATELESS`` set (i.e. a stateless device link) +is added in the ``->probe`` callback of the supplier or consumer driver, it is +typically deleted in its ``->remove`` callback for symmetry. That way, if the +driver is compiled as a module, the device link is added on module load and +orderly deleted on unload. The same restrictions that apply to device link +addition (e.g. exclusion of a parallel suspend/resume transition) apply equally +to deletion. Device links with ``DL_FLAG_STATELESS`` unset (i.e. managed +device links) are deleted automatically by the driver core.
Several flags may be specified on device link addition, two of which have already been mentioned above: ``DL_FLAG_STATELESS`` to express that no @@ -87,8 +89,6 @@ link is added from the consumer's ``->probe`` callback: ``DL_FLAG_RPM_ACTIVE`` can be specified to runtime resume the supplier upon addition of the device link. ``DL_FLAG_AUTOREMOVE_CONSUMER`` causes the device link to be automatically purged when the consumer fails to probe or later unbinds. -This obviates the need to explicitly delete the link in the ``->remove`` -callback or in the error path of the ``->probe`` callback.
Similarly, when the device link is added from supplier's ``->probe`` callback, ``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically @@ -97,12 +97,20 @@ purged when the supplier fails to probe or later unbinds. Limitations ===========
-Driver authors should be aware that a driver presence dependency (i.e. when -``DL_FLAG_STATELESS`` is not specified on link addition) may cause probing of -the consumer to be deferred indefinitely. This can become a problem if the -consumer is required to probe before a certain initcall level is reached. -Worse, if the supplier driver is blacklisted or missing, the consumer will -never be probed. +Driver authors should be aware that a driver presence dependency for managed +device links (i.e. when ``DL_FLAG_STATELESS`` is not specified on link addition) +may cause probing of the consumer to be deferred indefinitely. This can become +a problem if the consumer is required to probe before a certain initcall level +is reached. Worse, if the supplier driver is blacklisted or missing, the +consumer will never be probed. + +Moreover, managed device links cannot be deleted directly. They are deleted +by the driver core when they are not necessary any more in accordance with the +``DL_FLAG_AUTOREMOVE_CONSUMER`` and ``DL_FLAG_AUTOREMOVE_SUPPLIER`` flags. +However, stateless device links (i.e. device links with ``DL_FLAG_STATELESS`` +set) are expected to be removed by whoever called :c:func:`device_link_add()` +to add them with the help of either :c:func:`device_link_del()` or +:c:func:`device_link_remove()`.
Sometimes drivers depend on optional resources. They are able to operate in a degraded mode (reduced feature set or performance) when those resources @@ -286,4 +294,4 @@ API ===
.. kernel-doc:: drivers/base/core.c - :functions: device_link_add device_link_del + :functions: device_link_add device_link_del device_link_remove diff --git a/drivers/base/core.c b/drivers/base/core.c index ae2f85c..d827379 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -179,10 +179,21 @@ void device_pm_move_to_tail(struct device *dev) * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * - * If the DL_FLAG_AUTOREMOVE_CONSUMER flag is set, the link will be removed - * automatically when the consumer device driver unbinds from it. Analogously, - * if DL_FLAG_AUTOREMOVE_SUPPLIER is set in @flags, the link will be removed - * automatically when the supplier device driver unbinds from it. + * If DL_FLAG_STATELESS is set in @flags, the link is not going to be managed by + * the driver core and, in particular, the caller of this function is expected + * to drop the reference to the link acquired by it directly. + * + * If that flag is not set, however, the caller of this function is handing the + * management of the link over to the driver core entirely and its return value + * can only be used to check whether or not the link is present. In that case, + * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link + * flags can be used to indicate to the driver core when the link can be safely + * deleted. Namely, setting one of them in @flags indicates to the driver core + * that the link is not going to be used (by the given caller of this function) + * after unbinding the consumer or supplier driver, respectively, from its + * device, so the link can be deleted at that point. If none of them is set, + * the link will be maintained until one of the devices pointed to by it (either + * the consumer or the supplier) is unregistered. * * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and @@ -228,6 +239,14 @@ struct device_link *device_link_add(struct device *consumer, goto out; }
+ /* + * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed + * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both + * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. + */ + if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) + flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; + list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer != consumer) continue; @@ -241,12 +260,6 @@ struct device_link *device_link_add(struct device *consumer, goto out; }
- if (flags & DL_FLAG_AUTOREMOVE_CONSUMER) - link->flags |= DL_FLAG_AUTOREMOVE_CONSUMER; - - if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) - link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; - if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); @@ -256,7 +269,25 @@ struct device_link *device_link_add(struct device *consumer, refcount_inc(&link->rpm_active); }
- kref_get(&link->kref); + if (flags & DL_FLAG_STATELESS) { + kref_get(&link->kref); + goto out; + } + + /* + * If the life time of the link following from the new flags is + * longer than indicated by the flags of the existing link, + * update the existing link to stay around longer. + */ + if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { + if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { + link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; + link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; + } + } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { + link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER); + } goto out; }
@@ -406,8 +437,16 @@ static void __device_link_del(struct kref *kref) } #endif /* !CONFIG_SRCU */
+static void device_link_put_kref(struct device_link *link) +{ + if (link->flags & DL_FLAG_STATELESS) + kref_put(&link->kref, __device_link_del); + else + WARN(1, "Unable to drop a managed device link reference\n"); +} + /** - * device_link_del - Delete a link between two devices. + * device_link_del - Delete a stateless link between two devices. * @link: Device link to delete. * * The caller must ensure proper synchronization of this function with runtime @@ -419,14 +458,14 @@ void device_link_del(struct device_link *link) { device_links_write_lock(); device_pm_lock(); - kref_put(&link->kref, __device_link_del); + device_link_put_kref(link); device_pm_unlock(); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del);
/** - * device_link_remove - remove a link between two devices. + * device_link_remove - Delete a stateless link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * @@ -445,7 +484,7 @@ void device_link_remove(void *consumer, struct device *supplier)
list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { - kref_put(&link->kref, __device_link_del); + device_link_put_kref(link); break; } }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
commit e7dd40105aac9ba051e44ad711123bc53a5e4c71 upstream.
Add a new device link flag, DL_FLAG_AUTOPROBE_CONSUMER, to request the driver core to probe for a consumer driver automatically after binding a driver to the supplier device on a persistent managed device link.
As unbinding the supplier driver on a managed device link causes the consumer driver to be detached from its device automatically, this flag provides a complementary mechanism which is needed to address some "composite device" use cases.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/driver-api/device_link.rst | 9 +++++++++ drivers/base/core.c | 16 +++++++++++++++- drivers/base/dd.c | 2 +- include/linux/device.h | 3 +++ 4 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index e249e07..c764755 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -94,6 +94,15 @@ Similarly, when the device link is added from supplier's ``->probe`` callback, ``DL_FLAG_AUTOREMOVE_SUPPLIER`` causes the device link to be automatically purged when the supplier fails to probe or later unbinds.
+If neither ``DL_FLAG_AUTOREMOVE_CONSUMER`` nor ``DL_FLAG_AUTOREMOVE_SUPPLIER`` +is set, ``DL_FLAG_AUTOPROBE_CONSUMER`` can be used to request the driver core +to probe for a driver for the consumer driver on the link automatically after +a driver has been bound to the supplier device. + +Note, however, that any combinations of ``DL_FLAG_AUTOREMOVE_CONSUMER``, +``DL_FLAG_AUTOREMOVE_SUPPLIER`` or ``DL_FLAG_AUTOPROBE_CONSUMER`` with +``DL_FLAG_STATELESS`` are invalid and cannot be used. + Limitations ===========
diff --git a/drivers/base/core.c b/drivers/base/core.c index d827379..3495da1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -195,6 +195,12 @@ void device_pm_move_to_tail(struct device *dev) * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * + * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and + * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent + * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can + * be used to request the driver core to automaticall probe for a consmer + * driver after successfully binding a driver to the supplier device. + * * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and * will cause NULL to be returned upfront. @@ -215,7 +221,12 @@ struct device_link *device_link_add(struct device *consumer,
if (!consumer || !supplier || (flags & DL_FLAG_STATELESS && - flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER | + DL_FLAG_AUTOPROBE_CONSUMER)) || + (flags & DL_FLAG_AUTOPROBE_CONSUMER && + flags & (DL_FLAG_AUTOREMOVE_CONSUMER | + DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL;
if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { @@ -576,6 +587,9 @@ void device_links_driver_bound(struct device *dev)
WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); + + if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) + driver_deferred_probe_add(link->consumer); }
list_for_each_entry(link, &dev->links.suppliers, c_node) { diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 5f6416e..caaeb79 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -116,7 +116,7 @@ static void deferred_probe_work_func(struct work_struct *work) } static DECLARE_WORK(deferred_probe_work, deferred_probe_work_func);
-static void driver_deferred_probe_add(struct device *dev) +void driver_deferred_probe_add(struct device *dev) { mutex_lock(&deferred_probe_mutex); if (list_empty(&dev->p->deferred_probe)) { diff --git a/include/linux/device.h b/include/linux/device.h index 538b90d..837f7c9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -346,6 +346,7 @@ struct device *driver_find_device(struct device_driver *drv, struct device *start, void *data, int (*match)(struct device *dev, void *data));
+void driver_deferred_probe_add(struct device *dev); int driver_deferred_probe_check_state(struct device *dev);
/** @@ -836,12 +837,14 @@ enum device_link_state { * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. + * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) #define DL_FLAG_PM_RUNTIME BIT(2) #define DL_FLAG_RPM_ACTIVE BIT(3) #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) +#define DL_FLAG_AUTOPROBE_CONSUMER BIT(5)
/** * struct device_link - Device link representation.
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
commit 515db266a9dace92b0cbaed9a6044dd5304b8ca9 upstream.
If device_link_add() is called for a consumer/supplier pair with an existing device link between them and the existing link's type is not in agreement with the flags passed to that function by its caller, NULL will be returned. That is seriously inconvenient, because it forces the callers of device_link_add() to worry about what others may or may not do even if that is not relevant to them for any other reasons.
It turns out, however, that this limitation can be made go away relatively easily.
The underlying observation is that if DL_FLAG_STATELESS has been passed to device_link_add() in flags for the given consumer/supplier pair at least once, calling either device_link_del() or device_link_remove() to release the link returned by it should work, but there are no other requirements associated with that flag. In turn, if at least one of the callers of device_link_add() for the given consumer/supplier pair has not passed DL_FLAG_STATELESS to it in flags, the driver core should track the status of the link and act on it as appropriate (ie. the link should be treated as "managed"). This means that DL_FLAG_STATELESS needs to be set for managed device links and it should be valid to call device_link_del() or device_link_remove() to drop references to them in certain sutiations.
To allow that to happen, introduce a new (internal) device link flag called DL_FLAG_MANAGED and make device_link_add() set it automatically whenever DL_FLAG_STATELESS is not passed to it. Also make it take additional references to existing device links that were previously stateless (that is, with DL_FLAG_STATELESS set and DL_FLAG_MANAGED unset) and will need to be managed going forward and initialize their status (which has been DL_STATE_NONE so far).
Accordingly, when a managed device link is dropped automatically by the driver core, make it clear DL_FLAG_MANAGED, reset the link's status back to DL_STATE_NONE and drop the reference to it associated with DL_FLAG_MANAGED instead of just deleting it right away (to allow it to stay around in case it still needs to be released explicitly by someone).
With that, since setting DL_FLAG_STATELESS doesn't mean that the device link in question is not managed any more, replace all of the status-tracking checks against DL_FLAG_STATELESS with analogous checks against DL_FLAG_MANAGED and update the documentation to reflect these changes.
While at it, make device_link_add() reject flags that it does not recognize, including DL_FLAG_MANAGED.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Reviewed-by: Saravana Kannan saravanak@google.com Tested-by: Marek Szyprowski m.szyprowski@samsung.com Review-by: Saravana Kannan saravanak@google.com Link: https://lore.kernel.org/r/2305283.AStDPdUUnE@kreacher Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/driver-api/device_link.rst | 4 +- drivers/base/core.c | 176 +++++++++++++++++-------------- drivers/base/power/runtime.c | 4 +- include/linux/device.h | 4 +- 4 files changed, 106 insertions(+), 82 deletions(-)
diff --git a/Documentation/driver-api/device_link.rst b/Documentation/driver-api/device_link.rst index c764755..e8b0a8f 100644 --- a/Documentation/driver-api/device_link.rst +++ b/Documentation/driver-api/device_link.rst @@ -75,8 +75,8 @@ typically deleted in its ``->remove`` callback for symmetry. That way, if the driver is compiled as a module, the device link is added on module load and orderly deleted on unload. The same restrictions that apply to device link addition (e.g. exclusion of a parallel suspend/resume transition) apply equally -to deletion. Device links with ``DL_FLAG_STATELESS`` unset (i.e. managed -device links) are deleted automatically by the driver core. +to deletion. Device links managed by the driver core are deleted automatically +by it.
Several flags may be specified on device link addition, two of which have already been mentioned above: ``DL_FLAG_STATELESS`` to express that no diff --git a/drivers/base/core.c b/drivers/base/core.c index 3495da1..31007af 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -124,6 +124,50 @@ static int device_is_dependent(struct device *dev, void *target) return ret; }
+static void device_link_init_status(struct device_link *link, + struct device *consumer, + struct device *supplier) +{ + switch (supplier->links.status) { + case DL_DEV_PROBING: + switch (consumer->links.status) { + case DL_DEV_PROBING: + /* + * A consumer driver can create a link to a supplier + * that has not completed its probing yet as long as it + * knows that the supplier is already functional (for + * example, it has just acquired some resources from the + * supplier). + */ + link->status = DL_STATE_CONSUMER_PROBE; + break; + default: + link->status = DL_STATE_DORMANT; + break; + } + break; + case DL_DEV_DRIVER_BOUND: + switch (consumer->links.status) { + case DL_DEV_PROBING: + link->status = DL_STATE_CONSUMER_PROBE; + break; + case DL_DEV_DRIVER_BOUND: + link->status = DL_STATE_ACTIVE; + break; + default: + link->status = DL_STATE_AVAILABLE; + break; + } + break; + case DL_DEV_UNBINDING: + link->status = DL_STATE_SUPPLIER_UNBIND; + break; + default: + link->status = DL_STATE_DORMANT; + break; + } +} + static int device_reorder_to_tail(struct device *dev, void *not_used) { struct device_link *link; @@ -165,6 +209,10 @@ void device_pm_move_to_tail(struct device *dev) device_links_read_unlock(idx); }
+#define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ + DL_FLAG_AUTOREMOVE_SUPPLIER | \ + DL_FLAG_AUTOPROBE_CONSUMER) + /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. @@ -179,9 +227,9 @@ void device_pm_move_to_tail(struct device *dev) * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * - * If DL_FLAG_STATELESS is set in @flags, the link is not going to be managed by - * the driver core and, in particular, the caller of this function is expected - * to drop the reference to the link acquired by it directly. + * If DL_FLAG_STATELESS is set in @flags, the caller of this function is + * expected to release the link returned by it directly with the help of either + * device_link_del() or device_link_remove(). * * If that flag is not set, however, the caller of this function is handing the * management of the link over to the driver core entirely and its return value @@ -201,9 +249,16 @@ void device_pm_move_to_tail(struct device *dev) * be used to request the driver core to automaticall probe for a consmer * driver after successfully binding a driver to the supplier device. * - * The combination of DL_FLAG_STATELESS and either DL_FLAG_AUTOREMOVE_CONSUMER - * or DL_FLAG_AUTOREMOVE_SUPPLIER set in @flags at the same time is invalid and - * will cause NULL to be returned upfront. + * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, + * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at + * the same time is invalid and will cause NULL to be returned upfront. + * However, if a device link between the given @consumer and @supplier pair + * exists already when this function is called for them, the existing link will + * be returned regardless of its current type and status (the link's flags may + * be modified then). The caller of this function is then expected to treat + * the link as though it has just been created, so (in particular) if + * DL_FLAG_STATELESS was passed in @flags, the link needs to be released + * explicitly when not needed any more (as stated above). * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending @@ -220,10 +275,8 @@ struct device_link *device_link_add(struct device *consumer, struct device_link *link;
if (!consumer || !supplier || - (flags & DL_FLAG_STATELESS && - flags & (DL_FLAG_AUTOREMOVE_CONSUMER | - DL_FLAG_AUTOREMOVE_SUPPLIER | - DL_FLAG_AUTOPROBE_CONSUMER)) || + (flags & ~(DL_FLAG_STATELESS | DL_MANAGED_LINK_FLAGS)) || + (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) @@ -236,6 +289,9 @@ struct device_link *device_link_add(struct device *consumer, } }
+ if (!(flags & DL_FLAG_STATELESS)) + flags |= DL_FLAG_MANAGED; + device_links_write_lock(); device_pm_lock();
@@ -262,15 +318,6 @@ struct device_link *device_link_add(struct device *consumer, if (link->consumer != consumer) continue;
- /* - * Don't return a stateless link if the caller wants a stateful - * one and vice versa. - */ - if (WARN_ON((flags & DL_FLAG_STATELESS) != (link->flags & DL_FLAG_STATELESS))) { - link = NULL; - goto out; - } - if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); @@ -281,6 +328,7 @@ struct device_link *device_link_add(struct device *consumer, }
if (flags & DL_FLAG_STATELESS) { + link->flags |= DL_FLAG_STATELESS; kref_get(&link->kref); goto out; } @@ -299,6 +347,11 @@ struct device_link *device_link_add(struct device *consumer, link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } + if (!(link->flags & DL_FLAG_MANAGED)) { + kref_get(&link->kref); + link->flags |= DL_FLAG_MANAGED; + device_link_init_status(link, consumer, supplier); + } goto out; }
@@ -325,48 +378,10 @@ struct device_link *device_link_add(struct device *consumer, kref_init(&link->kref);
/* Determine the initial link state. */ - if (flags & DL_FLAG_STATELESS) { + if (flags & DL_FLAG_STATELESS) link->status = DL_STATE_NONE; - } else { - switch (supplier->links.status) { - case DL_DEV_PROBING: - switch (consumer->links.status) { - case DL_DEV_PROBING: - /* - * A consumer driver can create a link to a - * supplier that has not completed its probing - * yet as long as it knows that the supplier is - * already functional (for example, it has just - * acquired some resources from the supplier). - */ - link->status = DL_STATE_CONSUMER_PROBE; - break; - default: - link->status = DL_STATE_DORMANT; - break; - } - break; - case DL_DEV_DRIVER_BOUND: - switch (consumer->links.status) { - case DL_DEV_PROBING: - link->status = DL_STATE_CONSUMER_PROBE; - break; - case DL_DEV_DRIVER_BOUND: - link->status = DL_STATE_ACTIVE; - break; - default: - link->status = DL_STATE_AVAILABLE; - break; - } - break; - case DL_DEV_UNBINDING: - link->status = DL_STATE_SUPPLIER_UNBIND; - break; - default: - link->status = DL_STATE_DORMANT; - break; - } - } + else + device_link_init_status(link, consumer, supplier);
/* * Some callers expect the link creation during consumer driver probe to @@ -528,7 +543,7 @@ static void device_links_missing_supplier(struct device *dev) * mark the link as "consumer probe in progress" to make the supplier removal * wait for us to complete (or bad things may happen). * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ int device_links_check_suppliers(struct device *dev) { @@ -538,7 +553,7 @@ int device_links_check_suppliers(struct device *dev) device_links_write_lock();
list_for_each_entry(link, &dev->links.suppliers, c_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
if (link->status != DL_STATE_AVAILABLE) { @@ -563,7 +578,7 @@ int device_links_check_suppliers(struct device *dev) * * Also change the status of @dev's links to suppliers to "active". * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_bound(struct device *dev) { @@ -572,7 +587,7 @@ void device_links_driver_bound(struct device *dev) device_links_write_lock();
list_for_each_entry(link, &dev->links.consumers, s_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
/* @@ -593,7 +608,7 @@ void device_links_driver_bound(struct device *dev) }
list_for_each_entry(link, &dev->links.suppliers, c_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); @@ -605,6 +620,13 @@ void device_links_driver_bound(struct device *dev) device_links_write_unlock(); }
+static void device_link_drop_managed(struct device_link *link) +{ + link->flags &= ~DL_FLAG_MANAGED; + WRITE_ONCE(link->status, DL_STATE_NONE); + kref_put(&link->kref, __device_link_del); +} + /** * __device_links_no_driver - Update links of a device without a driver. * @dev: Device without a drvier. @@ -615,18 +637,18 @@ void device_links_driver_bound(struct device *dev) * unless they already are in the "supplier unbind in progress" state in which * case they need not be updated. * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ static void __device_links_no_driver(struct device *dev) { struct device_link *link, *ln;
list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) - __device_link_del(&link->kref); + device_link_drop_managed(link); else if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) WRITE_ONCE(link->status, DL_STATE_AVAILABLE); @@ -643,7 +665,7 @@ static void __device_links_no_driver(struct device *dev) * %__device_links_no_driver() to update links to suppliers for it as * appropriate. * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_no_driver(struct device *dev) { @@ -652,7 +674,7 @@ void device_links_no_driver(struct device *dev) device_links_write_lock();
list_for_each_entry(link, &dev->links.consumers, s_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
/* @@ -680,7 +702,7 @@ void device_links_no_driver(struct device *dev) * invoke %__device_links_no_driver() to update links to suppliers for it as * appropriate. * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_cleanup(struct device *dev) { @@ -689,7 +711,7 @@ void device_links_driver_cleanup(struct device *dev) device_links_write_lock();
list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); @@ -702,7 +724,7 @@ void device_links_driver_cleanup(struct device *dev) */ if (link->status == DL_STATE_SUPPLIER_UNBIND && link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) - __device_link_del(&link->kref); + device_link_drop_managed(link);
WRITE_ONCE(link->status, DL_STATE_DORMANT); } @@ -724,7 +746,7 @@ void device_links_driver_cleanup(struct device *dev) * * Return 'false' if there are no probing or active consumers. * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ bool device_links_busy(struct device *dev) { @@ -734,7 +756,7 @@ bool device_links_busy(struct device *dev) device_links_write_lock();
list_for_each_entry(link, &dev->links.consumers, s_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
if (link->status == DL_STATE_CONSUMER_PROBE @@ -764,7 +786,7 @@ bool device_links_busy(struct device *dev) * driver to unbind and start over (the consumer will not re-probe as we have * changed the state of the link already). * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_unbind_consumers(struct device *dev) { @@ -776,7 +798,7 @@ void device_links_unbind_consumers(struct device *dev) list_for_each_entry(link, &dev->links.consumers, s_node) { enum device_link_state status;
- if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
status = link->status; diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index 303ce7d..2c99f93 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -1531,7 +1531,7 @@ void pm_runtime_remove(struct device *dev) * runtime PM references to the device, drop the usage counter of the device * (as many times as needed). * - * Links with the DL_FLAG_STATELESS flag set are ignored. + * Links with the DL_FLAG_MANAGED flag unset are ignored. * * Since the device is guaranteed to be runtime-active at the point this is * called, nothing else needs to be done here. @@ -1548,7 +1548,7 @@ void pm_runtime_clean_up_links(struct device *dev) idx = device_links_read_lock();
list_for_each_entry_rcu(link, &dev->links.consumers, s_node) { - if (link->flags & DL_FLAG_STATELESS) + if (!(link->flags & DL_FLAG_MANAGED)) continue;
while (refcount_dec_not_one(&link->rpm_active)) diff --git a/include/linux/device.h b/include/linux/device.h index 837f7c9..ee4ed3a 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -832,12 +832,13 @@ enum device_link_state { /* * Device link flags. * - * STATELESS: The core won't track the presence of supplier/consumer drivers. + * STATELESS: The core will not remove this link automatically. * AUTOREMOVE_CONSUMER: Remove the link automatically on consumer driver unbind. * PM_RUNTIME: If set, the runtime PM framework will use this link. * RPM_ACTIVE: Run pm_runtime_get_sync() on the supplier during link creation. * AUTOREMOVE_SUPPLIER: Remove the link automatically on supplier driver unbind. * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds. + * MANAGED: The core tracks presence of supplier/consumer drivers (internal). */ #define DL_FLAG_STATELESS BIT(0) #define DL_FLAG_AUTOREMOVE_CONSUMER BIT(1) @@ -845,6 +846,7 @@ enum device_link_state { #define DL_FLAG_RPM_ACTIVE BIT(3) #define DL_FLAG_AUTOREMOVE_SUPPLIER BIT(4) #define DL_FLAG_AUTOPROBE_CONSUMER BIT(5) +#define DL_FLAG_MANAGED BIT(6)
/** * struct device_link - Device link representation.
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
commit fb583c8eeeb1fd57e24ef41ed94c9112067aeac9 upstream.
After commit 515db266a9da ("driver core: Remove device link creation limitation"), if PM-runtime flags are passed to device_link_add(), it will fail (returning NULL) due to an overly restrictive flags check introduced by that commit.
Fix this issue by extending the check in question to cover the PM-runtime flags too.
Fixes: 515db266a9da ("driver core: Remove device link creation limitation") Reported-by: Dmitry Osipenko digetx@gmail.com Tested-by: Jon Hunter jonathanh@nvidia.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Tested-by: Dmitry Osipenko digetx@gmail.com Tested-by: Marek Szyprowski m.szyprowski@samsung.com Link: https://lore.kernel.org/r/7674989.cD04D8YV3U@kreacher Signed-off-by: Saravana Kannan saravanak@google.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/base/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/drivers/base/core.c b/drivers/base/core.c index 31007af..928fc15 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -213,6 +213,9 @@ void device_pm_move_to_tail(struct device *dev) DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER)
+#define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ + DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) + /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. @@ -274,8 +277,7 @@ struct device_link *device_link_add(struct device *consumer, { struct device_link *link;
- if (!consumer || !supplier || - (flags & ~(DL_FLAG_STATELESS | DL_MANAGED_LINK_FLAGS)) || + if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
From: Carl Huang cjhuang@codeaurora.org
commit ce57785bf91b1ceaef4f4bffed8a47dc0919c8da upstream.
The len used for skb_put_padto is wrong, it need to add len of hdr.
In qrtr_node_enqueue, local variable size_t len is assign with skb->len, then skb_push(skb, sizeof(*hdr)) will add skb->len with sizeof(*hdr), so local variable size_t len is not same with skb->len after skb_push(skb, sizeof(*hdr)).
Then the purpose of skb_put_padto(skb, ALIGN(len, 4)) is to add add pad to the end of the skb's data if skb->len is not aligned to 4, but unfortunately it use len instead of skb->len, at this line, skb->len is 32 bytes(sizeof(*hdr)) more than len, for example, len is 3 bytes, then skb->len is 35 bytes(3 + 32), and ALIGN(len, 4) is 4 bytes, so __skb_put_padto will do nothing after check size(35) < len(4), the correct value should be 36(sizeof(*hdr) + ALIGN(len, 4) = 32 + 4), then __skb_put_padto will pass check size(35) < len(36) and add 1 byte to the end of skb's data, then logic is correct.
function of skb_push: void *skb_push(struct sk_buff *skb, unsigned int len) { skb->data -= len; skb->len += len; if (unlikely(skb->data < skb->head)) skb_under_panic(skb, len, __builtin_return_address(0)); return skb->data; }
function of skb_put_padto static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); }
function of __skb_put_padto static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, bool free_on_error) { unsigned int size = skb->len;
if (unlikely(size < len)) { len -= size; if (__skb_pad(skb, len, free_on_error)) return -ENOMEM; __skb_put(skb, len); } return 0; }
Signed-off-by: Carl Huang cjhuang@codeaurora.org Signed-off-by: Wen Gong wgong@codeaurora.org Cc: Doug Anderson dianders@chromium.org Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/qrtr/qrtr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5c75118..82d38f9 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -203,7 +203,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = 0;
- skb_put_padto(skb, ALIGN(len, 4)); + skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
mutex_lock(&node->ep_lock); if (node->ep)
From: Florian Fainelli f.fainelli@gmail.com
commit 45939ce292b4b11159719faaf60aba7d58d5fe33 upstream.
It is possible for a system with an ARMv8 timer to run a 32-bit kernel. When this happens we will unconditionally have the vDSO code remove the __vdso_gettimeofday and __vdso_clock_gettime symbols because cntvct_functional() returns false since it does not match that compatibility string.
Fixes: ecf99a439105 ("ARM: 8331/1: VDSO initialization, mapping, and synchronization") Signed-off-by: Florian Fainelli f.fainelli@gmail.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/kernel/vdso.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index e8cda5e0..a51b7ff 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -104,6 +104,8 @@ static bool __init cntvct_functional(void) */ np = of_find_compatible_node(NULL, NULL, "arm,armv7-timer"); if (!np) + np = of_find_compatible_node(NULL, NULL, "arm,armv8-timer"); + if (!np) goto out_put;
if (of_property_read_bool(np, "arm,cpu-registers-not-fw-configured"))
From: Kees Cook keescook@chromium.org
commit f87b1c49bc675da30d8e1e8f4b60b800312c7b90 upstream.
When the uaccess .fixup section was renamed to .text.fixup, one case was missed. Under ld.bfd, the orphaned section was moved close to .text (since they share the "ax" bits), so things would work normally on uaccess faults. Under ld.lld, the orphaned section was placed outside the .text section, making it unreachable.
Link: https://github.com/ClangBuiltLinux/linux/issues/282 Link: https://bugs.chromium.org/p/chromium/issues/detail?id=1020633#c44 Link: https://lore.kernel.org/r/nycvar.YSQ.7.76.1912032147340.17114@knanqh.ubzr Link: https://lore.kernel.org/lkml/202002071754.F5F073F1D@keescook/
Fixes: c4a84ae39b4a5 ("ARM: 8322/1: keep .text and .fixup regions closer together") Cc: stable@vger.kernel.org Signed-off-by: Kees Cook keescook@chromium.org Reviewed-by: Ard Biesheuvel ardb@kernel.org Reviewed-by: Nick Desaulniers ndesaulniers@google.com Signed-off-by: Russell King rmk+kernel@armlinux.org.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm/lib/copy_from_user.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 6709a8d..f1e34f1 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -100,7 +100,7 @@ ENTRY(arm_copy_from_user)
ENDPROC(arm_copy_from_user)
- .pushsection .fixup,"ax" + .pushsection .text.fixup,"ax" .align 0 copy_abort_preamble ldmfd sp!, {r1, r2, r3}
From: Jann Horn jannh@google.com
commit fd4d9c7d0c71866ec0c2825189ebd2ce35bd95b8 upstream.
When kmem_cache_alloc_bulk() attempts to allocate N objects from a percpu freelist of length M, and N > M > 0, it will first remove the M elements from the percpu freelist, then call ___slab_alloc() to allocate the next element and repopulate the percpu freelist. ___slab_alloc() can re-enable IRQs via allocate_slab(), so the TID must be bumped before ___slab_alloc() to properly commit the freelist head change.
Fix it by unconditionally bumping c->tid when entering the slowpath.
Cc: stable@vger.kernel.org Fixes: ebe909e0fdb3 ("slub: improve bulk alloc strategy") Signed-off-by: Jann Horn jannh@google.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/slub.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/mm/slub.c b/mm/slub.c index 3c5f572..dbd0661 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3103,6 +3103,15 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
if (unlikely(!object)) { /* + * We may have removed an object from c->freelist using + * the fastpath in the previous iteration; in that case, + * c->tid has not been bumped yet. + * Since ___slab_alloc() may reenable interrupts while + * allocating memory, we should bump c->tid now. + */ + c->tid = next_tid(c->tid); + + /* * Invoking slow path likely have side-effect * of re-populating per CPU c->freelist */
From: Chen-Tsung Hsieh chentsung@chromium.org
commit 58322a1590fc189a8e1e349d309637d4a4942840 upstream.
Add 1 additional hammer-like device.
Signed-off-by: Chen-Tsung Hsieh chentsung@chromium.org Reviewed-by: Nicolas Boichat drinkcat@chromium.org Signed-off-by: Jiri Kosina jkosina@suse.cz Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/hid/hid-google-hammer.c | 2 ++ drivers/hid/hid-ids.h | 1 + 2 files changed, 3 insertions(+)
diff --git a/drivers/hid/hid-google-hammer.c b/drivers/hid/hid-google-hammer.c index 8cb63ea..fab8fd7 100644 --- a/drivers/hid/hid-google-hammer.c +++ b/drivers/hid/hid-google-hammer.c @@ -125,6 +125,8 @@ static int hammer_input_configured(struct hid_device *hdev, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MASTERBALL) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, + USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_MOONBALL) }, + { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) }, { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) }, diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index f491092..b2fff44 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -468,6 +468,7 @@ #define USB_DEVICE_ID_GOOGLE_WHISKERS 0x5030 #define USB_DEVICE_ID_GOOGLE_MASTERBALL 0x503c #define USB_DEVICE_ID_GOOGLE_MAGNEMITE 0x503d +#define USB_DEVICE_ID_GOOGLE_MOONBALL 0x5044
#define USB_VENDOR_ID_GOTOP 0x08f2 #define USB_DEVICE_ID_SUPER_Q2 0x007f
From: Waiman Long longman@redhat.com
commit ef1491e791308317bb9851a0ad380c4a68b58d54 upstream.
The following commit:
9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler")
converted 'efi_rts_work' from an auto variable to a global variable. However, when submitting the work, INIT_WORK_ONSTACK() was still used, causing the following complaint from debugobjects:
ODEBUG: object 00000000ed27b500 is NOT on stack 00000000c7d38760, but annotated.
Change the macro to just INIT_WORK() to eliminate the warning.
Signed-off-by: Waiman Long longman@redhat.com Signed-off-by: Ard Biesheuvel ard.biesheuvel@linaro.org Acked-by: Sai Praneeth Prakhya sai.praneeth.prakhya@intel.com Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: linux-efi@vger.kernel.org Fixes: 9dbbedaa6171 ("efi: Make efi_rts_work accessible to efi page fault handler") Link: http://lkml.kernel.org/r/20181114175544.12860-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar mingo@kernel.org Cc: Chris Wilson chris@chris-wilson.co.uk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/firmware/efi/runtime-wrappers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c index e6353ee..0428871 100644 --- a/drivers/firmware/efi/runtime-wrappers.c +++ b/drivers/firmware/efi/runtime-wrappers.c @@ -62,7 +62,7 @@ efi_rts_work.status = EFI_ABORTED; \ \ init_completion(&efi_rts_work.efi_rts_comp); \ - INIT_WORK_ONSTACK(&efi_rts_work.work, efi_call_rts); \ + INIT_WORK(&efi_rts_work.work, efi_call_rts); \ efi_rts_work.arg1 = _arg1; \ efi_rts_work.arg2 = _arg2; \ efi_rts_work.arg3 = _arg3; \
From: Matteo Croce mcroce@redhat.com
commit 3e72dfdf8227b052393f71d820ec7599909dddc2 upstream.
Similarly to commit c543cb4a5f07 ("ipv4: ensure rcu_read_lock() in ipv4_link_failure()"), __ip_options_compile() must be called under rcu protection.
Fixes: 3da1ed7ac398 ("net: avoid use IPCB in cipso_v4_error") Suggested-by: Guillaume Nault gnault@redhat.com Signed-off-by: Matteo Croce mcroce@redhat.com Acked-by: Paul Moore paul@paul-moore.com Signed-off-by: David S. Miller davem@davemloft.net Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/ipv4/cipso_ipv4.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index f0165c5..1c21dc5 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1738,6 +1738,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; + int res;
if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES) return; @@ -1749,7 +1750,11 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
memset(opt, 0, sizeof(struct ip_options)); opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr); - if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL)) + rcu_read_lock(); + res = __ip_options_compile(dev_net(skb->dev), opt, skb, NULL); + rcu_read_unlock(); + + if (res) return;
if (gateway)
From: Greg Kroah-Hartman gregkh@linuxfoundation.org
Merge 40 patches from 4.19.112 stable branch (44 total) beside 4 already merged patches: e78ae00 hinic: fix a irq affinity bug 6bd688f hinic: fix a bug of setting hw_ioctxt dd5a146 wimax: i2400: fix memory leak dfb8270 wimax: i2400: Fix memory leak in i2400m_op_rfkill_sw_toggle
Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Makefile b/Makefile index fed04bd..bd57e08 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 111 +SUBLEVEL = 112 EXTRAVERSION = NAME = "People's Front"