From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.5-rc1 commit 34debdca68efd5625a2fcea7df1a215591a01f80 category: bugfix bugzilla: 95382 CVE: NA
-------------------------------------------------
Forcefully unbinding the Arm SMMU drivers is a pretty dangerous operation, since it will likely lead to catastrophic failure for any DMA devices mastering through the SMMU being unbound. When the driver then attempts to "handle" the fatal faults, it's very easy to trip over dead data structures, leading to use-after-free.
On John's machine, he reports that the machine was "unusable" due to loss of the storage controller following a forced unbind of the SMMUv3 driver:
| # cd ./bus/platform/drivers/arm-smmu-v3 | # echo arm-smmu-v3.0.auto > unbind | hisi_sas_v2_hw HISI0162:01: CQE_AXI_W_ERR (0x800) found! | platform arm-smmu-v3.0.auto: CMD_SYNC timeout at 0x00000146 | [hwprod 0x00000146, hwcons 0x00000000]
Prevent this forced unbinding of the drivers by setting "suppress_bind_attrs" to true.
Link: https://lore.kernel.org/lkml/06dfd385-1af0-3106-4cc5-6a5b8e864759@huawei.com Reported-by: John Garry john.garry@huawei.com Signed-off-by: Will Deacon will@kernel.org Tested-by: John Garry john.garry@huawei.com # smmu v3 Reviewed-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Nanyong Sun sunnanyong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/arm-smmu-v3.c | 5 +++-- drivers/iommu/arm-smmu.c | 7 ++++--- 2 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index c9ff437cb3283..49408e73716e7 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -4532,8 +4532,9 @@ MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
static struct platform_driver arm_smmu_driver = { .driver = { - .name = "arm-smmu-v3", - .of_match_table = of_match_ptr(arm_smmu_of_match), + .name = "arm-smmu-v3", + .of_match_table = of_match_ptr(arm_smmu_of_match), + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f948de8af412c..2c2bdeac758e0 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2287,9 +2287,10 @@ static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);
static struct platform_driver arm_smmu_driver = { .driver = { - .name = "arm-smmu", - .of_match_table = of_match_ptr(arm_smmu_of_match), - .pm = &arm_smmu_pm_ops, + .name = "arm-smmu", + .of_match_table = of_match_ptr(arm_smmu_of_match), + .pm = &arm_smmu_pm_ops, + .suppress_bind_attrs = true, }, .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove,
From: Jon Derrick jonathan.derrick@intel.com
mainline inclusion from mainline-v5.5-rc6 commit f78947c409204138a4bc0609f98e07ef9d01ac0a category: bugfix bugzilla: 96934 CVE: NA
-------------------------------------------------
If the device fails to be added to the group, make sure to unlink the reference before returning.
Signed-off-by: Jon Derrick jonathan.derrick@intel.com Fixes: 39ab9555c2411 ("iommu: Add sysfs bindings for struct iommu_device") Acked-by: Lu Baolu baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel jroedel@suse.de
Conflict: drivers/iommu/intel-iommu.c Signed-off-by: Nanyong Sun sunnanyong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/intel-iommu.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 249e81685fb54..754e667380cd7 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5248,6 +5248,7 @@ static int intel_iommu_add_device(struct device *dev) struct intel_iommu *iommu; struct iommu_group *group; u8 bus, devfn; + int ret;
iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) @@ -5257,11 +5258,17 @@ static int intel_iommu_add_device(struct device *dev)
group = iommu_group_get_for_dev(dev);
- if (IS_ERR(group)) - return PTR_ERR(group); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto unlink; + }
iommu_group_put(group); return 0; + +unlink: + iommu_device_unlink(&iommu->iommu, dev); + return ret; }
static void intel_iommu_remove_device(struct device *dev)
From: Mel Gorman mgorman@techsingularity.net
mainline inclusion from mainline-v5.3-rc3 commit 670105a25608affe01cb0ccdc2a1f4bd2327172b category: bugfix bugzilla: 115931 CVE: NA
-----------------------------------------------
"howaboutsynergy" reported via kernel buzilla number 204165 that compact_zone_order was consuming 100% CPU during a stress test for prolonged periods of time. Specifically the following command, which should exit in 10 seconds, was taking an excessive time to finish while the CPU was pegged at 100%.
stress -m 220 --vm-bytes 1000000000 --timeout 10
Tracing indicated a pattern as follows
stress-3923 [007] 519.106208: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106212: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106216: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106219: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106223: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106227: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106231: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106235: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106238: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0 stress-3923 [007] 519.106242: mm_compaction_isolate_migratepages: range=(0x70bb80 ~ 0x70bb80) nr_scanned=0 nr_taken=0
Note that compaction is entered in rapid succession while scanning and isolating nothing. The problem is that when a task that is compacting receives a fatal signal, it retries indefinitely instead of exiting while making no progress as a fatal signal is pending.
It's not easy to trigger this condition although enabling zswap helps on the basis that the timing is altered. A very small window has to be hit for the problem to occur (signal delivered while compacting and isolating a PFN for migration that is not aligned to SWAP_CLUSTER_MAX).
This was reproduced locally -- 16G single socket system, 8G swap, 30% zswap configured, vm-bytes 22000000000 using Colin Kings stress-ng implementation from github running in a loop until the problem hits). Tracing recorded the problem occurring almost 200K times in a short window. With this patch, the problem hit 4 times but the task existed normally instead of consuming CPU.
This problem has existed for some time but it was made worse by commit cf66f0700c8f ("mm, compaction: do not consider a need to reschedule as contention"). Before that commit, if the same condition was hit then locks would be quickly contended and compaction would exit that way.
Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=204165 Link: http://lkml.kernel.org/r/20190718085708.GE24383@techsingularity.net Fixes: cf66f0700c8f ("mm, compaction: do not consider a need to reschedule as contention") Signed-off-by: Mel Gorman mgorman@techsingularity.net Reviewed-by: Vlastimil Babka vbabka@suse.cz Cc: stable@vger.kernel.org [5.1+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Conflict: mm/compaction.c
Signed-off-by: Tong Tiangen tongtiangen@huawei.com Reviewed-by: Chen Wandun chenwandun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/compaction.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/mm/compaction.c b/mm/compaction.c index 5079ddbec8f9e..1d991e443322a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -754,12 +754,15 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, /* * Periodically drop the lock (if held) regardless of its * contention, to give chance to IRQs. Abort async compaction - * if contended. + * contention, to give chance to IRQs. Abort completely if + * a fatal signal is pending. */ if (!(low_pfn % SWAP_CLUSTER_MAX) && compact_unlock_should_abort(zone_lru_lock(zone), flags, - &locked, cc)) - break; + &locked, cc)) { + low_pfn = 0; + goto fatal_pending; + }
if (!pfn_valid_within(low_pfn)) goto isolate_fail; @@ -951,6 +954,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, trace_mm_compaction_isolate_migratepages(start_pfn, low_pfn, nr_scanned, nr_isolated);
+fatal_pending: cc->total_migrate_scanned += nr_scanned; if (nr_isolated) count_compact_events(COMPACTISOLATED, nr_isolated);
From: "Suthikulpanit, Suravee" Suravee.Suthikulpanit@amd.com
mainline inclusion from mainline-v5.5-rc1 commit ec21f17a9437e11bb29e5fa375aa31b472793c15 category: bugfix bugzilla: 100228 CVE: NA
-------------------------------------------------
IOMMU Event Log encodes 20-bit PASID for events: ILLEGAL_DEV_TABLE_ENTRY IO_PAGE_FAULT PAGE_TAB_HARDWARE_ERROR INVALID_DEVICE_REQUEST as: PASID[15:0] = bit 47:32 PASID[19:16] = bit 19:16
Note that INVALID_PPR_REQUEST event has different encoding from the rest of the events as the following: PASID[15:0] = bit 31:16 PASID[19:16] = bit 45:42
So, fixes the decoding logic.
Fixes: d64c0486ed50 ("iommu/amd: Update the PASID information printed to the system log") Cc: Joerg Roedel jroedel@suse.de Cc: Gary R Hook gary.hook@amd.com Signed-off-by: Suravee Suthikulpanit suravee.suthikulpanit@amd.com Signed-off-by: Joerg Roedel jroedel@suse.de Signed-off-by: Nanyong Sun sunnanyong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/iommu/amd_iommu.c | 5 +++-- drivers/iommu/amd_iommu_types.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index d7a288dd15667..3763e3b825907 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -572,7 +572,8 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - pasid = PPR_PASID(*(u64 *)&event[0]); + pasid = (event[0] & EVENT_DOMID_MASK_HI) | + (event[1] & EVENT_DOMID_MASK_LO); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2];
@@ -605,7 +606,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "Event logged [PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x pasid=0x%04x address=0x%016llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 0948c425d6528..1551253402d11 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -136,8 +136,8 @@ #define EVENT_TYPE_INV_PPR_REQ 0x9 #define EVENT_DEVID_MASK 0xffff #define EVENT_DEVID_SHIFT 0 -#define EVENT_DOMID_MASK 0xffff -#define EVENT_DOMID_SHIFT 0 +#define EVENT_DOMID_MASK_LO 0xffff +#define EVENT_DOMID_MASK_HI 0xf0000 #define EVENT_FLAGS_MASK 0xfff #define EVENT_FLAGS_SHIFT 0x10
On 10/28/21 10:37 AM, Yang Yingliang wrote:
From: Will Deaconwill@kernel.org
mainline inclusion from mainline-v5.5-rc1 commit 34debdca68efd5625a2fcea7df1a215591a01f80 category: bugfix bugzilla: 95382
Better to put the whole URL above instead of just the number, since it could be from bugzilla.openeuler.org or gitee.com if my understanding is correct.
Thanks, Guoqing