From: Li Ming <ming4.li(a)intel.com>
mainline inclusion
from mainline-v6.10-rc7
commit 84ec985944ef34a34a1605b93ce401aa8737af96
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAGEOB
CVE: CVE-2024-41085
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?…
--------------------------------
When CXL subsystem is auto-assembling a pmem region during cxl
endpoint port probing, always hit below calltrace.
BUG: kernel NULL pointer dereference, address: 0000000000000078
#PF: supervisor read access in kernel mode
#PF: error_code(0x0000) - not-present page
RIP: 0010:cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem]
Call Trace:
<TASK>
? __die+0x24/0x70
? page_fault_oops+0x82/0x160
? do_user_addr_fault+0x65/0x6b0
? exc_page_fault+0x7d/0x170
? asm_exc_page_fault+0x26/0x30
? cxl_pmem_region_probe+0x22e/0x360 [cxl_pmem]
? cxl_pmem_region_probe+0x1ac/0x360 [cxl_pmem]
cxl_bus_probe+0x1b/0x60 [cxl_core]
really_probe+0x173/0x410
? __pfx___device_attach_driver+0x10/0x10
__driver_probe_device+0x80/0x170
driver_probe_device+0x1e/0x90
__device_attach_driver+0x90/0x120
bus_for_each_drv+0x84/0xe0
__device_attach+0xbc/0x1f0
bus_probe_device+0x90/0xa0
device_add+0x51c/0x710
devm_cxl_add_pmem_region+0x1b5/0x380 [cxl_core]
cxl_bus_probe+0x1b/0x60 [cxl_core]
The cxl_nvd of the memdev needs to be available during the pmem region
probe. Currently the cxl_nvd is registered after the endpoint port probe.
The endpoint probe, in the case of autoassembly of regions, can cause a
pmem region probe requiring the not yet available cxl_nvd. Adjust the
sequence so this dependency is met.
This requires adding a port parameter to cxl_find_nvdimm_bridge() that
can be used to query the ancestor root port. The endpoint port is not
yet available, but will share a common ancestor with its parent, so
start the query from there instead.
Fixes: f17b558d6663 ("cxl/pmem: Refactor nvdimm device registration, delete the workqueue")
Co-developed-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Dan Williams <dan.j.williams(a)intel.com>
Signed-off-by: Li Ming <ming4.li(a)intel.com>
Tested-by: Alison Schofield <alison.schofield(a)intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron(a)huawei.com>
Reviewed-by: Alison Schofield <alison.schofield(a)intel.com>
Link: https://patch.msgid.link/20240612064423.2567625-1-ming4.li@intel.com
Signed-off-by: Dave Jiang <dave.jiang(a)intel.com>
Conflicts:
drivers/cxl/core/pmem.c
drivers/cxl/core/region.c
[wangxiongfeng: fix conflicts in cxl_find_nvdimm_bridge() and
cxl_pmem_region_alloc().]
Signed-off-by: Xiongfeng Wang <wangxiongfeng2(a)huawei.com>
---
drivers/cxl/core/pmem.c | 15 +++++++++++----
drivers/cxl/core/region.c | 2 +-
drivers/cxl/cxl.h | 4 ++--
drivers/cxl/mem.c | 17 +++++++++--------
4 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index fc94f5240327..fbaa1cbeac40 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -62,9 +62,14 @@ static int match_nvdimm_bridge(struct device *dev, void *data)
return is_cxl_nvdimm_bridge(dev);
}
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd)
+/**
+ * cxl_find_nvdimm_bridge() - find a bridge device relative to a port
+ * @port: any descendant port of an nvdimm-bridge associated
+ * root-cxl-port
+ */
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port)
{
- struct cxl_port *port = find_cxl_root(cxlmd->endpoint);
+ struct cxl_port *port = find_cxl_root(port);
struct device *dev;
if (!port)
@@ -242,18 +247,20 @@ static void cxlmd_release_nvdimm(void *_cxlmd)
/**
* devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
+ * @parent_port: parent port for the (to be added) @cxlmd endpoint port
* @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
*
* Return: 0 on success negative error code on failure.
*/
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd)
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port,
+ struct cxl_memdev *cxlmd)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct cxl_nvdimm *cxl_nvd;
struct device *dev;
int rc;
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(parent_port);
if (!cxl_nvb)
return -ENODEV;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 6f06255012f0..94e0af80d0b4 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2547,7 +2547,7 @@ static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
* bridge for one device is the same for all.
*/
if (i == 0) {
- cxl_nvb = cxl_find_nvdimm_bridge(cxlmd);
+ cxl_nvb = cxl_find_nvdimm_bridge(cxlmd->endpoint);
if (!cxl_nvb) {
cxlr_pmem = ERR_PTR(-ENODEV);
goto out;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 47fc7f692ae9..3b0b694a95b1 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -805,8 +805,8 @@ struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm(struct device *dev);
bool is_cxl_nvdimm_bridge(struct device *dev);
-int devm_cxl_add_nvdimm(struct cxl_memdev *cxlmd);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_memdev *cxlmd);
+int devm_cxl_add_nvdimm(struct cxl_port *parent_port, struct cxl_memdev *cxlmd);
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_port *port);
#ifdef CONFIG_CXL_REGION
bool is_cxl_pmem_region(struct device *dev);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index e087febf9af0..22b260d261e4 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,6 +152,15 @@ static int cxl_mem_probe(struct device *dev)
return -ENXIO;
}
+ if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+ rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
+ if (rc) {
+ if (rc == -ENODEV)
+ dev_info(dev, "PMEM disabled by platform\n");
+ return rc;
+ }
+ }
+
if (dport->rch)
endpoint_parent = parent_port->uport_dev;
else
@@ -174,14 +183,6 @@ static int cxl_mem_probe(struct device *dev)
if (rc)
return rc;
- if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) {
- rc = devm_cxl_add_nvdimm(cxlmd);
- if (rc == -ENODEV)
- dev_info(dev, "PMEM disabled by platform\n");
- else
- return rc;
- }
-
/*
* The kernel may be operating out of CXL memory on this device,
* there is no spec defined way to determine whether this device
--
2.20.1
Dan Carpenter (1):
PCI: dwc: Fix a 64bit bug in dw_pcie_ep_raise_msix_irq()
Song Liu (1):
Revert "Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d""
--
2.39.2
From: Yu Kuai <yukuai3(a)huawei.com>
stable inclusion
from stable-v5.10.219
commit aa64464c8f4d2ab92f6d0b959a1e0767b829d787
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAB04V
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id…
--------------------------------
commit 151f66bb618d1fd0eeb84acb61b4a9fa5d8bb0fa upstream.
Xiao reported that lvm2 test lvconvert-raid-takeover.sh can hang with
small possibility, the root cause is exactly the same as commit
bed9e27baf52 ("Revert "md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d"")
However, Dan reported another hang after that, and junxiao investigated
the problem and found out that this is caused by plugged bio can't issue
from raid5d().
Current implementation in raid5d() has a weird dependence:
1) md_check_recovery() from raid5d() must hold 'reconfig_mutex' to clear
MD_SB_CHANGE_PENDING;
2) raid5d() handles IO in a deadloop, until all IO are issued;
3) IO from raid5d() must wait for MD_SB_CHANGE_PENDING to be cleared;
This behaviour is introduce before v2.6, and for consequence, if other
context hold 'reconfig_mutex', and md_check_recovery() can't update
super_block, then raid5d() will waste one cpu 100% by the deadloop, until
'reconfig_mutex' is released.
Refer to the implementation from raid1 and raid10, fix this problem by
skipping issue IO if MD_SB_CHANGE_PENDING is still set after
md_check_recovery(), daemon thread will be woken up when 'reconfig_mutex'
is released. Meanwhile, the hang problem will be fixed as well.
Fixes: 5e2cf333b7bd ("md/raid5: Wait for MD_SB_CHANGE_PENDING in raid5d")
Cc: stable(a)vger.kernel.org # v5.19+
Reported-and-tested-by: Dan Moulding <dan(a)danm.net>
Closes: https://lore.kernel.org/all/20240123005700.9302-1-dan@danm.net/
Investigated-by: Junxiao Bi <junxiao.bi(a)oracle.com>
Signed-off-by: Yu Kuai <yukuai3(a)huawei.com>
Link: https://lore.kernel.org/r/20240322081005.1112401-1-yukuai1@huaweicloud.com
Signed-off-by: Song Liu <song(a)kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh(a)linuxfoundation.org>
Signed-off-by: Wang Hai <wanghai38(a)huawei.com>
---
drivers/md/raid5.c | 15 +++------------
1 file changed, 3 insertions(+), 12 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c4938b1a587e..3cb90d7e88d9 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -36,7 +36,6 @@
*/
#include <linux/blkdev.h>
-#include <linux/delay.h>
#include <linux/kthread.h>
#include <linux/raid/pq.h>
#include <linux/async_tx.h>
@@ -6476,6 +6475,9 @@ static void raid5d(struct md_thread *thread)
int batch_size, released;
unsigned int offset;
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags))
+ break;
+
released = release_stripe_list(conf, conf->temp_inactive_list);
if (released)
clear_bit(R5_DID_ALLOC, &conf->cache_state);
@@ -6512,18 +6514,7 @@ static void raid5d(struct md_thread *thread)
spin_unlock_irq(&conf->device_lock);
md_check_recovery(mddev);
spin_lock_irq(&conf->device_lock);
-
- /*
- * Waiting on MD_SB_CHANGE_PENDING below may deadlock
- * seeing md_check_recovery() is needed to clear
- * the flag when using mdmon.
- */
- continue;
}
-
- wait_event_lock_irq(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags),
- conf->device_lock);
}
pr_debug("%d stripes handled\n", handled);
--
2.17.1