From: Keith Busch kbusch@kernel.org
mainline inclusion from mainline-v6.12-rc4 commit 1f021341eef41e77a633186e9be5223de2ce5d48 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB67YI CVE: CVE-2024-53093
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We need to suppress the partition scan from occuring within the controller's scan_work context. If a path error occurs here, the IO will wait until a path becomes available or all paths are torn down, but that action also occurs within scan_work, so it would deadlock. Defer the partion scan to a different context that does not block scan_work.
Reported-by: Hannes Reinecke hare@suse.de Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Keith Busch kbusch@kernel.org
Conflicts: drivers/nvme/host/multipath.c [Context inconsistency.] fs/block_dev.c [Need to export bdget_part.] Signed-off-by: Zheng Qixing zhengqixing@huawei.com --- drivers/nvme/host/multipath.c | 51 +++++++++++++++++++++++++++++------ drivers/nvme/host/nvme.h | 1 + fs/block_dev.c | 1 + 3 files changed, 45 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index def9270642fa..eee881f970f7 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -335,6 +335,26 @@ blk_qc_t nvme_ns_head_submit_bio(struct bio *bio) return ret; }
+static void nvme_partition_scan_work(struct work_struct *work) +{ + struct nvme_ns_head *head = + container_of(work, struct nvme_ns_head, partition_scan_work); + struct block_device *bdev; + + if (WARN_ON_ONCE(!test_and_clear_bit(GENHD_FL_NO_PART_SCAN, + &head->disk->state))) + return; + + bdev = bdget_part(&head->disk->part0); + if (!bdev) + return; + + mutex_lock(&bdev->bd_mutex); + bdev_disk_changed(bdev, false); + mutex_unlock(&bdev->bd_mutex); + bdput(bdev); +} + static void nvme_requeue_work(struct work_struct *work) { struct nvme_ns_head *head = @@ -367,6 +387,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) bio_list_init(&head->requeue_list); spin_lock_init(&head->requeue_lock); INIT_WORK(&head->requeue_work, nvme_requeue_work); + INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
/* * Add a multipath node if the subsystems supports multiple controllers. @@ -396,6 +417,16 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) head->disk->private_data = head; head->disk->queue = q; head->disk->flags = GENHD_FL_EXT_DEVT; + + /* + * We need to suppress the partition scan from occuring within the + * controller's scan_work context. If a path error occurs here, the IO + * will wait until a path becomes available or all paths are torn down, + * but that action also occurs within scan_work, so it would deadlock. + * Defer the partion scan to a different context that does not block + * scan_work. + */ + set_bit(GENHD_FL_NO_PART_SCAN, &head->disk->state); sprintf(head->disk->disk_name, "nvme%dn%d", ctrl->subsys->instance, head->instance); return 0; @@ -413,9 +444,11 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) if (!head->disk) return;
- if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { device_add_disk(&head->subsys->dev, head->disk, nvme_ns_id_attr_groups); + kblockd_schedule_work(&head->partition_scan_work); + }
mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { @@ -716,14 +749,15 @@ void nvme_mpath_shutdown_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) + if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + /* + * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared + * to allow multipath to fail all I/O. + */ + synchronize_srcu(&head->srcu); + kblockd_schedule_work(&head->requeue_work); del_gendisk(head->disk); - /* - * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared - * to allow multipath to fail all I/O. - */ - synchronize_srcu(&head->srcu); - kblockd_schedule_work(&head->requeue_work); + } }
void nvme_mpath_remove_disk(struct nvme_ns_head *head) @@ -734,6 +768,7 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); flush_work(&head->requeue_work); + flush_work(&head->partition_scan_work); blk_cleanup_queue(head->disk->queue); if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { /* diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 3cf5e3ddee94..951e1d0f0885 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -422,6 +422,7 @@ struct nvme_ns_head { struct bio_list requeue_list; spinlock_t requeue_lock; struct work_struct requeue_work; + struct work_struct partition_scan_work; struct mutex lock; unsigned long flags; #define NVME_NSHEAD_DISK_LIVE 0 diff --git a/fs/block_dev.c b/fs/block_dev.c index a0e4d3ec300e..77fc1c0b5460 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -976,6 +976,7 @@ struct block_device *bdget_part(struct hd_struct *part) { return bdget(part_devt(part)); } +EXPORT_SYMBOL_GPL(bdget_part);
long nr_blockdev_pages(void) {