fix CVE-2024-50135.
Christoph Hellwig (2): nvme-pci: split nvme_dev_add nvme-pci: refactor the tagset handling in nvme_reset_work
Maurizio Lombardi (1): nvme-pci: fix race condition between reset and nvme_dev_disable()
drivers/nvme/host/pci.c | 119 ++++++++++++++++++++++++---------------- 1 file changed, 73 insertions(+), 46 deletions(-)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.0-rc1 commit 2455a4b77835c2c9d1c0310d50f69e6fbc1b173f category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB2BXE CVE: CVE-2024-50135
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------
Split nvme_dev_add into a helper to actually allocate the tag set, and one that just update the number of queues. Add a local variable for the tag_set to clean up the code a bit.
Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Keith Busch kbusch@kernel.org Reviewed-by: Sagi Grimberg sagi@grimberg.me Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: drivers/nvme/host/pci.c [Context conflict.] Signed-off-by: Zheng Qixing zhengqixing@huawei.com --- drivers/nvme/host/pci.c | 72 +++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 35 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 456e09c6bfc6..93d5d49c9290 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2317,47 +2317,45 @@ static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) return true; }
-static void nvme_dev_add(struct nvme_dev *dev) +static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) { + struct blk_mq_tag_set * set = &dev->tagset; int ret;
- if (!dev->ctrl.tagset) { - dev->tagset.ops = &nvme_mq_ops; - dev->tagset.nr_hw_queues = dev->online_queues - 1; - dev->tagset.nr_maps = 2; /* default + read */ - if (dev->io_queues[HCTX_TYPE_POLL]) - dev->tagset.nr_maps++; - dev->tagset.timeout = NVME_IO_TIMEOUT; - dev->tagset.numa_node = dev->ctrl.numa_node; - dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth, - BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = sizeof(struct nvme_iod); - dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; - dev->tagset.driver_data = dev; + set->ops = &nvme_mq_ops; + set->nr_hw_queues = dev->online_queues - 1; + set->nr_maps = 2; /* default + read */ + if (dev->io_queues[HCTX_TYPE_POLL]) + set->nr_maps++; + set->timeout = NVME_IO_TIMEOUT; + set->numa_node = dev->ctrl.numa_node; + set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; + set->cmd_size = sizeof(struct nvme_iod); + set->flags = BLK_MQ_F_SHOULD_MERGE; + set->driver_data = dev;
- /* - * Some Apple controllers requires tags to be unique - * across admin and IO queue, so reserve the first 32 - * tags of the IO queue. - */ - if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) - dev->tagset.reserved_tags = NVME_AQ_DEPTH; - - ret = blk_mq_alloc_tag_set(&dev->tagset); - if (ret) { - dev_warn(dev->ctrl.device, - "IO queues tagset allocation failed %d\n", ret); - return; - } - dev->ctrl.tagset = &dev->tagset; - } else { - blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); + /* + * Some Apple controllers requires tags to be unique + * across admin and IO queue, so reserve the first 32 + * tags of the IO queue. + */ + if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) + set->reserved_tags = NVME_AQ_DEPTH;
- /* Free previously allocated queues that are no longer usable */ - nvme_free_queues(dev, dev->online_queues); + ret = blk_mq_alloc_tag_set(set); + if (ret) { + dev_warn(dev->ctrl.device, + "IO queues tagset allocation failed %d\n", ret); + return; } + dev->ctrl.tagset = set; +}
- nvme_dbbuf_set(dev); +static void nvme_pci_update_nr_queues(struct nvme_dev *dev) +{ + blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); + /* free previously allocated queues that are no longer usable */ + nvme_free_queues(dev, dev->online_queues); }
static int nvme_pci_enable(struct nvme_dev *dev) @@ -2716,7 +2714,11 @@ static void nvme_reset_work(struct work_struct *work) } else { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - nvme_dev_add(dev); + if (!dev->ctrl.tagset) + nvme_pci_alloc_tag_set(dev); + else + nvme_pci_update_nr_queues(dev); + nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); }
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.2-rc1 commit 0ffc7e98bfaa45380b800deeb9b65ce0371c652d category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB2BXE CVE: CVE-2024-50135
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
The code to create, update or delete a tagset and namespaces in nvme_reset_work is a bit convoluted. Refactor it with a two high-level conditionals for first probe vs reset and I/O queues vs no I/O queues to make the code flow more clear.
Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Chaitanya Kulkarni kch@nvidia.com Link: https://lore.kernel.org/r/20221101150050.3510-3-hch@lst.de [axboe: fix whitespace issue] Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Zheng Qixing zhengqixing@huawei.com --- drivers/nvme/host/pci.c | 44 ++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 16 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 93d5d49c9290..f9a872e7d16d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2702,24 +2702,36 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out;
- /* - * Keep the controller around but remove all namespaces if we don't have - * any working I/O queue. - */ - if (dev->online_queues < 2) { - dev_warn(dev->ctrl.device, "IO queues not created\n"); - nvme_kill_queues(&dev->ctrl); - nvme_remove_namespaces(&dev->ctrl); - nvme_free_tagset(dev); + if (dev->ctrl.tagset) { + /* + * This is a controller reset and we already have a tagset. + * Freeze and update the number of I/O queues as thos might have + * changed. If there are no I/O queues left after this reset, + * keep the controller around but remove all namespaces. + */ + if (dev->online_queues > 1) { + nvme_start_queues(&dev->ctrl); + nvme_wait_freeze(&dev->ctrl); + nvme_pci_update_nr_queues(dev); + nvme_dbbuf_set(dev); + nvme_unfreeze(&dev->ctrl); + } else { + dev_warn(dev->ctrl.device, "IO queues lost\n"); + nvme_kill_queues(&dev->ctrl); + nvme_remove_namespaces(&dev->ctrl); + nvme_free_tagset(dev); + } } else { - nvme_start_queues(&dev->ctrl); - nvme_wait_freeze(&dev->ctrl); - if (!dev->ctrl.tagset) + /* + * First probe. Still allow the controller to show up even if + * there are no namespaces. + */ + if (dev->online_queues > 1) { nvme_pci_alloc_tag_set(dev); - else - nvme_pci_update_nr_queues(dev); - nvme_dbbuf_set(dev); - nvme_unfreeze(&dev->ctrl); + nvme_dbbuf_set(dev); + } else { + dev_warn(dev->ctrl.device, "IO queues not created\n"); + } }
/*
From: Maurizio Lombardi mlombard@redhat.com
mainline inclusion from mainline-v6.12-rc4 commit 26bc0a81f64ce00fc4342c38eeb2eddaad084dd2 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IB2BXE CVE: CVE-2024-50135
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
nvme_dev_disable() modifies the dev->online_queues field, therefore nvme_pci_update_nr_queues() should avoid racing against it, otherwise we could end up passing invalid values to blk_mq_update_nr_hw_queues().
WARNING: CPU: 39 PID: 61303 at drivers/pci/msi/api.c:347 pci_irq_get_affinity+0x187/0x210 Workqueue: nvme-reset-wq nvme_reset_work [nvme] RIP: 0010:pci_irq_get_affinity+0x187/0x210 Call Trace: <TASK> ? blk_mq_pci_map_queues+0x87/0x3c0 ? pci_irq_get_affinity+0x187/0x210 blk_mq_pci_map_queues+0x87/0x3c0 nvme_pci_map_queues+0x189/0x460 [nvme] blk_mq_update_nr_hw_queues+0x2a/0x40 nvme_reset_work+0x1be/0x2a0 [nvme]
Fix the bug by locking the shutdown_lock mutex before using dev->online_queues. Give up if nvme_dev_disable() is running or if it has been executed already.
Fixes: 949928c1c731 ("NVMe: Fix possible queue use after freed") Tested-by: Yi Zhang yi.zhang@redhat.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Maurizio Lombardi mlombard@redhat.com Signed-off-by: Keith Busch kbusch@kernel.org
Conflicts: drivers/nvme/host/pci.c [Context conflict.] Signed-off-by: Zheng Qixing zhengqixing@huawei.com --- drivers/nvme/host/pci.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f9a872e7d16d..da624c4f2a5f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2351,11 +2351,23 @@ static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) dev->ctrl.tagset = set; }
-static void nvme_pci_update_nr_queues(struct nvme_dev *dev) +static bool nvme_pci_update_nr_queues(struct nvme_dev *dev) { + /* Give up if we are racing with nvme_dev_disable() */ + if (!mutex_trylock(&dev->shutdown_lock)) + return false; + + /* Check if nvme_dev_disable() has been executed already */ + if (!dev->online_queues) { + mutex_unlock(&dev->shutdown_lock); + return false; + } + blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); /* free previously allocated queues that are no longer usable */ nvme_free_queues(dev, dev->online_queues); + mutex_unlock(&dev->shutdown_lock); + return true; }
static int nvme_pci_enable(struct nvme_dev *dev) @@ -2712,7 +2724,8 @@ static void nvme_reset_work(struct work_struct *work) if (dev->online_queues > 1) { nvme_start_queues(&dev->ctrl); nvme_wait_freeze(&dev->ctrl); - nvme_pci_update_nr_queues(dev); + if (!nvme_pci_update_nr_queues(dev)) + goto out; nvme_dbbuf_set(dev); nvme_unfreeze(&dev->ctrl); } else {
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/13337 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/R...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/13337 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/R...