From: Chao Leng lengchao@huawei.com
mainline inclusion from mainline-v5.11-rc5 commit 7674073b2ed35ac951a49c425dec6b39d5a57140 category: bugfix bugzilla: NA CVE: NA Link: https://gitee.com/openeuler/kernel/issues/I1WGZE
-------------------------------------------------
A crash happens when inject completing request long time(nearly 30s). Each name space has a request queue, when inject completing request long time, multi request queues may have time out requests at the same time, nvme_rdma_timeout will execute concurrently. Multi requests in different request queues may be queued in the same rdma queue, multi nvme_rdma_timeout may call nvme_rdma_stop_queue at the same time. The first nvme_rdma_timeout will clear NVME_RDMA_Q_LIVE and continue stopping the rdma queue(drain qp), but the others check NVME_RDMA_Q_LIVE is already cleared, and then directly complete the requests, complete request before the qp is fully drained may lead to a use-after-free condition.
Add a multex lock to serialize nvme_rdma_stop_queue.
Signed-off-by: Chao Leng lengchao@huawei.com Tested-by: Israel Rukshin israelr@nvidia.com Reviewed-by: Israel Rukshin israelr@nvidia.com Signed-off-by: Christoph Hellwig hch@lst.de
conflicts: drivers/nvme/host/rdma.c [lrz: adjust context]
Signed-off-by: Ruozhu Li liruozhu@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/nvme/host/rdma.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 158d7417bcea6..91ff9262f6729 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -93,6 +93,7 @@ struct nvme_rdma_queue { struct rdma_cm_id *cm_id; int cm_error; struct completion cm_done; + struct mutex queue_lock; };
struct nvme_rdma_ctrl { @@ -503,6 +504,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int ret;
queue = &ctrl->queues[idx]; + mutex_init(&queue->queue_lock); queue->ctrl = ctrl; init_completion(&queue->cm_done);
@@ -518,7 +520,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, if (IS_ERR(queue->cm_id)) { dev_info(ctrl->ctrl.device, "failed to create CM ID: %ld\n", PTR_ERR(queue->cm_id)); - return PTR_ERR(queue->cm_id); + ret = PTR_ERR(queue->cm_id); + goto out_destroy_mutex; }
if (ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) @@ -548,6 +551,8 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, out_destroy_cm_id: rdma_destroy_id(queue->cm_id); nvme_rdma_destroy_queue_ib(queue); +out_destroy_mutex: + mutex_destroy(&queue->queue_lock); return ret; }
@@ -559,10 +564,10 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { - if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) - return; - - __nvme_rdma_stop_queue(queue); + mutex_lock(&queue->queue_lock); + if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + __nvme_rdma_stop_queue(queue); + mutex_unlock(&queue->queue_lock); }
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) @@ -572,6 +577,7 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + mutex_destroy(&queue->queue_lock); }
static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)