From: Max Gurtovoy maxg@mellanox.com
mainline inclusion from mainline-v5.2-rc1 commit 87fd125344d68adf7699ec7396aa9f905ce79a80 category: bugfix bugzilla: NA CVE: NA Link: https://gitee.com/openeuler/kernel/issues/I1WGZE
-------------------------------------------------
In the past, before adding f41725bb ("nvme-rdma: Use mr pool") commit, we needed a reference on the ib_device as long as the tagset was alive, as the MRs in the request structures needed a valid ib_device. Now, we allocate/deallocate MR pool per QP and consume on demand.
Also remove nvme_rdma_free_tagset function and use blk_mq_free_tag_set instead, as it unneeded anymore.
This commit also fixes a memory leakage and possible segmentation fault. When configuring the system with NIC teaming (aka bonding), we use 1 network interface to create an HA connection to the target side. In case one connection breaks down, nvme-rdma driver will get notification from rdma-cm layer that underlying address was change and will start error recovery process. During this process, we'll reconnect to the target via the second interface in the bond without destroying the tagset. This will cause a leakage of the initial rdma device (ndev) and miscount in the reference count of the new created rdma device (new ndev). In the final destruction (or in another error flow), we'll get a warning dump from the ib_dealloc_pd that we still have inflight MR's related to that pd. This happens becasue of the miscount of the reference tag of the rdma device and causing access violation to it's elements (some queues are not destroyed yet).
Signed-off-by: Max Gurtovoy maxg@mellanox.com Signed-off-by: Israel Rukshin israelr@mellanox.com Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Chao Leng lengchao@huawei.com Reviewed-by: Jike Cheng chengjike.cheng@huawei.com Conflicts: drivers/nvme/host/rdma.c [lrz: adjust context] Signed-off-by: Ruozhu Li liruozhu@huawei.com Signed-off-by: Lijie lijie34@huawei.com Reviewed-by: Tao Hou houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/nvme/host/rdma.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-)
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 077c67816665..0bfc1875575d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -666,15 +666,6 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) return ret; }
-static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, - struct blk_mq_tag_set *set) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - - blk_mq_free_tag_set(set); - nvme_rdma_dev_put(ctrl->device); -} - static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, bool admin) { @@ -712,24 +703,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
ret = blk_mq_alloc_tag_set(set); if (ret) - goto out; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = nvme_rdma_dev_get(ctrl->device); - if (!ret) { - ret = -EINVAL; - goto out_free_tagset; - } + return ERR_PTR(ret);
return set; - -out_free_tagset: - blk_mq_free_tag_set(set); -out: - return ERR_PTR(ret); }
static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, @@ -737,7 +713,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); } if (ctrl->async_event_sqe.data) { cancel_work_sync(&ctrl->ctrl.async_event_work); @@ -815,7 +791,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_cleanup_queue(ctrl->ctrl.admin_q); out_free_tagset: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); out_free_async_qe: if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -832,7 +808,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); } nvme_rdma_free_io_queues(ctrl); } @@ -873,7 +849,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret;