From: wenglianfa wenglianfa@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB8NKF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=377...
----------------------------------------------------------------------
QP needs to be modified to IB_QPS_ERROR to trigger HW flush cqe. But when this process races with destroy qp, the destroy-qp process may modify the QP to IB_QPS_RESET first. In this case flush cqe will fail since it is invalid to modify qp from IB_QPS_RESET to IB_QPS_ERROR.
Add lock and bit flag to make sure pending flush cqe work is completed first and no more new works will be added.
Fixes: ffd541d45726 ("RDMA/hns: Add the workqueue framework for flush cqe handler") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-3-huangjunxian6@hisilicon.co... Reviewed-by: Zhu Yanjun yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 15 +++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a6c4ef631bb7..4a6707ed96ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -717,6 +717,7 @@ struct hns_roce_dev;
enum { HNS_ROCE_FLUSH_FLAG = 0, + HNS_ROCE_STOP_FLUSH_FLAG = 1, };
struct hns_roce_work { @@ -777,6 +778,7 @@ struct hns_roce_qp { bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; struct hns_roce_dip *dip; + spinlock_t flush_lock; };
struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 28feac784e49..8455f5d1ba07 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6330,7 +6330,13 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + unsigned long flags; int ret; + /* Make sure flush_cqe() is completed */ + spin_lock_irqsave(&hr_qp->flush_lock, flags); + set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + flush_work(&hr_qp->flush_work.work);
if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) put_dip_ctx_idx(hr_dev, hr_qp); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 98d9f3a77997..26af01f9dd84 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -71,11 +71,18 @@ static void flush_work_handle(struct work_struct *work) void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_work *flush_work = &hr_qp->flush_work; + unsigned long flags; + + spin_lock_irqsave(&hr_qp->flush_lock, flags); + /* Exit directly after destroy_qp() */ + if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + return; + }
- flush_work->hr_dev = hr_dev; - INIT_WORK(&flush_work->work, flush_work_handle); refcount_inc(&hr_qp->refcount); queue_work(hr_dev->irq_workq, &flush_work->work); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); }
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) @@ -1348,6 +1355,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, struct hns_roce_qp *hr_qp) { + struct hns_roce_work *flush_work = &hr_qp->flush_work; struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; @@ -1356,9 +1364,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); spin_lock_init(&hr_qp->rq.lock); + spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle);
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) {