From: Xinghai Cen cenxinghai@h-partners.com
[RDMA/hns] backport some bugfix from maillist linux:
Xinghai Cen (1): Revert "RDMA/hns: Fix an AEQE overflow error caused by untimely update of eq_db_ci"
wenglianfa (2): RDMA/hns: Fix flush cqe error when racing with destroy qp RDMA/hns: Fix an AEQE overflow error caused by untimely update of eq_db_ci
drivers/infiniband/hw/hns/hns_roce_device.h | 3 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++- drivers/infiniband/hw/hns/hns_roce_qp.c | 69 ++++++++++++++------- 3 files changed, 63 insertions(+), 22 deletions(-)
From: wenglianfa wenglianfa@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB8NKF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=377...
----------------------------------------------------------------------
QP needs to be modified to IB_QPS_ERROR to trigger HW flush cqe. But when this process races with destroy qp, the destroy-qp process may modify the QP to IB_QPS_RESET first. In this case flush cqe will fail since it is invalid to modify qp from IB_QPS_RESET to IB_QPS_ERROR.
Add lock and bit flag to make sure pending flush cqe work is completed first and no more new works will be added.
Fixes: ffd541d45726 ("RDMA/hns: Add the workqueue framework for flush cqe handler") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-3-huangjunxian6@hisilicon.co... Reviewed-by: Zhu Yanjun yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 15 +++++++++++++-- 3 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a6c4ef631bb7..4a6707ed96ce 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -717,6 +717,7 @@ struct hns_roce_dev;
enum { HNS_ROCE_FLUSH_FLAG = 0, + HNS_ROCE_STOP_FLUSH_FLAG = 1, };
struct hns_roce_work { @@ -777,6 +778,7 @@ struct hns_roce_qp { bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; struct hns_roce_dip *dip; + spinlock_t flush_lock; };
struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 28feac784e49..8455f5d1ba07 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6330,7 +6330,13 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + unsigned long flags; int ret; + /* Make sure flush_cqe() is completed */ + spin_lock_irqsave(&hr_qp->flush_lock, flags); + set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + flush_work(&hr_qp->flush_work.work);
if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) put_dip_ctx_idx(hr_dev, hr_qp); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 98d9f3a77997..26af01f9dd84 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -71,11 +71,18 @@ static void flush_work_handle(struct work_struct *work) void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_work *flush_work = &hr_qp->flush_work; + unsigned long flags; + + spin_lock_irqsave(&hr_qp->flush_lock, flags); + /* Exit directly after destroy_qp() */ + if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + return; + }
- flush_work->hr_dev = hr_dev; - INIT_WORK(&flush_work->work, flush_work_handle); refcount_inc(&hr_qp->refcount); queue_work(hr_dev->irq_workq, &flush_work->work); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); }
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) @@ -1348,6 +1355,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, struct hns_roce_qp *hr_qp) { + struct hns_roce_work *flush_work = &hr_qp->flush_work; struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; @@ -1356,9 +1364,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); spin_lock_init(&hr_qp->rq.lock); + spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle);
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) {
From: Xinghai Cen cenxinghai@h-partners.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB8NKF
----------------------------------------------------------------------
This reverts commit fb3989b0ea1bf8414b712897f3a13481f9f87ba9.
Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 68 ++++++++-------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 -- 2 files changed, 24 insertions(+), 49 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8455f5d1ba07..874637606843 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6708,10 +6708,11 @@ static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key, return ret; }
-static void dump_aeqe_log(struct hns_roce_work *irq_work) +static void hns_roce_irq_work_handle(struct work_struct *work) { - struct hns_roce_dev *hr_dev = irq_work->hr_dev; - struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + struct ib_device *ibdev = &irq_work->hr_dev->ib_dev;
switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -6755,8 +6756,6 @@ static void dump_aeqe_log(struct hns_roce_work *irq_work) case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); break; - case HNS_ROCE_EVENT_TYPE_MB: - break; case HNS_ROCE_EVENT_TYPE_FLR: ibdev_warn(ibdev, "Function level reset.\n"); break; @@ -6766,43 +6765,10 @@ static void dump_aeqe_log(struct hns_roce_work *irq_work) case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: ibdev_err(ibdev, "invalid xrceth error.\n"); break; - default: - ibdev_info(ibdev, "Undefined event %d.\n", - irq_work->event_type); - break; - } -} - -static void hns_roce_irq_work_handle(struct work_struct *work) -{ - struct hns_roce_work *irq_work = - container_of(work, struct hns_roce_work, work); - struct hns_roce_dev *hr_dev = irq_work->hr_dev; - int event_type = irq_work->event_type; - u32 queue_num = irq_work->queue_num; - - switch (event_type) { - case HNS_ROCE_EVENT_TYPE_PATH_MIG: - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - case HNS_ROCE_EVENT_TYPE_COMM_EST: - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: - hns_roce_qp_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, queue_num, event_type); - break; default: break; }
- dump_aeqe_log(irq_work); - kfree(irq_work); }
@@ -6862,14 +6828,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { + struct device *dev = hr_dev->dev; struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); irqreturn_t aeqe_found = IRQ_NONE; - int num_aeqes = 0; int event_type; u32 queue_num; int sub_type;
- while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) { + while (aeqe) { /* Make sure we read AEQ entry after we have checked the * ownership bit */ @@ -6880,20 +6846,38 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + case HNS_ROCE_EVENT_TYPE_COMM_EST: + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: hns_roce_qp_event(hr_dev, queue_num, event_type); break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); + break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, le16_to_cpu(aeqe->event.cmd.token), aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; + case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: + case HNS_ROCE_EVENT_TYPE_FLR: + break; default: + dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", + event_type, eq->eqn, eq->cons_index); break; }
@@ -6906,7 +6890,6 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq); - ++num_aeqes; }
update_eq_db(eq); @@ -7457,9 +7440,6 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) int ret; int i;
- if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET) - return -EINVAL; - other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; aeq_num = hr_dev->caps.num_aeq_vectors; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 41f868d4b3f1..238e58c6000b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -85,11 +85,6 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
-/* budget must be smaller than aeqe_depth to guarantee that we update - * the ci before we polled all the entries in the EQ. - */ -#define HNS_AEQ_POLLING_BUDGET 64 - enum { HNS_ROCE_CMD_FLAG_IN = BIT(0), HNS_ROCE_CMD_FLAG_OUT = BIT(1),
From: wenglianfa wenglianfa@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB8NKF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=571...
----------------------------------------------------------------------
eq_db_ci is updated only after all AEQEs are processed in the AEQ interrupt handler, which is not timely enough and may result in AEQ overflow. Two optimization methods are proposed: 1. Set an upper limit for AEQE processing. 2. Move time-consuming operations such as printings to the bottom half of the interrupt.
cmd events and flush_cqe events are still fully processed in the top half to ensure timely handling.
Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-2-huangjunxian6@hisilicon.co... Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 75 ++++++++++++++------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 ++ drivers/infiniband/hw/hns/hns_roce_qp.c | 54 +++++++++------ 4 files changed, 91 insertions(+), 44 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 4a6707ed96ce..a8083da693bf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1540,6 +1540,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 874637606843..7dc352b13509 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6708,11 +6708,10 @@ static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key, return ret; }
-static void hns_roce_irq_work_handle(struct work_struct *work) +static void dump_aeqe_log(struct hns_roce_work *irq_work) { - struct hns_roce_work *irq_work = - container_of(work, struct hns_roce_work, work); - struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -6756,6 +6755,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work) case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); break; + case HNS_ROCE_EVENT_TYPE_MB: + break; case HNS_ROCE_EVENT_TYPE_FLR: ibdev_warn(ibdev, "Function level reset.\n"); break; @@ -6766,8 +6767,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "invalid xrceth error.\n"); break; default: + ibdev_info(ibdev, "Undefined event %d.\n", + irq_work->event_type); break; } +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + int event_type = irq_work->event_type; + u32 queue_num = irq_work->queue_num; + + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + case HNS_ROCE_EVENT_TYPE_COMM_EST: + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: + case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: + hns_roce_qp_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); + break; + default: + break; + } + + dump_aeqe_log(irq_work);
kfree(irq_work); } @@ -6828,14 +6867,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct device *dev = hr_dev->dev; struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); irqreturn_t aeqe_found = IRQ_NONE; + int num_aeqes = 0; int event_type; u32 queue_num; int sub_type;
- while (aeqe) { + while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) { /* Make sure we read AEQ entry after we have checked the * ownership bit */ @@ -6846,25 +6885,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) { - case HNS_ROCE_EVENT_TYPE_PATH_MIG: - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - case HNS_ROCE_EVENT_TYPE_COMM_EST: - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: - hns_roce_qp_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, queue_num, event_type); + hns_roce_flush_cqe(hr_dev, queue_num); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -6872,12 +6898,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - case HNS_ROCE_EVENT_TYPE_FLR: - break; default: - dev_err(dev, "Unhandled event %d on EQ %d at idx %u.\n", - event_type, eq->eqn, eq->cons_index); break; }
@@ -6890,6 +6911,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq); + ++num_aeqes; }
update_eq_db(eq); @@ -7440,6 +7462,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) int ret; int i;
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET) + return -EINVAL; + other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; aeq_num = hr_dev->caps.num_aeq_vectors; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 238e58c6000b..41f868d4b3f1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -85,6 +85,11 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+/* budget must be smaller than aeqe_depth to guarantee that we update + * the ci before we polled all the entries in the EQ. + */ +#define HNS_AEQ_POLLING_BUDGET 64 + enum { HNS_ROCE_CMD_FLAG_IN = BIT(0), HNS_ROCE_CMD_FLAG_OUT = BIT(1), diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 26af01f9dd84..a3eb05e8471e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,6 +39,25 @@ #include "hns_roce_hem.h" #include "hns_roce_dca.h"
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, + u32 qpn) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_qp *qp; + unsigned long flags; + + xa_lock_irqsave(&hr_dev->qp_table_xa, flags); + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + refcount_inc(&qp->refcount); + xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); + + if (!qp) + dev_warn(dev, "async event for bogus QP %08x\n", qpn); + + return qp; +} + static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -102,31 +121,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { - struct device *dev = hr_dev->dev; struct hns_roce_qp *qp;
- xa_lock(&hr_dev->qp_table_xa); - qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); - xa_unlock(&hr_dev->qp_table_xa); - - if (!qp) { - dev_warn(dev, "Async event for bogus QP %08x\n", qpn); + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) return; - }
- if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || - event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { - qp->state = IB_QPS_ERR; + qp->event(qp, (enum hns_roce_event)event_type);
- flush_cqe(hr_dev, qp); - } + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); +}
- qp->event(qp, (enum hns_roce_event)event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *qp; + + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) + return; + + qp->state = IB_QPS_ERR; + flush_cqe(hr_dev, qp);
if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14109 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/A...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14109 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/A...