From: wenglianfa wenglianfa@huawei.com
maillist inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB1I99 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=571...
----------------------------------------------------------------------
eq_db_ci is updated only after all AEQEs are processed in the AEQ interrupt handler, which is not timely enough and may result in AEQ overflow. Two optimization methods are proposed: 1. Set an upper limit for AEQE processing. 2. Move time-consuming operations such as printings to the bottom half of the interrupt.
cmd events and flush_cqe events are still fully processed in the top half to ensure timely handling.
Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-2-huangjunxian6@hisilicon.co... Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 75 ++++++++++++++------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 5 ++ drivers/infiniband/hw/hns/hns_roce_qp.c | 54 +++++++++------ 4 files changed, 91 insertions(+), 44 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 8add4742c094..462849206bef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1443,6 +1443,7 @@ void hns_roce_cq_completion(struct hns_roce_dev *hr_dev, u32 cqn); void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bc0b944ecfac..49445815a009 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6392,11 +6392,10 @@ static int hns_roce_v2_query_mpt(struct hns_roce_dev *hr_dev, u32 key, return ret; }
-static void hns_roce_irq_work_handle(struct work_struct *work) +static void dump_aeqe_log(struct hns_roce_work *irq_work) { - struct hns_roce_work *irq_work = - container_of(work, struct hns_roce_work, work); - struct ib_device *ibdev = &irq_work->hr_dev->ib_dev; + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + struct ib_device *ibdev = &hr_dev->ib_dev;
switch (irq_work->event_type) { case HNS_ROCE_EVENT_TYPE_PATH_MIG: @@ -6440,6 +6439,8 @@ static void hns_roce_irq_work_handle(struct work_struct *work) case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: ibdev_warn(ibdev, "DB overflow.\n"); break; + case HNS_ROCE_EVENT_TYPE_MB: + break; case HNS_ROCE_EVENT_TYPE_FLR: ibdev_warn(ibdev, "function level reset.\n"); break; @@ -6450,8 +6451,46 @@ static void hns_roce_irq_work_handle(struct work_struct *work) ibdev_err(ibdev, "invalid xrceth error.\n"); break; default: + ibdev_info(ibdev, "Undefined event %d.\n", + irq_work->event_type); break; } +} + +static void hns_roce_irq_work_handle(struct work_struct *work) +{ + struct hns_roce_work *irq_work = + container_of(work, struct hns_roce_work, work); + struct hns_roce_dev *hr_dev = irq_work->hr_dev; + int event_type = irq_work->event_type; + u32 queue_num = irq_work->queue_num; + + switch (event_type) { + case HNS_ROCE_EVENT_TYPE_PATH_MIG: + case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: + case HNS_ROCE_EVENT_TYPE_COMM_EST: + case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: + case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: + case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: + case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: + case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: + case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: + hns_roce_qp_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: + case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: + hns_roce_srq_event(hr_dev, queue_num, event_type); + break; + case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: + case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: + hns_roce_cq_event(hr_dev, queue_num, event_type); + break; + default: + break; + } + + dump_aeqe_log(irq_work);
kfree(irq_work); } @@ -6512,14 +6551,14 @@ static struct hns_roce_aeqe *next_aeqe_sw_v2(struct hns_roce_eq *eq) static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) { - struct device *dev = hr_dev->dev; struct hns_roce_aeqe *aeqe = next_aeqe_sw_v2(eq); irqreturn_t aeqe_found = IRQ_NONE; + int num_aeqes = 0; int event_type; u32 queue_num; int sub_type;
- while (aeqe) { + while (aeqe && num_aeqes < HNS_AEQ_POLLING_BUDGET) { /* Make sure we read AEQ entry after we have checked the * ownership bit */ @@ -6530,25 +6569,12 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, queue_num = hr_reg_read(aeqe, AEQE_EVENT_QUEUE_NUM);
switch (event_type) { - case HNS_ROCE_EVENT_TYPE_PATH_MIG: - case HNS_ROCE_EVENT_TYPE_PATH_MIG_FAILED: - case HNS_ROCE_EVENT_TYPE_COMM_EST: - case HNS_ROCE_EVENT_TYPE_SQ_DRAINED: case HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR: - case HNS_ROCE_EVENT_TYPE_SRQ_LAST_WQE_REACH: case HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR: case HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR: case HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION: case HNS_ROCE_EVENT_TYPE_INVALID_XRCETH: - hns_roce_qp_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_SRQ_LIMIT_REACH: - case HNS_ROCE_EVENT_TYPE_SRQ_CATAS_ERROR: - hns_roce_srq_event(hr_dev, queue_num, event_type); - break; - case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR: - case HNS_ROCE_EVENT_TYPE_CQ_OVERFLOW: - hns_roce_cq_event(hr_dev, queue_num, event_type); + hns_roce_flush_cqe(hr_dev, queue_num); break; case HNS_ROCE_EVENT_TYPE_MB: hns_roce_cmd_event(hr_dev, @@ -6556,12 +6582,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, aeqe->event.cmd.status, le64_to_cpu(aeqe->event.cmd.out_param)); break; - case HNS_ROCE_EVENT_TYPE_DB_OVERFLOW: - case HNS_ROCE_EVENT_TYPE_FLR: - break; default: - dev_err(dev, "unhandled event %d on EQ %d at idx %u.\n", - event_type, eq->eqn, eq->cons_index); break; }
@@ -6575,6 +6596,7 @@ static irqreturn_t hns_roce_v2_aeq_int(struct hns_roce_dev *hr_dev, hns_roce_v2_init_irq_work(hr_dev, eq, queue_num);
aeqe = next_aeqe_sw_v2(eq); + ++num_aeqes; }
update_eq_db(eq); @@ -7122,6 +7144,9 @@ static int hns_roce_v2_init_eq_table(struct hns_roce_dev *hr_dev) int ret; int i;
+ if (hr_dev->caps.aeqe_depth < HNS_AEQ_POLLING_BUDGET) + return -EINVAL; + other_num = hr_dev->caps.num_other_vectors; comp_num = hr_dev->caps.num_comp_vectors; aeq_num = hr_dev->caps.num_aeq_vectors; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 26db7dbf6da9..b7953614e69f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -85,6 +85,11 @@
#define HNS_ROCE_V2_TABLE_CHUNK_SIZE (1 << 18)
+/* budget must be smaller than aeqe_depth to guarantee that we update + * the ci before we polled all the entries in the EQ. + */ +#define HNS_AEQ_POLLING_BUDGET 64 + enum { HNS_ROCE_CMD_FLAG_IN = BIT(0), HNS_ROCE_CMD_FLAG_OUT = BIT(1), diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 2eb2694acf21..df7b40966731 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,6 +39,25 @@ #include "hns_roce_hem.h" #include "hns_roce_dca.h"
+static struct hns_roce_qp *hns_roce_qp_lookup(struct hns_roce_dev *hr_dev, + u32 qpn) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_qp *qp; + unsigned long flags; + + xa_lock_irqsave(&hr_dev->qp_table_xa, flags); + qp = __hns_roce_qp_lookup(hr_dev, qpn); + if (qp) + refcount_inc(&qp->refcount); + xa_unlock_irqrestore(&hr_dev->qp_table_xa, flags); + + if (!qp) + dev_warn(dev, "async event for bogus QP %08x\n", qpn); + + return qp; +} + static void flush_work_handle(struct work_struct *work) { struct hns_roce_work *flush_work = container_of(work, @@ -95,31 +114,28 @@ void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp)
void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type) { - struct device *dev = hr_dev->dev; struct hns_roce_qp *qp;
- xa_lock(&hr_dev->qp_table_xa); - qp = __hns_roce_qp_lookup(hr_dev, qpn); - if (qp) - refcount_inc(&qp->refcount); - xa_unlock(&hr_dev->qp_table_xa); - - if (!qp) { - dev_warn(dev, "async event for bogus QP %08x\n", qpn); + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) return; - }
- if (event_type == HNS_ROCE_EVENT_TYPE_WQ_CATAS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_INV_REQ_LOCAL_WQ_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_LOCAL_WQ_ACCESS_ERROR || - event_type == HNS_ROCE_EVENT_TYPE_XRCD_VIOLATION || - event_type == HNS_ROCE_EVENT_TYPE_INVALID_XRCETH) { - qp->state = IB_QPS_ERR; + qp->event(qp, (enum hns_roce_event)event_type);
- flush_cqe(hr_dev, qp); - } + if (refcount_dec_and_test(&qp->refcount)) + complete(&qp->free); +}
- qp->event(qp, (enum hns_roce_event)event_type); +void hns_roce_flush_cqe(struct hns_roce_dev *hr_dev, u32 qpn) +{ + struct hns_roce_qp *qp; + + qp = hns_roce_qp_lookup(hr_dev, qpn); + if (!qp) + return; + + qp->state = IB_QPS_ERR; + flush_cqe(hr_dev, qp);
if (refcount_dec_and_test(&qp->refcount)) complete(&qp->free);