driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
By forcing each context to use the same CQ bank. Ensure that there is fixed mapping logic between all QP and CQ banks. Ensure that SQ, RQ, and CQ can share the QPC cache to improve the performance.
Fixes: bff8edc6dfe7 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 57 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 + 3 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b41f9788db24..72bf500f4b65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -37,6 +37,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h"
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP10) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP10) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +92,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; }
-static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *bank, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + /* only apply for HIP10 now, and use bank 0 for kernel */ + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP10) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +114,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id;
mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid];
id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -523,7 +574,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; }
- ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 0bad0165aa21..69592ccda2d6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -287,6 +287,7 @@ struct hns_roce_ucontext { u32 config; struct hns_roce_dca_ctx dca_ctx; struct hns_dca_ctx_debugfs dca_dbgfs; + u8 cq_bank_id; };
struct hns_roce_pd { @@ -621,6 +622,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; };
struct hns_roce_srq_table { @@ -1537,4 +1539,6 @@ int hns_roce_register_poe_channel(struct hns_roce_dev *hr_dev, u8 channel, u64 poe_addr); int hns_roce_unregister_poe_channel(struct hns_roce_dev *hr_dev, u8 channel); bool hns_roce_is_srq_exist(struct hns_roce_dev *hr_dev, u32 srqn); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 2e005edd0eb2..e213689eea6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -622,6 +622,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_unlock(&hr_dev->uctx_list_mutex);
hns_roce_register_uctx_debugfs(hr_dev, context); + hns_roce_get_cq_bankid_for_uctx(context);
return 0;
@@ -660,6 +661,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex);
+ hns_roce_put_cq_bankid_for_uctx(context); hns_roce_dealloc_uar_entry(context); hns_roce_dealloc_reset_entry(context);