From: Xinghai Cen cenxinghai@h-partners.com
Some patches for RDMA/hns
Chengchang Tang (6): RDMA/hns: Fix HW UAF when destroy context timeout RDMA/hns: Fix integer overflow in calc_loading_percent() RDMA/hns: Fix possible RAS when DCA is not attached RDMA/hns: Fix a meaningless loop in active_dca_pages_proc() RDMA/hns: Fix list_*_careful() not being used in pairs RDMA/hns: Use one CQ bank per context
Junxian Huang (5): RDMA/hns: Fix wrong output of sysfs scc pram when configuration failed RDMA/hns: Fix concurrency between sysfs store and FW configuration of scc params RDMA/hns: Fix mixed use of u32 and __le32 in sysfs RDMA/hns: Fix dereference of noderef expression RDMA/hns: Fix "Should it be static?" warnings
wenglianfa (2): RDMA/hns: Fix the modification of max_send_sge RDMA/hns: Fix RoCEE hang when multiple QP banks use EXT_SGE EXT_SGE
drivers/infiniband/hw/hns/hns_roce_bond.c | 8 ++- drivers/infiniband/hw/hns/hns_roce_cq.c | 73 +++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_dca.c | 60 ++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_debugfs.c | 10 ++- drivers/infiniband/hw/hns/hns_roce_device.h | 17 ++++- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 26 +++++-- drivers/infiniband/hw/hns/hns_roce_main.c | 20 ++++++ drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 74 ++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 41 +++++++---- 12 files changed, 288 insertions(+), 55 deletions(-)
-- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
If mbox timeout during detroying some resource, the HW may still access the related resource which caused an UAF.
To fix it, if resource destruction fails, the resource till be retained until driver uninit.
Fixes: 04c5d76e4f15 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +++--- 4 files changed, 12 insertions(+), 14 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 6bb5f4b6c7c2..dd24f2d991ee 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -178,12 +178,11 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); - if (ret) + if (ret) { + hr_cq->delayed_destroy_flag = true; dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); - - if (ret == -EBUSY) - hr_cq->delayed_destroy_flag = true; + }
xa_erase(&cq_table->array, hr_cq->cqn); xa_erase_irq(&cq_table->array, hr_cq->cqn); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 88bd75c5743e..3f12310bda3f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6001,10 +6001,12 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); - if (ret) + if (ret) { + hr_qp->delayed_destroy_flag = true; ibdev_err_ratelimited(ibdev, "failed to modify QP to RST, ret = %d.\n", ret); + } }
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -6068,9 +6070,6 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret);
- if (ret == -EBUSY) - hr_qp->delayed_destroy_flag = true; - hns_roce_qp_destroy(hr_dev, hr_qp, udata);
return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1f29377b52b8..5c4b6c4f4ca7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -153,11 +153,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); - if (ret) + if (ret) { + mr->delayed_destroy_flag = true; ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", ret); - if (ret == -EBUSY) - mr->delayed_destroy_flag = true; + } }
free_mr_pbl(hr_dev, mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 0a8e71431246..0ab99aa9f9d5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -149,12 +149,12 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); - if (ret) + if (ret) { + srq->delayed_destroy_flag = true; dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); - if (ret == -EBUSY) - srq->delayed_destroy_flag = true; + }
xa_erase_irq(&srq_table->xa, srq->srqn);
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
param[] in struct hns_roce_scc_param is used to store and show scc params both. But when the configuration to HW fails, the params stored in this array will become different from the ones in HW.
Add an member latest_param[] to struct hns_roce_scc_param to store the latest configured value of scc params. It will be modified only after the configuration has succeeded to ensure the shown result from sysfs is always the correct param in HW even if the previous configuration failed. The original member param[] is only used to store the temporary value of sysfs input now.
Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 ++++++++++--- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e3303cc3584a..95fbc174e1ba 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1076,6 +1076,7 @@ struct hns_roce_scc_param { enum hns_roce_scc_algo algo_type; struct delayed_work scc_cfg_dwork; struct hns_roce_dev *hr_dev; + __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; };
struct hns_roce_dev { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 3f12310bda3f..1630e4713764 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7305,11 +7305,16 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, memcpy(&desc.data, scc_param, sizeof(scc_param->param));
ret = hns_roce_cmq_send(hr_dev, &desc, 1); - if (ret) + if (ret) { ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); - return ret; + return ret; + } + + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param)); + return 0; }
static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, @@ -7337,7 +7342,9 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, }
scc_param = &hr_dev->scc_param[algo]; - memcpy(scc_param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param));
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index d36f05ac5f1e..4126a744f539 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -110,7 +110,11 @@ static ssize_t scc_attr_show(struct ib_device *ibdev, u32 port_num,
scc_param = &hr_dev->scc_param[scc_attr->algo_type];
- memcpy(&val, (void *)scc_param + scc_attr->offset, scc_attr->size); + if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) + val = scc_param->lifespan; + else + memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, + scc_attr->size);
return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
The FW configuration of scc param is delayed with a workqueue. This may lead to scc params being modified by sysfs store callback while they're being configured to FW. Use a mutex to solve this.
Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 ++++++ drivers/infiniband/hw/hns/hns_roce_sysfs.c | 9 ++++++++- 3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 95fbc174e1ba..f98c41c41623 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1077,6 +1077,7 @@ struct hns_roce_scc_param { struct delayed_work scc_cfg_dwork; struct hns_roce_dev *hr_dev; __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; + struct mutex scc_mutex; /* protect @param and @lastest_param */ };
struct hns_roce_dev { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1630e4713764..052b14835643 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7302,6 +7302,7 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev,
hns_roce_cmq_setup_basic_desc(&desc, scc_opcode[algo], false); scc_param = &hr_dev->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(&desc.data, scc_param, sizeof(scc_param->param));
ret = hns_roce_cmq_send(hr_dev, &desc, 1); @@ -7309,11 +7310,14 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); + mutex_unlock(&scc_param->scc_mutex); return ret; }
memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex); + return 0; }
@@ -7342,9 +7346,11 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, }
scc_param = &hr_dev->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 4126a744f539..3a8a98097042 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -46,6 +46,7 @@ int hns_roce_alloc_scc_param(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) { scc_param[i].algo_type = i; scc_param[i].hr_dev = hr_dev; + mutex_init(&scc_param[i].scc_mutex); INIT_DELAYED_WORK(&scc_param[i].scc_cfg_dwork, scc_param_config_work); } @@ -63,8 +64,10 @@ void hns_roce_dealloc_scc_param(struct hns_roce_dev *hr_dev) if (!hr_dev->scc_param) return;
- for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) + for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) { cancel_delayed_work_sync(&hr_dev->scc_param[i].scc_cfg_dwork); + mutex_destroy(&hr_dev->scc_param[i].scc_mutex); + }
kvfree(hr_dev->scc_param); hr_dev->scc_param = NULL; @@ -110,11 +113,13 @@ static ssize_t scc_attr_show(struct ib_device *ibdev, u32 port_num,
scc_param = &hr_dev->scc_param[scc_attr->algo_type];
+ mutex_lock(&scc_param->scc_mutex); if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) val = scc_param->lifespan; else memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex);
return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); } @@ -145,8 +150,10 @@ static ssize_t scc_attr_store(struct ib_device *ibdev, u32 port_num,
attr_val = cpu_to_le32(val); scc_param = &hr_dev->scc_param[scc_attr->algo_type]; + mutex_lock(&scc_param->scc_mutex); memcpy((void *)scc_param + scc_attr->offset, &attr_val, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex);
/* lifespan is only used for driver */ if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan))
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
lifespan is u32 but is operated with an __le32 in memcpy(). Change it to __le32 and add le32_to_cpu() where needed.
Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f98c41c41623..f434bf349887 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1071,7 +1071,7 @@ struct hns_roce_hw { #define HNS_ROCE_SCC_PARAM_SIZE 4 struct hns_roce_scc_param { __le32 param[HNS_ROCE_SCC_PARAM_SIZE]; - u32 lifespan; + __le32 lifespan; unsigned long timestamp; enum hns_roce_scc_algo algo_type; struct delayed_work scc_cfg_dwork; diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 3a8a98097042..e8161ea0001f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -159,7 +159,7 @@ static ssize_t scc_attr_store(struct ib_device *ibdev, u32 port_num, if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan)) return count;
- lifespan_jiffies = msecs_to_jiffies(scc_param->lifespan); + lifespan_jiffies = msecs_to_jiffies(le32_to_cpu(scc_param->lifespan)); exp_time = scc_param->timestamp + lifespan_jiffies;
if (time_is_before_eq_jiffies(exp_time)) {
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
For calc_loading_percent(), if the values of two types of u32 are multiplied, the result can be an integer overflow. To fix it, convert all variable to u64.
Since total and free are both size_t, alloc_pages and free_pages may overflow. In addition, because there is multiplication in the calculation of percent, it may also cause overflow of u32. In this patch all relevant variables are converted to u64.
This patch also adds corresponding processing for the exception of calc_loading_percent() to avoid printing a wrong result.
Fixes: 640cb0880216 ("RDMA/hns: Add debugfs support for DCA") Signed-off-by: Yuyu Li liyuyu6@huawei.com Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 3c2d7096fe13..7023c3cefaa7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -187,8 +187,8 @@ static void dca_setup_pool_name(pid_t pid, bool is_kdca, char *name, int size)
static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) { - u32 all_pages, used_pages, free_pages, scale; - u64 percent = 0; + u64 used_pages, scale, all_pages, free_pages; + u64 percent = U64_MAX; u32 rem = 0;
all_pages = total >> HNS_HW_PAGE_SHIFT; @@ -214,6 +214,9 @@ static void dca_print_pool_stats(struct hns_roce_dca_ctx *ctx, pid_t pid, u32 rem = 0;
percent = calc_loading_percent(ctx->total_size, ctx->free_size, &rem); + if (percent == U64_MAX) + return; + dca_setup_pool_name(pid, is_kdca, name, sizeof(name)); seq_printf(file, "%-10s %-16ld %-16ld %-16u %llu.%0*u\n", name, ctx->total_size / KB, ctx->free_size / KB, ctx->free_mems, @@ -366,6 +369,9 @@ static void dca_stats_ctx_mem_in_seqfile(struct hns_roce_dca_ctx *ctx,
dca_ctx_stats_mem(ctx, &stats); percent = calc_loading_percent(stats.total_size, stats.free_size, &rem); + if (percent == U64_MAX) + return; + seq_printf(file, DCA_STAT_NAME_FMT "%llu.%0*u\n", "Loading:", percent, LOADING_PERCENT_SHIFT, rem); dca_ctx_print_mem_kb(file, "Total:", stats.total_size);
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
RAS may occur if the DCA buffer is not attached and the DB is knocked out.
This patch adds a safe page for DCA, which will be attached to QP if no DCA buffer is attached to avoid the HW accessing illegal addresses.
Fixes: 10bb3b802412 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 56 ++++++++++++++++++++- drivers/infiniband/hw/hns/hns_roce_dca.h | 2 + drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 13 +++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 23 +++++++++ 5 files changed, 95 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index 4cef41591795..f435f6f5d8a3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -309,6 +309,33 @@ hr_qp_to_dca_ctx(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) return to_hr_dca_ctx(hr_dev, uctx); }
+int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; + unsigned int i; + int ret; + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) { + ibdev_err(ibdev, "failed to alloc DCA safe page array.\n"); + return -ENOMEM; + } + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = hns_roce_mtr_map(hr_dev, &hr_qp->mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map safe page for DCA, ret = %d.\n", + ret); + + kvfree(pages); + return ret; +} + static int config_dca_qpc(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, dma_addr_t *pages, int page_count) @@ -335,6 +362,29 @@ static int config_dca_qpc(struct hns_roce_dev *hr_dev, return 0; }
+static int config_dca_qpc_to_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned int page_count = hr_qp->dca_cfg.npages; + dma_addr_t *pages; + unsigned int i; + int ret; + + might_sleep(); + + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (IS_ERR_OR_NULL(pages)) + return -ENOMEM; + + for (i = 0; i < page_count; i++) + pages[i] = hr_dev->dca_safe_page; + + ret = config_dca_qpc(hr_dev, hr_qp, pages, page_count); + + kvfree(pages); + return ret; +} + static int setup_dca_buf_to_hw(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_dca_ctx *ctx, u32 buf_id, @@ -980,8 +1030,10 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, spin_unlock(&ctx->aging_lock);
if (start_free_dca_buf(ctx, cfg->dcan)) { - if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) - free_buf_from_dca_mem(ctx, cfg); + if (hr_dev->hw->chk_dca_buf_inactive(hr_dev, hr_qp)) { + if (!config_dca_qpc_to_safe_page(hr_dev, hr_qp)) + free_buf_from_dca_mem(ctx, cfg); + }
stop_free_dca_buf(ctx, cfg->dcan); } diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.h b/drivers/infiniband/hw/hns/hns_roce_dca.h index 7733887ce5e1..36f03f5357d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.h +++ b/drivers/infiniband/hw/hns/hns_roce_dca.h @@ -75,4 +75,6 @@ void hns_roce_modify_dca(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
void hns_roce_enum_dca_pool(struct hns_roce_dca_ctx *dca_ctx, void *param, hns_dca_enum_callback cb); +int hns_roce_map_dca_safe_page(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp); #endif diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f434bf349887..e2dfab08cc7a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1156,6 +1156,9 @@ struct hns_roce_dev { struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ struct list_head umem_unfree_list; /* list of unfree umem on this dev */ struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ + + void *dca_safe_buf; + dma_addr_t dca_safe_page; };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 520112bd43d5..9c6651fc87c7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1368,6 +1368,17 @@ static void hns_roce_dealloc_dfx_cnt(struct hns_roce_dev *hr_dev) kvfree(hr_dev->dfx_cnt); }
+static void hns_roce_free_dca_safe_buf(struct hns_roce_dev *hr_dev) +{ + if (!hr_dev->dca_safe_buf) + return; + + dma_free_coherent(hr_dev->dev, PAGE_SIZE, hr_dev->dca_safe_buf, + hr_dev->dca_safe_page); + hr_dev->dca_safe_page = 0; + hr_dev->dca_safe_buf = NULL; +} + int hns_roce_init(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; @@ -1481,6 +1492,8 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup) hns_roce_dealloc_scc_param(hr_dev); hns_roce_unregister_debugfs(hr_dev);
+ hns_roce_free_dca_safe_buf(hr_dev); + if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); hns_roce_free_unfree_umem(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 77ec0c8678d3..94124a2dd00b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -844,6 +844,8 @@ static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); kvfree(hr_qp->mtr_node); hr_qp->mtr_node = NULL; + } else if (dca_en) { + ret = hns_roce_map_dca_safe_page(hr_dev, hr_qp); }
return ret; @@ -864,6 +866,21 @@ static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_disable_dca(hr_dev, hr_qp, udata); }
+static int alloc_dca_safe_page(struct hns_roce_dev *hr_dev) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + + hr_dev->dca_safe_buf = dma_alloc_coherent(hr_dev->dev, PAGE_SIZE, + &hr_dev->dca_safe_page, + GFP_KERNEL); + if (!hr_dev->dca_safe_buf) { + ibdev_err(ibdev, "failed to alloc dca safe page.\n"); + return -ENOMEM; + } + + return 0; +} + static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, @@ -882,6 +899,12 @@ static int alloc_qp_wqe(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
dca_en = check_dca_is_enable(hr_dev, hr_qp, init_attr, !!udata, ucmd->buf_addr); + if (dca_en && !hr_dev->dca_safe_buf) { + ret = alloc_dca_safe_page(hr_dev); + if (ret) + return ret; + } + ret = set_wqe_buf_attr(hr_dev, hr_qp, dca_en, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret);
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
The iterated element does not change, making the loop in active_dca_pages_proc() meaningless.
Fixes: ef35d79d91ed ("RDMA/hns: Add DCA support for kernel space") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index f435f6f5d8a3..e76a7e6e8ad4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -617,7 +617,7 @@ static int active_dca_pages_proc(struct dca_mem *mem, int index, void *param) }
for (; changed && i < mem->page_count; i++) - if (dca_page_is_free(state)) + if (dca_page_is_free(&mem->states[i])) free_pages++;
/* Clean mem changed to dirty */
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
list_del_init_careful() is designed to be used together with list_empty_careful().
Fixes: 10bb3b802412 ("RDMA/hns: Add method for attaching WQE buffer") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_dca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_dca.c b/drivers/infiniband/hw/hns/hns_roce_dca.c index e76a7e6e8ad4..eb408130329b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_dca.c +++ b/drivers/infiniband/hw/hns/hns_roce_dca.c @@ -1022,7 +1022,7 @@ static void process_aging_dca_mem(struct hns_roce_dev *hr_dev, list_for_each_entry_safe(cfg, tmp_cfg, &ctx->aging_new_list, aging_node) list_move(&cfg->aging_node, &ctx->aging_proc_list);
- while (!ctx->exit_aging && !list_empty(&ctx->aging_proc_list)) { + while (!ctx->exit_aging && !list_empty_careful(&ctx->aging_proc_list)) { cfg = list_first_entry(&ctx->aging_proc_list, struct hns_roce_dca_cfg, aging_node); list_del_init_careful(&cfg->aging_node);
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
Fix sparse warnings: dereference of noderef expression. This is because curr_active_slave is defined as:
struct bonding { ... struct slave __rcu *curr_active_slave; ... };
__rcu contains __attribute__((noderef)) inside, which disallows callers dereferece it directly.
Fixes: 2004b3f9092a ("RDMA/hns: Support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_bond.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_bond.c b/drivers/infiniband/hw/hns/hns_roce_bond.c index 0fc026eb40e8..7adae8990acd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_bond.c +++ b/drivers/infiniband/hw/hns/hns_roce_bond.c @@ -93,10 +93,16 @@ bool hns_roce_bond_is_active(struct hns_roce_dev *hr_dev) static inline bool is_active_slave(struct net_device *net_dev, struct hns_roce_bond_group *bond_grp) { + struct net_device *slave_dev; + if (!bond_grp || !bond_grp->bond || !bond_grp->bond->curr_active_slave) return false;
- return net_dev == bond_grp->bond->curr_active_slave->dev; + rcu_read_lock(); + slave_dev = bond_option_active_slave_get_rcu(bond_grp->bond); + rcu_read_unlock(); + + return net_dev == slave_dev; }
struct net_device *hns_roce_get_bond_netdev(struct hns_roce_dev *hr_dev)
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
Fix sparse warnings: Should it be static?
Fixes: 41da9cd8456d ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index e8161ea0001f..0ccc75ccb434 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -204,11 +204,11 @@ static umode_t scc_attr_is_visible(struct kobject *kobj, .max = _max, \ }
-#define HNS_PORT_DCQCN_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_dcqcn_##_name = \ - __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DCQCN, \ - HNS_ROCE_DCQCN_##NAME##_OFS, \ - HNS_ROCE_DCQCN_##NAME##_SZ, \ +#define HNS_PORT_DCQCN_CC_ATTR_RW(_name, NAME) \ + static struct hns_port_cc_attr hns_roce_port_attr_dcqcn_##_name = \ + __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DCQCN, \ + HNS_ROCE_DCQCN_##NAME##_OFS, \ + HNS_ROCE_DCQCN_##NAME##_SZ, \ 0, HNS_ROCE_DCQCN_##NAME##_MAX)
HNS_PORT_DCQCN_CC_ATTR_RW(ai, AI); @@ -244,11 +244,11 @@ static const struct attribute_group dcqcn_cc_param_group = { .is_visible = scc_attr_is_visible, };
-#define HNS_PORT_LDCP_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_ldcp_##_name = \ - __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_LDCP, \ - HNS_ROCE_LDCP_##NAME##_OFS, \ - HNS_ROCE_LDCP_##NAME##_SZ, \ +#define HNS_PORT_LDCP_CC_ATTR_RW(_name, NAME) \ + static struct hns_port_cc_attr hns_roce_port_attr_ldcp_##_name = \ + __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_LDCP, \ + HNS_ROCE_LDCP_##NAME##_OFS, \ + HNS_ROCE_LDCP_##NAME##_SZ, \ 0, HNS_ROCE_LDCP_##NAME##_MAX)
HNS_PORT_LDCP_CC_ATTR_RW(cwd0, CWD0); @@ -275,7 +275,7 @@ static const struct attribute_group ldcp_cc_param_group = { };
#define HNS_PORT_HC3_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_hc3_##_name = \ + static struct hns_port_cc_attr hns_roce_port_attr_hc3_##_name = \ __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_HC3, \ HNS_ROCE_HC3_##NAME##_OFS, \ HNS_ROCE_HC3_##NAME##_SZ, \ @@ -309,7 +309,7 @@ static const struct attribute_group hc3_cc_param_group = { };
#define HNS_PORT_DIP_CC_ATTR_RW(_name, NAME) \ - struct hns_port_cc_attr hns_roce_port_attr_dip_##_name = \ + static struct hns_port_cc_attr hns_roce_port_attr_dip_##_name = \ __HNS_SCC_ATTR(_name, HNS_ROCE_SCC_ALGO_DIP, \ HNS_ROCE_DIP_##NAME##_OFS, \ HNS_ROCE_DIP_##NAME##_SZ, \
From: wenglianfa wenglianfa@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
The attribute max_send_sge describes the requested max number of SGE in a WR in the SQ. It is a necessary attribute for creating a qp. It is returned directly without modifying its value.
Fixes: 0c5e259b06a8 ("RDMA/hns: Fix incorrect sge nums calculation") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_qp.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 94124a2dd00b..fd534d4273c6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -668,7 +668,6 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; - cap->max_send_sge = hr_qp->sq.max_gs;
return 0; } @@ -780,7 +779,6 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
/* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; - cap->max_send_sge = hr_qp->sq.max_gs;
return 0; }
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
By forcing each context to use the same CQ bank. Ensure that there is fixed mapping logic between all QP and CQ banks. Ensure that SQ, RQ, and CQ can share the QPC cache in QMM to avoid the timer deadlock.
Currently, since the upload strategy for this issue(DTS2024032521959) has not yet been clarified and it involves 920B/C, the patch is currently marked as noup.
Fixes: 9e03dbea2b06 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 57 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 + 3 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index dd24f2d991ee..d34fd7122b3d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -37,6 +37,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h"
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP09) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +92,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; }
-static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *bank, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + /* only apply for HIP09 and HIP10 now, and use bank 0 for kernel */ + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +114,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id;
mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid];
id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -416,7 +467,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; }
- ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index e2dfab08cc7a..45aa8f4df4ae 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -265,6 +265,7 @@ struct hns_roce_ucontext { struct list_head list; /* link all uctx to uctx_list on hr_dev */ pid_t pid; /* process id to which the uctx belongs */ struct hns_dca_ctx_debugfs dca_dbgfs; + u8 cq_bank_id; };
struct hns_roce_pd { @@ -586,6 +587,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; };
struct hns_roce_srq_table { @@ -1469,4 +1471,6 @@ void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev); int hns_roce_alloc_scc_param(struct hns_roce_dev *hr_dev); void hns_roce_dealloc_scc_param(struct hns_roce_dev *hr_dev); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 9c6651fc87c7..97202b8ac57c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -598,6 +598,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_unlock(&hr_dev->uctx_list_mutex);
hns_roce_register_uctx_debugfs(hr_dev, context); + hns_roce_get_cq_bankid_for_uctx(context);
return 0;
@@ -634,6 +635,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex);
+ hns_roce_put_cq_bankid_for_uctx(context); hns_roce_unregister_uctx_debugfs(context);
hns_roce_unregister_udca(hr_dev, context);
From: wenglianfa wenglianfa@huawei.com
driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB30V8
----------------------------------------------------------------------
When QPs of multiple banks are used, there is a possibility that the RoCEE is hang. This is because QPs of different banks may interfere with each other in certain cases when processing extended SGEs.
To solve this problem, the QP-bank-limit mechanism is introduced. When this mechanism is enabled, the number of QP banks must be limited to ensure that extended SGEs can be used.
If this mechanism is not applied, the FW will limit the maximum number of SGEs and makes extended SGEs unavailable to avoid the HW hang out.
Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 15 +++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +++ drivers/infiniband/hw/hns/hns_roce_main.c | 5 +++ drivers/infiniband/hw/hns/hns_roce_qp.c | 49 ++++++++++++++++----- 4 files changed, 62 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index d34fd7122b3d..7cda55debe62 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -52,9 +52,10 @@ void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx)
void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) { +#define INVALID_LOAD_CQNUM 0xFFFFFFFF struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; - u32 least_load = cq_table->ctx_num[0]; + u32 least_load = INVALID_LOAD_CQNUM; u8 bankid = 0; u8 i;
@@ -62,7 +63,10 @@ void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) return;
mutex_lock(&cq_table->bank_mutex); - for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + for (i = 0; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (!(cq_table->valid_cq_bank_mask & BIT(i))) + continue; + if (cq_table->ctx_num[i] < least_load) { least_load = cq_table->ctx_num[i]; bankid = i; @@ -98,7 +102,7 @@ static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *ba struct hns_roce_ucontext *uctx = udata ? rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, ibucontext) : NULL; - /* only apply for HIP09 and HIP10 now, and use bank 0 for kernel */ + /* only HIP08 is not applied now, and use bank 0 for kernel */ if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) return uctx ? uctx->cq_bank_id : 0;
@@ -600,6 +604,11 @@ void hns_roce_init_cq_table(struct hns_roce_dev *hr_dev) cq_table->bank[i].max = hr_dev->caps.num_cqs / HNS_ROCE_CQ_BANK_NUM - 1; } + + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) + cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_LIMIT; + else + cq_table->valid_cq_bank_mask = VALID_CQ_BANK_MASK_DEFAULT; }
void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 45aa8f4df4ae..8f2527642318 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -105,6 +105,10 @@
#define CQ_BANKID_SHIFT 2 #define CQ_BANKID_MASK GENMASK(1, 0) +#define VALID_CQ_BANK_MASK_DEFAULT 0xF +#define VALID_CQ_BANK_MASK_LIMIT 0x9 + +#define VALID_EXT_SGE_QP_BANK_MASK_LIMIT 0x41
#define HNS_ROCE_MAX_CQ_COUNT 0xFFFF #define HNS_ROCE_MAX_CQ_PERIOD 0xFFFF @@ -168,6 +172,7 @@ enum { HNS_ROCE_CAP_FLAG_CQE_INLINE = BIT(19), HNS_ROCE_CAP_FLAG_BOND = BIT(21), HNS_ROCE_CAP_FLAG_SRQ_RECORD_DB = BIT(22), + HNS_ROCE_CAP_FLAG_LIMIT_BANK = BIT(23), };
#define HNS_ROCE_DB_TYPE_COUNT 2 @@ -588,6 +593,7 @@ struct hns_roce_cq_table { struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; + u8 valid_cq_bank_mask; };
struct hns_roce_srq_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 97202b8ac57c..f211b2ebed28 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -262,6 +262,11 @@ static int hns_roce_query_device(struct ib_device *ib_dev, props->max_srq_sge = hr_dev->caps.max_srq_sges; }
+ if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) { + props->max_cq >>= 1; + props->max_qp >>= 1; + } + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_FRMR && hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index fd534d4273c6..9a3ac810be44 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -198,22 +198,16 @@ static u8 get_affinity_cq_bank(u8 qp_bank) return (qp_bank >> 1) & CQ_BANKID_MASK; }
-static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr, - struct hns_roce_bank *bank) +static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank, u8 valid_qp_bank_mask) { #define INVALID_LOAD_QPNUM 0xFFFFFFFF - struct ib_cq *scq = init_attr->send_cq; u32 least_load = INVALID_LOAD_QPNUM; - unsigned long cqn = 0; u8 bankid = 0; u32 bankcnt; u8 i;
- if (scq) - cqn = to_hr_cq(scq)->cqn; - for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { - if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + if (!(valid_qp_bank_mask & BIT(i))) continue;
bankcnt = bank[i].inuse; @@ -247,6 +241,42 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid,
return 0; } + +static bool use_ext_sge(struct ib_qp_init_attr *init_attr) +{ + return init_attr->cap.max_send_sge > HNS_ROCE_SGE_IN_WQE || + init_attr->qp_type == IB_QPT_UD || + init_attr->qp_type == IB_QPT_GSI; +} + +static u8 select_qp_bankid(struct hns_roce_dev *hr_dev, + struct ib_qp_init_attr *init_attr) +{ + struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; + struct hns_roce_bank *bank = qp_table->bank; + struct ib_cq *scq = init_attr->send_cq; + u8 valid_qp_bank_mask = 0; + unsigned long cqn = 0; + u8 i; + + if (scq) + cqn = to_hr_cq(scq)->cqn; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + continue; + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_LIMIT_BANK) && + use_ext_sge(init_attr) && + !(VALID_EXT_SGE_QP_BANK_MASK_LIMIT & BIT(i))) + continue; + + valid_qp_bank_mask |= BIT(i); + } + + return get_least_load_bankid_for_qp(bank, valid_qp_bank_mask); +} + static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr) { @@ -259,8 +289,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, num = 1; } else { mutex_lock(&qp_table->bank_mutex); - bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank); - + bankid = select_qp_bankid(hr_dev, init_attr); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, &num); if (ret) {
high-performance-network@openeuler.org