Some bugfix patches for HNS RoCE:
Chengchang Tang (4): RDMA/hns: Use one CQ bank per context RDMA/hns: Fix HW UAF when destroy context timeout RDMA/hns: Fix spin_unlock_irqrestore() called with IRQs enabled RDMA/hns: Fix integer overflow in calc_loading_percent()
Feng Fang (1): RDMA/hns: Fix different dgids mapping to the same dip_idx
Junxian Huang (6): RDMA/hns: Fix a potential Sleep-in-Atomic-Context RDMA/hns: Fix soft lockup under heavy CEQE load RDMA/hns: Fix mixed use of u32 and __le32 in sysfs RDMA/hns: Fix wrong output of sysfs scc pram when configuration failed RDMA/hns: Fix concurrency between sysfs store and FW configuration of scc params RDMA/hns: Fix creating sysfs before allocating resources
wenglianfa (3): RDMA/hns: Fix the overflow risk of hem_list_calc_ba_range() RDMA/hns: Fix long waiting cmd event when reset RDMA/hns: Fix sleeping in spin_lock critical section
drivers/infiniband/hw/hns/hns_roce_cq.c | 63 ++++++- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 10 +- drivers/infiniband/hw/hns/hns_roce_device.h | 19 +- drivers/infiniband/hw/hns/hns_roce_hem.c | 12 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 189 ++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_main.c | 16 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 22 +-- drivers/infiniband/hw/hns/hns_roce_qp.c | 33 ++-- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 40 ++-- 11 files changed, 294 insertions(+), 117 deletions(-)
-- 2.33.0
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
By forcing each context to use the same CQ bank. Ensure that there is fixed mapping logic between all QP and CQ banks. Ensure that SQ, RQ, and CQ can share the QPC cache to improve the performance.
Fixes: bff8edc6dfe7 ("RDMA/hns: Fix CQ and QP cache affinity") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 57 +++++++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++ drivers/infiniband/hw/hns/hns_roce_main.c | 2 + 3 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index b41f9788db24..72bf500f4b65 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -37,6 +37,43 @@ #include "hns_roce_hem.h" #include "hns_roce_common.h"
+void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP10) + return; + + mutex_lock(&cq_table->bank_mutex); + cq_table->ctx_num[uctx->cq_bank_id]--; + mutex_unlock(&cq_table->bank_mutex); +} + +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(uctx->ibucontext.device); + struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; + u32 least_load = cq_table->ctx_num[0]; + u8 bankid = 0; + u8 i; + + if (hr_dev->pci_dev->revision < PCI_REVISION_ID_HIP10) + return; + + mutex_lock(&cq_table->bank_mutex); + for (i = 1; i < HNS_ROCE_CQ_BANK_NUM; i++) { + if (cq_table->ctx_num[i] < least_load) { + least_load = cq_table->ctx_num[i]; + bankid = i; + } + } + cq_table->ctx_num[bankid]++; + mutex_unlock(&cq_table->bank_mutex); + + uctx->cq_bank_id = bankid; +} + static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) { u32 least_load = bank[0].inuse; @@ -55,7 +92,21 @@ static u8 get_least_load_bankid_for_cq(struct hns_roce_bank *bank) return bankid; }
-static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) +static u8 select_cq_bankid(struct hns_roce_dev *hr_dev, struct hns_roce_bank *bank, + struct ib_udata *udata) +{ + struct hns_roce_ucontext *uctx = udata ? + rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext) : NULL; + /* only apply for HIP10 now, and use bank 0 for kernel */ + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP10) + return uctx ? uctx->cq_bank_id : 0; + + return get_least_load_bankid_for_cq(bank); +} + +static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, + struct ib_udata *udata) { struct hns_roce_cq_table *cq_table = &hr_dev->cq_table; struct hns_roce_bank *bank; @@ -63,7 +114,7 @@ static int alloc_cqn(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) int id;
mutex_lock(&cq_table->bank_mutex); - bankid = get_least_load_bankid_for_cq(cq_table->bank); + bankid = select_cq_bankid(hr_dev, cq_table->bank, udata); bank = &cq_table->bank[bankid];
id = ida_alloc_range(&bank->ida, bank->min, bank->max, GFP_KERNEL); @@ -523,7 +574,7 @@ int hns_roce_create_cq(struct ib_cq *ib_cq, const struct ib_cq_init_attr *attr, goto err_cq_buf; }
- ret = alloc_cqn(hr_dev, hr_cq); + ret = alloc_cqn(hr_dev, hr_cq, udata); if (ret) { ibdev_err(ibdev, "failed to alloc CQN, ret = %d.\n", ret); goto err_cq_db; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 75ad7975322c..1b0825cd4111 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -293,6 +293,7 @@ struct hns_roce_ucontext { u32 config; struct hns_roce_dca_ctx dca_ctx; struct hns_dca_ctx_debugfs dca_dbgfs; + u8 cq_bank_id; };
struct hns_roce_pd { @@ -627,6 +628,7 @@ struct hns_roce_cq_table { struct hns_roce_hem_table table; struct hns_roce_bank bank[HNS_ROCE_CQ_BANK_NUM]; struct mutex bank_mutex; + u32 ctx_num[HNS_ROCE_CQ_BANK_NUM]; };
struct hns_roce_srq_table { @@ -1553,4 +1555,6 @@ int hns_roce_register_poe_channel(struct hns_roce_dev *hr_dev, u8 channel, u64 poe_addr); int hns_roce_unregister_poe_channel(struct hns_roce_dev *hr_dev, u8 channel); bool hns_roce_is_srq_exist(struct hns_roce_dev *hr_dev, u32 srqn); +void hns_roce_put_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); +void hns_roce_get_cq_bankid_for_uctx(struct hns_roce_ucontext *uctx); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 2e005edd0eb2..e213689eea6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -622,6 +622,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, mutex_unlock(&hr_dev->uctx_list_mutex);
hns_roce_register_uctx_debugfs(hr_dev, context); + hns_roce_get_cq_bankid_for_uctx(context);
return 0;
@@ -660,6 +661,7 @@ static void hns_roce_dealloc_ucontext(struct ib_ucontext *ibcontext) hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&context->page_mutex);
+ hns_roce_put_cq_bankid_for_uctx(context); hns_roce_dealloc_uar_entry(context); hns_roce_dealloc_reset_entry(context);
From: wenglianfa wenglianfa@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
The 'unit' max value is 2^24 and the cur 'hopnum' value max value is 2, so the 'step' value may be more than the value range of u32. To fix it, The 'step' and context variables are changed to u64.
Fixes: 38389eaa4db1 ("RDMA/hns: Add mtr support for mixed multihop addressing") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 7705feea2cb9..bc7b555c230b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1100,9 +1100,9 @@ static bool hem_list_is_bottom_bt(int hopnum, int bt_level) * @bt_level: base address table level * @unit: ba entries per bt page */ -static u32 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) +static u64 hem_list_calc_ba_range(int hopnum, int bt_level, int unit) { - u32 step; + u64 step; int max; int i;
@@ -1138,7 +1138,7 @@ int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, { struct hns_roce_buf_region *r; int total = 0; - int step; + u64 step; int i;
for (i = 0; i < region_cnt; i++) { @@ -1169,7 +1169,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, int ret = 0; int max_ofs; int level; - u32 step; + u64 step; int end;
if (hopnum <= 1) @@ -1206,7 +1206,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, }
start_aligned = (distance / step) * step + r->offset; - end = min_t(int, start_aligned + step - 1, max_ofs); + end = min_t(u64, start_aligned + step - 1, max_ofs); cur = hem_list_alloc_item(hr_dev, start_aligned, end, unit, true); if (!cur) { @@ -1294,7 +1294,7 @@ static int setup_middle_bt(struct hns_roce_dev *hr_dev, void *cpu_base, struct hns_roce_hem_item *hem, *temp_hem; int total = 0; int offset; - int step; + u64 step;
step = hem_list_calc_ba_range(r->hopnum, 1, unit); if (step < 1)
From: wenglianfa wenglianfa@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
During the reset, the cmd event cannot be reported. As a result, Thread waiting for cmd event for a long time. To fix it, notify cmd not to wait for cmd event when reset starts.
Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d75328515458..f1983e089352 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -8013,6 +8013,20 @@ static void hns_roce_v2_reset_notify_user(struct hns_roce_dev *hr_dev) mutex_unlock(&hr_dev->uctx_list_mutex); }
+static void hns_roce_v2_reset_notify_cmd(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_cmdq *hr_cmd = &hr_dev->cmd; + int i; + + if (!hr_dev->cmd_mod) + return; + + for (i = 0; i < hr_cmd->max_cmds; i++) { + hr_cmd->context[i].result = -EBUSY; + complete(&hr_cmd->context[i].done); + } +} + static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle) { struct hns_roce_dev *hr_dev; @@ -8036,6 +8050,9 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
+ /* Complete the CMDQ event in advance during the reset. */ + hns_roce_v2_reset_notify_cmd(hr_dev); + return 0; }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
In hns_roce_get_bond_netdev(), a bond_mutex is locked. This may lead to a potential Sleep-in-Atomic-Context along with the iboe.lock in hns_roce_query_port().
Since hns_roce_get_bond_netdev() doesn't involve iboe, move the call out of the critical section of iboe.lock.
Fixes: e62a20278f18 ("RDMA/hns: support RoCE bonding") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index e213689eea6c..10ae5ac9cc2c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -341,9 +341,10 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num, if (ret) ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret);
+ net_dev = hr_dev->hw->get_bond_netdev(hr_dev); + spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- net_dev = hr_dev->hw->get_bond_netdev(hr_dev); if (!net_dev) net_dev = get_hr_netdev(hr_dev, port); if (!net_dev) {
From: wenglianfa wenglianfa@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
Sleep is not allowed in the spinlock critical section, but ib_umem_release() may sleep in the spinlock critical sectio. To fix it, use mutex_lock() instead of spin_lock().
Fixes: 431c875e4b02 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_main.c | 11 ++++++++--- drivers/infiniband/hw/hns/hns_roce_mr.c | 16 ++++++++-------- 3 files changed, 18 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1b0825cd4111..b287f3a5bd59 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1228,9 +1228,9 @@ struct hns_roce_dev { struct rdma_notify_mem *notify_tbl; size_t notify_num; struct list_head mtr_unfree_list; /* list of unfree mtr on this dev */ - spinlock_t mtr_unfree_list_lock; /* protect mtr_unfree_list */ + struct mutex mtr_unfree_list_mutex; /* protect mtr_unfree_list */ struct list_head umem_unfree_list; /* list of unfree umem on this dev */ - spinlock_t umem_unfree_list_lock; /* protect umem_unfree_list */ + struct mutex umem_unfree_list_mutex; /* protect umem_unfree_list */ };
static inline struct hns_roce_dev *to_hr_dev(struct ib_device *ib_dev) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 10ae5ac9cc2c..f203ffdfd4b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1314,6 +1314,8 @@ static void hns_roce_teardown_hca(struct hns_roce_dev *hr_dev) hns_roce_cleanup_dca(hr_dev);
hns_roce_cleanup_bitmap(hr_dev); + mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || @@ -1342,10 +1344,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) mutex_init(&hr_dev->uctx_list_mutex);
INIT_LIST_HEAD(&hr_dev->mtr_unfree_list); - spin_lock_init(&hr_dev->mtr_unfree_list_lock); + mutex_init(&hr_dev->mtr_unfree_list_mutex);
INIT_LIST_HEAD(&hr_dev->umem_unfree_list); - spin_lock_init(&hr_dev->umem_unfree_list_lock); + mutex_init(&hr_dev->umem_unfree_list_mutex);
if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) { @@ -1387,7 +1389,10 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
err_uar_table_free: ida_destroy(&hr_dev->uar_ida.ida); + mutex_destroy(&hr_dev->umem_unfree_list_mutex); + mutex_destroy(&hr_dev->mtr_unfree_list_mutex); mutex_destroy(&hr_dev->uctx_list_mutex); + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_CQ_RECORD_DB || hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_QP_RECORD_DB) mutex_destroy(&hr_dev->pgdir_mutex); @@ -1597,9 +1602,9 @@ void hns_roce_exit(struct hns_roce_dev *hr_dev, bool bond_cleanup)
if (hr_dev->hw->hw_exit) hr_dev->hw->hw_exit(hr_dev); - hns_roce_teardown_hca(hr_dev); hns_roce_free_unfree_umem(hr_dev); hns_roce_free_unfree_mtr(hr_dev); + hns_roce_teardown_hca(hr_dev); hns_roce_cleanup_hem(hr_dev);
if (hr_dev->cmd_mod) diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 3a3956fa76d8..bc5eba02b358 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1296,22 +1296,22 @@ void hns_roce_add_unfree_mtr(struct hns_roce_mtr_node *pos, { hns_roce_copy_mtr(&pos->mtr, mtr);
- spin_lock(&hr_dev->mtr_unfree_list_lock); + mutex_lock(&hr_dev->mtr_unfree_list_mutex); list_add_tail(&pos->list, &hr_dev->mtr_unfree_list); - spin_unlock(&hr_dev->mtr_unfree_list_lock); + mutex_unlock(&hr_dev->mtr_unfree_list_mutex); }
void hns_roce_free_unfree_mtr(struct hns_roce_dev *hr_dev) { struct hns_roce_mtr_node *pos, *next;
- spin_lock(&hr_dev->mtr_unfree_list_lock); + mutex_lock(&hr_dev->mtr_unfree_list_mutex); list_for_each_entry_safe(pos, next, &hr_dev->mtr_unfree_list, list) { list_del(&pos->list); hns_roce_mtr_destroy(hr_dev, &pos->mtr); kvfree(pos); } - spin_unlock(&hr_dev->mtr_unfree_list_lock); + mutex_unlock(&hr_dev->mtr_unfree_list_mutex); }
void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page, @@ -1321,20 +1321,20 @@ void hns_roce_add_unfree_umem(struct hns_roce_user_db_page *user_page,
pos->umem = user_page->umem;
- spin_lock(&hr_dev->umem_unfree_list_lock); + mutex_lock(&hr_dev->umem_unfree_list_mutex); list_add_tail(&pos->list, &hr_dev->umem_unfree_list); - spin_unlock(&hr_dev->umem_unfree_list_lock); + mutex_unlock(&hr_dev->umem_unfree_list_mutex); }
void hns_roce_free_unfree_umem(struct hns_roce_dev *hr_dev) { struct hns_roce_umem_node *pos, *next;
- spin_lock(&hr_dev->umem_unfree_list_lock); + mutex_lock(&hr_dev->umem_unfree_list_mutex); list_for_each_entry_safe(pos, next, &hr_dev->umem_unfree_list, list) { list_del(&pos->list); ib_umem_release(pos->umem); kvfree(pos); } - spin_unlock(&hr_dev->umem_unfree_list_lock); + mutex_unlock(&hr_dev->umem_unfree_list_mutex); }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
CEQEs are handled in interrupt handler currently. This may cause the CPU core staying in interrupt context too long and lead to soft lockup under heavy load.
Handle CEQEs in tasklet and set an upper limit for the number of CEQE handled by a single call of tasklet.
Fixes: a5073d6054f7 ("RDMA/hns: Add eq support of hip08") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 89 ++++++++++++--------- 2 files changed, 54 insertions(+), 36 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b287f3a5bd59..7060583535df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -836,6 +836,7 @@ struct hns_roce_eq { int shift; int event_type; int sub_type; + struct tasklet_struct tasklet; };
struct hns_roce_eq_table { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f1983e089352..b43eaf2c5e5d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6855,33 +6855,11 @@ static struct hns_roce_ceqe *next_ceqe_sw_v2(struct hns_roce_eq *eq) !!(eq->cons_index & eq->entries)) ? ceqe : NULL; }
-static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_dev *hr_dev, - struct hns_roce_eq *eq) +static irqreturn_t hns_roce_v2_ceq_int(struct hns_roce_eq *eq) { - struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); - irqreturn_t ceqe_found = IRQ_NONE; - u32 cqn; - - while (ceqe) { - /* Make sure we read CEQ entry after we have checked the - * ownership bit - */ - dma_rmb(); - - cqn = hr_reg_read(ceqe, CEQE_CQN); - - hns_roce_cq_completion(hr_dev, cqn); - - ++eq->cons_index; - ceqe_found = IRQ_HANDLED; - atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); - - ceqe = next_ceqe_sw_v2(eq); - } + tasklet_schedule(&eq->tasklet);
- update_eq_db(eq); - - return IRQ_RETVAL(ceqe_found); + return IRQ_HANDLED; }
static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr) @@ -6892,7 +6870,7 @@ static irqreturn_t hns_roce_v2_msix_interrupt_eq(int irq, void *eq_ptr)
if (eq->type_flag == HNS_ROCE_CEQ) /* Completion event interrupt */ - int_work = hns_roce_v2_ceq_int(hr_dev, eq); + int_work = hns_roce_v2_ceq_int(eq); else /* Asychronous event interrupt */ int_work = hns_roce_v2_aeq_int(hr_dev, eq); @@ -7264,6 +7242,34 @@ static int hns_roce_v2_create_eq(struct hns_roce_dev *hr_dev, return ret; }
+static void hns_roce_ceq_task(struct tasklet_struct *task) +{ + struct hns_roce_eq *eq = from_tasklet(eq, task, tasklet); + struct hns_roce_ceqe *ceqe = next_ceqe_sw_v2(eq); + struct hns_roce_dev *hr_dev = eq->hr_dev; + int ceqe_num = 0; + u32 cqn; + + while (ceqe && ceqe_num < hr_dev->caps.ceqe_depth) { + /* Make sure we read CEQ entry after we have checked the + * ownership bit + */ + dma_rmb(); + + cqn = hr_reg_read(ceqe, CEQE_CQN); + + hns_roce_cq_completion(hr_dev, cqn); + + ++eq->cons_index; + ++ceqe_num; + atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CEQE_CNT]); + + ceqe = next_ceqe_sw_v2(eq); + } + + update_eq_db(eq); +} + static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, int comp_num, int aeq_num, int other_num) { @@ -7295,21 +7301,24 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, j - other_num - aeq_num);
for (j = 0; j < irq_num; j++) { - if (j < other_num) + if (j < other_num) { ret = request_irq(hr_dev->irq[j], hns_roce_v2_msix_interrupt_abn, 0, hr_dev->irq_names[j], hr_dev); - - else if (j < (other_num + comp_num)) + } else if (j < (other_num + comp_num)) { + tasklet_setup(&eq_table->eq[j - other_num].tasklet, + hns_roce_ceq_task); ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j + aeq_num], &eq_table->eq[j - other_num]); - else + } else { ret = request_irq(eq_table->eq[j - other_num].irq, hns_roce_v2_msix_interrupt_eq, 0, hr_dev->irq_names[j - comp_num], &eq_table->eq[j - other_num]); + } + if (ret) { dev_err(hr_dev->dev, "Request irq error!\n"); goto err_request_failed; @@ -7319,12 +7328,16 @@ static int __hns_roce_request_irq(struct hns_roce_dev *hr_dev, int irq_num, return 0;
err_request_failed: - for (j -= 1; j >= 0; j--) - if (j < other_num) + for (j -= 1; j >= 0; j--) { + if (j < other_num) { free_irq(hr_dev->irq[j], hr_dev); - else - free_irq(eq_table->eq[j - other_num].irq, - &eq_table->eq[j - other_num]); + continue; + } + free_irq(eq_table->eq[j - other_num].irq, + &eq_table->eq[j - other_num]); + if (j < other_num + comp_num) + tasklet_kill(&eq_table->eq[j - other_num].tasklet); + }
err_kzalloc_failed: for (i -= 1; i >= 0; i--) @@ -7345,8 +7358,12 @@ static void __hns_roce_free_irq(struct hns_roce_dev *hr_dev) for (i = 0; i < hr_dev->caps.num_other_vectors; i++) free_irq(hr_dev->irq[i], hr_dev);
- for (i = 0; i < eq_num; i++) + for (i = 0; i < eq_num; i++) { free_irq(hr_dev->eq_table.eq[i].irq, &hr_dev->eq_table.eq[i]); + if (i < hr_dev->caps.num_comp_vectors) + tasklet_kill(&hr_dev->eq_table.eq[i].tasklet); + } +
for (i = 0; i < irq_num; i++) kfree(hr_dev->irq_names[i]);
From: Feng Fang fangfeng4@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
The mapping between dgid and dip_idx should be one-to-one. When two QPs use the same dgid, the first QP is repeatedly created and deleted, and different dgids are used, the problem that different dgids correspond to the same dip_idx occurs.
This patch creates qpn_bitmap and dip_idx_map to record qp_num and dip_idx. The bit corresponding to qpn_bitmap is set to 1 only when qp_num is different from the used dip_idx. When dip_idx is no longer used, the bit corresponding to dip_idx_map is cleared, and the bit corresponding to qpn_bitmap is set to 1. When dip_idx needs to be allocated, the index whose first bit is 1 is found from qpn_bitmap. The bit corresponding to dip_idx_map is set to 1, and the bit corresponding to qpn_bitmap is cleared.
Fixes: 2493138e413f ("RDMA/hns: Bugfix for incorrect association between dip_idx and dgid") Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Feng Fang fangfeng4@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 58 ++++++++++++++++++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 + drivers/infiniband/hw/hns/hns_roce_qp.c | 17 ++++-- 4 files changed, 67 insertions(+), 15 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 7060583535df..ef4400638556 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -607,9 +607,8 @@ struct hns_roce_bank { };
struct hns_roce_idx_table { - u32 *spare_idx; - u32 head; - u32 tail; + unsigned long *qpn_bitmap; + unsigned long *dip_idx_bitmap; };
struct hns_roce_qp_table { @@ -770,6 +769,7 @@ struct hns_roce_qp { u8 priority; bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; + struct hns_roce_dip *dip; };
struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b43eaf2c5e5d..f940e94645cf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5308,21 +5308,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, { const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx; - u32 *head = &hr_dev->qp_table.idx_table.head; - u32 *tail = &hr_dev->qp_table.idx_table.tail; + unsigned long *dip_idx_bitmap = hr_dev->qp_table.idx_table.dip_idx_bitmap; + unsigned long *qpn_bitmap = hr_dev->qp_table.idx_table.qpn_bitmap; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_dip *hr_dip; unsigned long flags; int ret = 0; + u32 idx;
spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
- spare_idx[*tail] = ibqp->qp_num; - *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1); + if (!test_bit(ibqp->qp_num, dip_idx_bitmap)) + set_bit(ibqp->qp_num, qpn_bitmap);
list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { *dip_idx = hr_dip->dip_idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; goto out; } } @@ -5336,9 +5339,21 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; }
+ idx = find_first_bit(qpn_bitmap, hr_dev->caps.num_qps); + if (idx < hr_dev->caps.num_qps) { + *dip_idx = idx; + clear_bit(idx, qpn_bitmap); + set_bit(idx, dip_idx_bitmap); + } else { + ret = -ENOENT; + kfree(hr_dip); + goto out; + } + memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - hr_dip->dip_idx = *dip_idx = spare_idx[*head]; - *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1); + hr_dip->dip_idx = *dip_idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; list_add_tail(&hr_dip->node, &hr_dev->dip_list);
out: @@ -6278,12 +6293,41 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; }
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + unsigned long *dip_idx_bitmap = hr_dev->qp_table.idx_table.dip_idx_bitmap; + unsigned long *qpn_bitmap = hr_dev->qp_table.idx_table.qpn_bitmap; + struct hns_roce_dip *hr_dip = hr_qp->dip; + unsigned long flags; + + spin_lock_irqsave(&hr_dev->dip_list_lock, flags); + + if (hr_dip) { + hr_dip->qp_cnt--; + if (!hr_dip->qp_cnt) { + clear_bit(hr_dip->dip_idx, dip_idx_bitmap); + set_bit(hr_dip->dip_idx, qpn_bitmap); + + list_del(&hr_dip->node); + } else { + hr_dip = NULL; + } + } + + spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); + kfree(hr_dip); +} + int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); int ret;
+ if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) + put_dip_ctx_idx(hr_dev, hr_qp); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index de96d7fdcb24..abe295ce26c8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1451,6 +1451,7 @@ struct hns_roce_v2_priv { struct hns_roce_dip { u8 dgid[GID_LEN_V2]; u32 dip_idx; + u32 qp_cnt; struct list_head node; /* all dips are on a list */ };
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 87007e29e593..e2d3a3c0cc6c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1760,11 +1760,18 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int reserved_from_bot; unsigned int i;
- qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps, - sizeof(u32), GFP_KERNEL); - if (!qp_table->idx_table.spare_idx) + qp_table->idx_table.qpn_bitmap = bitmap_zalloc(hr_dev->caps.num_qps, + GFP_KERNEL); + if (!qp_table->idx_table.qpn_bitmap) return -ENOMEM;
+ qp_table->idx_table.dip_idx_bitmap = bitmap_zalloc(hr_dev->caps.num_qps, + GFP_KERNEL); + if (!qp_table->idx_table.dip_idx_bitmap) { + bitmap_free(qp_table->idx_table.qpn_bitmap); + return -ENOMEM; + } + mutex_init(&qp_table->scc_mutex); mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); @@ -1793,6 +1800,6 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); mutex_destroy(&hr_dev->qp_table.bank_mutex); - mutex_destroy(&hr_dev->qp_table.scc_mutex); - kfree(hr_dev->qp_table.idx_table.spare_idx); + bitmap_free(hr_dev->qp_table.idx_table.qpn_bitmap); + bitmap_free(hr_dev->qp_table.idx_table.dip_idx_bitmap); }
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
If mbox timeout during detroying some resource, the HW may still access the related resource which caused an UAF.
To fix it, if resource destruction fails, the resource till be retained until driver uninit.
Fixes: 431c875e4b02 ("RDMA/hns: Fix simultaneous reset and resource deregistration") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 +++--- 4 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 72bf500f4b65..6fdbe34ecd20 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -229,12 +229,12 @@ static void free_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_CQC, hr_cq->cqn); - if (ret) + if (ret) { + hr_cq->delayed_destroy_flag = true; dev_err_ratelimited(dev, "DESTROY_CQ failed (%d) for CQN %06lx\n", ret, hr_cq->cqn); - if (ret == -EBUSY) - hr_cq->delayed_destroy_flag = true; + }
xa_erase_irq(&cq_table->array, hr_cq->cqn);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f940e94645cf..9db4753c0365 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6262,10 +6262,12 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, /* Modify qp to reset before destroying qp */ ret = hns_roce_v2_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET, udata); - if (ret) + if (ret) { + hr_qp->delayed_destroy_flag = true; ibdev_err_ratelimited(ibdev, "failed to modify QP to RST, ret = %d.\n", ret); + } }
send_cq = hr_qp->ibqp.send_cq ? to_hr_cq(hr_qp->ibqp.send_cq) : NULL; @@ -6334,9 +6336,6 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) "failed to destroy QP, QPN = 0x%06lx, ret = %d.\n", hr_qp->qpn, ret);
- if (ret == -EBUSY) - hr_qp->delayed_destroy_flag = true; - hns_roce_qp_destroy(hr_dev, hr_qp, udata);
return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index bc5eba02b358..d625b586fb60 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -152,11 +152,11 @@ static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT, key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1)); - if (ret) + if (ret) { + mr->delayed_destroy_flag = true; ibdev_warn_ratelimited(ibdev, "failed to destroy mpt, ret = %d.\n", ret); - if (ret == -EBUSY) - mr->delayed_destroy_flag = true; + } }
free_mr_pbl(hr_dev, mr); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 12b69f150d6c..454a7370a8b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -161,12 +161,12 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_SRQ, srq->srqn); - if (ret) + if (ret) { + srq->delayed_destroy_flag = true; dev_err_ratelimited(hr_dev->dev, "DESTROY_SRQ failed (%d) for SRQN %06lx\n", ret, srq->srqn); - if (ret == -EBUSY) - srq->delayed_destroy_flag = true; + }
xa_erase_irq(&srq_table->xa, srq->srqn);
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
Fix missuse of spin_lock_irq()/spin_unlock_irq() when spin_lock_irqsave()/spin_lock_irqrestore() was hold.
This was discovered through the lock debugging, and the corresponding log is as follows: raw_local_irq_restore() called with IRQs enabled Call trace: warn_bogus_irq_restore+0x30/0x40 _raw_spin_unlock_irqrestore+0x84/0xc8 add_qp_to_list+0x11c/0x148 [hns_roce_hw_v2] hns_roce_create_qp_common.constprop.0+0x240/0x780 [hns_roce_hw_v2] hns_roce_create_qp+0x98/0x160 [hns_roce_hw_v2] create_qp+0x138/0x258 ib_create_qp_kernel+0x50/0xe8 create_mad_qp+0xa8/0x128 ib_mad_port_open+0x218/0x448 ib_mad_init_device+0x70/0x1f8 add_client_context+0xfc/0x220 enable_device_and_get+0xd0/0x140 ib_register_device.part.0+0xf4/0x1c8 ib_register_device+0x34/0x50 hns_roce_register_device+0x174/0x3d0 [hns_roce_hw_v2] hns_roce_init+0xfc/0x2c0 [hns_roce_hw_v2] __hns_roce_hw_v2_init_instance+0x7c/0x1d0 [hns_roce_hw_v2] hns_roce_hw_v2_init_instance+0x9c/0x180 [hns_roce_hw_v2] hclge_init_roce_client_instance+0x78/0x1d8 [hclge] hclge_init_client_instance+0x108/0x168 [hclge] hnae3_register_client+0x120/0x5d8 [hnae3] hns_roce_hw_v2_init+0x28/0xff8 [hns_roce_hw_v2] do_one_initcall+0x7c/0x368 do_init_module+0x60/0x220 load_module+0x570/0x668 init_module_from_file+0x8c/0xd0 idempotent_init_module+0x188/0x260 __arm64_sys_finit_module+0x70/0xe0 invoke_syscall+0x50/0x128 el0_svc_common.constprop.0+0xc8/0xf0 do_el0_svc+0x24/0x38 el0_svc+0x64/0x268 el0t_64_sync_handler+0xc0/0xc8 el0t_64_sync+0x188/0x190 irq event stamp: 0
Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_qp.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e2d3a3c0cc6c..13705d3de9a7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1665,19 +1665,19 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) __acquire(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq != NULL && recv_cq == NULL)) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (unlikely(send_cq == NULL && recv_cq != NULL)) { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); __acquire(&send_cq->lock); } else if (send_cq == recv_cq) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); __acquire(&recv_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { - spin_lock_irq(&send_cq->lock); + spin_lock(&send_cq->lock); spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING); } else { - spin_lock_irq(&recv_cq->lock); + spin_lock(&recv_cq->lock); spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING); } } @@ -1697,13 +1697,13 @@ void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, spin_unlock(&recv_cq->lock); } else if (send_cq == recv_cq) { __release(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else if (send_cq->cqn < recv_cq->cqn) { spin_unlock(&recv_cq->lock); - spin_unlock_irq(&send_cq->lock); + spin_unlock(&send_cq->lock); } else { spin_unlock(&send_cq->lock); - spin_unlock_irq(&recv_cq->lock); + spin_unlock(&recv_cq->lock); } }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
lifespan is u32 but is operated with an __le32 in memcpy(). Change it to __le32 and add le32_to_cpu() where needed.
Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 +- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ef4400638556..391d28f953b0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1124,7 +1124,7 @@ struct hns_roce_cnp_pri_param { #define HNS_ROCE_SCC_PARAM_SIZE 4 struct hns_roce_scc_param { __le32 param[HNS_ROCE_SCC_PARAM_SIZE]; - u32 lifespan; + __le32 lifespan; unsigned long timestamp; enum hns_roce_scc_algo algo_type; struct delayed_work scc_cfg_dwork; diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 2710f97e3af5..12b1eaee72b3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -239,7 +239,7 @@ static ssize_t scc_attr_store(struct hns_roce_port *pdata, if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan)) return count;
- lifespan_jiffies = msecs_to_jiffies(scc_param->lifespan); + lifespan_jiffies = msecs_to_jiffies(le32_to_cpu(scc_param->lifespan)); exp_time = scc_param->timestamp + lifespan_jiffies;
if (time_is_before_eq_jiffies(exp_time)) {
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
param[] in struct hns_roce_scc_param is used to store and show scc params both. But when the configuration to HW fails, the params stored in this array will become different from the ones in HW.
Add an member latest_param[] to struct hns_roce_scc_param to store the latest configured value of scc params. It will be modified only after the configuration has succeeded to ensure the shown result from sysfs is always the correct param in HW even if the previous configuration failed. The original member param[] is only used to store the temporary value of sysfs input now.
Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 ++++++++++--- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 391d28f953b0..ef5c72051a9a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1130,6 +1130,7 @@ struct hns_roce_scc_param { struct delayed_work scc_cfg_dwork; struct hns_roce_dev *hr_dev; u8 port_num; + __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; };
struct hns_roce_port { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 9db4753c0365..a5f6e7685c2e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7600,11 +7600,16 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, memcpy(&desc.data, scc_param, sizeof(scc_param->param));
ret = hns_roce_cmq_send(hr_dev, &desc, 1); - if (ret) + if (ret) { ibdev_err_ratelimited(&hr_dev->ib_dev, "failed to configure scc param, opcode: 0x%x, ret = %d.\n", le16_to_cpu(desc.opcode), ret); - return ret; + return ret; + } + + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param)); + return 0; }
static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev, @@ -7640,7 +7645,9 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev,
pdata = &hr_dev->port_data[port_num - 1]; scc_param = &pdata->scc_param[algo]; - memcpy(scc_param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); + memcpy(scc_param->latest_param, &desc.data, + sizeof(scc_param->latest_param));
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 12b1eaee72b3..a43426717765 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -202,7 +202,11 @@ static ssize_t scc_attr_show(struct hns_roce_port *pdata,
scc_param = &pdata->scc_param[scc_attr->algo_type];
- memcpy(&val, (void *)scc_param + scc_attr->offset, scc_attr->size); + if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) + val = scc_param->lifespan; + else + memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, + scc_attr->size);
return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
The FW configuration of scc param is delayed with a workqueue. This may lead to scc params being modified by sysfs store callback while they're being configured to FW. Use a mutex to solve this.
Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++++ drivers/infiniband/hw/hns/hns_roce_sysfs.c | 9 ++++++++- 3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ef5c72051a9a..92a9dfc04a80 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1131,6 +1131,7 @@ struct hns_roce_scc_param { struct hns_roce_dev *hr_dev; u8 port_num; __le32 latest_param[HNS_ROCE_SCC_PARAM_SIZE]; + struct mutex scc_mutex; /* protect @param and @latest_param */ };
struct hns_roce_port { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a5f6e7685c2e..33af1fc89892 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -7597,6 +7597,7 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev, hns_roce_cmq_setup_basic_desc(&desc, scc_opcode[algo], false); pdata = &hr_dev->port_data[port_num - 1]; scc_param = &pdata->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(&desc.data, scc_param, sizeof(scc_param->param));
ret = hns_roce_cmq_send(hr_dev, &desc, 1); @@ -7609,6 +7610,8 @@ static int hns_roce_v2_config_scc_param(struct hns_roce_dev *hr_dev,
memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex); + return 0; }
@@ -7645,9 +7648,11 @@ static int hns_roce_v2_query_scc_param(struct hns_roce_dev *hr_dev,
pdata = &hr_dev->port_data[port_num - 1]; scc_param = &pdata->scc_param[algo]; + mutex_lock(&scc_param->scc_mutex); memcpy(scc_param->param, &desc.data, sizeof(scc_param->param)); memcpy(scc_param->latest_param, &desc.data, sizeof(scc_param->latest_param)); + mutex_unlock(&scc_param->scc_mutex);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index a43426717765..42d6e888cc90 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -92,6 +92,7 @@ static int alloc_scc_param(struct hns_roce_dev *hr_dev, scc_param[i].algo_type = i; scc_param[i].hr_dev = hr_dev; scc_param[i].port_num = pdata->port_num; + mutex_init(&scc_param[i].scc_mutex); INIT_DELAYED_WORK(&scc_param[i].scc_cfg_dwork, scc_param_config_work); } @@ -202,11 +203,13 @@ static ssize_t scc_attr_show(struct hns_roce_port *pdata,
scc_param = &pdata->scc_param[scc_attr->algo_type];
+ mutex_lock(&scc_param->scc_mutex); if (scc_attr->offset == offsetof(typeof(*scc_param), lifespan)) val = scc_param->lifespan; else memcpy(&val, (void *)scc_param->latest_param + scc_attr->offset, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex);
return sysfs_emit(buf, "%u\n", le32_to_cpu(val)); } @@ -236,8 +239,10 @@ static ssize_t scc_attr_store(struct hns_roce_port *pdata,
attr_val = cpu_to_le32(val); scc_param = &pdata->scc_param[scc_attr->algo_type]; + mutex_lock(&scc_param->scc_mutex); memcpy((void *)scc_param + scc_attr->offset, &attr_val, scc_attr->size); + mutex_unlock(&scc_param->scc_mutex);
/* lifespan is only used for driver */ if (scc_attr->offset >= offsetof(typeof(*scc_param), lifespan)) @@ -606,8 +611,10 @@ static void hns_roce_unregister_port_sysfs(struct hns_roce_dev *hr_dev, pdata = &hr_dev->port_data[port_num]; sysfs_remove_groups(&pdata->kobj, hns_attr_port_groups); scc_param = pdata->scc_param; - for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) + for (i = 0; i < HNS_ROCE_SCC_ALGO_TOTAL; i++) { cancel_delayed_work_sync(&scc_param[i].scc_cfg_dwork); + mutex_destroy(&scc_param[i].scc_mutex); + } kobject_put(&pdata->kobj); }
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
The sysfs should be created only after all necessary resources are inited.
Fixes: 523f34d81ea7 ("RDMA/hns: Support congestion control algorithm parameter configuration") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_sysfs.c | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_sysfs.c b/drivers/infiniband/hw/hns/hns_roce_sysfs.c index 42d6e888cc90..8950e7c4a2e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_sysfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_sysfs.c @@ -568,32 +568,29 @@ int hns_roce_create_port_files(struct ib_device *ibdev, u8 port_num, } kobject_uevent(&pdata->kobj, KOBJ_ADD);
- ret = sysfs_create_groups(&pdata->kobj, hns_attr_port_groups); - if (ret) { - ibdev_err(ibdev, - "fail to create port(%u) cc param sysfs, ret = %d.\n", - port_num, ret); - goto fail_kobj; - } - ret = alloc_scc_param(hr_dev, pdata); if (ret) { dev_err(hr_dev->dev, "alloc scc param failed, ret = %d!\n", ret); - goto fail_group; + goto fail_kobj; }
ret = alloc_cnp_pri_param(hr_dev, pdata); if (ret) { dev_err(hr_dev->dev, "alloc cnp pri param failed, ret = %d!\n", ret); - goto fail_group; + goto fail_kobj; }
- return ret; + ret = sysfs_create_groups(&pdata->kobj, hns_attr_port_groups); + if (ret) { + ibdev_err(ibdev, + "fail to create port(%u) cc param sysfs, ret = %d.\n", + port_num, ret); + goto fail_kobj; + }
-fail_group: - sysfs_remove_groups(&pdata->kobj, hns_attr_port_groups); + return ret;
fail_kobj: kobject_put(&pdata->kobj);
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAL7SX
----------------------------------------------------------------------
For calc_loading_percent(), if the values of two types of u32 are multiplied, the result can be an integer overflow. To fix it, convert all variable to u64.
Since total and free are both size_t, alloc_pages and free_pages may overflow. In addition, because there is multiplication in the calculation of percent, it may also cause overflow of u32. In this patch all relevant variables are converted to u64.
This patch also adds corresponding processing for the exception of calc_loading_percent() to avoid printing a wrong result.
Fixes: a21781182f77 ("RDMA/hns: Add debugfs support for DCA") Signed-off-by: Yuyu Li liyuyu6@huawei.com Signed-off-by: Chengchang Tang tangchengchang@huawei.com Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_debugfs.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_debugfs.c b/drivers/infiniband/hw/hns/hns_roce_debugfs.c index 85fa88e8a657..f46aeab4195a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_debugfs.c +++ b/drivers/infiniband/hw/hns/hns_roce_debugfs.c @@ -243,8 +243,8 @@ static void dca_setup_pool_name(pid_t pid, bool is_kdca, char *name, int size)
static u64 calc_loading_percent(size_t total, size_t free, u32 *out_rem) { - u32 all_pages, used_pages, free_pages, scale; - u64 percent = 0; + u64 used_pages, scale, all_pages, free_pages; + u64 percent = U64_MAX; u32 rem = 0;
all_pages = total >> HNS_HW_PAGE_SHIFT; @@ -270,6 +270,9 @@ static void dca_print_pool_stats(struct hns_roce_dca_ctx *ctx, pid_t pid, u32 rem = 0;
percent = calc_loading_percent(ctx->total_size, ctx->free_size, &rem); + if (percent == U64_MAX) + return; + dca_setup_pool_name(pid, is_kdca, name, sizeof(name)); seq_printf(file, "%-10s %-16ld %-16ld %-16u %llu.%0*u\n", name, ctx->total_size / KB, ctx->free_size / KB, ctx->free_mems, @@ -422,6 +425,9 @@ static void dca_stats_ctx_mem_in_seqfile(struct hns_roce_dca_ctx *ctx,
dca_ctx_stats_mem(ctx, &stats); percent = calc_loading_percent(stats.total_size, stats.free_size, &rem); + if (percent == U64_MAX) + return; + seq_printf(file, DCA_STAT_NAME_FMT "%llu.%0*u\n", "Loading:", percent, LOADING_PERCENT_SHIFT, rem); dca_ctx_print_mem_kb(file, "Total:", stats.total_size);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/11036 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/11036 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...