From: Xinghai Cen cenxinghai@h-partners.com
Mainline patch to olk-5.10:
Feng Fang (1): RDMA/hns: Fix different dgids mapping to the same dip_idx
Xinghai Cen (2): Revert "RDMA/hns: Fix flush cqe error when racing with destroy qp" Revert "RDMA/hns: Fix different dgids mapping to the same dip_idx"
wenglianfa (1): RDMA/hns: Fix flush cqe error when racing with destroy qp
drivers/infiniband/hw/hns/hns_roce_device.h | 11 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 117 ++++++++++---------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 - drivers/infiniband/hw/hns/hns_roce_main.c | 2 - drivers/infiniband/hw/hns/hns_roce_qp.c | 17 +-- 5 files changed, 61 insertions(+), 87 deletions(-)
From: Xinghai Cen cenxinghai@h-partners.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIXIN CVE: NA
----------------------------------------------------------------------
This reverts commit a31744454c28ff2bc6bbfec17fd3fb216649c000. This reverts commit 2b39c452903342e0821b767e2b7fbd0f2d02c25a
Fixes: a31744454c28 ("RDMA/hns: Fix flush cqe error when racing with destroy qp") Fixes: 2b39c4529033 ("RDMA/hns: Fix flush cqe error when racing with destroy qp") Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 -- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 ------------ drivers/infiniband/hw/hns/hns_roce_qp.c | 15 ++------------- 3 files changed, 2 insertions(+), 27 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f12e56969fd3..f20054b36dc6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -717,7 +717,6 @@ struct hns_roce_dev;
enum { HNS_ROCE_FLUSH_FLAG = 0, - HNS_ROCE_STOP_FLUSH_FLAG = 1, };
struct hns_roce_work { @@ -778,7 +777,6 @@ struct hns_roce_qp { bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; struct hns_roce_dip *dip; - spinlock_t flush_lock; };
struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d3b0dccf7c48..bd27795d33c1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6330,23 +6330,11 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - unsigned long flags; int ret; - /* Make sure flush_cqe() is completed */ - spin_lock_irqsave(&hr_qp->flush_lock, flags); - set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); - spin_unlock_irqrestore(&hr_qp->flush_lock, flags); - flush_work(&hr_qp->flush_work.work);
if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) put_dip_ctx_idx(hr_dev, hr_qp);
- /* Make sure flush_cqe() is completed */ - spin_lock_irqsave(&hr_qp->flush_lock, flags); - set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); - spin_unlock_irqrestore(&hr_qp->flush_lock, flags); - flush_work(&hr_qp->flush_work.work); - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a3eb05e8471e..f6fa66340c3e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -90,18 +90,11 @@ static void flush_work_handle(struct work_struct *work) void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_work *flush_work = &hr_qp->flush_work; - unsigned long flags; - - spin_lock_irqsave(&hr_qp->flush_lock, flags); - /* Exit directly after destroy_qp() */ - if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { - spin_unlock_irqrestore(&hr_qp->flush_lock, flags); - return; - }
+ flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle); refcount_inc(&hr_qp->refcount); queue_work(hr_dev->irq_workq, &flush_work->work); - spin_unlock_irqrestore(&hr_qp->flush_lock, flags); }
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) @@ -1371,7 +1364,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, struct hns_roce_qp *hr_qp) { - struct hns_roce_work *flush_work = &hr_qp->flush_work; struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; @@ -1380,12 +1372,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); spin_lock_init(&hr_qp->rq.lock); - spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; - flush_work->hr_dev = hr_dev; - INIT_WORK(&flush_work->work, flush_work_handle);
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) {
From: wenglianfa wenglianfa@huawei.com
mainline inclusion from mainline-v6.13-rc2 commit 377a2097705b915325a67e4d44f9f2844e567809 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB7JSL
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=377...
----------------------------------------------------------------------
QP needs to be modified to IB_QPS_ERROR to trigger HW flush cqe. But when this process races with destroy qp, the destroy-qp process may modify the QP to IB_QPS_RESET first. In this case flush cqe will fail since it is invalid to modify qp from IB_QPS_RESET to IB_QPS_ERROR.
Add lock and bit flag to make sure pending flush cqe work is completed first and no more new works will be added.
Fixes: ffd541d45726 ("RDMA/hns: Add the workqueue framework for flush cqe handler") Signed-off-by: wenglianfa wenglianfa@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241024124000.2931869-3-huangjunxian6@hisilicon.co... Reviewed-by: Zhu Yanjun yanjun.zhu@linux.dev Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 ++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 7 +++++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 15 +++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f20054b36dc6..f12e56969fd3 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -717,6 +717,7 @@ struct hns_roce_dev;
enum { HNS_ROCE_FLUSH_FLAG = 0, + HNS_ROCE_STOP_FLUSH_FLAG = 1, };
struct hns_roce_work { @@ -777,6 +778,7 @@ struct hns_roce_qp { bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; struct hns_roce_dip *dip; + spinlock_t flush_lock; };
struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index bd27795d33c1..27f14fc9a34a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6330,8 +6330,15 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + unsigned long flags; int ret;
+ /* Make sure flush_cqe() is completed */ + spin_lock_irqsave(&hr_qp->flush_lock, flags); + set_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + flush_work(&hr_qp->flush_work.work); + if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) put_dip_ctx_idx(hr_dev, hr_qp);
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f6fa66340c3e..a3eb05e8471e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -90,11 +90,18 @@ static void flush_work_handle(struct work_struct *work) void init_flush_work(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { struct hns_roce_work *flush_work = &hr_qp->flush_work; + unsigned long flags; + + spin_lock_irqsave(&hr_qp->flush_lock, flags); + /* Exit directly after destroy_qp() */ + if (test_bit(HNS_ROCE_STOP_FLUSH_FLAG, &hr_qp->flush_flag)) { + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); + return; + }
- flush_work->hr_dev = hr_dev; - INIT_WORK(&flush_work->work, flush_work_handle); refcount_inc(&hr_qp->refcount); queue_work(hr_dev->irq_workq, &flush_work->work); + spin_unlock_irqrestore(&hr_qp->flush_lock, flags); }
void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp) @@ -1364,6 +1371,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_udata *udata, struct hns_roce_qp *hr_qp) { + struct hns_roce_work *flush_work = &hr_qp->flush_work; struct hns_roce_ib_create_qp_resp resp = {}; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_qp ucmd = {}; @@ -1372,9 +1380,12 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); spin_lock_init(&hr_qp->rq.lock); + spin_lock_init(&hr_qp->flush_lock);
hr_qp->state = IB_QPS_RESET; hr_qp->flush_flag = 0; + flush_work->hr_dev = hr_dev; + INIT_WORK(&flush_work->work, flush_work_handle);
ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) {
From: Xinghai Cen cenxinghai@h-partners.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIXIN CVE: NA
----------------------------------------------------------------------
This reverts commit ae7d7dfce702c70b20bacda46315193709cd1771.
Fixes: ae7d7dfce702 ("RDMA/hns: Fix different dgids mapping to the same dip_idx") Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 58 +++------------------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 17 ++---- 4 files changed, 15 insertions(+), 67 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f12e56969fd3..b56cdb9d1d85 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -613,8 +613,9 @@ struct hns_roce_bank { };
struct hns_roce_idx_table { - unsigned long *qpn_bitmap; - unsigned long *dip_idx_bitmap; + u32 *spare_idx; + u32 head; + u32 tail; };
struct hns_roce_qp_table { @@ -777,7 +778,6 @@ struct hns_roce_qp { u8 priority; bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; - struct hns_roce_dip *dip; spinlock_t flush_lock; };
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 27f14fc9a34a..fd600711d9df 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5313,24 +5313,21 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, { const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - unsigned long *dip_idx_bitmap = hr_dev->qp_table.idx_table.dip_idx_bitmap; - unsigned long *qpn_bitmap = hr_dev->qp_table.idx_table.qpn_bitmap; - struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); + u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx; + u32 *head = &hr_dev->qp_table.idx_table.head; + u32 *tail = &hr_dev->qp_table.idx_table.tail; struct hns_roce_dip *hr_dip; unsigned long flags; int ret = 0; - u32 idx;
spin_lock_irqsave(&hr_dev->dip_list_lock, flags);
- if (!test_bit(ibqp->qp_num, dip_idx_bitmap)) - set_bit(ibqp->qp_num, qpn_bitmap); + spare_idx[*tail] = ibqp->qp_num; + *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1);
list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { *dip_idx = hr_dip->dip_idx; - hr_dip->qp_cnt++; - hr_qp->dip = hr_dip; goto out; } } @@ -5344,21 +5341,9 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; }
- idx = find_first_bit(qpn_bitmap, hr_dev->caps.num_qps); - if (idx < hr_dev->caps.num_qps) { - *dip_idx = idx; - clear_bit(idx, qpn_bitmap); - set_bit(idx, dip_idx_bitmap); - } else { - ret = -ENOENT; - kfree(hr_dip); - goto out; - } - memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - hr_dip->dip_idx = *dip_idx; - hr_dip->qp_cnt++; - hr_qp->dip = hr_dip; + hr_dip->dip_idx = *dip_idx = spare_idx[*head]; + *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1); list_add_tail(&hr_dip->node, &hr_dev->dip_list);
out: @@ -6300,32 +6285,6 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; }
-static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) -{ - unsigned long *dip_idx_bitmap = hr_dev->qp_table.idx_table.dip_idx_bitmap; - unsigned long *qpn_bitmap = hr_dev->qp_table.idx_table.qpn_bitmap; - struct hns_roce_dip *hr_dip = hr_qp->dip; - unsigned long flags; - - spin_lock_irqsave(&hr_dev->dip_list_lock, flags); - - if (hr_dip) { - hr_dip->qp_cnt--; - if (!hr_dip->qp_cnt) { - clear_bit(hr_dip->dip_idx, dip_idx_bitmap); - set_bit(hr_dip->dip_idx, qpn_bitmap); - - list_del(&hr_dip->node); - } else { - hr_dip = NULL; - } - } - - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); - kfree(hr_dip); -} - int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); @@ -6339,9 +6298,6 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) spin_unlock_irqrestore(&hr_qp->flush_lock, flags); flush_work(&hr_qp->flush_work.work);
- if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) - put_dip_ctx_idx(hr_dev, hr_qp); - ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 41f868d4b3f1..570b972354b6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1457,7 +1457,6 @@ struct hns_roce_v2_priv { struct hns_roce_dip { u8 dgid[GID_LEN_V2]; u32 dip_idx; - u32 qp_cnt; struct list_head node; /* all dips are on a list */ };
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index a3eb05e8471e..06e46d82c828 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1837,18 +1837,11 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int reserved_from_bot; unsigned int i;
- qp_table->idx_table.qpn_bitmap = bitmap_zalloc(hr_dev->caps.num_qps, - GFP_KERNEL); - if (!qp_table->idx_table.qpn_bitmap) + qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps, + sizeof(u32), GFP_KERNEL); + if (!qp_table->idx_table.spare_idx) return -ENOMEM;
- qp_table->idx_table.dip_idx_bitmap = bitmap_zalloc(hr_dev->caps.num_qps, - GFP_KERNEL); - if (!qp_table->idx_table.dip_idx_bitmap) { - bitmap_free(qp_table->idx_table.qpn_bitmap); - return -ENOMEM; - } - mutex_init(&qp_table->scc_mutex); mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); @@ -1877,6 +1870,6 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev) for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); mutex_destroy(&hr_dev->qp_table.bank_mutex); - bitmap_free(hr_dev->qp_table.idx_table.qpn_bitmap); - bitmap_free(hr_dev->qp_table.idx_table.dip_idx_bitmap); + mutex_destroy(&hr_dev->qp_table.scc_mutex); + kfree(hr_dev->qp_table.idx_table.spare_idx); }
From: Feng Fang fangfeng4@huawei.com
mailline inclusion from mainline-v6.12-rc2 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIXIN
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/commit/?id=faa...
---------------------------------------------------------------------
DIP algorithm requires a one-to-one mapping between dgid and dip_idx. Currently a queue 'spare_idx' is used to store QPN of QPs that use DIP algorithm. For a new dgid, use a QPN from spare_idx as dip_idx. This method lacks a mechanism for deduplicating QPN, which may result in different dgids sharing the same dip_idx and break the one-to-one mapping requirement.
This patch replaces spare_idx with xarray and introduces a refcnt of a dip_idx to indicate the number of QPs that using this dip_idx.
The state machine for dip_idx management is implemented as:
* The entry at an index in xarray is empty -- This indicates that the corresponding dip_idx hasn't been created.
* The entry at an index in xarray is not empty but with 0 refcnt -- This indicates that the corresponding dip_idx has been created but not used as dip_idx yet.
* The entry at an index in xarray is not empty and with non-0 refcnt -- This indicates that the corresponding dip_idx is being used by refcnt number of DIP QPs.
Fixes: eb653eda1e91 ("RDMA/hns: Bugfix for incorrect association between dip_idx and dgid") Fixes: f91696f2f053 ("RDMA/hns: Support congestion control type selection according to the FW") Signed-off-by: Feng Fang fangfeng4@huawei.com Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com Link: https://patch.msgid.link/20241112055553.3681129-1-huangjunxian6@hisilicon.co... Signed-off-by: Leon Romanovsky leon@kernel.org Signed-off-by: Xinghai Cen cenxinghai@h-partners.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 11 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 98 +++++++++++++++------ drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 2 +- drivers/infiniband/hw/hns/hns_roce_main.c | 2 - drivers/infiniband/hw/hns/hns_roce_qp.c | 8 +- 5 files changed, 76 insertions(+), 45 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b56cdb9d1d85..7d30b8380af6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -612,12 +612,6 @@ struct hns_roce_bank { u32 next; /* Next ID to allocate. */ };
-struct hns_roce_idx_table { - u32 *spare_idx; - u32 head; - u32 tail; -}; - struct hns_roce_qp_table { struct hns_roce_hem_table qp_table; struct hns_roce_hem_table irrl_table; @@ -626,7 +620,7 @@ struct hns_roce_qp_table { struct mutex scc_mutex; struct hns_roce_bank bank[HNS_ROCE_QP_BANK_NUM]; struct mutex bank_mutex; - struct hns_roce_idx_table idx_table; + struct xarray dip_xa; };
struct hns_roce_cq_table { @@ -779,6 +773,7 @@ struct hns_roce_qp { bool delayed_destroy_flag; struct hns_roce_mtr_node *mtr_node; spinlock_t flush_lock; + struct hns_roce_dip *dip; };
struct hns_roce_ib_iboe { @@ -1176,8 +1171,6 @@ struct hns_roce_dev { enum hns_roce_device_state state; struct list_head qp_list; /* list of all qps on this dev */ spinlock_t qp_list_lock; /* protect qp_list */ - struct list_head dip_list; /* list of all dest ips on this dev */ - spinlock_t dip_list_lock; /* protect dip_list */
struct list_head pgdir_list; struct mutex pgdir_mutex; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index fd600711d9df..05a21dce9116 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2903,20 +2903,19 @@ static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev) free_link_table_buf(hr_dev, &priv->ext_llm); }
-static void free_dip_list(struct hns_roce_dev *hr_dev) +static void free_dip_entry(struct hns_roce_dev *hr_dev) { struct hns_roce_dip *hr_dip; - struct hns_roce_dip *tmp; - unsigned long flags; + unsigned long idx;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags); + xa_lock(&hr_dev->qp_table.dip_xa);
- list_for_each_entry_safe(hr_dip, tmp, &hr_dev->dip_list, node) { - list_del(&hr_dip->node); - kfree(hr_dip); + xa_for_each(&hr_dev->qp_table.dip_xa, idx, hr_dip) { + __xa_erase(&hr_dev->qp_table.dip_xa, hr_dip->dip_idx); + xa_unlock(&hr_dev->qp_table.dip_xa); }
- spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); + xa_unlock(&hr_dev->qp_table.dip_xa); }
static int hns_roce_v2_get_reset_page(struct hns_roce_dev *hr_dev) @@ -3377,7 +3376,7 @@ static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev) hns_roce_v2_put_reset_page(hr_dev);
if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP09) - free_dip_list(hr_dev); + free_dip_entry(hr_dev); }
static inline void mbox_desc_init(struct hns_roce_post_mbox *mb, @@ -5308,26 +5307,49 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, int attr_mask, return 0; }
+static int alloc_dip_entry(struct xarray *dip_xa, u32 qpn) +{ + struct hns_roce_dip *hr_dip; + int ret; + + hr_dip = xa_load(dip_xa, qpn); + if (hr_dip) + return 0; + + hr_dip = kzalloc(sizeof(*hr_dip), GFP_KERNEL); + if (!hr_dip) + return -ENOMEM; + + ret = xa_err(xa_store(dip_xa, qpn, hr_dip, GFP_KERNEL)); + if (ret) + kfree(hr_dip); + + return ret; +} + static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, u32 *dip_idx) { const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); - u32 *spare_idx = hr_dev->qp_table.idx_table.spare_idx; - u32 *head = &hr_dev->qp_table.idx_table.head; - u32 *tail = &hr_dev->qp_table.idx_table.tail; + struct xarray *dip_xa = &hr_dev->qp_table.dip_xa; + struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_dip *hr_dip; - unsigned long flags; + unsigned long idx; int ret = 0;
- spin_lock_irqsave(&hr_dev->dip_list_lock, flags); + ret = alloc_dip_entry(dip_xa, ibqp->qp_num); + if (ret) + return ret;
- spare_idx[*tail] = ibqp->qp_num; - *tail = (*tail == hr_dev->caps.num_qps - 1) ? 0 : (*tail + 1); + xa_lock(dip_xa);
- list_for_each_entry(hr_dip, &hr_dev->dip_list, node) { - if (!memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { + xa_for_each(dip_xa, idx, hr_dip) { + if (hr_dip->qp_cnt && + !memcmp(grh->dgid.raw, hr_dip->dgid, GID_LEN_V2)) { *dip_idx = hr_dip->dip_idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; goto out; } } @@ -5335,19 +5357,24 @@ static int get_dip_ctx_idx(struct ib_qp *ibqp, const struct ib_qp_attr *attr, /* If no dgid is found, a new dip and a mapping between dgid and * dip_idx will be created. */ - hr_dip = kzalloc(sizeof(*hr_dip), GFP_ATOMIC); - if (!hr_dip) { - ret = -ENOMEM; - goto out; + xa_for_each(dip_xa, idx, hr_dip) { + if (hr_dip->qp_cnt) + continue; + + *dip_idx = idx; + memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); + hr_dip->dip_idx = idx; + hr_dip->qp_cnt++; + hr_qp->dip = hr_dip; + break; }
- memcpy(hr_dip->dgid, grh->dgid.raw, sizeof(grh->dgid.raw)); - hr_dip->dip_idx = *dip_idx = spare_idx[*head]; - *head = (*head == hr_dev->caps.num_qps - 1) ? 0 : (*head + 1); - list_add_tail(&hr_dip->node, &hr_dev->dip_list); + /* This should never happen. */ + if (WARN_ON_ONCE(!hr_qp->dip)) + ret = -ENOSPC;
out: - spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); + xa_unlock(dip_xa); return ret; }
@@ -6285,6 +6312,20 @@ int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; }
+static void put_dip_ctx_idx(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp) +{ + struct hns_roce_dip *hr_dip = hr_qp->dip; + + xa_lock(&hr_dev->qp_table.dip_xa); + + hr_dip->qp_cnt--; + if (!hr_dip->qp_cnt) + memset(hr_dip->dgid, 0, GID_LEN_V2); + + xa_unlock(&hr_dev->qp_table.dip_xa); +} + int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); @@ -6298,6 +6339,9 @@ int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) spin_unlock_irqrestore(&hr_qp->flush_lock, flags); flush_work(&hr_qp->flush_work.work);
+ if (hr_qp->congest_type == HNS_ROCE_CONGEST_TYPE_DIP) + put_dip_ctx_idx(hr_dev, hr_qp); + ret = hns_roce_v2_destroy_qp_common(hr_dev, hr_qp, udata); if (ret) ibdev_err_ratelimited(&hr_dev->ib_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 570b972354b6..c59e40649454 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -1457,7 +1457,7 @@ struct hns_roce_v2_priv { struct hns_roce_dip { u8 dgid[GID_LEN_V2]; u32 dip_idx; - struct list_head node; /* all dips are on a list */ + u32 qp_cnt; };
struct fmea_ram_ecc { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 3872e777c74b..99accce12665 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -1319,8 +1319,6 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev)
INIT_LIST_HEAD(&hr_dev->qp_list); spin_lock_init(&hr_dev->qp_list_lock); - INIT_LIST_HEAD(&hr_dev->dip_list); - spin_lock_init(&hr_dev->dip_list_lock);
INIT_LIST_HEAD(&hr_dev->uctx_list); mutex_init(&hr_dev->uctx_list_mutex); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 06e46d82c828..fc123b16797b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1837,14 +1837,10 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev) unsigned int reserved_from_bot; unsigned int i;
- qp_table->idx_table.spare_idx = kcalloc(hr_dev->caps.num_qps, - sizeof(u32), GFP_KERNEL); - if (!qp_table->idx_table.spare_idx) - return -ENOMEM; - mutex_init(&qp_table->scc_mutex); mutex_init(&qp_table->bank_mutex); xa_init(&hr_dev->qp_table_xa); + xa_init(&qp_table->dip_xa);
reserved_from_bot = hr_dev->caps.reserved_qps;
@@ -1869,7 +1865,7 @@ void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev)
for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) ida_destroy(&hr_dev->qp_table.bank[i].ida); + xa_destroy(&hr_dev->qp_table.dip_xa); mutex_destroy(&hr_dev->qp_table.bank_mutex); mutex_destroy(&hr_dev->qp_table.scc_mutex); - kfree(hr_dev->qp_table.idx_table.spare_idx); }