There are some issues when using SRQ on HIP08/HIP09, the first part of this series is some fixes on them.
In addition, the codes about RQ/SRQ including the creation and post recv flow are a bit hard to understand, they need to be refactored.
Lang Cheng (2): RDMA/hns: Allocate one more recv SGE for HIP08 RDMA/hns: Use new interfaces to write SRQC
Wenpeng Liang (8): RDMA/hns: Bugfix for checking whether the srq is full when post wr RDMA/hns: Force srq_limit to 0 when creating SRQ RDMA/hns: Fixed wrong judgments in the goto branch RDMA/hns: Remove the reserved WQE of SRQ RDMA/hns: Refactor hns_roce_create_srq() RDMA/hns: Refactor code about SRQ Context RDMA/hns: Refactor hns_roce_v2_post_srq_recv() RDMA/hns: Add verification of QP type when post_recv
Xi Wang (2): RDMA/hns: Refactor post recv flow RDMA/hns: Clear remaining unused sges when post_recv
drivers/infiniband/hw/hns/hns_roce_device.h | 16 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 407 +++++++++++++++------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 72 +++-- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 37 ++- drivers/infiniband/hw/hns/hns_roce_srq.c | 329 ++++++++++++---------- 6 files changed, 510 insertions(+), 354 deletions(-)
From: Lang Cheng chenglang@huawei.com
The RQ/SRQ of HIP08 needs one special sge to stop receive reliably. So the driver needs to allocate at least one SGE when creating RQ/SRQ and ensure that at least one SGE is filled with the special value during post_recv.
Besides, the kernel driver should only do this for kernel ULP. For userspace ULP, the userspace driver will allocate the reserved SGE in buffer, and the kernel driver just needs to pin the corresponding size of memory based on the userspace driver's requirements.
Signed-off-by: Lang Cheng chenglang@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 28 +++++++++++++------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 3 ++- drivers/infiniband/hw/hns/hns_roce_qp.c | 37 +++++++++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_srq.c | 40 ++++++++++++++++++++++++++--- 5 files changed, 93 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index f62851f..72961e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -65,6 +65,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16
+#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128
#define HNS_ROCE_SGE_IN_WQE 2 @@ -395,6 +397,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -498,6 +501,7 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; void __iomem *db_reg_l;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index a5bbfb1..2245d25 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -741,6 +741,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; u32 wqe_idx; + u32 max_sge; int nreq; int ret; int i; @@ -754,6 +755,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; }
+ max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -764,9 +766,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp,
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1);
- if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; @@ -781,9 +783,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, dseg++; }
- if (wr->num_sge < hr_qp->rq.max_gs) { + if (hr_qp->rq.rsv_sge) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); }
/* rq support inline data */ @@ -879,6 +882,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, __le32 *srq_idx; int ret = 0; int wqe_idx; + u32 max_sge; void *wqe; int nreq; int i; @@ -886,9 +890,13 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, spin_lock_irqsave(&srq->lock, flags);
ind = srq->head & (srq->wqe_cnt - 1); + max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(&hr_dev->ib_dev, + "srq: num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; break; @@ -916,9 +924,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); }
- if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); + if (srq->rsv_sge) { + dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; }
@@ -1999,10 +2007,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -5071,7 +5081,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5383,7 +5393,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
attr->srq_limit = limit_wl; attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_sge = srq->max_gs - srq->rsv_sge;
out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 69bc072..cd9abdd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 9988ca9..8af411f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) spin_unlock(&hr_dev->qp_table.bank_lock); }
+static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) +{ + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) + struct hns_roce_qp *hr_qp, int has_rq, bool user) { + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt;
/* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
/* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; }
- hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0;
cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0; } @@ -918,7 +943,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9403828..1be6812 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */
+#include <linux/pci.h> #include <rdma/ib_umem.h> #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -277,6 +278,28 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; }
+static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -286,6 +309,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; + u32 max_sge; int ret; u32 cqn;
@@ -293,16 +317,24 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, init_attr->srq_type != IB_SRQT_XRC) return -EOPNOTSUPP;
- /* Check the actual SRQ wqe and SRQ sge num */ + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + init_attr->attr.max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "SRQ config error, depth = %u, sge = %d\n", + init_attr->attr.max_wr, init_attr->attr.max_sge); return -EINVAL; + }
mutex_init(&srq->mutex); spin_lock_init(&srq->lock);
srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; + srq->max_gs = + roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); + init_attr->attr.max_wr = srq->wqe_cnt; + init_attr->attr.max_sge = srq->max_gs;
if (udata) { ret = ib_copy_from_udata(&ucmd, udata, @@ -349,6 +381,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; + srq->max_gs = init_attr->attr.max_sge; + init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge;
if (udata) { ret = ib_copy_to_udata(udata, &resp,
From: Wenpeng Liang liangwenpeng@huawei.com
If a user posts WR by wr_list, the head pointer of idx_queue won't be updated until all wqes are filled, so the judgment of whether head equals to tail will get a wrong result. Fix above issue and move the head and tail pointer from the srq structure into the idx_queue structure. After idx_queue is filled with wqe idx, the head pointer of it will increase.
Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 ++-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 19 ++++++++++++++----- drivers/infiniband/hw/hns/hns_roce_srq.c | 5 +++-- 3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 72961e4..916e031 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -494,6 +494,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; };
struct hns_roce_srq { @@ -513,8 +515,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 2245d25..b13775a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -849,11 +849,20 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) spin_lock(&srq->lock);
bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++;
spin_unlock(&srq->lock); }
+int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int cur; + + cur = idx_que->head - idx_que->tail; + return cur + nreq >= srq->wqe_cnt - 1; +} + static int find_empty_entry(struct hns_roce_idx_que *idx_que, unsigned long size) { @@ -889,7 +898,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
spin_lock_irqsave(&srq->lock, flags);
- ind = srq->head & (srq->wqe_cnt - 1); + ind = srq->idx_que.head & (srq->wqe_cnt - 1); max_sge = srq->max_gs - srq->rsv_sge;
for (nreq = 0; wr; ++nreq, wr = wr->next) { @@ -902,7 +911,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; }
- if (unlikely(srq->head == srq->tail)) { + if (unlikely(hns_roce_srqwq_overflow(srq, nreq))) { ret = -ENOMEM; *bad_wr = wr; break; @@ -938,7 +947,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, }
if (likely(nreq)) { - srq->head += nreq; + srq->idx_que.head += nreq;
/* * Make sure that descriptors are written before @@ -950,7 +959,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M);
hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 1be6812..e622fd1d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -245,6 +245,9 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, } }
+ idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); @@ -263,8 +266,6 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM;
From: Wenpeng Liang liangwenpeng@huawei.com
According to the IB Specification, srq_limit shouldn't be configured during SRQ creation. If a user set srq_limit at this time, the driver should forced it to zero, or the result of creating SRQ will conflict with the result of querying SRQ.
Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_srq.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index e622fd1d..47e66fe 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -336,6 +336,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); init_attr->attr.max_wr = srq->wqe_cnt; init_attr->attr.max_sge = srq->max_gs; + init_attr->attr.srq_limit = 0;
if (udata) { ret = ib_copy_from_udata(&ucmd, udata,
From: Wenpeng Liang liangwenpeng@huawei.com
When an error occurs, the qp_table must be cleared, regardless of whether the SRQ feature is enabled.
Fixes: 5c1f167af112 ("RDMA/hns: Init SRQ table for hip08") Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 2b78b1f..7978220 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -768,8 +768,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0;
err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev);
err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev);
From: Wenpeng Liang liangwenpeng@huawei.com
Each SRQs contain an reserved WQE, it is inappropriate and should be removed.
Fixes: c7bcb13442e1 ("RDMA/hns: Add SRQ support for hip08 kernel mode") Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 6 +++--- drivers/infiniband/hw/hns/hns_roce_srq.c | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 916e031..d6a846b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1
/* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index b13775a..49a8456 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -860,7 +860,7 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) unsigned int cur;
cur = idx_que->head - idx_que->tail; - return cur + nreq >= srq->wqe_cnt - 1; + return cur + nreq >= srq->wqe_cnt; }
static int find_empty_entry(struct hns_roce_idx_que *idx_que, @@ -5338,7 +5338,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL;
if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL;
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5401,7 +5401,7 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S);
attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; + attr->max_wr = srq->wqe_cnt; attr->max_sge = srq->max_gs - srq->rsv_sge;
out: diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 47e66fe..5d20b30 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -320,7 +320,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
max_sge = proc_srq_sge(hr_dev, srq, !!udata);
- if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || + if (init_attr->attr.max_wr > hr_dev->caps.max_srq_wrs || init_attr->attr.max_sge > max_sge) { ibdev_err(&hr_dev->ib_dev, "SRQ config error, depth = %u, sge = %d\n", @@ -331,7 +331,9 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, mutex_init(&srq->mutex); spin_lock_init(&srq->lock);
- srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); + init_attr->attr.max_wr = max_t(u32, init_attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr); srq->max_gs = roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); init_attr->attr.max_wr = srq->wqe_cnt;
From: Wenpeng Liang liangwenpeng@huawei.com
Split the SRQ creation process into multiple steps and encapsulate them into functions.
Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +- drivers/infiniband/hw/hns/hns_roce_srq.c | 269 ++++++++++++++++------------ 3 files changed, 161 insertions(+), 125 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index d6a846b..b325b9c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -506,6 +506,7 @@ struct hns_roce_srq { int max_gs; u32 rsv_sge; int wqe_shift; + u32 cqn; void __iomem *db_reg_l;
atomic_t refcount; @@ -953,8 +954,8 @@ struct hns_roce_hw { int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, u32 cqn, - void *mb_buf, u64 *mtts_wqe, u64 *mtts_idx, + struct hns_roce_srq *srq, void *mb_buf, + u64 *mtts_wqe, u64 *mtts_idx, dma_addr_t dma_handle_wqe, dma_addr_t dma_handle_idx); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 49a8456..ec2d64c 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5228,9 +5228,9 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, }
static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, u32 pdn, u16 xrcd, - u32 cqn, void *mb_buf, u64 *mtts_wqe, - u64 *mtts_idx, dma_addr_t dma_handle_wqe, + struct hns_roce_srq *srq, void *mb_buf, + u64 *mtts_wqe, u64 *mtts_idx, + dma_addr_t dma_handle_wqe, dma_addr_t dma_handle_idx) { struct hns_roce_srq_context *srq_context; @@ -5257,7 +5257,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0);
roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, xrcd); + SRQC_BYTE_12_SRQ_XRCD_S, 0);
srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3));
@@ -5267,7 +5267,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, dma_handle_wqe >> 35);
roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, pdn); + SRQC_BYTE_28_PD_S, to_hr_pd(srq->ibsrq.pd)->pdn); roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1)); @@ -5307,7 +5307,7 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - cqn); + srq->cqn); roce_set_field(srq_context->byte_56_xrc_cqn, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5d20b30..5069b81 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -77,8 +77,7 @@ static int hns_roce_hw_destroy_srq(struct hns_roce_dev *dev, HNS_ROCE_CMD_TIMEOUT_MSECS); }
-static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - u32 pdn, u32 cqn, u16 xrcd, u64 db_rec_addr) +static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; @@ -133,9 +132,8 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_xa; }
- hr_dev->hw->write_srqc(hr_dev, srq, pdn, xrcd, cqn, mailbox->buf, - mtts_wqe, mtts_idx, dma_handle_wqe, - dma_handle_idx); + hr_dev->hw->write_srqc(hr_dev, srq, mailbox->buf, mtts_wqe, mtts_idx, + dma_handle_wqe, dma_handle_idx);
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -144,9 +142,7 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, goto err_xa; }
- atomic_set(&srq->refcount, 1); - init_completion(&srq->free); - return ret; + return 0;
err_xa: xa_erase(&srq_table->xa, srq->srqn); @@ -179,45 +175,13 @@ static void free_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); }
-static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, - struct ib_udata *udata, unsigned long addr) -{ - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_buf_attr buf_attr = {}; - int err; - - srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, - HNS_ROCE_SGE_SIZE * - srq->max_gs))); - - buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; - buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, - srq->wqe_shift); - buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; - buf_attr.region_count = 1; - - err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, - hr_dev->caps.srqwqe_ba_pg_sz + - HNS_HW_PAGE_SHIFT, udata, addr); - if (err) - ibdev_err(ibdev, - "failed to alloc SRQ buf mtr, ret = %d.\n", err); - - return err; -} - -static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) -{ - hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); -} - static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, struct ib_udata *udata, unsigned long addr) { struct hns_roce_idx_que *idx_que = &srq->idx_que; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; - int err; + int ret;
srq->idx_que.entry_shift = ilog2(HNS_ROCE_IDX_QUE_ENTRY_SZ);
@@ -227,20 +191,20 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1;
- err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, + ret = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT, udata, addr); - if (err) { + if (ret) { ibdev_err(ibdev, - "failed to alloc SRQ idx mtr, ret = %d.\n", err); - return err; + "failed to alloc SRQ idx mtr, ret = %d.\n", ret); + return ret; }
if (!udata) { idx_que->bitmap = bitmap_zalloc(srq->wqe_cnt, GFP_KERNEL); if (!idx_que->bitmap) { ibdev_err(ibdev, "failed to alloc SRQ idx bitmap.\n"); - err = -ENOMEM; + ret = -ENOMEM; goto err_idx_mtr; } } @@ -252,7 +216,7 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr);
- return err; + return ret; }
static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) @@ -264,6 +228,40 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); }
+static int alloc_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq, + struct ib_udata *udata, unsigned long addr) +{ + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_buf_attr buf_attr = {}; + int ret; + + srq->wqe_shift = ilog2(roundup_pow_of_two(max(HNS_ROCE_SGE_SIZE, + HNS_ROCE_SGE_SIZE * + srq->max_gs))); + + buf_attr.page_shift = hr_dev->caps.srqwqe_buf_pg_sz + HNS_HW_PAGE_SHIFT; + buf_attr.region[0].size = to_hr_hem_entries_size(srq->wqe_cnt, + srq->wqe_shift); + buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; + buf_attr.region_count = 1; + + ret = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, + hr_dev->caps.srqwqe_ba_pg_sz + + HNS_HW_PAGE_SHIFT, udata, addr); + if (ret) + ibdev_err(ibdev, + "failed to alloc SRQ buf mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_srq_wqe_buf(struct hns_roce_dev *hr_dev, + struct hns_roce_srq *srq) +{ + hns_roce_mtr_destroy(hr_dev, &srq->buf_mtr); +} + static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); @@ -301,110 +299,149 @@ static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, return max_sge; }
-int hns_roce_create_srq(struct ib_srq *ib_srq, - struct ib_srq_init_attr *init_attr, - struct ib_udata *udata) +static int set_srq_basic_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) { - struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); - struct hns_roce_ib_create_srq_resp resp = {}; - struct hns_roce_srq *srq = to_hr_srq(ib_srq); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_ib_create_srq ucmd = {}; + struct hns_roce_dev *hr_dev = to_hr_dev(srq->ibsrq.device); + struct ib_srq_attr *attr = &init_attr->attr; u32 max_sge; - int ret; - u32 cqn; - - if (init_attr->srq_type != IB_SRQT_BASIC && - init_attr->srq_type != IB_SRQT_XRC) - return -EOPNOTSUPP;
max_sge = proc_srq_sge(hr_dev, srq, !!udata); - - if (init_attr->attr.max_wr > hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > max_sge) { + if (attr->max_wr > hr_dev->caps.max_srq_wrs || + attr->max_sge > max_sge) { ibdev_err(&hr_dev->ib_dev, - "SRQ config error, depth = %u, sge = %d\n", - init_attr->attr.max_wr, init_attr->attr.max_sge); + "invalid SRQ attr, depth = %u, sge = %u.\n", + attr->max_wr, attr->max_sge); return -EINVAL; }
- mutex_init(&srq->mutex); - spin_lock_init(&srq->lock); + attr->max_wr = max_t(u32, attr->max_wr, HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(attr->max_wr); + srq->max_gs = roundup_pow_of_two(attr->max_sge + srq->rsv_sge); + + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; + attr->srq_limit = 0;
- init_attr->attr.max_wr = max_t(u32, init_attr->attr.max_wr, - HNS_ROCE_MIN_SRQ_WQE_NUM); - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr); - srq->max_gs = - roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); - init_attr->attr.max_wr = srq->wqe_cnt; - init_attr->attr.max_sge = srq->max_gs; - init_attr->attr.srq_limit = 0; + return 0; +} + +static void set_srq_ext_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr) +{ + srq->cqn = ib_srq_has_cq(init_attr->srq_type) ? + to_hr_cq(init_attr->ext.cq)->cqn : 0; +} + +static int set_srq_param(struct hns_roce_srq *srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + int ret; + + ret = set_srq_basic_param(srq, init_attr, udata); + if (ret) + return ret; + + set_srq_ext_param(srq, init_attr); + + return 0; +} + +static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, + struct ib_udata *udata) +{ + struct hns_roce_ib_create_srq ucmd = {}; + int ret;
if (udata) { ret = ib_copy_from_udata(&ucmd, udata, min(udata->inlen, sizeof(ucmd))); if (ret) { - ibdev_err(ibdev, "failed to copy SRQ udata, ret = %d.\n", + ibdev_err(&hr_dev->ib_dev, + "failed to copy SRQ udata, ret = %d.\n", ret); return ret; } }
- ret = alloc_srq_buf(hr_dev, srq, udata, ucmd.buf_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ buffer, ret = %d.\n", ret); + ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); + if (ret) return ret; - }
- ret = alloc_srq_idx(hr_dev, srq, udata, ucmd.que_addr); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ idx, ret = %d.\n", ret); - goto err_buf_alloc; - } + ret = alloc_srq_wqe_buf(hr_dev, srq, udata, ucmd.buf_addr); + if (ret) + goto err_idx;
if (!udata) { ret = alloc_srq_wrid(hr_dev, srq); - if (ret) { - ibdev_err(ibdev, "failed to alloc SRQ wrid, ret = %d.\n", - ret); - goto err_idx_alloc; - } + if (ret) + goto err_wqe_buf; }
- cqn = ib_srq_has_cq(init_attr->srq_type) ? - to_hr_cq(init_attr->ext.cq)->cqn : 0; - srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + return 0;
- ret = alloc_srqc(hr_dev, srq, to_hr_pd(ib_srq->pd)->pdn, cqn, 0, 0); - if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ context, ret = %d.\n", ret); - goto err_wrid_alloc; - } +err_wqe_buf: + free_srq_wqe_buf(hr_dev, srq); +err_idx: + free_srq_idx(hr_dev, srq);
- srq->event = hns_roce_ib_srq_event; - resp.srqn = srq->srqn; - srq->max_gs = init_attr->attr.max_sge; - init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge; + return ret; +} + +static void free_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) +{ + free_srq_wrid(srq); + free_srq_wqe_buf(hr_dev, srq); + free_srq_idx(hr_dev, srq); +} + +int hns_roce_create_srq(struct ib_srq *ib_srq, + struct ib_srq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(ib_srq->device); + struct hns_roce_ib_create_srq_resp resp = {}; + struct hns_roce_srq *srq = to_hr_srq(ib_srq); + int ret; + + mutex_init(&srq->mutex); + spin_lock_init(&srq->lock); + + ret = set_srq_param(srq, init_attr, udata); + if (ret) + return ret; + + ret = alloc_srq_buf(hr_dev, srq, udata); + if (ret) + return ret; + + ret = alloc_srqc(hr_dev, srq); + if (ret) + goto err_srq_buf;
if (udata) { - ret = ib_copy_to_udata(udata, &resp, - min(udata->outlen, sizeof(resp))); - if (ret) - goto err_srqc_alloc; + resp.srqn = srq->srqn; + if (ib_copy_to_udata(udata, &resp, + min(udata->outlen, sizeof(resp)))) { + ret = -EFAULT; + goto err_srqc; + } }
+ srq->db_reg_l = hr_dev->reg_base + SRQ_DB_REG; + srq->event = hns_roce_ib_srq_event; + atomic_set(&srq->refcount, 1); + init_completion(&srq->free); + return 0;
-err_srqc_alloc: +err_srqc: free_srqc(hr_dev, srq); -err_wrid_alloc: - free_srq_wrid(srq); -err_idx_alloc: - free_srq_idx(hr_dev, srq); -err_buf_alloc: +err_srq_buf: free_srq_buf(hr_dev, srq); + return ret; }
@@ -414,8 +451,6 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) struct hns_roce_srq *srq = to_hr_srq(ibsrq);
free_srqc(hr_dev, srq); - free_srq_idx(hr_dev, srq); - free_srq_wrid(srq); free_srq_buf(hr_dev, srq); return 0; }
From: Wenpeng Liang liangwenpeng@huawei.com
Reduce parameter numbers of write_srqc() and move some related code into it from alloc_srqc().
Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 6 +--- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 33 ++++++++++++++++++---- drivers/infiniband/hw/hns/hns_roce_srq.c | 44 +++++++++-------------------- 3 files changed, 42 insertions(+), 41 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index b325b9c..d51641a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -953,11 +953,7 @@ struct hns_roce_hw { int (*modify_cq)(struct ib_cq *cq, u16 cq_count, u16 cq_period); int (*init_eq)(struct hns_roce_dev *hr_dev); void (*cleanup_eq)(struct hns_roce_dev *hr_dev); - void (*write_srqc)(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, void *mb_buf, - u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx); + int (*write_srqc)(struct hns_roce_srq *srq, void *mb_buf); int (*modify_srq)(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, enum ib_srq_attr_mask srq_attr_mask, struct ib_udata *udata); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ec2d64c..dd5f7b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5227,17 +5227,38 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; }
-static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev, - struct hns_roce_srq *srq, void *mb_buf, - u64 *mtts_wqe, u64 *mtts_idx, - dma_addr_t dma_handle_wqe, - dma_addr_t dma_handle_idx) +static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) { + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); struct hns_roce_srq_context *srq_context; + u64 mtts_wqe[MTT_MIN_COUNT] = {}; + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_wqe = 0; + dma_addr_t dma_handle_idx = 0; + int ret;
srq_context = mb_buf; memset(srq_context, 0, sizeof(*srq_context));
+ /* Get the physical address of srq buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, + ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", + ret); + return -ENOBUFS; + } + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, SRQC_BYTE_4_SRQ_ST_S, 1);
@@ -5319,6 +5340,8 @@ static void hns_roce_v2_write_srqc(struct hns_roce_dev *hr_dev,
roce_set_bit(srq_context->db_record_addr_record_en, SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); + + return 0; }
static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 5069b81..d5a6de0 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -82,34 +82,11 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) struct hns_roce_srq_table *srq_table = &hr_dev->srq_table; struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_cmd_mailbox *mailbox; - u64 mtts_wqe[MTT_MIN_COUNT] = { 0 }; - u64 mtts_idx[MTT_MIN_COUNT] = { 0 }; - dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret;
- /* Get the physical address of srq buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, - ARRAY_SIZE(mtts_wqe), &dma_handle_wqe); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ WQE, ret = %d.\n", - ret); - return -ENOBUFS; - } - - /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } - ret = hns_roce_bitmap_alloc(&srq_table->bitmap, &srq->srqn); if (ret) { - ibdev_err(ibdev, - "failed to alloc SRQ number, ret = %d.\n", ret); + ibdev_err(ibdev, "failed to alloc SRQ number.\n"); return -ENOMEM; }
@@ -127,31 +104,36 @@ static int alloc_srqc(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq)
mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); if (IS_ERR_OR_NULL(mailbox)) { - ret = -ENOMEM; ibdev_err(ibdev, "failed to alloc mailbox for SRQC.\n"); + ret = -ENOMEM; goto err_xa; }
- hr_dev->hw->write_srqc(hr_dev, srq, mailbox->buf, mtts_wqe, mtts_idx, - dma_handle_wqe, dma_handle_idx); + ret = hr_dev->hw->write_srqc(srq, mailbox->buf); + if (ret) { + ibdev_err(ibdev, "failed to write SRQC.\n"); + goto err_mbox; + }
ret = hns_roce_hw_create_srq(hr_dev, mailbox, srq->srqn); - hns_roce_free_cmd_mailbox(hr_dev, mailbox); if (ret) { ibdev_err(ibdev, "failed to config SRQC, ret = %d.\n", ret); - goto err_xa; + goto err_mbox; }
+ hns_roce_free_cmd_mailbox(hr_dev, mailbox); + return 0;
+err_mbox: + hns_roce_free_cmd_mailbox(hr_dev, mailbox); err_xa: xa_erase(&srq_table->xa, srq->srqn); - err_put: hns_roce_table_put(hr_dev, &srq_table->table, srq->srqn); - err_out: hns_roce_bitmap_free(&srq_table->bitmap, srq->srqn, BITMAP_NO_RR); + return ret; }
From: Lang Cheng chenglang@huawei.com
Use new register operation interfaces to simplify the process of write SRQ Context.
Signed-off-by: Lang Cheng chenglang@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 160 ++++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 69 ++++++++++--- 2 files changed, 118 insertions(+), 111 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index dd5f7b5..105019c5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5227,19 +5227,59 @@ static int hns_roce_v2_qp_flow_control_init(struct hns_roce_dev *hr_dev, return ret; }
+static int hns_roce_v2_write_srqc_index_queue(struct hns_roce_srq *srq, + struct hns_roce_srq_context *ctx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct ib_device *ibdev = srq->ibsrq.device; + struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); + u64 mtts_idx[MTT_MIN_COUNT] = {}; + dma_addr_t dma_handle_idx = 0; + int ret; + + /* Get physical address of idx que buf */ + ret = hns_roce_mtr_find(hr_dev, &idx_que->mtr, 0, mtts_idx, + ARRAY_SIZE(mtts_idx), &dma_handle_idx); + if (ret < 1) { + ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", + ret); + return -ENOBUFS; + } + + hr_reg_write(ctx, SRQC_IDX_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, srq->wqe_cnt)); + + hr_reg_write(ctx, SRQC_IDX_BT_BA_L, dma_handle_idx >> 3); + hr_reg_write(ctx, SRQC_IDX_BT_BA_H, upper_32_bits(dma_handle_idx >> 3)); + + hr_reg_write(ctx, SRQC_IDX_BA_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_IDX_BUF_PG_SZ, + to_hr_hw_page_shift(idx_que->mtr.hem_cfg.buf_pg_shift)); + + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[0])); + hr_reg_write(ctx, SRQC_IDX_CUR_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); + + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_L, + to_hr_hw_page_addr(mtts_idx[1])); + hr_reg_write(ctx, SRQC_IDX_NXT_BLK_ADDR_H, + upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); + + return 0; +} + static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) { struct ib_device *ibdev = srq->ibsrq.device; struct hns_roce_dev *hr_dev = to_hr_dev(ibdev); - struct hns_roce_srq_context *srq_context; + struct hns_roce_srq_context *ctx = mb_buf; u64 mtts_wqe[MTT_MIN_COUNT] = {}; - u64 mtts_idx[MTT_MIN_COUNT] = {}; dma_addr_t dma_handle_wqe = 0; - dma_addr_t dma_handle_idx = 0; int ret;
- srq_context = mb_buf; - memset(srq_context, 0, sizeof(*srq_context)); + memset(ctx, 0, sizeof(*ctx));
/* Get the physical address of srq buf */ ret = hns_roce_mtr_find(hr_dev, &srq->buf_mtr, 0, mtts_wqe, @@ -5250,98 +5290,28 @@ static int hns_roce_v2_write_srqc(struct hns_roce_srq *srq, void *mb_buf) return -ENOBUFS; }
- /* Get physical address of idx que buf */ - ret = hns_roce_mtr_find(hr_dev, &srq->idx_que.mtr, 0, mtts_idx, - ARRAY_SIZE(mtts_idx), &dma_handle_idx); - if (ret < 1) { - ibdev_err(ibdev, "failed to find mtr for SRQ idx, ret = %d.\n", - ret); - return -ENOBUFS; - } + hr_reg_write(ctx, SRQC_SRQ_ST, 1); + hr_reg_write(ctx, SRQC_PD, to_hr_pd(srq->ibsrq.pd)->pdn); + hr_reg_write(ctx, SRQC_SRQN, srq->srqn); + hr_reg_write(ctx, SRQC_XRCD, 0); + hr_reg_write(ctx, SRQC_XRC_CQN, srq->cqn); + hr_reg_write(ctx, SRQC_SHIFT, ilog2(srq->wqe_cnt)); + hr_reg_write(ctx, SRQC_RQWS, + srq->max_gs <= 0 ? 0 : fls(srq->max_gs - 1));
- roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQ_ST_M, - SRQC_BYTE_4_SRQ_ST_S, 1); - - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_M, - SRQC_BYTE_4_SRQ_WQE_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, - srq->wqe_cnt)); - roce_set_field(srq_context->byte_4_srqn_srqst, - SRQC_BYTE_4_SRQ_SHIFT_M, SRQC_BYTE_4_SRQ_SHIFT_S, - ilog2(srq->wqe_cnt)); - - roce_set_field(srq_context->byte_4_srqn_srqst, SRQC_BYTE_4_SRQN_M, - SRQC_BYTE_4_SRQN_S, srq->srqn); - - roce_set_field(srq_context->byte_8_limit_wl, SRQC_BYTE_8_SRQ_LIMIT_WL_M, - SRQC_BYTE_8_SRQ_LIMIT_WL_S, 0); - - roce_set_field(srq_context->byte_12_xrcd, SRQC_BYTE_12_SRQ_XRCD_M, - SRQC_BYTE_12_SRQ_XRCD_S, 0); - - srq_context->wqe_bt_ba = cpu_to_le32((u32)(dma_handle_wqe >> 3)); - - roce_set_field(srq_context->byte_24_wqe_bt_ba, - SRQC_BYTE_24_SRQ_WQE_BT_BA_M, - SRQC_BYTE_24_SRQ_WQE_BT_BA_S, - dma_handle_wqe >> 35); - - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_PD_M, - SRQC_BYTE_28_PD_S, to_hr_pd(srq->ibsrq.pd)->pdn); - roce_set_field(srq_context->byte_28_rqws_pd, SRQC_BYTE_28_RQWS_M, - SRQC_BYTE_28_RQWS_S, srq->max_gs <= 0 ? 0 : - fls(srq->max_gs - 1)); - - srq_context->idx_bt_ba = cpu_to_le32(dma_handle_idx >> 3); - roce_set_field(srq_context->rsv_idx_bt_ba, - SRQC_BYTE_36_SRQ_IDX_BT_BA_M, - SRQC_BYTE_36_SRQ_IDX_BT_BA_S, - dma_handle_idx >> 35); - - srq_context->idx_cur_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[0])); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_M, - SRQC_BYTE_44_SRQ_IDX_CUR_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[0]))); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_M, - SRQC_BYTE_44_SRQ_IDX_HOP_NUM_S, - to_hr_hem_hopnum(hr_dev->caps.idx_hop_num, - srq->wqe_cnt)); - - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_44_idxbufpgsz_addr, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_M, - SRQC_BYTE_44_SRQ_IDX_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->idx_que.mtr.hem_cfg.buf_pg_shift)); - - srq_context->idx_nxt_blk_addr = - cpu_to_le32(to_hr_hw_page_addr(mtts_idx[1])); - roce_set_field(srq_context->rsv_idxnxtblkaddr, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_M, - SRQC_BYTE_52_SRQ_IDX_NXT_BLK_ADDR_S, - upper_32_bits(to_hr_hw_page_addr(mtts_idx[1]))); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_XRC_CQN_M, SRQC_BYTE_56_SRQ_XRC_CQN_S, - srq->cqn); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BA_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(srq_context->byte_56_xrc_cqn, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_M, - SRQC_BYTE_56_SRQ_WQE_BUF_PG_SZ_S, - to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); - - roce_set_bit(srq_context->db_record_addr_record_en, - SRQC_BYTE_60_SRQ_RECORD_EN_S, 0); + hr_reg_write(ctx, SRQC_WQE_HOP_NUM, + to_hr_hem_hopnum(hr_dev->caps.srqwqe_hop_num, + srq->wqe_cnt));
- return 0; + hr_reg_write(ctx, SRQC_WQE_BT_BA_L, dma_handle_wqe >> 3); + hr_reg_write(ctx, SRQC_WQE_BT_BA_H, upper_32_bits(dma_handle_wqe >> 3)); + + hr_reg_write(ctx, SRQC_WQE_BA_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.ba_pg_shift)); + hr_reg_write(ctx, SRQC_WQE_BUF_PG_SZ, + to_hr_hw_page_shift(srq->buf_mtr.hem_cfg.buf_pg_shift)); + + return hns_roce_v2_write_srqc_index_queue(srq, ctx); }
static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index cd9abdd..e46c935 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -367,24 +367,61 @@ struct hns_roce_v2_cq_context { #define CQC_STASH CQC_FIELD_LOC(63, 63)
struct hns_roce_srq_context { - __le32 byte_4_srqn_srqst; - __le32 byte_8_limit_wl; - __le32 byte_12_xrcd; - __le32 byte_16_pi_ci; - __le32 wqe_bt_ba; - __le32 byte_24_wqe_bt_ba; - __le32 byte_28_rqws_pd; - __le32 idx_bt_ba; - __le32 rsv_idx_bt_ba; - __le32 idx_cur_blk_addr; - __le32 byte_44_idxbufpgsz_addr; - __le32 idx_nxt_blk_addr; - __le32 rsv_idxnxtblkaddr; - __le32 byte_56_xrc_cqn; - __le32 db_record_addr_record_en; - __le32 db_record_addr; + __le32 byte_4_srqn_srqst; + __le32 byte_8_limit_wl; + __le32 byte_12_xrcd; + __le32 byte_16_pi_ci; + __le32 wqe_bt_ba; + __le32 byte_24_wqe_bt_ba; + __le32 byte_28_rqws_pd; + __le32 idx_bt_ba; + __le32 rsv_idx_bt_ba; + __le32 idx_cur_blk_addr; + __le32 byte_44_idxbufpgsz_addr; + __le32 idx_nxt_blk_addr; + __le32 rsv_idxnxtblkaddr; + __le32 byte_56_xrc_cqn; + __le32 db_record_addr_record_en; + __le32 db_record_addr; };
+#define SRQC_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_srq_context, h, l) + +#define SRQC_SRQ_ST SRQC_FIELD_LOC(1, 0) +#define SRQC_WQE_HOP_NUM SRQC_FIELD_LOC(3, 2) +#define SRQC_SHIFT SRQC_FIELD_LOC(7, 4) +#define SRQC_SRQN SRQC_FIELD_LOC(31, 8) +#define SRQC_LIMIT_WL SRQC_FIELD_LOC(47, 32) +#define SRQC_RSV0 SRQC_FIELD_LOC(63, 48) +#define SRQC_XRCD SRQC_FIELD_LOC(87, 64) +#define SRQC_RSV1 SRQC_FIELD_LOC(95, 88) +#define SRQC_PRODUCER_IDX SRQC_FIELD_LOC(111, 96) +#define SRQC_CONSUMER_IDX SRQC_FIELD_LOC(127, 112) +#define SRQC_WQE_BT_BA_L SRQC_FIELD_LOC(159, 128) +#define SRQC_WQE_BT_BA_H SRQC_FIELD_LOC(188, 160) +#define SRQC_RSV2 SRQC_FIELD_LOC(191, 189) +#define SRQC_PD SRQC_FIELD_LOC(215, 192) +#define SRQC_RQWS SRQC_FIELD_LOC(219, 216) +#define SRQC_RSV3 SRQC_FIELD_LOC(223, 220) +#define SRQC_IDX_BT_BA_L SRQC_FIELD_LOC(255, 224) +#define SRQC_IDX_BT_BA_H SRQC_FIELD_LOC(284, 256) +#define SRQC_RSV4 SRQC_FIELD_LOC(287, 285) +#define SRQC_IDX_CUR_BLK_ADDR_L SRQC_FIELD_LOC(319, 288) +#define SRQC_IDX_CUR_BLK_ADDR_H SRQC_FIELD_LOC(339, 320) +#define SRQC_RSV5 SRQC_FIELD_LOC(341, 340) +#define SRQC_IDX_HOP_NUM SRQC_FIELD_LOC(343, 342) +#define SRQC_IDX_BA_PG_SZ SRQC_FIELD_LOC(347, 344) +#define SRQC_IDX_BUF_PG_SZ SRQC_FIELD_LOC(351, 348) +#define SRQC_IDX_NXT_BLK_ADDR_L SRQC_FIELD_LOC(383, 352) +#define SRQC_IDX_NXT_BLK_ADDR_H SRQC_FIELD_LOC(403, 384) +#define SRQC_RSV6 SRQC_FIELD_LOC(415, 404) +#define SRQC_XRC_CQN SRQC_FIELD_LOC(439, 416) +#define SRQC_WQE_BA_PG_SZ SRQC_FIELD_LOC(443, 440) +#define SRQC_WQE_BUF_PG_SZ SRQC_FIELD_LOC(447, 444) +#define SRQC_DB_RECORD_EN SRQC_FIELD_LOC(448, 448) +#define SRQC_DB_RECORD_ADDR_L SRQC_FIELD_LOC(479, 449) +#define SRQC_DB_RECORD_ADDR_H SRQC_FIELD_LOC(511, 480) + #define SRQC_BYTE_4_SRQ_ST_S 0 #define SRQC_BYTE_4_SRQ_ST_M GENMASK(1, 0)
From: Xi Wang wangxi11@huawei.com
Refactor post recv flow by removing unnecessary checking and removing duplicated code.
Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 81 +++++++++++++++--------------- 1 file changed, 41 insertions(+), 40 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 105019c5..1f70422 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -48,8 +48,8 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v2.h"
-static void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, - struct ib_sge *sg) +static inline void set_data_seg_v2(struct hns_roce_v2_wqe_data_seg *dseg, + struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); dseg->addr = cpu_to_le64(sg->addr); @@ -729,6 +729,40 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, return 0; }
+static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx) +{ + struct hns_roce_v2_wqe_data_seg *dseg; + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + int i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; + for (i = 0; i < wr->num_sge; i++) { + if (!wr->sg_list[i].length) + continue; + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; + } + + if (hr_qp->rq.rsv_sge) { + dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } + + /* rq support inline data */ + if (hr_qp->rq_inl_buf.wqe_cnt) { + sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; + hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = (u32)wr->num_sge; + for (i = 0; i < wr->num_sge; i++) { + sge_list[i].addr = (void *)(u64)wr->sg_list[i].addr; + sge_list[i].len = wr->sg_list[i].length; + } + } +} + static int hns_roce_v2_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) @@ -736,15 +770,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; + u32 wqe_idx, nreq, max_sge; unsigned long flags; - void *wqe = NULL; - u32 wqe_idx; - u32 max_sge; - int nreq; int ret; - int i;
spin_lock_irqsave(&hr_qp->rq.lock, flags);
@@ -764,8 +792,6 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; }
- wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", wr->num_sge, max_sge); @@ -774,32 +800,8 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; }
- wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { - if (!wr->sg_list[i].length) - continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (hr_qp->rq.rsv_sge) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - } - - /* rq support inline data */ - if (hr_qp->rq_inl_buf.wqe_cnt) { - sge_list = hr_qp->rq_inl_buf.wqe_list[wqe_idx].sg_list; - hr_qp->rq_inl_buf.wqe_list[wqe_idx].sge_cnt = - (u32)wr->num_sge; - for (i = 0; i < wr->num_sge; i++) { - sge_list[i].addr = - (void *)(u64)wr->sg_list[i].addr; - sge_list[i].len = wr->sg_list[i].length; - } - } + wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); + fill_rq_wqe(hr_qp, wr, wqe_idx);
hr_qp->rq.wrid[wqe_idx] = wr->wr_id; } @@ -928,9 +930,8 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg = (struct hns_roce_v2_wqe_data_seg *)wqe;
for (i = 0; i < wr->num_sge; ++i) { - dseg[i].len = cpu_to_le32(wr->sg_list[i].length); - dseg[i].lkey = cpu_to_le32(wr->sg_list[i].lkey); - dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); + set_data_seg_v2(dseg, wr->sg_list + i); + dseg++; }
if (srq->rsv_sge) {
From: Xi Wang wangxi11@huawei.com
The HIP09 requires the driver to clear the unused data segments in wqe buffer to make the hns ROCEE stop reading the remaining invalid sges for RQ.
Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 99 ++++++++++++++---------------- 1 file changed, 47 insertions(+), 52 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1f70422..7eba9b5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -729,28 +729,42 @@ static int check_recv_valid(struct hns_roce_dev *hr_dev, return 0; }
-static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, - u32 wqe_idx) +static void fill_recv_sge_to_wqe(const struct ib_recv_wr *wr, void *wqe, + u32 max_sge, bool rsv) { - struct hns_roce_v2_wqe_data_seg *dseg; - struct hns_roce_rinl_sge *sge_list; - void *wqe = NULL; - int i; + struct hns_roce_v2_wqe_data_seg *dseg = wqe; + u32 i, cnt;
- wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - for (i = 0; i < wr->num_sge; i++) { + for (i = 0, cnt = 0; i < wr->num_sge; i++) { + /* Skip zero-length sge */ if (!wr->sg_list[i].length) continue; - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; + set_data_seg_v2(dseg + cnt, wr->sg_list + i); + cnt++; }
- if (hr_qp->rq.rsv_sge) { - dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg->addr = 0; - dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + /* Fill a reserved sge to make hw stop reading remaining segments */ + if (rsv) { + dseg[cnt].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); + dseg[cnt].addr = 0; + dseg[cnt].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + } else { + /* Clear remaining segments to make ROCEE ignore sges */ + if (cnt < max_sge) + memset(dseg + cnt, 0, + (max_sge - cnt) * HNS_ROCE_SGE_SIZE); } +} + +static void fill_rq_wqe(struct hns_roce_qp *hr_qp, const struct ib_recv_wr *wr, + u32 wqe_idx, u32 max_sge) +{ + struct hns_roce_rinl_sge *sge_list; + void *wqe = NULL; + u32 i; + + wqe = hns_roce_get_recv_wqe(hr_qp, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, hr_qp->rq.rsv_sge);
/* rq support inline data */ if (hr_qp->rq_inl_buf.wqe_cnt) { @@ -801,8 +815,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, }
wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - fill_rq_wqe(hr_qp, wr, wqe_idx); - + fill_rq_wqe(hr_qp, wr, wqe_idx, max_sge); hr_qp->rq.wrid[wqe_idx] = wr->wr_id; }
@@ -834,18 +847,18 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, return ret; }
-static void *get_srq_wqe(struct hns_roce_srq *srq, int n) +static void *get_srq_wqe_buf(struct hns_roce_srq *srq, u32 n) { return hns_roce_buf_offset(srq->buf_mtr.kmem, n << srq->wqe_shift); }
-static void *get_idx_buf(struct hns_roce_idx_que *idx_que, unsigned int n) +static void *get_idx_buf(struct hns_roce_idx_que *idx_que, u32 n) { return hns_roce_buf_offset(idx_que->mtr.kmem, n << idx_que->entry_shift); }
-static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) +static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) { /* always called with interrupts disabled. */ spin_lock(&srq->lock); @@ -856,7 +869,7 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) spin_unlock(&srq->lock); }
-int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) +int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq) { struct hns_roce_idx_que *idx_que = &srq->idx_que; unsigned int cur; @@ -865,19 +878,18 @@ int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) return cur + nreq >= srq->wqe_cnt; }
-static int find_empty_entry(struct hns_roce_idx_que *idx_que, - unsigned long size) +static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) { - int wqe_idx; + struct hns_roce_idx_que *idx_que = &srq->idx_que; + u32 pos;
- if (unlikely(bitmap_full(idx_que->bitmap, size))) + pos = find_first_zero_bit(idx_que->bitmap, srq->wqe_cnt); + if (unlikely(pos == srq->wqe_cnt)) return -ENOSPC;
- wqe_idx = find_first_zero_bit(idx_que->bitmap, size); - - bitmap_set(idx_que->bitmap, wqe_idx, 1); - - return wqe_idx; + bitmap_set(idx_que->bitmap, pos, 1); + *wqe_idx = pos; + return 0; }
static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, @@ -886,17 +898,12 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - struct hns_roce_v2_wqe_data_seg *dseg; + u32 wqe_idx, ind, nreq, max_sge; struct hns_roce_v2_db srq_db; unsigned long flags; - unsigned int ind; __le32 *srq_idx; int ret = 0; - int wqe_idx; - u32 max_sge; void *wqe; - int nreq; - int i;
spin_lock_irqsave(&srq->lock, flags);
@@ -919,26 +926,14 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, break; }
- wqe_idx = find_empty_entry(&srq->idx_que, srq->wqe_cnt); - if (unlikely(wqe_idx < 0)) { - ret = -ENOMEM; + ret = get_srq_wqe_idx(srq, &wqe_idx); + if (unlikely(ret)) { *bad_wr = wr; break; }
- wqe = get_srq_wqe(srq, wqe_idx); - dseg = (struct hns_roce_v2_wqe_data_seg *)wqe; - - for (i = 0; i < wr->num_sge; ++i) { - set_data_seg_v2(dseg, wr->sg_list + i); - dseg++; - } - - if (srq->rsv_sge) { - dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); - dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); - dseg[i].addr = 0; - } + wqe = get_srq_wqe_buf(srq, wqe_idx); + fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge);
srq_idx = get_idx_buf(&srq->idx_que, ind); *srq_idx = cpu_to_le32(wqe_idx);
From: Wenpeng Liang liangwenpeng@huawei.com
The SRQ in the hns driver consists of the following four parts: * wqe buf: the buffer to store WQE. * wqe_idx buf: the cqe of SRQ may be not generated in the order of wqe, so the wqe_idx corresponding to the idle WQE needs to be pushed into the index queue which is a FIFO, then it instructs the hardware to obtain the corresponding WQE. * bitmap: bitmap is used to generate and release wqe_idx. When the user has a new WR, the driver finds the idx of the idle wqe in bitmap. When the CQE of wqe is generated, the driver will release the idx. * wr_id buf: wr_id buf is used to store the user's wr_id, then return it to the user when poll_cq verb is invoked.
The process of post SRQ recv is refactored to make preceding code clearer.
Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 69 +++++++++++++++++++----------- 1 file changed, 43 insertions(+), 26 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 7eba9b5..d5a63e4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -869,13 +869,32 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, u32 wqe_index) spin_unlock(&srq->lock); }
-int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, u32 nreq) +static int hns_roce_srqwq_overflow(struct hns_roce_srq *srq) { struct hns_roce_idx_que *idx_que = &srq->idx_que; - unsigned int cur;
- cur = idx_que->head - idx_que->tail; - return cur + nreq >= srq->wqe_cnt; + return idx_que->head - idx_que->tail >= srq->wqe_cnt; +} + +static int check_post_srq_valid(struct hns_roce_srq *srq, u32 max_sge, + const struct ib_recv_wr *wr) +{ + struct ib_device *ib_dev = srq->ibsrq.device; + + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(ib_dev, + "failed to check sge, wr->num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); + return -EINVAL; + } + + if (unlikely(hns_roce_srqwq_overflow(srq))) { + ibdev_err(ib_dev, + "failed to check srqwq status, srqwq is full.\n"); + return -ENOMEM; + } + + return 0; }
static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) @@ -892,36 +911,40 @@ static int get_srq_wqe_idx(struct hns_roce_srq *srq, u32 *wqe_idx) return 0; }
+static void fill_wqe_idx(struct hns_roce_srq *srq, unsigned int wqe_idx) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int head; + __le32 *buf; + + head = idx_que->head & (srq->wqe_cnt - 1); + + buf = get_idx_buf(idx_que, head); + *buf = cpu_to_le32(wqe_idx); + + idx_que->head++; +} + static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { struct hns_roce_dev *hr_dev = to_hr_dev(ibsrq->device); struct hns_roce_srq *srq = to_hr_srq(ibsrq); - u32 wqe_idx, ind, nreq, max_sge; struct hns_roce_v2_db srq_db; unsigned long flags; - __le32 *srq_idx; int ret = 0; + u32 max_sge; + u32 wqe_idx; void *wqe; + u32 nreq;
spin_lock_irqsave(&srq->lock, flags);
- ind = srq->idx_que.head & (srq->wqe_cnt - 1); max_sge = srq->max_gs - srq->rsv_sge; - for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge > max_sge)) { - ibdev_err(&hr_dev->ib_dev, - "srq: num_sge = %d, max_sge = %u.\n", - wr->num_sge, max_sge); - ret = -EINVAL; - *bad_wr = wr; - break; - } - - if (unlikely(hns_roce_srqwq_overflow(srq, nreq))) { - ret = -ENOMEM; + ret = check_post_srq_valid(srq, max_sge, wr); + if (ret) { *bad_wr = wr; break; } @@ -934,17 +957,11 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq,
wqe = get_srq_wqe_buf(srq, wqe_idx); fill_recv_sge_to_wqe(wr, wqe, max_sge, srq->rsv_sge); - - srq_idx = get_idx_buf(&srq->idx_que, ind); - *srq_idx = cpu_to_le32(wqe_idx); - + fill_wqe_idx(srq, wqe_idx); srq->wrid[wqe_idx] = wr->wr_id; - ind = (ind + 1) & (srq->wqe_cnt - 1); }
if (likely(nreq)) { - srq->idx_que.head += nreq; - /* * Make sure that descriptors are written before * doorbell record.
From: Wenpeng Liang liangwenpeng@huawei.com
The post_recv only supports QP types of RC, GSI and UD.
Signed-off-by: Wenpeng Liang liangwenpeng@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d5a63e4..3adb77d7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -721,9 +721,21 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, static int check_recv_valid(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) { + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_qp *ibqp = &hr_qp->ibqp; + + if (unlikely(ibqp->qp_type != IB_QPT_RC && + ibqp->qp_type != IB_QPT_GSI && + ibqp->qp_type != IB_QPT_UD)) { + ibdev_err(ibdev, "unsupported qp type, qp_type = %d.\n", + ibqp->qp_type); + return -EOPNOTSUPP; + } + if (unlikely(hr_dev->state >= HNS_ROCE_DEVICE_STATE_RST_DOWN)) return -EIO; - else if (hr_qp->state == IB_QPS_RESET) + + if (hr_qp->state == IB_QPS_RESET) return -EINVAL;
return 0;
On Sat, Jan 30, 2021 at 04:57:58PM +0800, Weihang Li wrote:
There are some issues when using SRQ on HIP08/HIP09, the first part of this series is some fixes on them.
In addition, the codes about RQ/SRQ including the creation and post recv flow are a bit hard to understand, they need to be refactored.
Lang Cheng (2): RDMA/hns: Allocate one more recv SGE for HIP08 RDMA/hns: Use new interfaces to write SRQC
Wenpeng Liang (8): RDMA/hns: Bugfix for checking whether the srq is full when post wr RDMA/hns: Force srq_limit to 0 when creating SRQ RDMA/hns: Fixed wrong judgments in the goto branch RDMA/hns: Remove the reserved WQE of SRQ RDMA/hns: Refactor hns_roce_create_srq() RDMA/hns: Refactor code about SRQ Context RDMA/hns: Refactor hns_roce_v2_post_srq_recv() RDMA/hns: Add verification of QP type when post_recv
Xi Wang (2): RDMA/hns: Refactor post recv flow RDMA/hns: Clear remaining unused sges when post_recv
Applied to for-next, thanks
Jason