From: Xi Wang wangxi11@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
This helper iterates over a DMA-mapped SGL and returns contiguous memory blocks aligned to a HW supported page size.
Suggested-by: Jason Gunthorpe jgg@ziepe.ca Signed-off-by: Shiraz Saleem shiraz.saleem@intel.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/core/verbs.c | 34 ++++++++++++++++++++++++ include/rdma/ib_verbs.h | 47 +++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+)
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e1ecd4682c096..6467645c84d09 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -2628,3 +2628,37 @@ void ib_drain_qp(struct ib_qp *qp) ib_drain_rq(qp); } EXPORT_SYMBOL(ib_drain_qp); + +void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, unsigned int nents, + unsigned long pgsz) +{ + memset(biter, 0, sizeof(struct ib_block_iter)); + biter->__sg = sglist; + biter->__sg_nents = nents; + + /* Driver provides best block size to use */ + biter->__pg_bit = __fls(pgsz); +} +EXPORT_SYMBOL(__rdma_block_iter_start); + +bool __rdma_block_iter_next(struct ib_block_iter *biter) +{ + unsigned int block_offset; + + if (!biter->__sg_nents || !biter->__sg) + return false; + + biter->__dma_addr = sg_dma_address(biter->__sg) + biter->__sg_advance; + block_offset = biter->__dma_addr & (BIT_ULL(biter->__pg_bit) - 1); + biter->__sg_advance += BIT_ULL(biter->__pg_bit) - block_offset; + + if (biter->__sg_advance >= sg_dma_len(biter->__sg)) { + biter->__sg_advance = 0; + biter->__sg = sg_next(biter->__sg); + biter->__sg_nents--; + } + + return true; +} +EXPORT_SYMBOL(__rdma_block_iter_next); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index c1c1a61d81f6e..43a7426b43a15 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2629,6 +2629,21 @@ struct ib_client { struct list_head list; };
+/* + * IB block DMA iterator + * + * Iterates the DMA-mapped SGL in contiguous memory blocks aligned + * to a HW supported page size. + */ +struct ib_block_iter { + /* internal states */ + struct scatterlist *__sg; /* sg holding the current aligned block */ + dma_addr_t __dma_addr; /* unaligned DMA address of this block */ + unsigned int __sg_nents; /* number of SG entries */ + unsigned int __sg_advance; /* number of bytes to advance in sg in next step */ + unsigned int __pg_bit; /* alignment of current block */ +}; + struct ib_device *ib_alloc_device(size_t size); void ib_dealloc_device(struct ib_device *device);
@@ -2642,6 +2657,38 @@ void ib_unregister_device(struct ib_device *device); int ib_register_client (struct ib_client *client); void ib_unregister_client(struct ib_client *client);
+void __rdma_block_iter_start(struct ib_block_iter *biter, + struct scatterlist *sglist, + unsigned int nents, + unsigned long pgsz); +bool __rdma_block_iter_next(struct ib_block_iter *biter); + +/** + * rdma_block_iter_dma_address - get the aligned dma address of the current + * block held by the block iterator. + * @biter: block iterator holding the memory block + */ +static inline dma_addr_t +rdma_block_iter_dma_address(struct ib_block_iter *biter) +{ + return biter->__dma_addr & ~(BIT_ULL(biter->__pg_bit) - 1); +} + +/** + * rdma_for_each_block - iterate over contiguous memory blocks of the sg list + * @sglist: sglist to iterate over + * @biter: block iterator holding the memory block + * @nents: maximum number of sg entries to iterate over + * @pgsz: best HW supported page size to use + * + * Callers may use rdma_block_iter_dma_address() to get each + * blocks aligned DMA address. + */ +#define rdma_for_each_block(sglist, biter, nents, pgsz) \ + for (__rdma_block_iter_start(biter, sglist, nents, \ + pgsz); \ + __rdma_block_iter_next(biter);) + void *ib_get_client_data(struct ib_device *device, struct ib_client *client); void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data);
From: Xi Wang wangxi11@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
This helper does the same as rdma_for_each_block(), except it works on a umem. This simplifies most of the call sites.
Link: https://lore.kernel.org/r/4-v2-270386b7e60b+28f4-umem_1_jgg@nvidia.com Acked-by: Miguel Ojeda miguel.ojeda.sandonis@gmail.com Acked-by: Shiraz Saleem shiraz.saleem@intel.com Signed-off-by: Jason Gunthorpe jgg@nvidia.com Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/rdma/ib_umem.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+)
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index a1fd63871d172..c9fd050e74f4d 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -36,6 +36,7 @@ #include <linux/list.h> #include <linux/scatterlist.h> #include <linux/workqueue.h> +#include <rdma/ib_verbs.h>
struct ib_ucontext; struct ib_umem_odp; @@ -79,6 +80,28 @@ static inline size_t ib_umem_num_pages(struct ib_umem *umem) return (ib_umem_end(umem) - ib_umem_start(umem)) >> umem->page_shift; }
+static inline void __rdma_umem_block_iter_start(struct ib_block_iter *biter, + struct ib_umem *umem, + unsigned long pgsz) +{ + __rdma_block_iter_start(biter, umem->sg_head.sgl, umem->nmap, pgsz); +} + +/** + * rdma_umem_for_each_dma_block - iterate over contiguous DMA blocks of the umem + * @umem: umem to iterate over + * @pgsz: Page size to split the list into + * + * pgsz must be <= PAGE_SIZE or computed by ib_umem_find_best_pgsz(). The + * returned DMA blocks will be aligned to pgsz and span the range: + * ALIGN_DOWN(umem->address, pgsz) to ALIGN(umem->address + umem->length, pgsz) + * + * Performs exactly ib_umem_num_dma_blocks() iterations. + */ +#define rdma_umem_for_each_dma_block(umem, biter, pgsz) \ + for (__rdma_umem_block_iter_start(biter, umem, pgsz); \ + __rdma_block_iter_next(biter);) + #ifdef CONFIG_INFINIBAND_USER_MEM
struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
From: Jason Gunthorpe jgg@mellanox.com
mainline inclusion from mainline-v5.5 commit 515f60004ed985d2b2f03659365752e0b6142986 category: bugfix bugzilla: NA CVE: NA
The "ucmd->log_sq_bb_count" variable is a user controlled variable in the 0-255 range. If we shift more than then number of bits in an int then it's undefined behavior (it shift wraps), and potentially the int could become negative.
Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20190608092514.GC28890@mwanda Reported-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Reviewed-by: Dan Carpenter dan.carpenter@oracle.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_qp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index e0f2413c2a0a3..387f475b4c537 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -435,9 +435,8 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, u8 max_sq_stride = ilog2(roundup_sq_stride);
/* Sanity check SQ size before proceeding */ - if ((u32)(1 << ucmd->log_sq_bb_count) > hr_dev->caps.max_wqes || - ucmd->log_sq_stride > max_sq_stride || - ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { + if (ucmd->log_sq_stride > max_sq_stride || + ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { dev_err(hr_dev->dev, "Check SQ size error! Log sq stride 0x%x\n", ucmd->log_sq_stride); @@ -464,6 +463,10 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, u32 max_cnt; int ret;
+ if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || + hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + return -EINVAL; + ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { dev_err(hr_dev->dev, "Sanity check sq(0x%lx) size fail\n", @@ -471,7 +474,6 @@ static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; }
- hr_qp->sq.wqe_cnt = 1 << ucmd->log_sq_bb_count; hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) {
From: Xi Wang wangxi11@huawei.com
mainline inclusion from mainline-v5.5 commit 99441ab552f1e1713a562838215c080387a94267 category: bugfix bugzilla: NA CVE: NA
Currently, more than 20 lines of duplicate code exist in function 'modify_qp_init_to_init' and function 'modify_qp_reset_to_init', which affects the readability of the code. Consolidate them.
Link: https://lore.kernel.org/r/1562593285-8037-6-git-send-email-oulijun@huawei.co... Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 82 ++++++++++------------ 1 file changed, 38 insertions(+), 44 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d3a70dd82d337..d55158eec47e5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -3884,6 +3884,43 @@ static void hns_roce_get_cqs(struct ib_qp *ibqp, struct hns_roce_cq **send_cq, } }
+static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, + struct hns_roce_v2_qp_context *context, + struct hns_roce_v2_qp_context *qpc_mask) +{ + if (hr_qp->ibqp.qp_type == IB_QPT_GSI) + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + ilog2((unsigned int)hr_qp->sge.sge_cnt)); + else + roce_set_field(context->byte_4_sqpn_tst, + V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + hr_qp->sq.max_gs > + HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? + ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + + roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, + ilog2((unsigned int)hr_qp->sq.wqe_cnt)); + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); + + roce_set_field(context->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, + (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || + hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || + hr_qp->ibqp.srq) ? 0 : + ilog2((unsigned int)hr_qp->rq.wqe_cnt)); + + roce_set_field(qpc_mask->byte_20_smac_sgid_idx, + V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); +} + static void modify_qp_reset_to_init(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, @@ -3906,21 +3943,6 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); - - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, 0); - roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, V2_QPC_BYTE_4_SQPN_S, hr_qp->qpn); roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SQPN_M, @@ -3942,19 +3964,7 @@ static void modify_qp_reset_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQWS_M, V2_QPC_BYTE_20_RQWS_S, 0);
- roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, - ilog2((unsigned int)hr_qp->sq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SQ_SHIFT_M, V2_QPC_BYTE_20_SQ_SHIFT_S, 0); - - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, - (hr_qp->ibqp.qp_type == IB_QPT_XRC_INI || - hr_qp->ibqp.qp_type == IB_QPT_XRC_TGT || ibqp->srq) ? 0 : - ilog2((unsigned int)hr_qp->rq.wqe_cnt)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_RQ_SHIFT_M, V2_QPC_BYTE_20_RQ_SHIFT_S, 0); + set_qpc_wqe_cnt(hr_qp, context, qpc_mask);
/* No VLAN need to set 0xFFF */ roce_set_field(context->byte_24_mtu_tc, V2_QPC_BYTE_24_VLAN_ID_M, @@ -4258,22 +4268,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_TST_M, V2_QPC_BYTE_4_TST_S, 0);
- if (ibqp->qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); - - roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, 0); - if (attr_mask & IB_QP_ACCESS_FLAGS) { roce_set_bit(context->byte_76_srqn_op_en, V2_QPC_BYTE_76_RRE_S, !!(attr->qp_access_flags & IB_ACCESS_REMOTE_READ));
From: Yixian Liu liuyixian@huawei.com
mainline inclusion from mainline-v5.5 commit ec6adad0a1e3ef3064c12146b00c2bd1e6835b0c category: bugfix bugzilla: NA CVE: NA
There is no need to define max_post in hns_roce_wq, as it does same thing as wqe_cnt.
Link: https://lore.kernel.org/r/1572952082-6681-2-git-send-email-liweihang@hisilic... Signed-off-by: Yixian Liu liuyixian@huawei.com Signed-off-by: Weihang Li liweihang@hisilicon.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 8 ++++---- 2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c5edeaa6ee8e8..a1f1bd855d921 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -491,7 +491,6 @@ struct hns_roce_wq { u64 *wrid; /* Work request ID */ spinlock_t lock; int wqe_cnt; /* WQE num */ - u32 max_post; int max_gs; u32 rsv_sge; int offset; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 387f475b4c537..153f42a4e3c8b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -421,7 +421,7 @@ static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, * hr_qp->rq.max_gs); }
- cap->max_recv_wr = hr_qp->rq.max_post = hr_qp->rq.wqe_cnt; + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0; @@ -736,7 +736,7 @@ static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->buff_size = size;
/* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.max_post = hr_qp->sq.wqe_cnt; + cap->max_send_wr = hr_qp->sq.wqe_cnt; cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */ @@ -1522,7 +1522,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, u32 cur;
cur = hr_wq->head - hr_wq->tail; - if (likely(cur + nreq < hr_wq->max_post)) + if (likely(cur + nreq < hr_wq->wqe_cnt)) return false;
hr_cq = to_hr_cq(ib_cq); @@ -1530,7 +1530,7 @@ bool hns_roce_wq_overflow(struct hns_roce_wq *hr_wq, int nreq, cur = hr_wq->head - hr_wq->tail; spin_unlock(&hr_cq->lock);
- return cur + nreq >= hr_wq->max_post; + return cur + nreq >= hr_wq->wqe_cnt; } EXPORT_SYMBOL_GPL(hns_roce_wq_overflow);
From: Lijun Ou oulijun@huawei.com
mainline inclusion from mainline-v5.6 commit 468d020e2f02867b8ec561461a1689cd4365e493 category: bugfix bugzilla: NA CVE: NA
Driver should first check whether the sge is valid, then fill the valid sge and the caculated total into hardware, otherwise invalid sges will cause an error.
Fixes: 52e3b42a2f58 ("RDMA/hns: Filter for zero length of sge in hip08 kernel mode") Fixes: 7bdee4158b37 ("RDMA/hns: Fill sq wqe context of ud type in hip08") Link: https://lore.kernel.org/r/1578571852-13704-1-git-send-email-liweihang@huawei... Signed-off-by: Lijun Ou oulijun@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 44 +++++++++++++--------- 1 file changed, 27 insertions(+), 17 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index d55158eec47e5..1c741a3485c56 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -130,10 +130,10 @@ static void set_frmr_seg(struct hns_roce_v2_rc_send_wqe *rc_sq_wqe,
#ifdef CONFIG_KERNEL_419 static void set_extend_sge(struct hns_roce_qp *qp, const struct ib_send_wr *wr, - unsigned int *sge_ind) + unsigned int *sge_ind, int valid_num_sge) #else static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, - unsigned int *sge_ind) + unsigned int *sge_ind, int valid_num_sge) #endif { struct hns_roce_v2_wqe_data_seg *dseg; @@ -147,7 +147,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr,
if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; - extend_sge_num = wr->num_sge - num_in_wqe; + extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; shift = qp->hr_buf.page_shift;
@@ -248,20 +248,23 @@ static int set_atomic_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, static int set_rwqe_data_seg(struct ib_qp *ibqp, const struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, + int valid_num_sge, const struct ib_send_wr **bad_wr) #else static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, struct hns_roce_v2_rc_send_wqe *rc_sq_wqe, void *wqe, unsigned int *sge_ind, + int valid_num_sge, struct ib_send_wr **bad_wr) #endif { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_v2_wqe_data_seg *dseg = wqe; struct hns_roce_qp *qp = to_hr_qp(ibqp); + int j = 0; int i;
- if (wr->send_flags & IB_SEND_INLINE && wr->num_sge) { + if (wr->send_flags & IB_SEND_INLINE && valid_num_sge) { if (le32_to_cpu(rc_sq_wqe->msg_len) > hr_dev->caps.max_sq_inline) { *bad_wr = wr; @@ -285,7 +288,7 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_bit(rc_sq_wqe->byte_4, V2_RC_SEND_WQE_BYTE_4_INLINE_S, 1); } else { - if (wr->num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { + if (valid_num_sge <= HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) { for (i = 0; i < wr->num_sge; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); @@ -293,19 +296,21 @@ static int set_rwqe_data_seg(struct ib_qp *ibqp, struct ib_send_wr *wr, } } } else { - for (i = 0; i < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { + for (i = 0; i < wr->num_sge && + j < HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; i++) { if (likely(wr->sg_list[i].length)) { set_data_seg_v2(dseg, wr->sg_list + i); dseg++; + j++; } }
- set_extend_sge(qp, wr, sge_ind); + set_extend_sge(qp, wr, sge_ind, valid_num_sge); }
roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, - V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, wr->num_sge); + V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, valid_num_sge); }
return 0; @@ -342,6 +347,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, unsigned int sge_ind; unsigned int owner_bit; unsigned long flags = 0; + int valid_num_sge; unsigned int ind; void *wqe = NULL; u32 tmp_len; @@ -404,8 +410,16 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
owner_bit = ~(((qp->sq.head + nreq) >> ilog2(qp->sq.wqe_cnt)) & 0x1); + valid_num_sge = 0; tmp_len = 0;
+ for (i = 0; i < wr->num_sge; i++) { + if (likely(wr->sg_list[i].length)) { + tmp_len += wr->sg_list[i].length; + valid_num_sge++; + } + } + /* Corresponding to the QP type, wqe process separately */ if (ibqp->qp_type == IB_QPT_GSI) { ah = to_hr_ah(ud_wr(wr)->ah); @@ -434,9 +448,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, V2_UD_SEND_WQE_BYTE_40_LBI_S, hr_dev->loop_idc);
- for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length; - ud_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(ud_sq_wqe->msg_len) + tmp_len);
@@ -480,7 +491,7 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(ud_sq_wqe->byte_16, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_M, V2_UD_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge);
roce_set_field(ud_sq_wqe->byte_20, V2_UD_SEND_WQE_BYTE_20_MSG_START_SGE_IDX_M, @@ -534,14 +545,12 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, memcpy(&ud_sq_wqe->dgid[0], &ah->av.dgid[0], GID_LEN_V2);
- set_extend_sge(qp, wr, &sge_ind); + set_extend_sge(qp, wr, &sge_ind, valid_num_sge); ind++; } else if (ibqp->qp_type == IB_QPT_RC || ibqp->qp_type == IB_QPT_UC) { rc_sq_wqe = wqe; memset(rc_sq_wqe, 0, sizeof(*rc_sq_wqe)); - for (i = 0; i < wr->num_sge; i++) - tmp_len += wr->sg_list[i].length;
rc_sq_wqe->msg_len = cpu_to_le32(le32_to_cpu(rc_sq_wqe->msg_len) + tmp_len); @@ -668,10 +677,11 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, roce_set_field(rc_sq_wqe->byte_16, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_M, V2_RC_SEND_WQE_BYTE_16_SGE_NUM_S, - wr->num_sge); + valid_num_sge); } else if (wr->opcode != IB_WR_REG_MR) { ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, - wqe, &sge_ind, bad_wr); + wqe, &sge_ind, + valid_num_sge, bad_wr); if (ret) goto out; }
From: Leon Romanovsky leonro@mellanox.com
mainline inclusion from mainline-v5.2 commit 574258222281221444b561b05c3a5fa85947a80c category: bugfix bugzilla: NA CVE: NA
Verbs destroy callbacks are synchronous operations and can't be delayed. The expectation is that after driver returned from destroy function, the memory can be freed and user won't be able to access it again.
Ditch workqueue implementation used in HNS driver.
Fixes: d838c481e025 ("IB/hns: Fix the bug when destroy qp") Signed-off-by: Leon Romanovsky leonro@mellanox.com Acked-by: oulijun oulijun@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_common.h | 33 -- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 365 +------------------- drivers/infiniband/hw/hns/hns_roce_hw_v1.h | 12 - 3 files changed, 12 insertions(+), 398 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 981d723b7f264..8eb4b8efcff2b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -59,32 +59,6 @@ #define roce_set_bit(origin, shift, val) \ roce_set_field((origin), (1ul << (shift)), (shift), (val))
-/* - * roce_hw_index_cmp_lt - Compare two hardware index values in hisilicon - * SOC, check if a is less than b. - * @a: hardware index value - * @b: hardware index value - * @bits: the number of bits of a and b, range: 0~31. - * - * Hardware index increases continuously till max value, and then restart - * from zero, again and again. Because the bits of reg field is often - * limited, the reg field can only hold the low bits of the hardware index - * in hisilicon SOC. - * In some scenes we need to compare two values(a,b) getted from two reg - * fields in this driver, for example: - * If a equals 0xfffe, b equals 0x1 and bits equals 16, we think b has - * incresed from 0xffff to 0x1 and a is less than b. - * If a equals 0xfffe, b equals 0x0xf001 and bits equals 16, we think a - * is bigger than b. - * - * Return true on a less than b, otherwise false. - */ -#define roce_hw_index_mask(bits) ((1ul << (bits)) - 1) -#define roce_hw_index_shift(bits) (32 - (bits)) -#define roce_hw_index_cmp_lt(a, b, bits) \ - ((int)((((a) - (b)) & roce_hw_index_mask(bits)) << \ - roce_hw_index_shift(bits)) < 0) - #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
@@ -273,8 +247,6 @@ #define ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M \ (((1UL << 28) - 1) << ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)
-#define ROCEE_SDB_PTR_CMP_BITS 28 - #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_S 0 #define ROCEE_SDB_INV_CNT_SDB_INV_CNT_M \ (((1UL << 16) - 1) << ROCEE_SDB_INV_CNT_SDB_INV_CNT_S) @@ -355,13 +327,8 @@ #define ROCEE_CAEP_AE_MASK_REG 0x6C8 #define ROCEE_CAEP_AE_ST_REG 0x6CC
-#define ROCEE_SDB_ISSUE_PTR_REG 0x758 -#define ROCEE_SDB_SEND_PTR_REG 0x75C #define ROCEE_CAEP_CQE_WCMD_EMPTY 0x850 #define ROCEE_SCAEP_WR_CQE_CNT 0x8D0 -#define ROCEE_SDB_INV_CNT_REG 0x9A4 -#define ROCEE_SDB_RETRY_CNT_REG 0x9AC -#define ROCEE_TSP_BP_ST_REG 0x9EC #define ROCEE_ECC_UCERR_ALM0_REG 0xB34 #define ROCEE_ECC_CERR_ALM0_REG 0xB40
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 6aca77ca533ce..117a6118befd2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -1527,38 +1527,6 @@ static int hns_roce_v1_reset(struct hns_roce_dev *hr_dev, bool dereset) return ret; }
-static int hns_roce_des_qp_init(struct hns_roce_dev *hr_dev) -{ - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 1; - des_qp->qp_wq = create_singlethread_workqueue("hns_roce_destroy_qp"); - if (!des_qp->qp_wq) { - dev_err(dev, "Create destroy qp workqueue failed!\n"); - return -ENOMEM; - } - - return 0; -} - -static void hns_roce_des_qp_free(struct hns_roce_dev *hr_dev) -{ - struct hns_roce_v1_priv *priv; - struct hns_roce_des_qp *des_qp; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - des_qp = &priv->des_qp; - - des_qp->requeue_flag = 0; - flush_workqueue(des_qp->qp_wq); - destroy_workqueue(des_qp->qp_wq); -} - static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev) { int i = 0; @@ -1678,12 +1646,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) goto error_failed_tptr_init; }
- ret = hns_roce_des_qp_init(hr_dev); - if (ret) { - dev_err(dev, "des qp init failed!\n"); - goto error_failed_des_qp_init; - } - ret = hns_roce_free_mr_init(hr_dev); if (ret) { dev_err(dev, "free mr init failed!\n"); @@ -1695,9 +1657,6 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev) return 0;
error_failed_free_mr_init: - hns_roce_des_qp_free(hr_dev); - -error_failed_des_qp_init: hns_roce_tptr_free(hr_dev);
error_failed_tptr_init: @@ -1715,7 +1674,6 @@ static void hns_roce_v1_exit(struct hns_roce_dev *hr_dev) { hns_roce_port_enable(hr_dev, HNS_ROCE_PORT_DOWN); hns_roce_free_mr_free(hr_dev); - hns_roce_des_qp_free(hr_dev); hns_roce_tptr_free(hr_dev); hns_roce_bt_free(hr_dev); hns_roce_raq_free(hr_dev); @@ -3675,281 +3633,6 @@ static int hns_roce_v1_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, hns_roce_v1_q_qp(ibqp, qp_attr, qp_attr_mask, qp_init_attr); }
-static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev, - u32 *old_send, u32 *old_retry, - u32 *tsp_st, u32 *success_flags) -{ - __le32 *old_send_tmp, *old_retry_tmp; - u32 sdb_retry_cnt; - u32 sdb_send_ptr; - u32 cur_cnt, old_cnt; - __le32 tmp, tmp1; - u32 send_ptr; - - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - tmp = cpu_to_le32(sdb_send_ptr); - tmp1 = cpu_to_le32(sdb_retry_cnt); - cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - - old_send_tmp = (__le32 *)old_send; - old_retry_tmp = (__le32 *)old_retry; - tmp = cpu_to_le32(*tsp_st); - if (!roce_get_bit(tmp, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(*old_retry_tmp, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) - *success_flags = 1; - } else { - old_cnt = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S); - if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) { - *success_flags = 1; - } else { - send_ptr = roce_get_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) + - roce_get_field(tmp1, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M, - ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S); - roce_set_field(*old_send_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S, - send_ptr); - } - } -} - -static int check_qp_db_process_status(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - u32 sdb_issue_ptr, - u32 *sdb_inv_cnt, - u32 *wait_stage) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_send_ptr, old_send; - __le32 sdb_issue_ptr_tmp; - __le32 sdb_send_ptr_tmp; - u32 success_flags = 0; - unsigned long end; - u32 old_retry; - u32 inv_cnt; - u32 tsp_st; - __le32 tmp; - - if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 || - *wait_stage < HNS_ROCE_V1_DB_STAGE1) { - dev_err(dev, "QP(0x%lx) db status wait stage(%d) error!\n", - hr_qp->qpn, *wait_stage); - return -EINVAL; - } - - /* Calculate the total timeout for the entire verification process */ - end = msecs_to_jiffies(HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS) + jiffies; - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE1) { - /* Query db process status, until hw process completely */ - sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - while (roce_hw_index_cmp_lt(sdb_send_ptr, sdb_issue_ptr, - ROCEE_SDB_PTR_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout.\n", - hr_qp->qpn); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - sdb_send_ptr = roce_read(hr_dev, - ROCEE_SDB_SEND_PTR_REG); - } - - sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr); - sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr); - if (roce_get_field(sdb_issue_ptr_tmp, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M, - ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) == - roce_get_field(sdb_send_ptr_tmp, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M, - ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) { - old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG); - old_retry = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG); - - do { - tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG); - tmp = cpu_to_le32(tsp_st); - if (roce_get_bit(tmp, - ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) { - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - return 0; - } - - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n" - "issue 0x%x send 0x%x.\n", - hr_qp->qpn, - le32_to_cpu(sdb_issue_ptr_tmp), - le32_to_cpu(sdb_send_ptr_tmp)); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - - hns_roce_check_sdb_status(hr_dev, &old_send, - &old_retry, &tsp_st, - &success_flags); - } while (!success_flags); - } - - *wait_stage = HNS_ROCE_V1_DB_STAGE2; - - /* Get list pointer */ - *sdb_inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - dev_dbg(dev, "QP(0x%lx) db process stage2. inv cnt = 0x%x.\n", - hr_qp->qpn, *sdb_inv_cnt); - } - - if (*wait_stage == HNS_ROCE_V1_DB_STAGE2) { - /* Query db's list status, until hw reversal */ - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - while (roce_hw_index_cmp_lt(inv_cnt, - *sdb_inv_cnt + SDB_INV_CNT_OFFSET, - ROCEE_SDB_CNT_CMP_BITS)) { - if (!time_before(jiffies, end)) { - dev_dbg(dev, "QP(0x%lx) db process stage2 timeout. inv cnt 0x%x.\n", - hr_qp->qpn, inv_cnt); - return 0; - } - - msleep(HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS); - inv_cnt = roce_read(hr_dev, ROCEE_SDB_INV_CNT_REG); - } - - *wait_stage = HNS_ROCE_V1_DB_WAIT_OK; - } - - return 0; -} - -static int check_qp_reset_state(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_qp_work *qp_work_entry, - int *is_timeout) -{ - struct device *dev = &hr_dev->pdev->dev; - u32 sdb_issue_ptr; - int ret; - - if (hr_qp->state != IB_QPS_RESET) { - /* Set qp to ERR, waiting for hw complete processing all dbs */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_ERR); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to ERR failed!\n", - hr_qp->qpn); - return ret; - } - - /* Record issued doorbell */ - sdb_issue_ptr = roce_read(hr_dev, ROCEE_SDB_ISSUE_PTR_REG); - qp_work_entry->sdb_issue_ptr = sdb_issue_ptr; - qp_work_entry->db_wait_stage = HNS_ROCE_V1_DB_STAGE1; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - hr_qp->qpn); - return ret; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK) { - qp_work_entry->sche_cnt = 0; - *is_timeout = 1; - return 0; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", - hr_qp->qpn); - return ret; - } - } - - return 0; -} - -static void hns_roce_v1_destroy_qp_work_fn(struct work_struct *work) -{ - struct hns_roce_qp_work *qp_work_entry; - struct hns_roce_v1_priv *priv; - struct hns_roce_dev *hr_dev; - struct hns_roce_qp *hr_qp; - struct device *dev; - unsigned long qpn; - int ret; - - qp_work_entry = container_of(work, struct hns_roce_qp_work, work); - hr_dev = to_hr_dev(qp_work_entry->ib_dev); - dev = &hr_dev->pdev->dev; - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - hr_qp = qp_work_entry->qp; - qpn = hr_qp->qpn; - - dev_dbg(dev, "Schedule destroy QP(0x%lx) work.\n", qpn); - - qp_work_entry->sche_cnt++; - - /* Query db process status, until hw process completely */ - ret = check_qp_db_process_status(hr_dev, hr_qp, - qp_work_entry->sdb_issue_ptr, - &qp_work_entry->sdb_inv_cnt, - &qp_work_entry->db_wait_stage); - if (ret) { - dev_err(dev, "Check QP(0x%lx) db process status failed!\n", - qpn); - return; - } - - if (qp_work_entry->db_wait_stage != HNS_ROCE_V1_DB_WAIT_OK && - priv->des_qp.requeue_flag) { - queue_work(priv->des_qp.qp_wq, work); - return; - } - - /* Modify qp to reset before destroying qp */ - ret = hns_roce_v1_modify_qp(&hr_qp->ibqp, NULL, 0, hr_qp->state, - IB_QPS_RESET); - if (ret) { - dev_err(dev, "Modify QP(0x%lx) to RST failed!\n", qpn); - return; - } - - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); - - /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, qpn, 1); - - kfree(hr_qp); - kfree(qp_work_entry); - - dev_dbg(dev, "Accomplished destroy QP(0x%lx) work.\n", qpn); -} - static void get_cqs(struct ib_qp *ibqp, struct hns_roce_cq **send_cq, struct hns_roce_cq **recv_cq) { @@ -3961,20 +3644,13 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); - struct device *dev = &hr_dev->pdev->dev; - struct hns_roce_qp_work qp_work_entry; - struct hns_roce_qp_work *qp_work; - struct hns_roce_v1_priv *priv; struct hns_roce_cq *send_cq, *recv_cq; - int is_user = !!ibqp->pd->uobject; - int is_timeout = 0; + bool is_user = ibqp->uobject; int ret;
- ret = check_qp_reset_state(hr_dev, hr_qp, &qp_work_entry, &is_timeout); - if (ret) { - dev_err(dev, "QP reset state check failed(%d)!\n", ret); + ret = hns_roce_v1_modify_qp(ibqp, NULL, 0, hr_qp->state, IB_QPS_RESET); + if (ret) return ret; - }
get_cqs(&hr_qp->ibqp, &send_cq, &recv_cq);
@@ -3990,14 +3666,12 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) } hns_roce_unlock_cqs(send_cq, recv_cq);
- if (!is_timeout) { - hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp); + hns_roce_qp_remove(hr_dev, hr_qp); + hns_roce_qp_free(hr_dev, hr_qp);
- /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - } + /* RC QP, release QPN */ + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt);
@@ -4010,25 +3684,10 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); }
- if (!is_timeout) { - kfree(hr_qp); - } else { - qp_work = kzalloc(sizeof(*qp_work), GFP_KERNEL); - if (!qp_work) - return -ENOMEM; - - INIT_WORK(&qp_work->work, hns_roce_v1_destroy_qp_work_fn); - qp_work->ib_dev = &hr_dev->ib_dev; - qp_work->qp = hr_qp; - qp_work->db_wait_stage = qp_work_entry.db_wait_stage; - qp_work->sdb_issue_ptr = qp_work_entry.sdb_issue_ptr; - qp_work->sdb_inv_cnt = qp_work_entry.sdb_inv_cnt; - qp_work->sche_cnt = qp_work_entry.sche_cnt; - - priv = (struct hns_roce_v1_priv *)hr_dev->priv; - queue_work(priv->des_qp.qp_wq, &qp_work->work); - dev_dbg(dev, "Begin destroy QP(0x%lx) work.\n", hr_qp->qpn); - } + if (hr_qp->ibqp.qp_type == IB_QPT_RC) + kfree(hr_qp); + else + kfree(hr_qp);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h index 66440147d9eb2..7edd2eb8214e8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h @@ -110,11 +110,6 @@ #define HNS_ROCE_V1_EXT_ODB_ALFUL \ (HNS_ROCE_V1_EXT_ODB_DEPTH - HNS_ROCE_V1_DB_RSVD)
-#define HNS_ROCE_V1_DB_WAIT_OK 0 -#define HNS_ROCE_V1_DB_STAGE1 1 -#define HNS_ROCE_V1_DB_STAGE2 2 -#define HNS_ROCE_V1_CHECK_DB_TIMEOUT_MSECS 10000 -#define HNS_ROCE_V1_CHECK_DB_SLEEP_MSECS 20 #define HNS_ROCE_V1_FREE_MR_TIMEOUT_MSECS 50000 #define HNS_ROCE_V1_RECREATE_LP_QP_TIMEOUT_MSECS 10000 #define HNS_ROCE_V1_FREE_MR_WAIT_VALUE 5 @@ -162,7 +157,6 @@ #define SQ_PSN_SHIFT 8 #define QKEY_VAL 0x80010000 #define SDB_INV_CNT_OFFSET 8 -#define SDB_ST_CMP_VAL 8
#define HNS_ROCE_CEQ_DEFAULT_INTERVAL 0x10 #define HNS_ROCE_CEQ_DEFAULT_BURST_NUM 0x10 @@ -1068,11 +1062,6 @@ struct hns_roce_qp_work { u32 sche_cnt; };
-struct hns_roce_des_qp { - struct workqueue_struct *qp_wq; - int requeue_flag; -}; - struct hns_roce_mr_free_work { struct work_struct work; struct ib_device *ib_dev; @@ -1100,7 +1089,6 @@ struct hns_roce_v1_priv { struct hns_roce_raq_table raq_table; struct hns_roce_bt_table bt_table; struct hns_roce_tptr_table tptr_table; - struct hns_roce_des_qp des_qp; struct hns_roce_free_mr free_mr; };
From: Xi Wang wangxi11@huawei.com
mainline inclusion from mainline-v5.7 commit e365b26c6b66fe2bc800399912f7cbc875edf693 category: bugfix bugzilla: NA CVE: NA
Wrap the duplicate code in hip08 and hip06 qp destruction process as hns_roce_qp_destroy() to simply the qp destroy flow.
Link: https://lore.kernel.org/r/1582526258-13825-2-git-send-email-liweihang@huawei... Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 22 +----------- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 37 +------------------- drivers/infiniband/hw/hns/hns_roce_qp.c | 38 +++++++++++++++++++++ 4 files changed, 41 insertions(+), 60 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index a1f1bd855d921..1276502dfe431 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -1471,9 +1471,7 @@ void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); void hns_roce_qp_remove(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void hns_roce_qp_free(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); -void hns_roce_release_range_qp(struct hns_roce_dev *hr_dev, int base_qpn, - int cnt); +void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp); __be32 send_ieth(struct ib_send_wr *wr); int to_hr_qp_type(int qp_type);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 117a6118befd2..de7571a056895 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -3664,30 +3664,10 @@ int hns_roce_v1_destroy_qp(struct ib_qp *ibqp) if (send_cq && send_cq != recv_cq) __hns_roce_v1_cq_clean(send_cq, hr_qp->qpn, NULL); } - hns_roce_unlock_cqs(send_cq, recv_cq);
hns_roce_qp_remove(hr_dev, hr_qp); - hns_roce_qp_free(hr_dev, hr_qp);
- /* RC QP, release QPN */ - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - - hns_roce_mtt_cleanup(hr_dev, &hr_qp->mtt); - - if (is_user) - ib_umem_release(hr_qp->umem); - else { - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); - - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - } - - if (hr_qp->ibqp.qp_type == IB_QPT_RC) - kfree(hr_qp); - else - kfree(hr_qp); + hns_roce_qp_destroy(hr_dev, hr_qp);
return 0; } diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1c741a3485c56..afca2b885977f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5500,41 +5500,6 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, hns_roce_unlock_cqs(send_cq, recv_cq); spin_unlock_irqrestore(&hr_dev->qp_lock, flags);
- hns_roce_qp_free(hr_dev, hr_qp); - - /* Not special_QP, free their QPN */ - if ((hr_qp->ibqp.qp_type == IB_QPT_RC) || - (hr_qp->ibqp.qp_type == IB_QPT_UC) || - (hr_qp->ibqp.qp_type == IB_QPT_UD)) - hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - - if (is_user) { - if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->sdb); - - if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) - hns_roce_db_unmap_user( - to_hr_ucontext(hr_qp->ibqp.uobject->context), - &hr_qp->rdb); - ib_umem_release(hr_qp->umem); - } else { - kfree(hr_qp->sq.wrid); - kfree(hr_qp->rq.wrid); - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) - hns_roce_free_db(hr_dev, &hr_qp->rdb); - } - - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hr_qp->rq.wqe_cnt) { - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); - kfree(hr_qp->rq_inl_buf.wqe_list); - } - return ret; }
@@ -5554,7 +5519,7 @@ static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp) dev_err(hr_dev->dev, "Destroy qp 0x%06lx failed(%d)\n", hr_qp->qpn, ret);
- kfree(hr_qp); + hns_roce_qp_destroy(hr_dev, hr_qp); return 0; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 153f42a4e3c8b..45fc6ee5b67e7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -1189,6 +1189,44 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, return ret; }
+void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + hns_roce_qp_free(hr_dev, hr_qp); + + /* Not special_QP, free their QPN */ + if (hr_qp->ibqp.qp_type != IB_QPT_GSI) + hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1); + + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); + + if (hr_qp->ibqp.pd->uobject) { + struct hns_roce_ucontext *context = + to_hr_ucontext(hr_qp->ibqp.pd->uobject->context); + + if (hr_qp->sq.wqe_cnt && (hr_qp->sdb_en == 1)) + hns_roce_db_unmap_user(context, &hr_qp->sdb); + + if (hr_qp->rq.wqe_cnt && (hr_qp->rdb_en == 1)) + hns_roce_db_unmap_user(context, &hr_qp->rdb); + } else { + kfree(hr_qp->sq.wrid); + kfree(hr_qp->rq.wrid); + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + if (hr_qp->rq.wqe_cnt) + hns_roce_free_db(hr_dev, &hr_qp->rdb); + } + ib_umem_release(hr_qp->umem); + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) { + kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); + kfree(hr_qp->rq_inl_buf.wqe_list); + } + + kfree(hr_qp); +} +EXPORT_SYMBOL_GPL(hns_roce_qp_destroy); + struct ib_qp *hns_roce_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata)
From: Xi Wang wangxi11@huawei.com
mainline inclusion from mainline-v5.7 commit 24c22112b9c2084a362c6fe7d7ddb2bbef5b4a2e category: bugfix bugzilla: NA CVE: NA
Encapsulate qp buffer allocation related code into 3 functions: alloc_qp_buf(), map_wqe_buf() and free_qp_buf().
Link: https://lore.kernel.org/r/1582526258-13825-5-git-send-email-liweihang@huawei... Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 269 +++++++++++--------- 2 files changed, 143 insertions(+), 127 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1276502dfe431..0b1d3a5e4ca51 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -772,7 +772,6 @@ struct hns_roce_qp { /* this define must less than HNS_ROCE_MAX_BT_REGION */ #define HNS_ROCE_WQE_REGION_MAX 3 struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX]; - int region_cnt; int wqe_bt_pg_shift;
u32 buff_size; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 45fc6ee5b67e7..3a615c32749f6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -837,22 +837,145 @@ static void hns_roce_add_cq_to_qp(struct hns_roce_dev *hr_dev, } }
+static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u32 page_shift, bool is_user) +{ + dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL }; + struct hns_roce_buf_region *r; + int region_count; + int buf_count; + int ret; + int i; + + region_count = split_wqe_buf_region(hr_dev, hr_qp, hr_qp->regions, + ARRAY_SIZE(hr_qp->regions), page_shift); + + /* alloc a tmp list to store WQE buffers address */ + ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, region_count); + if (ret) { + dev_err(hr_dev->dev, "Failed to alloc WQE buffer list\n"); + return ret; + } + + for (i = 0; i < region_count; i++) { + r = &hr_qp->regions[i]; + if (is_user) + buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], + r->count, r->offset, hr_qp->umem, + page_shift); + else + buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], + r->count, r->offset, &hr_qp->hr_buf); + + if (buf_count != r->count) { + dev_err(hr_dev->dev, "Failed to get %s WQE buf, expect %d = %d.\n", + is_user ? "user" : "kernel", + r->count, buf_count); + ret = -ENOBUFS; + goto done; + } + } + + hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, + region_count); + hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, + page_shift); + ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, hr_qp->regions, + region_count); + if (ret) + dev_err(hr_dev->dev, "Failed to attatch WQE's mtr\n"); + + goto done; + + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); +done: + hns_roce_free_buf_list(buf_list, region_count); + + return ret; +} + +static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, unsigned long addr) +{ + u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + bool is_rq_buf_inline; + int ret; + + is_rq_buf_inline = (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hns_roce_qp_has_rq(init_attr); + if (is_rq_buf_inline) { + ret = hns_roce_alloc_recv_inline_buffer(hr_qp, init_attr); + if (ret) { + dev_err(hr_dev->dev, "Failed to alloc inline RQ buffer\n"); + return ret; + } + } + + if (hr_qp->ibqp.pd->uobject->context) { + hr_qp->umem = ib_umem_get(hr_qp->ibqp.pd->uobject->context, addr, hr_qp->buff_size, 0, 0); + if (IS_ERR(hr_qp->umem)) { + ret = PTR_ERR(hr_qp->umem); + goto err_inline; + } + } else { + ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, + (1 << page_shift) * 2, + &hr_qp->hr_buf, page_shift); + if (ret) + goto err_inline; + } + + ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); + if (ret) + goto err_alloc; + + return 0; + +err_inline: + if (is_rq_buf_inline) + hns_roce_free_recv_inline_buffer(hr_qp); + +err_alloc: + if (udata) { + ib_umem_release(hr_qp->umem); + hr_qp->umem = NULL; + } else { + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + } + + dev_err(hr_dev->dev, "Failed to alloc WQE buffer, ret %d.\n", ret); + + return ret; +} + +static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +{ + hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); + if (hr_qp->umem) { + ib_umem_release(hr_qp->umem); + hr_qp->umem = NULL; + } + + if (hr_qp->hr_buf.nbufs > 0) + hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + + if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && + hr_qp->rq.wqe_cnt) + hns_roce_free_recv_inline_buffer(hr_qp); +} + static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata, unsigned long sqpn, struct hns_roce_qp *hr_qp) { - dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = {}; struct device *dev = hr_dev->dev; struct hns_roce_ib_create_qp ucmd; struct hns_roce_ib_create_qp_resp resp = {}; - struct hns_roce_buf_region *r; unsigned long qpn = 0; - u32 page_shift; - int buf_count; int ret; - int i;
mutex_init(&hr_qp->mutex); spin_lock_init(&hr_qp->sq.lock); @@ -875,21 +998,11 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_out; }
- if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr)) { - ret = hns_roce_alloc_recv_inline_buffer(hr_qp, init_attr); - if (ret) { - dev_err(dev, "allocate receive inline buffer failed\n"); - goto err_out; - } - } - - page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; if (ib_pd->uobject) { if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { dev_err(dev, "ib_copy_from_udata error for create qp.\n"); ret = -EFAULT; - goto err_alloc_recv_inline_buffer; + goto err_out; }
ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, @@ -897,40 +1010,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { dev_err(dev, "hns_roce_set_user_sq_size error(%d) for create qp.\n", ret); - goto err_alloc_recv_inline_buffer; - } - - hr_qp->umem = ib_umem_get(ib_pd->uobject->context, - ucmd.buf_addr, hr_qp->buff_size, 0, - 0); - if (IS_ERR(hr_qp->umem)) { - dev_err(dev, "ib_umem_get error for create qp.\n"); - ret = PTR_ERR(hr_qp->umem); - goto err_alloc_recv_inline_buffer; - } - hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, - hr_qp->regions, ARRAY_SIZE(hr_qp->regions), - page_shift); - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, - hr_qp->region_cnt); - if (ret) { - dev_err(dev, "alloc buf_list error(%d) for create qp.\n", - ret); - goto err_alloc_list; - } - - for (i = 0; i < hr_qp->region_cnt; i++) { - r = &hr_qp->regions[i]; - buf_count = hns_roce_get_umem_bufs(hr_dev, - buf_list[i], r->count, r->offset, - hr_qp->umem, page_shift); - if (buf_count != r->count) { - dev_err(dev, - "get umem buf err, expect %d,ret %d.\n", - r->count, buf_count); - ret = -ENOBUFS; - goto err_get_bufs; - } + goto err_out; }
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && @@ -943,7 +1023,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { dev_err(dev, "SQ record doorbell map failed(%d)!\n", ret); - goto err_get_bufs; + goto err_out; }
/* indicate kernel supports sq record db */ @@ -973,14 +1053,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, dev_err(dev, "init_attr->create_flags error(%d) for BLOCK_MULTICAST_LOOPBACK!\n", init_attr->create_flags); ret = -EINVAL; - goto err_alloc_recv_inline_buffer; + goto err_out; }
if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { dev_err(dev, "init_attr->create_flags error(%d) for IPOIB_UD_LSO!\n", init_attr->create_flags); ret = -EINVAL; - goto err_alloc_recv_inline_buffer; + goto err_out; }
/* Set SQ size */ @@ -989,7 +1069,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { dev_err(dev, "hns_roce_set_kernel_sq_size error(%d)!\n", ret); - goto err_alloc_recv_inline_buffer; + goto err_out; }
/* QP doorbell register address */ @@ -1004,50 +1084,17 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (ret) { dev_err(dev, "RQ record doorbell alloc failed(%d)!\n", ret); - goto err_alloc_recv_inline_buffer; + goto err_out; } *hr_qp->rdb.db_record = 0; hr_qp->rdb_en = 1; }
- /* Allocate QP buf */ - if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift)) { - dev_err(dev, "hns_roce_buf_alloc error!\n"); - ret = -ENOMEM; - goto err_db; - } - hr_qp->region_cnt = split_wqe_buf_region(hr_dev, hr_qp, - hr_qp->regions, ARRAY_SIZE(hr_qp->regions), - page_shift); - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, - hr_qp->region_cnt); - if (ret) { - dev_err(dev, "Alloc buf_list error(%d) for create qp!\n", - ret); - goto err_alloc_list; - } - - for (i = 0; i < hr_qp->region_cnt; i++) { - r = &hr_qp->regions[i]; - buf_count = hns_roce_get_kmem_bufs(hr_dev, - buf_list[i], r->count, r->offset, - &hr_qp->hr_buf); - if (buf_count != r->count) { - dev_err(dev, - "get kmem buf err, expect %d,ret %d.\n", - r->count, buf_count); - ret = -ENOBUFS; - goto err_get_bufs; - } - } - hr_qp->sq.wrid = kcalloc(hr_qp->sq.wqe_cnt, sizeof(u64), GFP_KERNEL); if (ZERO_OR_NULL_PTR(hr_qp->sq.wrid)) { ret = -ENOMEM; - goto err_get_bufs; + goto err_db; }
if (hr_qp->rq.wqe_cnt) { @@ -1067,19 +1114,14 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, ret = hns_roce_reserve_range_qp(hr_dev, 1, 1, &qpn); if (ret) { dev_err(dev, "hns_roce_reserve_range_qp alloc qpn error\n"); - goto err_wrid; + goto err_buf; } }
- hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, - hr_qp->region_cnt); - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, - hr_qp->regions, hr_qp->region_cnt); + ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); if (ret) { - dev_err(dev, "mtr attatch error for create qp\n"); - goto err_mtr; + dev_err(hr_dev->dev, "Failed to alloc QP buffer\n"); + goto err_db; }
if (init_attr->qp_type == IB_QPT_GSI && @@ -1122,8 +1164,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hns_roce_add_cq_to_qp(hr_dev, hr_qp, init_attr->send_cq, init_attr->recv_cq);
- hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); - return 0;
err_qp: @@ -1137,10 +1177,9 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (!sqpn) hns_roce_release_range_qp(hr_dev, qpn, 1);
-err_mtr: - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); +err_buf: + free_qp_buf(hr_dev, hr_qp);
-err_wrid: if (ib_pd->uobject) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && (udata->outlen >= sizeof(resp)) && @@ -1166,25 +1205,11 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, if (!ib_pd->uobject) kfree(hr_qp->sq.wrid);
-err_get_bufs: - hns_roce_free_buf_list(buf_list, hr_qp->region_cnt); - -err_alloc_list: - if (ib_pd->uobject) - ib_umem_release(hr_qp->umem); - else - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - err_db: if (!ib_pd->uobject && hns_roce_qp_has_rq(init_attr) && (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB)) hns_roce_free_db(hr_dev, &hr_qp->rdb);
-err_alloc_recv_inline_buffer: - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hns_roce_qp_has_rq(init_attr)) - hns_roce_free_recv_inline_buffer(hr_qp); - err_out: return ret; } @@ -1197,7 +1222,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->ibqp.qp_type != IB_QPT_GSI) hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); + free_qp_buf(hr_dev, hr_qp);
if (hr_qp->ibqp.pd->uobject) { struct hns_roce_ucontext *context = @@ -1211,17 +1236,9 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) } else { kfree(hr_qp->sq.wrid); kfree(hr_qp->rq.wrid); - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); if (hr_qp->rq.wqe_cnt) hns_roce_free_db(hr_dev, &hr_qp->rdb); } - ib_umem_release(hr_qp->umem); - - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && - hr_qp->rq.wqe_cnt) { - kfree(hr_qp->rq_inl_buf.wqe_list[0].sg_list); - kfree(hr_qp->rq_inl_buf.wqe_list); - }
kfree(hr_qp); }
From: Xi Wang wangxi11@huawei.com
mainline inclusion from mainline-v5.7 commit ae85bf92effc1e4d4f5b3a3a291d2440a7200e25 category: bugfix bugzilla: NA CVE: NA
Encapsulate the qp param setup related code into set_qp_param().
Link: https://lore.kernel.org/r/1582526258-13825-6-git-send-email-liweihang@huawei... Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_qp.c | 116 ++++++++++++------------ 1 file changed, 58 insertions(+), 58 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 3a615c32749f6..2da8e4d2509b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -366,9 +366,7 @@ static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, return max_sge; }
- - -static int hns_roce_set_rq_size(struct hns_roce_dev *hr_dev, +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, int is_user, int has_rq, struct hns_roce_qp *hr_qp) { @@ -452,10 +450,9 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, return 0; }
-static int hns_roce_set_user_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, - struct hns_roce_ib_create_qp *ucmd) +static int set_user_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, + struct hns_roce_ib_create_qp *ucmd) { u32 ex_sge_num; u32 page_size; @@ -668,9 +665,8 @@ static int set_extend_sge_param(struct hns_roce_dev *hr_dev, return 0; }
-static int hns_roce_set_kernel_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp) +static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; u32 buf_size; @@ -965,6 +961,56 @@ static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) hns_roce_free_recv_inline_buffer(hr_qp); }
+static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata, + struct hns_roce_ib_create_qp *ucmd) +{ + int ret; + + hr_qp->ibqp.qp_type = init_attr->qp_type; + + if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) + hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; + else + hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; + + ret = set_rq_size(hr_dev, &init_attr->cap, !!udata, + hns_roce_qp_has_rq(init_attr), hr_qp); + if (ret) { + dev_err(hr_dev->dev, "Failed to set user RQ size\n"); + return ret; + } + + if (udata) { + if (ib_copy_from_udata(ucmd, udata, sizeof(*ucmd))) { + dev_err(hr_dev->dev, "Failed to copy QP ucmd\n"); + return -EFAULT; + } + + ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); + if (ret) + dev_err(hr_dev->dev, "Failed to set user SQ size\n"); + } else { + if (init_attr->create_flags & + IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { + dev_err(hr_dev->dev, "Failed to check multicast loopback\n"); + return -EINVAL; + } + + if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { + dev_err(hr_dev->dev, "Failed to check ipoib ud lso\n"); + return -EINVAL; + } + + ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); + if (ret) + dev_err(hr_dev->dev, "Failed to set kernel SQ size\n"); + } + + return ret; +} + static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, struct ib_pd *ib_pd, struct ib_qp_init_attr *init_attr, @@ -984,35 +1030,13 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->state = IB_QPS_RESET; hr_qp->next_state = IB_QPS_RESET;
- hr_qp->ibqp.qp_type = init_attr->qp_type; - - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) - hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_ALL_WR); - else - hr_qp->sq_signal_bits = cpu_to_le32(IB_SIGNAL_REQ_WR); - - ret = hns_roce_set_rq_size(hr_dev, &init_attr->cap, !!ib_pd->uobject, - hns_roce_qp_has_rq(init_attr), hr_qp); + ret = set_qp_param(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { dev_err(dev, "hns_roce_set_rq_size failed(%d).\n", ret); - goto err_out; + return ret; }
if (ib_pd->uobject) { - if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) { - dev_err(dev, "ib_copy_from_udata error for create qp.\n"); - ret = -EFAULT; - goto err_out; - } - - ret = hns_roce_set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, - &ucmd); - if (ret) { - dev_err(dev, "hns_roce_set_user_sq_size error(%d) for create qp.\n", - ret); - goto err_out; - } - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SQ_RECORD_DB) && (udata->inlen >= sizeof(ucmd)) && (udata->outlen >= sizeof(resp)) && @@ -1048,30 +1072,6 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hr_qp->rdb_en = 1; } } else { - if (init_attr->create_flags & - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) { - dev_err(dev, "init_attr->create_flags error(%d) for BLOCK_MULTICAST_LOOPBACK!\n", - init_attr->create_flags); - ret = -EINVAL; - goto err_out; - } - - if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) { - dev_err(dev, "init_attr->create_flags error(%d) for IPOIB_UD_LSO!\n", - init_attr->create_flags); - ret = -EINVAL; - goto err_out; - } - - /* Set SQ size */ - ret = hns_roce_set_kernel_sq_size(hr_dev, &init_attr->cap, - hr_qp); - if (ret) { - dev_err(dev, "hns_roce_set_kernel_sq_size error(%d)!\n", - ret); - goto err_out; - } - /* QP doorbell register address */ hr_qp->sq.db_reg_l = hr_dev->reg_base + hr_dev->sdb_offset + DB_REG_OFFSET * hr_dev->priv_uar.index;
From: Lang Cheng chenglang@huawei.com
mainline inclusion from mainline-v5.7 commit 026ded373483c07983a6a30b70034ad0f3667a44 category: bugfix bugzilla: NA CVE: NA
Depth of qp shouldn't be allowed to be set to zero, after ensuring that, subsequent process can be simplified. And when qp is changed from reset to reset, the capability of minimum qp depth was used to identify hardware of hip06, it should be changed into a more readable form.
Link: https://lore.kernel.org/r/1584006624-11846-1-git-send-email-liweihang@huawei... Signed-off-by: Lang Cheng chenglang@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com Signed-off-by: Jason Gunthorpe jgg@mellanox.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_qp.c | 69 ++++++++++--------------- 1 file changed, 27 insertions(+), 42 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 2da8e4d2509b2..ecfae6e7c6033 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -374,51 +374,40 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct device *dev = hr_dev->dev; u32 max_cnt;
- /* Check the validity of QP support capacity */ - if (cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > max_sge) { - dev_err(dev, "RQ(0x%lx) WR or sge error, depth = %u, sge = %u\n", - hr_qp->qpn, cap->max_recv_wr, cap->max_recv_sge); - return -EINVAL; - } - /* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; - } else { - if (is_user && (!cap->max_recv_wr || !cap->max_recv_sge)) { - dev_err(dev, "User space no need config max_recv_wr max_recv_sge\n"); - return -EINVAL; - } - - if (hr_dev->caps.min_wqes) - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - else - max_cnt = cap->max_recv_wr; + return 0; + }
- hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || + cap->max_recv_sge > max_sge) { + dev_err(dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_wr, cap->max_recv_sge); + return -EINVAL; + }
- if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "while setting rq(0x%lx) size, rq.wqe_cnt too large\n", - hr_qp->qpn); - return -EINVAL; - } + max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes);
- max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt + - hr_qp->rq.rsv_sge); - if (hr_dev->caps.max_rq_sg <= HNS_ROCE_MAX_SGE_NUM) - hr_qp->rq.wqe_shift = - ilog2(hr_dev->caps.max_rq_desc_sz); - else - hr_qp->rq.wqe_shift = - ilog2(hr_dev->caps.max_rq_desc_sz - * hr_qp->rq.max_gs); + hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); + if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "rq depth %u too large\n", + cap->max_recv_wr); + return -EINVAL; }
+ max_cnt = max(1U, cap->max_recv_sge); + hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + + if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); + else + hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * + hr_qp->rq.max_gs); + cap->max_recv_wr = hr_qp->rq.wqe_cnt; cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
@@ -675,7 +664,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, int size; int ret;
- if (cap->max_send_wr > hr_dev->caps.max_wqes || + if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || cap->max_send_sge > hr_dev->caps.max_sq_sg || cap->max_inline_data > hr_dev->caps.max_sq_inline) { dev_err(dev, "SQ WR or sge or inline data error!\n"); @@ -684,10 +673,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev,
hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz);
- if (hr_dev->caps.min_wqes) - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - else - max_cnt = cap->max_send_wr; + max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes);
hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { @@ -1473,10 +1459,9 @@ int hns_roce_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, goto out;
if (cur_state == new_state && cur_state == IB_QPS_RESET) { - if (hr_dev->caps.min_wqes) { + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { ret = -EPERM; - dev_err(dev, "cur_state=%d new_state=%d.\n", cur_state, - new_state); + dev_err(dev, "RST2RST state is not supported\n"); }
goto out;
From: Xi Wang wangxi11@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
When the value of nbufs is 1, the buffer is in direct mode, which may cause confusion. So optimizes current codes to make it easier to maintain. Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 224 ++++++++---------- drivers/infiniband/hw/hns/hns_roce_cq.c | 25 +- drivers/infiniband/hw/hns/hns_roce_device.h | 66 ++++-- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 10 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 8 +- drivers/infiniband/hw/hns/hns_roce_mr.c | 8 +- drivers/infiniband/hw/hns/hns_roce_qp.c | 44 ++-- drivers/infiniband/hw/hns/hns_roce_srq.c | 34 +-- .../hw/hns/roce-customer/rdfx_entry.c | 2 +- .../hw/hns/roce-customer/rdfx_intf.c | 44 ++-- 10 files changed, 230 insertions(+), 235 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 89547c8bb82d8..1ad0cab94491f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -158,83 +158,101 @@ void hns_roce_bitmap_cleanup(struct hns_roce_bitmap *bitmap) kfree(bitmap->table); }
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf) +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf) { - int i; - struct device *dev = hr_dev->dev; + struct hns_roce_buf_list *trunks; + u32 i;
- if (buf->nbufs == 1) { - dma_free_coherent(dev, size, buf->direct.buf, buf->direct.map); - } else { - for (i = 0; i < buf->nbufs; ++i) - if (buf->page_list[i].buf) - dma_free_coherent(dev, 1 << buf->page_shift, - buf->page_list[i].buf, - buf->page_list[i].map); - kfree(buf->page_list); + if (!buf) + return; + + trunks = buf->trunk_list; + if (trunks) { + buf->trunk_list = NULL; + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, 1 << buf->trunk_shift, + trunks[i].buf, trunks[i].map); + + kfree(trunks); } + + kfree(buf); } EXPORT_SYMBOL_GPL(hns_roce_buf_free);
-int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift) +/* + * Allocate the dma buffer for storing ROCEE table entries + * + * @size: required size + * @page_shift: the unit size in a continuous dma address range + * @flags: HNS_ROCE_BUF_ flags to control the allocation flow. + */ +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags) { - int i = 0; - dma_addr_t t; - struct device *dev = hr_dev->dev; - u32 page_size = 1 << page_shift; - u32 order; - - /* buf for SQ/RQ both at lease one page, SQ + RQ is 2 pages */ - if (size <= max_direct) { - buf->nbufs = 1; - /* Npages calculated by page_size */ - order = get_order(size); - if (order <= page_shift - PAGE_SHIFT) - order = 0; - else - order -= page_shift - PAGE_SHIFT; - buf->npages = 1 << order; - buf->page_shift = page_shift; - /* MTT PA must be recorded in 4k alignment, t is 4k aligned */ - buf->direct.buf = dma_zalloc_coherent(dev, - size, &t, GFP_KERNEL); - if (!buf->direct.buf) - return -ENOMEM; - - buf->direct.map = t; - - while (t & ((1 << buf->page_shift) - 1)) { - --buf->page_shift; - buf->npages *= 2; - } + u32 trunk_size, page_size, alloced_size; + struct hns_roce_buf_list *trunks; + struct hns_roce_buf *buf; + gfp_t gfp_flags; + u32 ntrunk, i; + + /* The minimum shift of the page accessed by hw is HNS_HW_PAGE_SHIFT */ + if (WARN_ON(page_shift < HNS_HW_PAGE_SHIFT)) + return ERR_PTR(-EINVAL); + + gfp_flags = (flags & HNS_ROCE_BUF_NOSLEEP) ? GFP_ATOMIC : GFP_KERNEL; + buf = kzalloc(sizeof(*buf), gfp_flags); + if (!buf) + return ERR_PTR(-ENOMEM); + + buf->page_shift = page_shift; + page_size = 1 << buf->page_shift; + + /* Calc the trunk size and num by required size and page_shift */ + if (flags & HNS_ROCE_BUF_DIRECT) { + buf->trunk_shift = order_base_2(ALIGN(size, PAGE_SIZE)); + ntrunk = 1; } else { - buf->nbufs = (size + page_size - 1) / page_size; - buf->npages = buf->nbufs; - buf->page_shift = page_shift; - buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list), - GFP_KERNEL); - if (!buf->page_list) - return -ENOMEM; - - for (i = 0; i < buf->nbufs; ++i) { - buf->page_list[i].buf = dma_zalloc_coherent(dev, - page_size, &t, - GFP_KERNEL); - if (!buf->page_list[i].buf) - goto err_free; - - buf->page_list[i].map = t; - } + buf->trunk_shift = order_base_2(ALIGN(page_size, PAGE_SIZE)); + ntrunk = DIV_ROUND_UP(size, 1 << buf->trunk_shift); }
- return 0; + trunks = kcalloc(ntrunk, sizeof(*trunks), gfp_flags); + if (!trunks) { + kfree(buf); + return ERR_PTR(-ENOMEM); + }
-err_free: - hns_roce_buf_free(hr_dev, size, buf); - return -ENOMEM; + trunk_size = 1 << buf->trunk_shift; + alloced_size = 0; + for (i = 0; i < ntrunk; i++) { + trunks[i].buf = dma_alloc_coherent(hr_dev->dev, trunk_size, + &trunks[i].map, gfp_flags); + if (!trunks[i].buf) + break; + + alloced_size += trunk_size; + } + + buf->ntrunks = i; + + /* In nofail mode, it's only failed when the alloced size is 0 */ + if ((flags & HNS_ROCE_BUF_NOFAIL) ? i == 0 : i != ntrunk) { + for (i = 0; i < buf->ntrunks; i++) + dma_free_coherent(hr_dev->dev, trunk_size, + trunks[i].buf, trunks[i].map); + + kfree(trunks); + kfree(buf); + return ERR_PTR(-ENOMEM); + } + + buf->npages = DIV_ROUND_UP(alloced_size, page_size); + buf->trunk_list = trunks; + + return buf; } +EXPORT_SYMBOL_GPL(hns_roce_buf_alloc);
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf) @@ -245,80 +263,46 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, end = start + buf_cnt; if (end > buf->npages) { dev_err(hr_dev->dev, - "Invalid kmem region,offset 0x%x plus buf_cnt 0x%x larger than total 0x%x!\n", + "failed to check kmem bufs, end %d + %d total %u!\n", start, buf_cnt, buf->npages); return -EINVAL; }
total = 0; for (i = start; i < end; i++) - if (buf->nbufs == 1) - bufs[total++] = buf->direct.map + - (i << buf->page_shift); - else - bufs[total++] = buf->page_list[i].map; + bufs[total++] = hns_roce_buf_page(buf, i);
return total; } +EXPORT_SYMBOL_GPL(hns_roce_get_kmem_bufs);
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct ib_umem *umem, - int page_shift) + unsigned int page_shift) { - struct scatterlist *sg; - int npage_per_buf; - int npage_per_sg; - dma_addr_t addr; - int n, entry; - int idx, end; - int npage; - int total; - - if (page_shift < PAGE_SHIFT || page_shift > umem->page_shift) { - dev_err(hr_dev->dev, "Invalid page shift %d, umem shift %d!\n", - page_shift, umem->page_shift); + struct ib_block_iter biter; + int total = 0; + int idx = 0; + u64 addr; + + if (page_shift < HNS_HW_PAGE_SHIFT) { + dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", + page_shift); return -EINVAL; }
/* convert system page cnt to hw page cnt */ - npage_per_buf = (1 << (page_shift - PAGE_SHIFT)); - total = DIV_ROUND_UP(ib_umem_page_count(umem), npage_per_buf); - end = start + buf_cnt; - if (end > total) { - dev_err(hr_dev->dev, - "Invalid umem region,offset 0x%x plus buf_cnt 0x%x larger than total 0x%x!\n", - start, buf_cnt, total); - return -EINVAL; - } - - idx = 0; - npage = 0; - total = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - npage_per_sg = sg_dma_len(sg) >> PAGE_SHIFT; - for (n = 0; n < npage_per_sg; n++) { - if (!(npage % npage_per_buf)) { - addr = sg_dma_address(sg) + - (n << umem->page_shift); - if (addr & ((1 << page_shift) - 1)) { - dev_err(hr_dev->dev, - "Umem addr not align to page_shift %d!\n", - page_shift); - return -ENOBUFS; - } - - /* get buf addr between start and end */ - if (start <= idx && idx < end) { - bufs[total++] = addr; - if (total >= buf_cnt) - goto done; - } - - idx++; - } - npage++; + rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, + 1 << page_shift) { + addr = rdma_block_iter_dma_address(&biter); + if (idx >= start) { + bufs[total++] = addr; + if (total >= buf_cnt) + goto done; } + idx++; } + done: return total; } diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 75afed8ee7a62..5420bec32da88 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -275,29 +275,32 @@ static int hns_roce_ib_get_cq_umem(struct hns_roce_dev *hr_dev, static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq_buf *buf, u32 nent) { - int ret; u32 page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; + struct hns_roce_buf *kbuf; + int ret;
- ret = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, &buf->hr_buf, - page_shift); - if (ret) + kbuf = hns_roce_buf_alloc(hr_dev, nent * hr_dev->caps.cq_entry_sz, + page_shift, 0); + if (IS_ERR(kbuf)) { + ret = -ENOMEM; goto out; + }
+ buf->hr_buf = kbuf; if (hns_roce_check_whether_mhop(hr_dev, HEM_TYPE_CQE)) buf->hr_mtt.mtt_type = MTT_TYPE_CQE; else buf->hr_mtt.mtt_type = MTT_TYPE_WQE;
- ret = hns_roce_mtt_init(hr_dev, buf->hr_buf.npages, - buf->hr_buf.page_shift, &buf->hr_mtt); + ret = hns_roce_mtt_init(hr_dev, kbuf->npages, kbuf->page_shift, + &buf->hr_mtt); if (ret) { dev_err(hr_dev->dev, "hns_roce_mtt_init error(%d) for kernel create cq.\n", ret); goto err_buf; }
- ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, &buf->hr_buf); + ret = hns_roce_buf_write_mtt(hr_dev, &buf->hr_mtt, buf->hr_buf); if (ret) { dev_err(hr_dev->dev, "hns_roce_ib_umem_write_mtt error(%d) for kernel create cq.\n", ret); @@ -310,8 +313,7 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, hns_roce_mtt_cleanup(hr_dev, &buf->hr_mtt);
err_buf: - hns_roce_buf_free(hr_dev, nent * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->hr_buf); out: return ret; } @@ -319,8 +321,7 @@ static int hns_roce_ib_alloc_cq_buf(struct hns_roce_dev *hr_dev, static void hns_roce_ib_free_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq_buf *buf, int cqe) { - hns_roce_buf_free(hr_dev, (cqe + 1) * hr_dev->caps.cq_entry_sz, - &buf->hr_buf); + hns_roce_buf_free(hr_dev, buf->hr_buf); }
static int create_user_cq(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 0b1d3a5e4ca51..ff76c0bcd1a68 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -318,6 +318,10 @@ static inline void hns_roce_inc_rdma_hw_stats(struct ib_device *dev, int stats)
#define HNS_ROCE_MTT_ENTRY_PER_SEG 8
+/* The minimum page size is 4K for hardware */ +#define HNS_HW_PAGE_SHIFT 12 +#define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) + #define PAGE_ADDR_SHIFT 12
#define HNS_ROCE_IS_RESETTING 1 @@ -511,12 +515,27 @@ struct hns_roce_buf_list { dma_addr_t map; };
+/* + * %HNS_ROCE_BUF_DIRECT indicates that the all memory must be in a continuous + * dma address range. + * + * %HNS_ROCE_BUF_NOSLEEP indicates that the caller cannot sleep. + * + * %HNS_ROCE_BUF_NOFAIL allocation only failed when allocated size is zero, even + * the allocated size is smaller than the required size. + */ +enum { + HNS_ROCE_BUF_DIRECT = BIT(0), + HNS_ROCE_BUF_NOSLEEP = BIT(1), + HNS_ROCE_BUF_NOFAIL = BIT(2), +}; + struct hns_roce_buf { - struct hns_roce_buf_list direct; - struct hns_roce_buf_list *page_list; - int nbufs; + struct hns_roce_buf_list *trunk_list; + u32 ntrunks; u32 npages; - int page_shift; + unsigned int trunk_shift; + unsigned int page_shift; };
struct hns_roce_db_pgdir { @@ -548,7 +567,7 @@ struct hns_roce_db { };
struct hns_roce_cq_buf { - struct hns_roce_buf hr_buf; + struct hns_roce_buf *hr_buf; struct hns_roce_mtt hr_mtt; };
@@ -587,7 +606,7 @@ struct hns_roce_cq { };
struct hns_roce_idx_que { - struct hns_roce_buf idx_buf; + struct hns_roce_buf *idx_buf; int entry_sz; u32 buf_size; struct ib_umem *umem; @@ -608,7 +627,7 @@ struct hns_roce_srq { refcount_t refcount; struct completion free;
- struct hns_roce_buf buf; + struct hns_roce_buf *buf; u64 *wrid; struct ib_umem *umem; struct hns_roce_mtt mtt; @@ -754,7 +773,7 @@ enum hns_roce_qp_dfx_cnt {
struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf hr_buf; + struct hns_roce_buf *hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; @@ -1305,15 +1324,23 @@ static inline struct hns_roce_qp qpn & (hr_dev->caps.num_qps - 1)); }
-static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, int offset) +static inline void *hns_roce_buf_offset(struct hns_roce_buf *buf, + unsigned int offset) +{ + return (char *)(buf->trunk_list[offset >> buf->trunk_shift].buf) + + (offset & ((1 << buf->trunk_shift) - 1)); +} + +static inline dma_addr_t hns_roce_buf_dma_addr(struct hns_roce_buf *buf, + unsigned int offset) { - u32 page_size = 1 << buf->page_shift; + return buf->trunk_list[offset >> buf->trunk_shift].map + + (offset & ((1 << buf->trunk_shift) - 1)); +}
- if (buf->nbufs == 1) - return (char *)(buf->direct.buf) + offset; - else - return (char *)(buf->page_list[offset >> buf->page_shift].buf) + - (offset & (page_size - 1)); +static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) +{ + return hns_roce_buf_dma_addr(buf, idx << buf->page_shift); }
static inline u8 to_rdma_port_num(u8 phy_port_num) @@ -1425,10 +1452,9 @@ struct ib_mw *hns_roce_alloc_mw(struct ib_pd *pd, enum ib_mw_type, struct ib_udata *udata); int hns_roce_dealloc_mw(struct ib_mw *ibmw);
-void hns_roce_buf_free(struct hns_roce_dev *hr_dev, u32 size, - struct hns_roce_buf *buf); -int hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, u32 max_direct, - struct hns_roce_buf *buf, u32 page_shift); +void hns_roce_buf_free(struct hns_roce_dev *hr_dev, struct hns_roce_buf *buf); +struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, + u32 page_shift, u32 flags);
int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem); @@ -1443,7 +1469,7 @@ int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct hns_roce_buf *buf); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int buf_cnt, int start, struct ib_umem *umem, - int page_shift); + unsigned int page_shift);
struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, struct ib_srq_init_attr *srq_init_attr, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index de7571a056895..4cf732ff150ed 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2003,7 +2003,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
static void *get_cqe(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, + return hns_roce_buf_offset(hr_cq->hr_buf.hr_buf, n * HNS_ROCE_V1_CQE_ENTRY_SIZE); }
@@ -3679,7 +3679,6 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq) struct device *dev = &hr_dev->pdev->dev; u32 cqe_cnt_ori; u32 cqe_cnt_cur; - u32 cq_buf_size; int wait_time = 0; int ret = 0;
@@ -3713,11 +3712,8 @@ static int hns_roce_v1_destroy_cq(struct ib_cq *ibcq)
if (ibcq->uobject) ib_umem_release(hr_cq->umem); - else { - /* Free the buff of stored cq */ - cq_buf_size = (ibcq->cqe + 1) * hr_dev->caps.cq_entry_sz; - hns_roce_buf_free(hr_dev, cq_buf_size, &hr_cq->hr_buf.hr_buf); - } + else + hns_roce_buf_free(hr_dev, hr_cq->hr_buf.hr_buf);
kfree(hr_cq);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index afca2b885977f..f2c4304399239 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -149,7 +149,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf.page_shift; + shift = qp->hr_buf->page_shift;
/* * Check whether wr->num_sge sges are in the same page. If not, we @@ -3001,7 +3001,7 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
static void *get_cqe_v2(struct hns_roce_cq *hr_cq, int n) { - return hns_roce_buf_offset(&hr_cq->hr_buf.hr_buf, + return hns_roce_buf_offset(hr_cq->hr_buf.hr_buf, n * HNS_ROCE_V2_CQE_ENTRY_SIZE); }
@@ -3021,7 +3021,7 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *hr_cq)
static void *get_srq_wqe(struct hns_roce_srq *srq, int n) { - return hns_roce_buf_offset(&srq->buf, n << srq->wqe_shift); + return hns_roce_buf_offset(srq->buf, n << srq->wqe_shift); }
static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) @@ -7052,7 +7052,7 @@ static void fill_idx_queue(struct hns_roce_idx_que *idx_que, { unsigned int *addr;
- addr = (unsigned int *)hns_roce_buf_offset(&idx_que->idx_buf, + addr = (unsigned int *)hns_roce_buf_offset(idx_que->idx_buf, cur_idx * idx_que->entry_sz); *addr = wqe_idx; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index ce5c0544fc1b3..bef0ceb24b72d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -908,13 +908,9 @@ int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, if (!page_list) return -ENOMEM;
- for (i = 0; i < buf->npages; ++i) { - if (buf->nbufs == 1) - page_list[i] = buf->direct.map + (i << buf->page_shift); - else - page_list[i] = buf->page_list[i].map; + for (i = 0; i < buf->npages; ++i) + page_list[i] = hns_roce_buf_page(buf, i);
- } ret = hns_roce_write_mtt(hr_dev, mtt, 0, buf->npages, page_list);
kfree(page_list); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index ecfae6e7c6033..62fe471a81ca7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -400,7 +400,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, }
max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt); + hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt) + hr_qp->rq.rsv_sge;
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -847,7 +847,7 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, page_shift); else buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, &hr_qp->hr_buf); + r->count, r->offset, hr_qp->hr_buf);
if (buf_count != r->count) { dev_err(hr_dev->dev, "Failed to get %s WQE buf, expect %d = %d.\n", @@ -878,7 +878,7 @@ static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, unsigned long addr) + struct ib_uobject *uobject, unsigned long addr) { u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; bool is_rq_buf_inline; @@ -894,21 +894,26 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } }
- if (hr_qp->ibqp.pd->uobject->context) { - hr_qp->umem = ib_umem_get(hr_qp->ibqp.pd->uobject->context, addr, hr_qp->buff_size, 0, 0); + if (uobject) { + hr_qp->umem = ib_umem_get(uobject->context, addr, + hr_qp->buff_size, 0, 0); if (IS_ERR(hr_qp->umem)) { ret = PTR_ERR(hr_qp->umem); goto err_inline; } } else { - ret = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, - (1 << page_shift) * 2, - &hr_qp->hr_buf, page_shift); - if (ret) + struct hns_roce_buf *kmem; + + kmem = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift, + 0); + if (IS_ERR(hr_qp->umem)) { + ret = PTR_ERR(hr_qp->umem); goto err_inline; + } + hr_qp->hr_buf = kmem; }
- ret = map_wqe_buf(hr_dev, hr_qp, page_shift, udata); + ret = map_wqe_buf(hr_dev, hr_qp, page_shift, !!uobject); if (ret) goto err_alloc;
@@ -919,11 +924,12 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hns_roce_free_recv_inline_buffer(hr_qp);
err_alloc: - if (udata) { + if (uobject) { ib_umem_release(hr_qp->umem); hr_qp->umem = NULL; } else { - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); + hns_roce_buf_free(hr_dev, hr_qp->hr_buf); + hr_qp->hr_buf = NULL; }
dev_err(hr_dev->dev, "Failed to alloc WQE buffer, ret %d.\n", ret); @@ -937,11 +943,11 @@ static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->umem) { ib_umem_release(hr_qp->umem); hr_qp->umem = NULL; + } else { + hns_roce_buf_free(hr_dev, hr_qp->hr_buf); + hr_qp->hr_buf = NULL; }
- if (hr_qp->hr_buf.nbufs > 0) - hns_roce_buf_free(hr_dev, hr_qp->buff_size, &hr_qp->hr_buf); - if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) hns_roce_free_recv_inline_buffer(hr_qp); @@ -1104,7 +1110,8 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } }
- ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); + ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, ib_pd->uobject, + ucmd.buf_addr); if (ret) { dev_err(hr_dev->dev, "Failed to alloc QP buffer\n"); goto err_db; @@ -1532,8 +1539,7 @@ EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - - return hns_roce_buf_offset(&hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->hr_buf, offset); }
void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1550,7 +1556,7 @@ EXPORT_SYMBOL_GPL(get_send_wqe);
void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(&hr_qp->hr_buf, hr_qp->sge.offset + + return hns_roce_buf_offset(hr_qp->hr_buf, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } EXPORT_SYMBOL_GPL(get_send_extend_sge); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 9d4aec18be0fc..630bf17c281c9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -310,6 +310,7 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct hns_roce_idx_que *idx_que = &srq->idx_que; + struct hns_roce_buf *kbuf; u32 bitmap_num; int i;
@@ -324,12 +325,13 @@ static int hns_roce_create_idx_que(struct ib_pd *pd, struct hns_roce_srq *srq,
idx_que->buf_size = srq->max * idx_que->entry_sz;
- if (hns_roce_buf_alloc(hr_dev, idx_que->buf_size, (1 << page_shift) * 2, - &idx_que->idx_buf, page_shift)) { + kbuf = hns_roce_buf_alloc(hr_dev, idx_que->buf_size, page_shift, 0); + if (IS_ERR(kbuf)) { kfree(idx_que->bitmap); return -ENOMEM; }
+ idx_que->idx_buf = kbuf; for (i = 0; i < bitmap_num; i++) idx_que->bitmap[i] = ~(0UL);
@@ -341,17 +343,19 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq, { struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); u32 page_shift = PAGE_SHIFT + hr_dev->caps.srqwqe_buf_pg_sz; + struct hns_roce_buf *kbuf; int ret;
- if (hns_roce_buf_alloc(hr_dev, srq_buf_size, (1 << page_shift) * 2, - &srq->buf, page_shift)) + kbuf = hns_roce_buf_alloc(hr_dev, srq_buf_size, page_shift, 0); + if (IS_ERR(kbuf)) return -ENOMEM;
+ srq->buf = kbuf; srq->head = 0; srq->tail = srq->max - 1; srq->wqe_ctr = 0;
- ret = hns_roce_mtt_init(hr_dev, srq->buf.npages, srq->buf.page_shift, + ret = hns_roce_mtt_init(hr_dev, kbuf->npages, kbuf->page_shift, &srq->mtt); if (ret) { dev_err(hr_dev->dev, "Mtt init error(%d) when create srq.\n", @@ -359,7 +363,7 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq, goto err_kernel_buf; }
- ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, &srq->buf); + ret = hns_roce_buf_write_mtt(hr_dev, &srq->mtt, srq->buf); if (ret) goto err_kernel_srq_mtt;
@@ -371,8 +375,8 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq, }
/* Init mtt table for idx_que */ - ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf.npages, - srq->idx_que.idx_buf.page_shift, + ret = hns_roce_mtt_init(hr_dev, srq->idx_que.idx_buf->npages, + srq->idx_que.idx_buf->page_shift, &srq->idx_que.mtt); if (ret) { dev_err(hr_dev->dev, "Kernel mtt init error(%d) for idx que.\n", @@ -381,7 +385,7 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq, } /* Write buffer address into the mtt table */ ret = hns_roce_buf_write_mtt(hr_dev, &srq->idx_que.mtt, - &srq->idx_que.idx_buf); + srq->idx_que.idx_buf); if (ret) { dev_err(hr_dev->dev, "Write mtt error(%d) for idx que.\n", ret); goto err_kernel_idx_buf; @@ -398,15 +402,14 @@ static int create_kernel_srq(struct ib_pd *pd, struct hns_roce_srq *srq, hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt);
err_kernel_create_idx: - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, - &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap);
err_kernel_srq_mtt: hns_roce_mtt_cleanup(hr_dev, &srq->mtt);
err_kernel_buf: - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, srq->buf);
return ret; } @@ -425,10 +428,10 @@ static void destroy_kernel_srq(struct hns_roce_dev *hr_dev, { kfree(srq->wrid); hns_roce_mtt_cleanup(hr_dev, &srq->idx_que.mtt); - hns_roce_buf_free(hr_dev, srq->idx_que.buf_size, &srq->idx_que.idx_buf); + hns_roce_buf_free(hr_dev, srq->idx_que.idx_buf); kfree(srq->idx_que.bitmap); hns_roce_mtt_cleanup(hr_dev, &srq->mtt); - hns_roce_buf_free(hr_dev, srq_buf_size, &srq->buf); + hns_roce_buf_free(hr_dev, srq->buf); }
static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, @@ -564,8 +567,7 @@ int hns_roce_destroy_srq(struct ib_srq *ibsrq) ib_umem_release(srq->umem); } else { kfree(srq->wrid); - hns_roce_buf_free(hr_dev, srq->max << srq->wqe_shift, - &srq->buf); + hns_roce_buf_free(hr_dev, srq->buf); }
kfree(srq); diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c index 8c2de43bcf14b..bb8c4d7d2449a 100644 --- a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c +++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c @@ -145,7 +145,7 @@ void rdfx_cp_sq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, memcpy(dfx_qp_buf, dfx_hns_wqe_sge, 2 * sizeof(struct hns_roce_v2_wqe_data_seg)); dfx_qp_buf = hns_roce_buf_offset(rdfx_qp->buf, qp->sge.offset); - dfx_hns_wqe_sge = hns_roce_buf_offset(&qp->hr_buf, qp->sge.offset); + dfx_hns_wqe_sge = hns_roce_buf_offset(qp->hr_buf, qp->sge.offset); rdfx_change_sq_buf(wr, atomic_en, dfx_qp_buf, dfx_hns_wqe_sge, sq, hr_dev, qp); } diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c index 9a84e7ce64170..d658621d93a22 100644 --- a/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c +++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_intf.c @@ -23,17 +23,17 @@ struct rdfx_cq_info *rdfx_find_rdfx_cq(struct rdfx_info *rdfx, static void rdfx_v2_free_cqe_dma_buf(struct rdfx_cq_info *rdfx_cq) { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)rdfx_cq->priv; - u32 size = (rdfx_cq->cq_depth) * hr_dev->caps.cq_entry_sz;
- hns_roce_buf_free(hr_dev, size, (struct hns_roce_buf *)rdfx_cq->buf); + hns_roce_buf_free(hr_dev, (struct hns_roce_buf *)rdfx_cq->buf); + rdfx_cq->buf = NULL; }
static void rdfx_v2_free_wqe_dma_buf(struct rdfx_qp_info *rdfx_qp) { struct hns_roce_dev *hr_dev = (struct hns_roce_dev *)rdfx_qp->priv; - u32 size = rdfx_qp->buf_size;
- hns_roce_buf_free(hr_dev, size, (struct hns_roce_buf *)rdfx_qp->buf); + hns_roce_buf_free(hr_dev, (struct hns_roce_buf *)rdfx_qp->buf); + rdfx_qp->buf = NULL; }
void qp_release(struct kref *ref) @@ -41,7 +41,6 @@ void qp_release(struct kref *ref) struct rdfx_qp_info *rdfx_qp = container_of(ref, struct rdfx_qp_info, cnt); rdfx_v2_free_wqe_dma_buf(rdfx_qp); - kfree(rdfx_qp->buf); kfree(rdfx_qp); } EXPORT_SYMBOL_GPL(qp_release); @@ -51,7 +50,6 @@ void cq_release(struct kref *ref) struct rdfx_cq_info *rdfx_cq = container_of(ref, struct rdfx_cq_info, cnt); rdfx_v2_free_cqe_dma_buf(rdfx_cq); - kfree(rdfx_cq->buf); kfree(rdfx_cq); } EXPORT_SYMBOL_GPL(cq_release); @@ -647,21 +645,16 @@ void rdfx_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) unsigned long flags; u32 page_shift; int cq_entries; - int ret;
cq_entries = hr_cq->cq_depth;
- dfx_cq_buf = kzalloc(sizeof(struct hns_roce_buf), GFP_KERNEL); - if (ZERO_OR_NULL_PTR(dfx_cq_buf)) - return; - page_shift = PAGE_SHIFT + hr_dev->caps.cqe_buf_pg_sz; - - ret = hns_roce_buf_alloc(hr_dev, cq_entries * hr_dev->caps.cq_entry_sz, - (1 << page_shift) * 2, dfx_cq_buf, page_shift); - if (ret) { + dfx_cq_buf = hns_roce_buf_alloc(hr_dev, + cq_entries * hr_dev->caps.cq_entry_sz, + page_shift, 0); + if (IS_ERR(dfx_cq_buf)) { dev_err(hr_dev->dev, "hns_roce_dfx_buf_alloc error!\n"); - goto err_dfx_buf; + return; }
#ifdef CONFIG_INFINIBAND_HNS_DFX_ENHANCE @@ -690,10 +683,7 @@ void rdfx_alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) return;
err_buf: - hns_roce_buf_free(hr_dev, cq_entries * hr_dev->caps.cq_entry_sz, - dfx_cq_buf); -err_dfx_buf: - kfree(dfx_cq_buf); + hns_roce_buf_free(hr_dev, dfx_cq_buf); }
void rdfx_free_cq_buff(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) @@ -711,15 +701,10 @@ void rdfx_alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) u32 page_shift = 0; unsigned long flags;
- dfx_qp_buf = kzalloc(sizeof(struct hns_roce_buf), GFP_KERNEL); - if (ZERO_OR_NULL_PTR(dfx_qp_buf)) - return; - page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - - if (hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, (1 << page_shift) * 2, - dfx_qp_buf, page_shift)) { - kfree(dfx_qp_buf); + dfx_qp_buf = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift, + 0); + if (IS_ERR(dfx_qp_buf)) { dev_err(hr_dev->dev, "alloc dfx qp 0x%lx buff failed!\n", hr_qp->qpn); return; @@ -727,8 +712,7 @@ void rdfx_alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp)
rdfx_qp = kzalloc(sizeof(*rdfx_qp), GFP_KERNEL); if (ZERO_OR_NULL_PTR(rdfx_qp)) { - hns_roce_buf_free(hr_dev, hr_qp->buff_size, dfx_qp_buf); - kfree(dfx_qp_buf); + hns_roce_buf_free(hr_dev, dfx_qp_buf); return; }
From: Xi Wang wangxi11@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
Wraps the public logic as 3 functions: hns_roce_mtr_create(), hns_roce_mtr_destroy() and hns_roce_mtr_map() to support hopnum ranges from 0 to 3. In addition, makes the mtr interfaces easier to use. Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 93 +-- drivers/infiniband/hw/hns/hns_roce_device.h | 108 +++- drivers/infiniband/hw/hns/hns_roce_hem.c | 33 +- drivers/infiniband/hw/hns/hns_roce_hem.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 88 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 90 +-- drivers/infiniband/hw/hns/hns_roce_mr.c | 477 ++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 538 ++++++------------ .../hw/hns/roce-customer/rdfx_entry.c | 2 +- 9 files changed, 776 insertions(+), 658 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 1ad0cab94491f..595e8e6663e5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -255,102 +255,49 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, EXPORT_SYMBOL_GPL(hns_roce_buf_alloc);
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf) + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift) { - int i, end; - int total; - - end = start + buf_cnt; - if (end > buf->npages) { - dev_err(hr_dev->dev, - "failed to check kmem bufs, end %d + %d total %u!\n", - start, buf_cnt, buf->npages); + unsigned int offset, max_size; + int total = 0; + int i; + + if (page_shift > buf->trunk_shift) { + dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n", + page_shift, buf->trunk_shift); return -EINVAL; }
- total = 0; - for (i = start; i < end; i++) - bufs[total++] = hns_roce_buf_page(buf, i); + offset = 0; + max_size = buf->ntrunks << buf->trunk_shift; + for (i = 0; i < buf_cnt && offset < max_size; i++) { + bufs[total++] = hns_roce_buf_dma_addr(buf, offset); + offset += (1 << page_shift); + }
return total; } EXPORT_SYMBOL_GPL(hns_roce_get_kmem_bufs);
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; int total = 0; - int idx = 0; - u64 addr; - - if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", - page_shift); - return -EINVAL; - }
/* convert system page cnt to hw page cnt */ rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, 1 << page_shift) { - addr = rdma_block_iter_dma_address(&biter); - if (idx >= start) { - bufs[total++] = addr; - if (total >= buf_cnt) - goto done; - } - idx++; + bufs[total++] = rdma_block_iter_dma_address(&biter); + if (total >= buf_cnt) + goto done; }
done: return total; } - -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt) -{ - if (hopnum == HNS_ROCE_HOP_NUM_0) - region->hopnum = 0; - else - region->hopnum = hopnum; - - region->offset = offset; - region->count = buf_cnt; -} - -void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt) -{ - int i; - - for (i = 0; i < region_cnt; i++) { - if (bufs[i]) { - kfree(bufs[i]); - bufs[i] = NULL; - } - } -} - -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int region_cnt) -{ - struct hns_roce_buf_region *r; - int i; - - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL); - if (!bufs[i]) - goto err_alloc; - } - - return 0; - -err_alloc: - hns_roce_free_buf_list(bufs, i); - - return -ENOMEM; -} +EXPORT_SYMBOL_GPL(hns_roce_get_umem_bufs);
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ff76c0bcd1a68..6e1ec2d77ce2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -415,7 +415,7 @@ struct hns_roce_mtt { };
struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -428,13 +428,36 @@ struct hns_roce_hem_list { struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; struct list_head btm_bt; /* link all bottom bt in @mid_bt */ dma_addr_t root_ba; /* pointer to the root ba table */ - int bt_pg_shift; +}; + +struct hns_roce_buf_attr { + struct { + size_t size; /* region size */ + int hopnum; /* multi-hop addressing hop num */ + } region[HNS_ROCE_MAX_BT_REGION]; + unsigned int region_count; /* valid region count */ + unsigned int page_shift; /* buffer page shift */ + unsigned int user_access; /* umem access flag */ + int user_dmasync; /* umem dma sync flag */ + bool mtt_only; /* only alloc buffer-required MTT memory */ +}; + +struct hns_roce_hem_cfg { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + unsigned int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + unsigned int region_count; };
/* memory translate region */ struct hns_roce_mtr { - struct hns_roce_hem_list hem_list; - int buf_pg_shift; + struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ };
struct hns_roce_mw { @@ -773,7 +796,6 @@ enum hns_roce_qp_dfx_cnt {
struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf *hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; @@ -784,16 +806,9 @@ struct hns_roce_qp { u32 sq_next_wqe; struct hns_roce_wq sq;
- struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - - /* this define must less than HNS_ROCE_MAX_BT_REGION */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX]; - int wqe_bt_pg_shift; - u32 buff_size; + struct mutex mutex; u16 xrcdn; u8 port; @@ -1343,6 +1358,45 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) return hns_roce_buf_dma_addr(buf, idx << buf->page_shift); }
+#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) + +static inline u64 to_hr_hw_page_addr(u64 addr) +{ + return addr >> HNS_HW_PAGE_SHIFT; +} + +static inline u32 to_hr_hw_page_shift(u32 page_shift) +{ + return page_shift - HNS_HW_PAGE_SHIFT; +} + +static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) +{ + if (count > 0) + return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum; + + return 0; +} + +static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift); +} + +static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift) >> buf_shift; +} + +static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) +{ + if (!count) + return 0; + + return ilog2(buf_shift ? + to_hr_hem_entries_count(count, buf_shift) : count); +} + static inline u8 to_rdma_port_num(u8 phy_port_num) { return phy_port_num + 1; @@ -1373,18 +1427,19 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift); -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt); -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr); - /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_ucontext *ucontext, + unsigned long user_addr); +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, unsigned int page_cnt);
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev); @@ -1459,16 +1514,11 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem);
-void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt); -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int count); -void hns_roce_free_buf_list(dma_addr_t **bufs, int count); - int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf); + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift);
struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 1911470ef26c6..a810fde3e356d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1177,9 +1177,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL;
if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1351,7 +1350,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, }
if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d,min %u!\n", offset, r->offset); return -EINVAL; } @@ -1415,8 +1414,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1438,12 +1437,16 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM;
+ if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ r = ®ions[region_cnt - 1]; root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, - ba_num, true, 0); + ba_num, true, 0); if (!root_hem) return -ENOMEM; list_add(&root_hem->list, &temp_root); @@ -1487,9 +1490,9 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, } /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, - &hem_list->mid_bt[i][1], list) { - offset = ((hem->start - r->offset) / step) * - BA_BYTE_LEN; + &hem_list->mid_bt[i][1], list) { + offset = (hem->start - r->offset) / step * + BA_BYTE_LEN; hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); total++; @@ -1517,12 +1520,12 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt) + int region_cnt, unsigned int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; - int ret = 0; int unit; + int ret; int i;
if (region_cnt > HNS_ROCE_MAX_BT_REGION) { @@ -1531,7 +1534,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, return -EINVAL; }
- unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + unit = (1 << bt_pg_shift) / BA_BYTE_LEN; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (!r->count) @@ -1578,8 +1581,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, hem_list->root_ba = 0; }
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order) +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) { int i, j;
@@ -1588,8 +1590,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); - - hem_list->bt_pg_shift = bt_page_order; }
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, @@ -1620,4 +1620,3 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
return cpu_base; } - diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 292ead0eb2d5b..8033c123175b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,14 +133,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list); int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int region_cnt, int unit); int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt); + int region_cnt, unsigned int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4cf732ff150ed..d1b21f71ee296 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2513,7 +2513,6 @@ static void hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, }
static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum hns_roce_qp_state cur_state, enum hns_roce_qp_state new_state, struct hns_roce_qp_context *context, @@ -2560,7 +2559,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret;
if (cur_state >= HNS_ROCE_QP_NUM_STATE || new_state >= HNS_ROCE_QP_NUM_STATE || @@ -2594,6 +2593,27 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, return ret; }
+static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) +{ + int count; + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); + if (count < 1) { + dev_err(hr_dev->dev, "Failed to find SQ ba\n"); + return -ENOBUFS; + } + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, + 1, NULL); + if (!count) { + dev_err(hr_dev->dev, "Failed to find RQ ba\n"); + return -ENOBUFS; + } + + return 0; +} + static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2601,25 +2621,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_sqp_context *context; - struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; u32 __iomem *addr; - int rq_pa_start; + u64 sq_ba = 0; + u64 rq_ba = 0; __le32 tmp; u32 reg_val; - u64 *mtts;
context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM;
/* Search QP buf's MTTs */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qp1c_bytes_4, @@ -2633,11 +2648,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle));
roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); @@ -2645,7 +2660,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2658,14 +2673,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - (mtts[rq_pa_start]) >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_RQ_CUR_IDX_M, QP1C_BYTES_28_RQ_CUR_IDX_S, 0); @@ -2679,12 +2692,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn);
- context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); + context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_SQ_CUR_IDX_M, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); @@ -2751,10 +2764,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; __le32 doorbell[2] = {0}; - int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; - u64 *mtts = NULL; + u64 sq_ba = 0; + u64 rq_ba = 0; int port; u8 port_num; u8 *dmac; @@ -2765,12 +2778,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM;
/* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (mtts == NULL) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - }
/* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -2843,7 +2852,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -2925,11 +2933,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_bit(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, 1); @@ -2951,7 +2959,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits);
port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -3028,14 +3036,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - mtts[rq_pa_start] >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); @@ -3097,8 +3103,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_156_SL_S, rdma_ah_get_sl(&attr->ah_attr)); hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - } else if (cur_state == IB_QPS_RTR && - new_state == IB_QPS_RTS) { + } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { /* If exist optional param, return error */ if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || @@ -3110,12 +3115,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; }
- context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba));
roce_set_field(context->qpc_bytes_124, QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, @@ -3258,12 +3263,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
- context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_bit(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); roce_set_field(context->qpc_bytes_188, @@ -3288,8 +3293,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
/* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, - to_hns_roce_state(cur_state), + ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), to_hns_roce_state(new_state), context, hr_qp); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f2c4304399239..07932c1e029ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -149,7 +149,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf->page_shift; + shift = qp->mtr.kmem->page_shift;
/* * Check whether wr->num_sge sges are in the same page. If not, we @@ -3898,18 +3898,10 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift));
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S, 0); @@ -4369,31 +4361,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } }
-static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, int mtt_cnt, - u32 page_size) -{ - struct device *dev = hr_dev->dev; - - if (hr_qp->rq.wqe_cnt < 1) - return true; - - if (mtt_cnt < 1) { - dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n", - hr_qp->qpn); - return false; - } - - if (mtt_cnt < MTT_MIN_COUNT && - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n", - hr_qp->qpn); - return false; - } - - return true; -} - static int modify_qp_init_to_rtr(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, struct hns_roce_v2_qp_context *context, @@ -4407,7 +4374,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t dma_handle_3; dma_addr_t dma_handle_2; u64 wqe_sge_ba; - u32 page_size; u8 port_num; u64 *mtts_3; u64 *mtts_2; @@ -4416,13 +4382,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port;
/* Search qp buf's mtts */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->rq.offset / page_size, mtts, - MTT_MIN_COUNT, &wqe_sge_ba); - if (!ibqp->srq) - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) - return -EINVAL; + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, + mtts, ARRAY_SIZE(mtts), &wqe_sge_ba); + if (hr_qp->rq.wqe_cnt && count < 1) { + dev_err(dev, "failed to find RQ WQE, QPN = 0x%lx.\n", + hr_qp->qpn); + return -EINVAL; + }
/* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -4462,17 +4428,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, - hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_sq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num, + hr_qp->sq.wqe_cnt)); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, - (ibqp->qp_type == IB_QPT_GSI || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - hr_dev->caps.wqe_sge_hop_num : 0); + to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num, + hr_qp->sge.sge_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); @@ -4480,8 +4445,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, - hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_rq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num, + hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); @@ -4489,7 +4454,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -4497,7 +4462,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); @@ -4648,23 +4613,22 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - u32 page_size; int count;
- /* Search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); + /* search qp buf's mtts */ + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset, + &sq_cur_blk, 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn); + dev_err(dev, "failed to find QP(0x%lx) SQ buf.\n", hr_qp->qpn); return -EINVAL; }
if (hr_qp->sge.sge_cnt > 0) { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset / page_size, - &sge_cur_blk, 1, NULL); + hr_qp->sge.offset, &sge_cur_blk, + 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) sge pa find failed\n", + dev_err(dev, "failed to find QP(0x%lx) SGE buf.\n", hr_qp->qpn); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index bef0ceb24b72d..f81fb0359afd6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1601,122 +1601,475 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; }
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; -} - -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr) -{ - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -} -EXPORT_SYMBOL_GPL(hns_roce_mtr_cleanup); - -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) -{ - int offset; - int count; - int npage; - u64 *mtts; - int end; + int count, npage; + int offset, end; + __le64 *mtts; + u64 addr; int i;
- offset = r->offset; - end = offset + r->count; + offset = region->offset; + end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS;
- /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage];
+ mtts[i] = cpu_to_le64(addr); npage++; } offset += count; }
- /* Memory barrier */ - mb(); + return npage; +} + +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) +{ + int i; + + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; + + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + unsigned int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i;
return 0; }
-int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt) +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { - struct hns_roce_buf_region *r; + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + mtr->kmem = NULL; + } +} + +static struct ib_umem * +mtr_get_umem(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, size_t buf_size, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + return ib_umem_get(ucontext, user_addr, buf_size, + buf_attr->user_access, + buf_attr->user_dmasync); +} + +static struct hns_roce_buf * +mtr_get_kmem(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int pg_shift, size_t buf_size, + bool is_direct) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_buf *hr_buf; + + hr_buf = hns_roce_buf_alloc(hr_dev, buf_size, pg_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR_OR_NULL(hr_buf)) { + dev_err(dev, "Failed to alloc kmem, ret %ld\n", + PTR_ERR(hr_buf)); + return NULL; + } + + return hr_buf; +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + struct device *dev = hr_dev->dev; + size_t total_size; + + total_size = mtr_bufs_size(buf_attr); + if (ucontext) { + mtr->kmem = NULL; + mtr->umem = mtr_get_umem(hr_dev, mtr, buf_attr, total_size, + ucontext, user_addr); + if (IS_ERR_OR_NULL(mtr->umem)) { + dev_err(dev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + } else { + mtr->umem = NULL; + mtr->kmem = mtr_get_kmem(hr_dev, mtr, buf_attr, + buf_attr->page_shift, total_size, + mtr->hem_cfg.is_direct); + if (!mtr->kmem) { + dev_err(dev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + } + + return 0; +} + +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) +{ + struct device *dev = hr_dev->dev; + dma_addr_t *pages; + int npage; int ret; - int i;
- ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); - if (ret) - return ret; + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, + mtr->kmem, page_shift); + + if (npage != page_count) { + dev_err(dev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + }
- for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + if (mtr->hem_cfg.is_direct && npage > 1) { + ret = mtr_check_direct_pages(pages, npage, page_shift); if (ret) { - dev_err(hr_dev->dev, - "write mtr[%d/%d] err %d,offset=%d.\n", - i, region_cnt, ret, r->offset); - goto err_write; + dev_err(dev, "failed to check %s page: %d / %d.\n", + mtr->umem ? "umtr" : "kmtr", ret, npage); + ret = -ENOBUFS; + goto err_alloc_list; } }
- return 0; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + dev_err(dev, "failed to map mtr pages, ret = %d.\n", ret);
-err_write: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_list: + /* drop tmp array */ + kvfree(pages);
return ret; }
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, unsigned int page_cnt) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_buf_region *r; + unsigned int i, mapped_cnt; + int ret; + + /* + * Only use the first page address as root ba when hopnum is 0, this + * is because the addresses of all pages are consecutive in this case. + */ + if (mtr->hem_cfg.is_direct) { + mtr->hem_cfg.root_ba = pages[0]; + return 0; + } + + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { + r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + + if (r->offset + r->count > page_cnt) { + ret = -EINVAL; + dev_err(dev, + "failed to check mtr%u count %u + %u > %u\n", + i, r->offset, r->count, page_cnt); + return ret; + } + + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { + dev_err(dev, "failed to map mtr%u offset %u, ret = %d.\n", + i, r->offset, ret); + return ret; + } + mapped_cnt += ret; + ret = 0; + } + + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + dev_err(dev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_map); + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; - int mtt_count; + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; + int start_index; int total = 0; - u64 *addr; - int npage; - int left; + __le64 *mtts; + u32 npage; + u64 addr;
- if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done;
+ /* no mtt memory in direct mode, so just return the buffer address */ + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + for (mtt_count = 0; mtt_count < cfg->region_count && + total < mtt_max; mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; + + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + + total++; + } + + goto done; + } + + start_index = offset >> cfg->buf_pg_shift; left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset + total, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + start_index + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done;
npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); }
done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = cfg->root_ba;
return total; } EXPORT_SYMBOL_GPL(hns_roce_mtr_find); + +static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *attr, + struct hns_roce_hem_cfg *cfg, + unsigned int *buf_page_shift, int unalinged_size) +{ + struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; + unsigned int page_shift; + size_t buf_size; + + /* if disable mtt, all pages must in a continuous address range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); + if (cfg->is_direct) { + /* When HEM buffer use level-0 addressing, the page size is + * equal the whole buffer size, and we split whole buffer as + * small pages which is used to check whether the adjacent units + * are in the continuous space and the size is fixed as 4K for + * the hns ROCEE required. + */ + page_shift = HNS_HW_PAGE_SHIFT; + /* The ROCEE requires the page size is 4K * 2^N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; + } else { + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; + } + + /* Convert buffer size to page index and page count for each region and + * the buffer's offset need append to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { + r = &cfg->region[region_cnt]; + r->offset = page_cnt; + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; + page_cnt += r->count; + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, + r->count); + } + + cfg->region_count = region_cnt; + *buf_page_shift = page_shift; + + return page_cnt; +} + +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @buf_attr: buffer attribute for creating mtr + * @ba_page_shift: page shift for multi-hop base address table + * @ucontext: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + struct device *dev = hr_dev->dev; + unsigned int buf_page_shift = 0; + int buf_page_cnt; + int ret; + + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + ucontext ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + dev_err(dev, "failed to init mtr cfg, count %d shift %u.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; + } + + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + dev_err(dev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; + } + + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configure. + */ + if (buf_attr->mtt_only) { + mtr->umem = NULL; + mtr->kmem = NULL; + return 0; + } + + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, ucontext, user_addr); + if (ret) { + dev_err(dev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; + } + + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + dev_err(dev, "failed to map mtr bufs, ret = %d.\n", ret); + else + return 0; + + mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); + return ret; +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_create); + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_destroy); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 62fe471a81ca7..9a4118b2fb78f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -366,41 +366,35 @@ static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, return max_sge; }
-static int set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_rq, - struct hns_roce_qp *hr_qp) +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq, bool user) { - u32 max_sge = proc_rq_sge(hr_dev, hr_qp, is_user); + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); struct device *dev = hr_dev->dev; - u32 max_cnt; + u32 cnt;
/* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; + hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; + return 0; }
- if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || + /* Check the validity of QP support capacity */ + if (!cap->max_recv_sge || cap->max_recv_wr > hr_dev->caps.max_wqes || cap->max_recv_sge > max_sge) { - dev_err(dev, "RQ config error, depth=%u, sge=%d\n", - cap->max_recv_wr, cap->max_recv_sge); - return -EINVAL; - } - - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "rq depth %u too large\n", - cap->max_recv_wr); + dev_err(dev, "RQ config error, depth=%u, sge=%u\n", + cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; }
- max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt) + hr_qp->rq.rsv_sge; + cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes)); + hr_qp->rq.max_gs = + roundup_pow_of_two(cap->max_recv_sge + hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -408,12 +402,59 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * hr_qp->rq.max_gs);
- cap->max_recv_wr = hr_qp->rq.wqe_cnt; + hr_qp->rq.wqe_cnt = cnt; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE && + hr_qp->ibqp.qp_type != IB_QPT_UD && + hr_qp->ibqp.qp_type != IB_QPT_GSI) + hr_qp->rq_inl_buf.wqe_cnt = cnt; + else + hr_qp->rq_inl_buf.wqe_cnt = 0; + + cap->max_recv_wr = cnt; cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0; }
+static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +{ + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; + + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + + return 0; +} + +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; + + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; + } + + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } +} + static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_ib_create_qp *ucmd) @@ -424,15 +465,13 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - dev_err(hr_dev->dev, - "Check SQ size error! Log sq stride 0x%x\n", - ucmd->log_sq_stride); + dev_err(hr_dev->dev, "failed to check SQ stride size\n"); return -EINVAL; }
if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - dev_err(hr_dev->dev, "SQ sge error! Max send sge is %d, Max sq sge is %d\n", - cap->max_send_sge, hr_dev->caps.max_sq_sg); + dev_err(hr_dev->dev, "failed to check SQ SGE size %u\n", + cap->max_send_sge); return -EINVAL; }
@@ -443,213 +482,77 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - u32 ex_sge_num; - u32 page_size; - u32 buf_size; - u32 max_cnt; + struct device *dev = hr_dev->dev; + u32 cnt = 0; int ret;
- if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || - hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) || + cnt > hr_dev->caps.max_wqes) return -EINVAL;
ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { - dev_err(hr_dev->dev, "Sanity check sq(0x%lx) size fail\n", - hr_qp->qpn); + dev_err(dev, "failed to check user SQ size\n"); return ret; }
- hr_qp->sq.wqe_shift = ucmd->log_sq_stride; - - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(hr_dev->dev, - "while setting sq(0x%lx) size, sq.wqe_cnt too large\n", - hr_qp->qpn); - return -EINVAL; - } - - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - - if ((hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) && - (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A)) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(hr_dev->dev, - "SQ(0x%lx) extended sge cnt error! sge cnt is %d, max extend sg is %d.\n", - hr_qp->qpn, hr_qp->sge.sge_cnt, - hr_dev->caps.max_extend_sg); - return -EINVAL; - } - } + set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - ex_sge_num = hr_qp->sge.sge_cnt; - - /* Get buf size, SQ and RQ are aligned to page_szie */ - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) { - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - - hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - } else { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - buf_size = ALIGN((hr_qp->sge.sge_cnt << HNS_ROCE_SGE_SHIFT), - page_size); - hr_qp->sge.sge_cnt = ex_sge_num ? - max(buf_size / (1 << hr_qp->sge.sge_shift), - ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), page_size); - - hr_qp->sq.offset = 0; - if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), - page_size); - } else { - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - } - } + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + hr_qp->sq.wqe_cnt = cnt;
return 0; }
-static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_buf_region *regions, - int region_max, int page_shift) +static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_buf_attr *buf_attr) { - int page_size = 1 << page_shift; - bool is_extend_sge; - int region_cnt = 0; int buf_size; - int buf_cnt; - - if (hr_qp->buff_size < 1 || region_max < 1) - return region_cnt; - - if (hr_qp->sge.sge_cnt > 0) - is_extend_sge = true; - else - is_extend_sge = false; - - /* sq region */ - if (is_extend_sge) - buf_size = hr_qp->sge.offset - hr_qp->sq.offset; - else - buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sq_hop_num, - hr_qp->sq.offset / page_size, - buf_cnt); - region_cnt++; - } - - /* sge region */ - if (is_extend_sge) { - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sge_hop_num, - hr_qp->sge.offset / page_size, - buf_cnt); - region_cnt++; - } - } - - /* rq region */ - buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_rq_hop_num, - hr_qp->rq.offset / page_size, - buf_cnt); - region_cnt++; - } - - return region_cnt; -} - -static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev, - struct hns_roce_buf_region *regions, - int region_cnt) -{ - int bt_pg_shift; - int ba_num; - int ret; + int idx = 0;
- bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz; - - /* all root ba entries must in one bt page */ - do { - ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN; - ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt, - ba_num); - if (ret <= ba_num) - break; - - bt_pg_shift++; - } while (ret > ba_num); - - return bt_pg_shift - PAGE_SHIFT; -} - -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) -{ - struct device *dev = hr_dev->dev; + hr_qp->buff_size = 0;
- if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - } + /* SQ WQE */ + hr_qp->sq.offset = 0; + buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, + hr_qp->sq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* extend SGE WQE in SQ */ + hr_qp->sge.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* RQ WQE */ + hr_qp->rq.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, + hr_qp->rq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + if (hr_qp->buff_size < 1) + return -EINVAL;
- /* ud sqwqe's sge use extend sge */ - if (hr_dev->caps.max_sq_sg > HNS_ROCE_SGE_IN_WQE && - hr_qp->ibqp.qp_type == IB_QPT_GSI) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - hr_qp->sq.max_gs); - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - } + buf_attr->region_count = idx;
- if ((hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(dev, "The extended sge cnt error! sge_cnt is %d, max extend sg is %d.\n", - hr_qp->sge.sge_cnt, hr_dev->caps.max_extend_sg); - return -EINVAL; - } - } + buf_attr->mtt_only = false; + buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
return 0; } @@ -658,67 +561,28 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; - u32 buf_size; - u32 page_size; - u32 max_cnt; - int size; - int ret; + u32 cnt;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || - cap->max_send_sge > hr_dev->caps.max_sq_sg || - cap->max_inline_data > hr_dev->caps.max_sq_inline) { - dev_err(dev, "SQ WR or sge or inline data error!\n"); + cap->max_send_sge > hr_dev->caps.max_sq_sg) { + dev_err(dev, "failed to check SQ WR or SGE num, ret = %d.\n", + -EINVAL); return -EINVAL; }
- hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); - - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - - hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "sq.wqe_cnt(0x%x) too large for setting kernel sq size.\n", - (u32)hr_qp->sq.wqe_cnt); + cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "failed to check WQE num %u\n", cnt); return -EINVAL; }
- /* Get data_seg numbers */ - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - ret = set_extend_sge_param(hr_dev, hr_qp); - if (ret) { - dev_err(dev, "set extend sge parameters failed(%d)\n", ret); - return ret; - } - - /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sq.offset = 0; - size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, - page_size); - - if (hr_dev->caps.max_sq_sg > HNS_ROCE_MAX_SGE_NUM && - hr_qp->sge.sge_cnt) { - buf_size = ALIGN((hr_qp->sge.sge_cnt << HNS_ROCE_SGE_SHIFT), - page_size); - hr_qp->sge.sge_cnt = max(buf_size / (1 << hr_qp->sge.sge_shift), - (u32)hr_qp->sge.sge_cnt); - hr_qp->sge.offset = size; - size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift, page_size); - } + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq.wqe_cnt = cnt;
- hr_qp->rq.offset = size; - size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), - page_size); - hr_qp->buff_size = size; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.wqe_cnt; + /* sync the parameters of kernel QP to user's configuration */ + cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */ @@ -794,6 +658,28 @@ static void hns_roce_free_recv_inline_buffer(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); }
+static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_roce_buf_attr *buf_attr, + struct ib_uobject *uobject, unsigned long addr) +{ + struct device *dev = hr_dev->dev; + int ret; + + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, buf_attr, + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + uobject ? uobject->context : NULL, addr); + if (ret) + dev_err(dev, "failed to create WQE mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_uobject *uobject) +{ + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); +} + static void hns_roce_add_cq_to_qp(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_cq *send_cq, struct ib_cq *recv_cq) @@ -819,68 +705,12 @@ static void hns_roce_add_cq_to_qp(struct hns_roce_dev *hr_dev, } }
-static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u32 page_shift, bool is_user) -{ - dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL }; - struct hns_roce_buf_region *r; - int region_count; - int buf_count; - int ret; - int i; - - region_count = split_wqe_buf_region(hr_dev, hr_qp, hr_qp->regions, - ARRAY_SIZE(hr_qp->regions), page_shift); - - /* alloc a tmp list to store WQE buffers address */ - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, region_count); - if (ret) { - dev_err(hr_dev->dev, "Failed to alloc WQE buffer list\n"); - return ret; - } - - for (i = 0; i < region_count; i++) { - r = &hr_qp->regions[i]; - if (is_user) - buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->umem, - page_shift); - else - buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->hr_buf); - - if (buf_count != r->count) { - dev_err(hr_dev->dev, "Failed to get %s WQE buf, expect %d = %d.\n", - is_user ? "user" : "kernel", - r->count, buf_count); - ret = -ENOBUFS; - goto done; - } - } - - hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, - region_count); - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, hr_qp->regions, - region_count); - if (ret) - dev_err(hr_dev->dev, "Failed to attatch WQE's mtr\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); -done: - hns_roce_free_buf_list(buf_list, region_count); - - return ret; -} - static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_uobject *uobject, unsigned long addr) { - u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + struct hns_roce_buf_attr buf_attr = {}; + struct device *dev = hr_dev->dev; bool is_rq_buf_inline; int ret;
@@ -894,28 +724,17 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } }
- if (uobject) { - hr_qp->umem = ib_umem_get(uobject->context, addr, - hr_qp->buff_size, 0, 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - } else { - struct hns_roce_buf *kmem; - - kmem = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift, - 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - hr_qp->hr_buf = kmem; + ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr); + if (ret) { + dev_err(dev, "failed to set WQE attr, ret = %d.\n", ret); + goto err_inline; }
- ret = map_wqe_buf(hr_dev, hr_qp, page_shift, !!uobject); - if (ret) - goto err_alloc; + ret = alloc_wqe_buf(hr_dev, hr_qp, &buf_attr, uobject, addr); + if (ret) { + dev_err(dev, "failed to alloc WQE buf, ret = %d.\n", ret); + goto err_inline; + }
return 0;
@@ -923,30 +742,14 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (is_rq_buf_inline) hns_roce_free_recv_inline_buffer(hr_qp);
-err_alloc: - if (uobject) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, hr_qp->hr_buf); - hr_qp->hr_buf = NULL; - } - - dev_err(hr_dev->dev, "Failed to alloc WQE buffer, ret %d.\n", ret); - return ret; }
-static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static void free_qp_buf(struct hns_roce_qp *hr_qp, struct ib_pd *ib_pd) { - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - if (hr_qp->umem) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, hr_qp->hr_buf); - hr_qp->hr_buf = NULL; - } + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + + free_wqe_buf(hr_dev, hr_qp, ib_pd->uobject);
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) @@ -967,8 +770,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, else hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
- ret = set_rq_size(hr_dev, &init_attr->cap, !!udata, - hns_roce_qp_has_rq(init_attr), hr_qp); + ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { dev_err(hr_dev->dev, "Failed to set user RQ size\n"); return ret; @@ -1171,7 +974,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hns_roce_release_range_qp(hr_dev, qpn, 1);
err_buf: - free_qp_buf(hr_dev, hr_qp); + free_qp_buf(hr_qp, ib_pd);
if (ib_pd->uobject) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && @@ -1215,7 +1018,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->ibqp.qp_type != IB_QPT_GSI) hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- free_qp_buf(hr_dev, hr_qp); + free_qp_buf(hr_qp, hr_qp->ibqp.pd);
if (hr_qp->ibqp.pd->uobject) { struct hns_roce_ucontext *context = @@ -1539,7 +1342,7 @@ EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - return hns_roce_buf_offset(hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); }
void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1556,8 +1359,7 @@ EXPORT_SYMBOL_GPL(get_send_wqe);
void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(hr_qp->hr_buf, hr_qp->sge.offset + - (n << hr_qp->sge.sge_shift)); + return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } EXPORT_SYMBOL_GPL(get_send_extend_sge);
diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c index bb8c4d7d2449a..160e0dfd71551 100644 --- a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c +++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c @@ -145,7 +145,7 @@ void rdfx_cp_sq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, memcpy(dfx_qp_buf, dfx_hns_wqe_sge, 2 * sizeof(struct hns_roce_v2_wqe_data_seg)); dfx_qp_buf = hns_roce_buf_offset(rdfx_qp->buf, qp->sge.offset); - dfx_hns_wqe_sge = hns_roce_buf_offset(qp->hr_buf, qp->sge.offset); + dfx_hns_wqe_sge = hns_roce_buf_offset(qp->mtr.kmem, qp->sge.offset); rdfx_change_sq_buf(wr, atomic_en, dfx_qp_buf, dfx_hns_wqe_sge, sq, hr_dev, qp); }