From: Luoyouming luoyouming@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5USIG
----------------------------------------------------------
Modify the sge num calculation algorithm when creating qp. The value of the sge-num takes the maximum value calculated by the parameter max_send_sge or parameter max_inline_data set by the user.In the sq inline mode, the following 2 cases will use extended sge: 1)UD: payload bytes more than 8 2)RC/XRC: payload bytes more than 32
Fixes:05201e01be93("RDMA/hns: Refactor process of setting extended sge") Fixes:30b707886aeb("RDMA/hns: Support inline data in extented sge space for RC")
Signed-off-by: Luoyouming luoyouming@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com Reviewed-by: YueHaibing yuehaibing@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 2 + drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 12 +- drivers/infiniband/hw/hns/hns_roce_main.c | 16 +++ drivers/infiniband/hw/hns/hns_roce_qp.c | 134 ++++++++++++++++---- include/uapi/rdma/hns-abi.h | 11 ++ 5 files changed, 140 insertions(+), 35 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 1386a7720d2d..47f44012f44a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -202,6 +202,7 @@ struct hns_roce_ucontext { struct list_head page_list; struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; + u32 config; };
struct hns_roce_pd { @@ -329,6 +330,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 ext_sge_cnt; u32 rsv_sge; u32 offset; u32 wqe_shift; /* WQE size */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index c10ee1edce00..779d89662d43 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -188,14 +188,6 @@ static void set_atomic_seg(const struct ib_send_wr *wr, hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge); }
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp) -{ - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) - return 0; - - return HNS_ROCE_SGE_IN_WQE; -} - static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, const struct ib_send_wr *wr, unsigned int *sge_idx, u32 msg_len) @@ -203,14 +195,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; - unsigned int std_sge_num; unsigned int i = 0; unsigned int len; void *addr; void *dseg;
- std_sge_num = get_std_sge_num(qp); - if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) { + if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) { ibdev_err(ibdev, "no enough extended sge space for inline data.\n"); return -EINVAL; diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index b4ce05d12f8c..ff6f0e6f2ac4 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -358,6 +358,7 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct hns_roce_ucontext *context = to_hr_ucontext(uctx); struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + struct hns_roce_ib_alloc_ucontext ucmd = {};
if (!hr_dev->active) return -EAGAIN; @@ -365,6 +366,21 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.qp_tab_size = hr_dev->caps.num_qps; resp.srq_tab_size = hr_dev->caps.num_srqs;
+ if (udata->inlen == sizeof(struct hns_roce_ib_alloc_ucontext)) { + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); + if (ret) + return ret; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + context->config = ucmd.config & HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE; + + if (context->config & HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE) { + resp.config = HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE; + resp.max_inline_data = hr_dev->caps.max_sq_inline; + } + } + ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) goto error_fail_uar_alloc; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 43530a7c8304..99119c97fd96 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -482,38 +482,118 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; }
-static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +static u32 get_max_inline_data(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap) { - /* GSI/UD QP only has extended sge */ - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) - return qp->sq.max_gs; + if (cap->max_inline_data) { + cap->max_inline_data = roundup_pow_of_two( + cap->max_inline_data); + return min(cap->max_inline_data, + hr_dev->caps.max_sq_inline); + }
- if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + return cap->max_inline_data; +}
- return 0; +static void update_inline_data(struct hns_roce_qp *hr_qp, + struct ib_qp_cap *cap, u32 config) +{ + bool is_ud_or_gsi_type = (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD); + u32 sge_num = hr_qp->sq.ext_sge_cnt; + + if (config & HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE) { + if (!is_ud_or_gsi_type) + sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num); + + cap->max_inline_data = max(cap->max_inline_data, + sge_num * HNS_ROCE_SGE_SIZE); + } + + hr_qp->max_inline_data = cap->max_inline_data; +} + +/** + * Calculated sge num according to attr's max_send_sge + */ +static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi_type, + u32 max_send_sge) +{ + unsigned int std_sge_num; + unsigned int min_sge; + + std_sge_num = is_ud_or_gsi_type ? 0 : HNS_ROCE_SGE_IN_WQE; + min_sge = is_ud_or_gsi_type ? 1 : 0; + return max_send_sge > std_sge_num ? max(min_sge, + (max_send_sge - std_sge_num)) : min_sge; +} + +/** + * Calculated sge num according to attr's max_inline_data + */ +static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi_type, + u32 max_inline_data) +{ + unsigned int inline_sge = 0; + + inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; + + /* + * if max_inline_data less than + * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, + * In addition to ud's mode, no need to extend sge. + */ + if ((!is_ud_or_gsi_type) && (inline_sge <= HNS_ROCE_SGE_IN_WQE)) + inline_sge = 0; + + return inline_sge; }
static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, - struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap, + u32 config) { + bool is_ud_or_gsi_type = (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD); + unsigned int std_sge_num; + u32 inline_ext_sge = 0; + u32 ext_wqe_sge_cnt; u32 total_sge_cnt; - u32 wqe_sge_cnt; + + cap->max_inline_data = get_max_inline_data(hr_dev, cap);
hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + std_sge_num = is_ud_or_gsi_type ? 0 : HNS_ROCE_SGE_IN_WQE; + ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi_type, + cap->max_send_sge);
- hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + if (config & HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE) { + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data( + is_ud_or_gsi_type, cap->max_inline_data)); + hr_qp->sq.ext_sge_cnt = !!(inline_ext_sge) ? + roundup_pow_of_two(inline_ext_sge) : 0;
- wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num)); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + + ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt; + } else { + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs; + }
/* If the number of extended sge is not zero, they MUST use the * space of HNS_HW_PAGE_SIZE at least. */ - if (wqe_sge_cnt) { - total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + if (ext_wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt); hr_qp->sge.sge_cnt = max(total_sge_cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); } + + update_inline_data(hr_qp, cap, config); }
static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -541,7 +621,7 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
static int set_user_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, - struct hns_roce_ib_create_qp *ucmd) + struct hns_roce_ib_create_qp *ucmd, u32 config) { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt = 0; @@ -558,10 +638,11 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, return ret; }
- set_ext_sge_param(hr_dev, cnt, hr_qp, cap); + set_ext_sge_param(hr_dev, cnt, hr_qp, cap, config);
hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; + cap->max_send_sge = hr_qp->sq.max_gs;
return 0; } @@ -618,7 +699,8 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, }
static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) + struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, + u32 config) { struct ib_device *ibdev = &hr_dev->ib_dev; u32 cnt; @@ -639,7 +721,7 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); hr_qp->sq.wqe_cnt = cnt;
- set_ext_sge_param(hr_dev, cnt, hr_qp, cap); + set_ext_sge_param(hr_dev, cnt, hr_qp, cap, config);
/* sync the parameters of kernel QP to user's configuration */ cap->max_send_wr = cnt; @@ -991,15 +1073,12 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ucontext *uctx; + u32 config = 0; int ret;
hr_qp->ibqp.qp_type = init_attr->qp_type;
- if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) - init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; - - hr_qp->max_inline_data = init_attr->cap.max_inline_data; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -1022,7 +1101,12 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; }
- ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); + uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext); + config = uctx->config; + ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd, + config); + if (ret) ibdev_err(ibdev, "Failed to set user SQ size, ret = %d\n", ret); @@ -1038,7 +1122,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return -EINVAL; }
- ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + config = HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE; + ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp, config); if (ret) ibdev_err(ibdev, "Failed to set kernel SQ size, ret = %d\n", ret); diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index abfd36e27f5e..499ccc84aa39 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -85,13 +85,24 @@ struct hns_roce_ib_create_qp_resp { __aligned_u64 dwqe_mmap_key; };
+enum { + HNS_ROCE_UCONTEXT_EXSGE_CALC_MODE = 1 << 0, +}; + struct hns_roce_ib_alloc_ucontext_resp { __u32 qp_tab_size; __u32 cqe_size; __u32 srq_tab_size; + __u32 max_inline_data; + __u32 config; __u32 reserved; };
+struct hns_roce_ib_alloc_ucontext { + __u32 config; + __u32 reserved; +}; + struct hns_roce_ib_alloc_pd_resp { __u32 pdn; };