driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I98HIN
--------------------------------------------------------------------------
Currently, driver fixedly allocates 4K pages for user space WQE buffer even in a 64K system. This results in HW reading WQE with a granularity of 4K even in a 64K system. Considering that we support 1024-byte inline, in the scenario of using SQ inline, HW will switch pages every 4 WQEs. This will introduce a delay of about 400ns, which is an average delay of 100ns per packet.
In order to improve performance, we allow user-mode driver to use more flexible WQE buffer page size allocation strategies, which allowing user-mode driver to configure WQE buffer using pages between 4K to system PAGESIZE.
This feature needs to be used in conjunction with the user-mode driver. In order to ensure forward compatibility, if the user-mode driver does not support this feature, the kernel mode will continue to use a fixed 4K pagesize.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_main.c | 5 ++++ drivers/infiniband/hw/hns/hns_roce_qp.c | 32 ++++++++++++++--------- 2 files changed, 25 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index c3d057222..260052fcb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -473,6 +473,11 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) resp.congest_type = hr_dev->caps.cong_cap;
+ if (ucmd.config & HNS_ROCE_UCTX_DYN_QP_PGSZ) { + context->config |= HNS_ROCE_UCTX_DYN_QP_PGSZ; + resp.config |= HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ; + } + ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) goto error_out; diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 0b7064b0a..33c1a5cef 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -641,18 +641,21 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, }
static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, + struct hns_roce_qp *hr_qp, u8 page_shift, struct hns_roce_buf_attr *buf_attr) { + unsigned int page_size = BIT(page_shift); int buf_size; int idx = 0;
hr_qp->buff_size = 0;
+ if (page_shift > PAGE_SHIFT || page_shift < HNS_HW_PAGE_SHIFT) + return -EOPNOTSUPP; + /* SQ WQE */ hr_qp->sq.offset = 0; - buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, - hr_qp->sq.wqe_shift); + buf_size = ALIGN(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, page_size); if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { buf_attr->region[idx].size = buf_size; buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; @@ -662,8 +665,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
/* extend SGE WQE in SQ */ hr_qp->sge.offset = hr_qp->buff_size; - buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, - hr_qp->sge.sge_shift); + buf_size = ALIGN(hr_qp->sge.sge_cnt << hr_qp->sge.sge_shift, page_size); if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { buf_attr->region[idx].size = buf_size; buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; @@ -673,8 +675,7 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev,
/* RQ WQE */ hr_qp->rq.offset = hr_qp->buff_size; - buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, - hr_qp->rq.wqe_shift); + buf_size = ALIGN(hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift, page_size); if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { buf_attr->region[idx].size = buf_size; buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; @@ -685,8 +686,8 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, if (hr_qp->buff_size < 1) return -EINVAL;
- buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; buf_attr->region_count = idx; + buf_attr->page_shift = page_shift;
return 0; } @@ -742,20 +743,27 @@ static int hns_roce_qp_has_rq(struct ib_qp_init_attr *attr)
static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, unsigned long addr) + struct ib_udata *udata, + struct hns_roce_ib_create_qp *ucmd) { + struct hns_roce_ucontext *uctx = rdma_udata_to_drv_context(udata, + struct hns_roce_ucontext, ibucontext); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_attr buf_attr = {}; + u8 page_shift = HNS_HW_PAGE_SHIFT; int ret;
- ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr); + if (uctx && (uctx->config & HNS_ROCE_UCTX_DYN_QP_PGSZ)) + page_shift = ucmd->pageshift; + + ret = set_wqe_buf_attr(hr_dev, hr_qp, page_shift, &buf_attr); if (ret) { ibdev_err(ibdev, "failed to split WQE buf, ret = %d.\n", ret); goto err_inline; } ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, &buf_attr, PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, - udata, addr); + udata, ucmd->buf_addr); if (ret) { ibdev_err(ibdev, "failed to create WQE mtr, ret = %d.\n", ret); goto err_inline; @@ -1151,7 +1159,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, } }
- ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, ucmd.buf_addr); + ret = alloc_qp_buf(hr_dev, hr_qp, init_attr, udata, &ucmd); if (ret) { ibdev_err(ibdev, "failed to alloc QP buffer, ret = %d.\n", ret); goto err_buf;