This series refactors memory related codes, including MR, MTR(Memory Translate Region) and MPT(Memory Protection Table).
Changes since v1: * Add static keyword for hns_roce_mr_free(). * Link: https://patchwork.kernel.org/project/linux-rdma/cover/1611236882-41498-1-git...
Lang Cheng (2): RDMA/hns: Optimize the MR registration process RDMA/hns: Use new interface to set MPT related fields
Xi Wang (1): RDMA/hns: Refactor the MTR creation flow
drivers/infiniband/hw/hns/hns_roce_common.h | 22 ++ drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 77 +++--- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 39 +++ drivers/infiniband/hw/hns/hns_roce_mr.c | 401 +++++++++++----------------- drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 2 - 8 files changed, 257 insertions(+), 291 deletions(-)
From: Xi Wang wangxi11@huawei.com
Split the hns_roce_mtr_create() into serval small functions, remove unused member in 'struct hns_roce_buf_attr' and delete unnecessary MTR page count check flow to make the MTR creation related codes clearer.
Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 1 - drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 1 - drivers/infiniband/hw/hns/hns_roce_mr.c | 291 ++++++++++++---------------- drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 2 - 6 files changed, 125 insertions(+), 172 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index ffb7f7e..74fc494 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -206,7 +206,6 @@ static int alloc_cq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq, buf_attr.region[0].size = hr_cq->cq_depth * hr_cq->cqe_size; buf_attr.region[0].hopnum = hr_dev->caps.cqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true;
ret = hns_roce_mtr_create(hr_dev, &hr_cq->mtr, &buf_attr, hr_dev->caps.cqe_ba_pg_sz + HNS_HW_PAGE_SHIFT, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index c46b330..ffed82d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -335,7 +335,6 @@ struct hns_roce_buf_attr { } region[HNS_ROCE_MAX_BT_REGION]; unsigned int region_count; /* valid region count */ unsigned int page_shift; /* buffer page shift */ - bool fixed_page; /* decide page shift is fixed-size or maximum size */ unsigned int user_access; /* umem access flag */ bool mtt_only; /* only alloc buffer-required MTT memory */ }; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 4c06889..110354b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -5948,7 +5948,6 @@ static int alloc_eq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_eq *eq) buf_attr.region[0].size = eq->entries * eq->eqe_size; buf_attr.region[0].hopnum = eq->hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true;
err = hns_roce_mtr_create(hr_dev, &eq->mtr, &buf_attr, hr_dev->caps.eqe_ba_pg_sz + diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1fbfa3a..45ceeab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -124,7 +124,6 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, buf_attr.region[0].size = length; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true; buf_attr.user_access = access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast; @@ -729,25 +728,15 @@ static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) }
static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - struct hns_roce_buf_attr *buf_attr, bool is_direct, + struct hns_roce_buf_attr *buf_attr, struct ib_udata *udata, unsigned long user_addr) { struct ib_device *ibdev = &hr_dev->ib_dev; - unsigned int best_pg_shift; - int all_pg_count = 0; size_t total_size; - int ret;
total_size = mtr_bufs_size(buf_attr); - if (total_size < 1) { - ibdev_err(ibdev, "failed to check mtr size\n."); - return -EINVAL; - }
if (udata) { - unsigned long pgsz_bitmap; - unsigned long page_size; - mtr->kmem = NULL; mtr->umem = ib_umem_get(ibdev, user_addr, total_size, buf_attr->user_access); @@ -756,76 +745,67 @@ static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, PTR_ERR(mtr->umem)); return -ENOMEM; } - if (buf_attr->fixed_page) - pgsz_bitmap = 1 << buf_attr->page_shift; - else - pgsz_bitmap = GENMASK(buf_attr->page_shift, PAGE_SHIFT); - - page_size = ib_umem_find_best_pgsz(mtr->umem, pgsz_bitmap, - user_addr); - if (!page_size) - return -EINVAL; - best_pg_shift = order_base_2(page_size); - all_pg_count = ib_umem_num_dma_blocks(mtr->umem, page_size); - ret = 0; } else { mtr->umem = NULL; - mtr->kmem = - hns_roce_buf_alloc(hr_dev, total_size, - buf_attr->page_shift, - is_direct ? HNS_ROCE_BUF_DIRECT : 0); + mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size, + buf_attr->page_shift, + mtr->hem_cfg.is_direct ? + HNS_ROCE_BUF_DIRECT : 0); if (IS_ERR(mtr->kmem)) { ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n", PTR_ERR(mtr->kmem)); return PTR_ERR(mtr->kmem); } - - best_pg_shift = buf_attr->page_shift; - all_pg_count = mtr->kmem->npages; }
- /* must bigger than minimum hardware page shift */ - if (best_pg_shift < HNS_HW_PAGE_SHIFT || all_pg_count < 1) { - ret = -EINVAL; - ibdev_err(ibdev, - "failed to check mtr, page shift = %u count = %d.\n", - best_pg_shift, all_pg_count); - goto err_alloc_mem; - } - - mtr->hem_cfg.buf_pg_shift = best_pg_shift; - mtr->hem_cfg.buf_pg_count = all_pg_count; - return 0; -err_alloc_mem: - mtr_free_bufs(hr_dev, mtr); - return ret; }
-static int mtr_get_pages(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, int count, unsigned int page_shift) +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) { struct ib_device *ibdev = &hr_dev->ib_dev; + dma_addr_t *pages; int npage; - int err; + int ret; + + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM;
if (mtr->umem) - npage = hns_roce_get_umem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, 0, mtr->umem, page_shift); else - npage = hns_roce_get_kmem_bufs(hr_dev, pages, count, 0, + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, 0, mtr->kmem);
+ if (npage != page_count) { + ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + } + if (mtr->hem_cfg.is_direct && npage > 1) { - err = mtr_check_direct_pages(pages, npage, page_shift); - if (err) { - ibdev_err(ibdev, "Failed to check %s direct page-%d\n", - mtr->umem ? "user" : "kernel", err); - npage = err; + ret = mtr_check_direct_pages(pages, npage, page_shift); + if (ret) { + ibdev_err(ibdev, "failed to check %s mtr, idx = %d.\n", + mtr->umem ? "user" : "kernel", ret); + ret = -ENOBUFS; + goto err_alloc_list; } }
- return npage; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); + +err_alloc_list: + kvfree(pages); + + return ret; }
int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, @@ -928,65 +908,88 @@ int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, struct hns_roce_buf_attr *attr, struct hns_roce_hem_cfg *cfg, - unsigned int *buf_page_shift) + unsigned int *buf_page_shift, int unalinged_size) { struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; unsigned int page_shift; - int page_cnt = 0; size_t buf_size; - int region_cnt;
+ /* If mtt is disabled, all pages must be within a continuous range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); if (cfg->is_direct) { - buf_size = cfg->buf_pg_count << cfg->buf_pg_shift; - page_cnt = DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE); - /* - * When HEM buffer use level-0 addressing, the page size equals - * the buffer size, and the the page size = 4K * 2^N. + /* When HEM buffer uses 0-level addressing, the page size is + * equal to the whole buffer size, and we split the buffer into + * small pages which is used to check whether the adjacent + * units are in the continuous space and its size is fixed to + * 4K based on hns ROCEE's requirement. */ - cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + order_base_2(page_cnt); - if (attr->region_count > 1) { - cfg->buf_pg_count = page_cnt; - page_shift = HNS_HW_PAGE_SHIFT; - } else { - cfg->buf_pg_count = 1; - page_shift = cfg->buf_pg_shift; - if (buf_size != 1 << page_shift) { - ibdev_err(&hr_dev->ib_dev, - "failed to check direct size %zu shift %d.\n", - buf_size, page_shift); - return -EINVAL; - } - } + page_shift = HNS_HW_PAGE_SHIFT; + + /* The ROCEE requires the page size to be 4K * 2 ^ N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; } else { - page_shift = cfg->buf_pg_shift; + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; }
- /* convert buffer size to page index and page count */ - for (page_cnt = 0, region_cnt = 0; page_cnt < cfg->buf_pg_count && - region_cnt < attr->region_count && + /* Convert buffer size to page index and page count for each region and + * the buffer's offset needs to be appended to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { r = &cfg->region[region_cnt]; r->offset = page_cnt; - buf_size = hr_hw_page_align(attr->region[region_cnt].size); + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; page_cnt += r->count; r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, r->count); }
- if (region_cnt < 1) { - ibdev_err(&hr_dev->ib_dev, - "failed to check mtr region count, pages = %d.\n", - cfg->buf_pg_count); - return -ENOBUFS; - } - cfg->region_count = region_cnt; *buf_page_shift = page_shift;
return page_cnt; }
+static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + /** * hns_roce_mtr_create - Create hns memory translate region. * @@ -1002,95 +1005,51 @@ int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, unsigned int ba_page_shift, struct ib_udata *udata, unsigned long user_addr) { - struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; struct ib_device *ibdev = &hr_dev->ib_dev; unsigned int buf_page_shift = 0; - dma_addr_t *pages = NULL; - int all_pg_cnt; - int get_pg_cnt; - int ret = 0; - - /* if disable mtt, all pages must in a continuous address range */ - cfg->is_direct = !mtr_has_mtt(buf_attr); - - /* if buffer only need mtt, just init the hem cfg */ - if (buf_attr->mtt_only) { - cfg->buf_pg_shift = buf_attr->page_shift; - cfg->buf_pg_count = mtr_bufs_size(buf_attr) >> - buf_attr->page_shift; - mtr->umem = NULL; - mtr->kmem = NULL; - } else { - ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, cfg->is_direct, - udata, user_addr); - if (ret) { - ibdev_err(ibdev, - "failed to alloc mtr bufs, ret = %d.\n", ret); - return ret; - } - } + int buf_page_cnt; + int ret;
- all_pg_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, cfg, &buf_page_shift); - if (all_pg_cnt < 1) { - ret = -ENOBUFS; - ibdev_err(ibdev, "failed to init mtr buf cfg.\n"); - goto err_alloc_bufs; + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + udata ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %d.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; }
- hns_roce_hem_list_init(&mtr->hem_list); - if (!cfg->is_direct) { - ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, - cfg->region, cfg->region_count, - ba_page_shift); - if (ret) { - ibdev_err(ibdev, "failed to request mtr hem, ret = %d.\n", - ret); - goto err_alloc_bufs; - } - cfg->root_ba = mtr->hem_list.root_ba; - cfg->ba_pg_shift = ba_page_shift; - } else { - cfg->ba_pg_shift = cfg->buf_pg_shift; + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; }
- /* no buffer to map */ - if (buf_attr->mtt_only) + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configuration. + */ + if (buf_attr->mtt_only) { + mtr->umem = NULL; + mtr->kmem = NULL; return 0; - - /* alloc a tmp array to store buffer's dma address */ - pages = kvcalloc(all_pg_cnt, sizeof(dma_addr_t), GFP_KERNEL); - if (!pages) { - ret = -ENOMEM; - ibdev_err(ibdev, "failed to alloc mtr page list %d.\n", - all_pg_cnt); - goto err_alloc_hem_list; - } - - get_pg_cnt = mtr_get_pages(hr_dev, mtr, pages, all_pg_cnt, - buf_page_shift); - if (get_pg_cnt != all_pg_cnt) { - ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", - get_pg_cnt, all_pg_cnt); - ret = -ENOBUFS; - goto err_alloc_page_list; }
- /* write buffer's dma address to BA table */ - ret = hns_roce_mtr_map(hr_dev, mtr, pages, all_pg_cnt); + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr); if (ret) { - ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret); - goto err_alloc_page_list; + ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; }
- /* drop tmp array */ - kvfree(pages); - return 0; -err_alloc_page_list: - kvfree(pages); -err_alloc_hem_list: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -err_alloc_bufs: + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret); + else + return 0; + mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); return ret; }
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d8e2fe5..9988ca9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -599,7 +599,6 @@ static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, return -EINVAL;
buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; - buf_attr->fixed_page = true; buf_attr->region_count = idx;
return 0; diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e..9403828 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -194,7 +194,6 @@ static int alloc_srq_buf(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->wqe_shift); buf_attr.region[0].hopnum = hr_dev->caps.srqwqe_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true;
err = hns_roce_mtr_create(hr_dev, &srq->buf_mtr, &buf_attr, hr_dev->caps.srqwqe_ba_pg_sz + @@ -226,7 +225,6 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, srq->idx_que.entry_shift); buf_attr.region[0].hopnum = hr_dev->caps.idx_hop_num; buf_attr.region_count = 1; - buf_attr.fixed_page = true;
err = hns_roce_mtr_create(hr_dev, &idx_que->mtr, &buf_attr, hr_dev->caps.idx_ba_pg_sz + HNS_HW_PAGE_SHIFT,
From: Lang Cheng chenglang@huawei.com
When creating or re-registering an MR, storing the PDN, access flag and IOVA information ASAP can simplify the number of parameters passed into the subsequent process.
Reported-by: kernel test robot lkp@intel.com Signed-off-by: Lang Cheng chenglang@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_device.h | 4 +- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 22 ++---- drivers/infiniband/hw/hns/hns_roce_mr.c | 110 ++++++++++------------------ 3 files changed, 47 insertions(+), 89 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ffed82d..f62851f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -914,8 +914,7 @@ struct hns_roce_hw { int (*write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr, unsigned long mtpt_idx); int (*rereg_write_mtpt)(struct hns_roce_dev *hr_dev, - struct hns_roce_mr *mr, int flags, u32 pdn, - int mr_access_flags, u64 iova, u64 size, + struct hns_roce_mr *mr, int flags, void *mb_buf); int (*frmr_write_mtpt)(struct hns_roce_dev *hr_dev, void *mb_buf, struct hns_roce_mr *mr); @@ -1284,7 +1283,6 @@ u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq); #endif /* _HNS_ROCE_DEVICE_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 110354b..16ef631 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2927,20 +2927,17 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, int flags, - u32 pdn, int mr_access_flags, u64 iova, - u64 size, void *mb_buf) + void *mb_buf) { struct hns_roce_v2_mpt_entry *mpt_entry = mb_buf; + u32 mr_access_flags = mr->access; int ret = 0;
roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID);
- if (flags & IB_MR_REREG_PD) { - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, pdn); - mr->pd = pdn; - } + roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, + V2_MPT_BYTE_4_PD_S, mr->pd);
if (flags & IB_MR_REREG_ACCESS) { roce_set_bit(mpt_entry->byte_8_mw_cnt_en, @@ -2958,13 +2955,10 @@ static int hns_roce_v2_rereg_write_mtpt(struct hns_roce_dev *hr_dev, }
if (flags & IB_MR_REREG_TRANS) { - mpt_entry->va_l = cpu_to_le32(lower_32_bits(iova)); - mpt_entry->va_h = cpu_to_le32(upper_32_bits(iova)); - mpt_entry->len_l = cpu_to_le32(lower_32_bits(size)); - mpt_entry->len_h = cpu_to_le32(upper_32_bits(size)); - - mr->iova = iova; - mr->size = size; + mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); + mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova)); + mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); + mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size));
ret = set_mtpt_pbl(hr_dev, mpt_entry, mr); } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 45ceeab..5a2a557 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -66,8 +66,7 @@ int hns_roce_hw_destroy_mpt(struct hns_roce_dev *hr_dev, HNS_ROCE_CMD_TIMEOUT_MSECS); }
-static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - u32 pd, u64 iova, u64 size, u32 access) +static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) { struct ib_device *ibdev = &hr_dev->ib_dev; unsigned long obj = 0; @@ -82,11 +81,6 @@ static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, return -ENOMEM; }
- mr->iova = iova; /* MR va starting addr */ - mr->size = size; /* MR addr range */ - mr->pd = pd; /* MR num */ - mr->access = access; /* MR access permit */ - mr->enabled = 0; /* MR active status */ mr->key = hw_index_to_key(obj); /* MR key */
err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table, obj); @@ -110,8 +104,7 @@ static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr) }
static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, - size_t length, struct ib_udata *udata, u64 start, - int access) + struct ib_udata *udata, u64 start) { struct ib_device *ibdev = &hr_dev->ib_dev; bool is_fast = mr->type == MR_TYPE_FRMR; @@ -121,10 +114,10 @@ static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr, mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num; buf_attr.page_shift = is_fast ? PAGE_SHIFT : hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT; - buf_attr.region[0].size = length; + buf_attr.region[0].size = mr->size; buf_attr.region[0].hopnum = mr->pbl_hop_num; buf_attr.region_count = 1; - buf_attr.user_access = access; + buf_attr.user_access = mr->access; /* fast MR's buffer is alloced before mapping, not at creation */ buf_attr.mtt_only = is_fast;
@@ -196,9 +189,6 @@ static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev, }
mr->enabled = 1; - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return 0;
err_page: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -236,14 +226,16 @@ struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc) return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_DMA; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = acc;
/* Allocate memory region key */ hns_roce_hem_list_init(&mr->pbl_mtr.hem_list); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, 0, acc); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free;
- ret = hns_roce_mr_enable(to_hr_dev(pd->device), mr); + ret = hns_roce_mr_enable(hr_dev, mr); if (ret) goto err_mr;
@@ -270,13 +262,17 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, if (!mr) return ERR_PTR(-ENOMEM);
+ mr->iova = virt_addr; + mr->size = length; + mr->pd = to_hr_pd(pd)->pdn; + mr->access = access_flags; mr->type = MR_TYPE_MR; - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, virt_addr, length, - access_flags); + + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_alloc_mr;
- ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, access_flags); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); if (ret) goto err_alloc_key;
@@ -298,35 +294,6 @@ struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, return ERR_PTR(ret); }
-static int rereg_mr_trans(struct ib_mr *ibmr, int flags, - u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct hns_roce_cmd_mailbox *mailbox, - u32 pdn, struct ib_udata *udata) -{ - struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device); - struct ib_device *ibdev = &hr_dev->ib_dev; - struct hns_roce_mr *mr = to_hr_mr(ibmr); - int ret; - - free_mr_pbl(hr_dev, mr); - ret = alloc_mr_pbl(hr_dev, mr, length, udata, start, mr_access_flags); - if (ret) { - ibdev_err(ibdev, "failed to create mr PBL, ret = %d.\n", ret); - return ret; - } - - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) { - ibdev_err(ibdev, "failed to write mtpt, ret = %d.\n", ret); - free_mr_pbl(hr_dev, mr); - } - - return ret; -} - struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_pd *pd, @@ -337,7 +304,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, struct hns_roce_mr *mr = to_hr_mr(ibmr); struct hns_roce_cmd_mailbox *mailbox; unsigned long mtpt_idx; - u32 pdn = 0; int ret;
if (!mr->enabled) @@ -359,23 +325,29 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
mr->enabled = 0; + mr->iova = virt_addr; + mr->size = length;
if (flags & IB_MR_REREG_PD) - pdn = to_hr_pd(pd)->pdn; + mr->pd = to_hr_pd(pd)->pdn; + + if (flags & IB_MR_REREG_ACCESS) + mr->access = mr_access_flags;
if (flags & IB_MR_REREG_TRANS) { - ret = rereg_mr_trans(ibmr, flags, - start, length, - virt_addr, mr_access_flags, - mailbox, pdn, udata); - if (ret) - goto free_cmd_mbox; - } else { - ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, pdn, - mr_access_flags, virt_addr, - length, mailbox->buf); - if (ret) + free_mr_pbl(hr_dev, mr); + ret = alloc_mr_pbl(hr_dev, mr, udata, start); + if (ret) { + ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n", + ret); goto free_cmd_mbox; + } + } + + ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf); + if (ret) { + ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret); + goto free_cmd_mbox; }
ret = hns_roce_hw_create_mpt(hr_dev, mailbox, mtpt_idx); @@ -385,12 +357,6 @@ struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start, }
mr->enabled = 1; - if (flags & IB_MR_REREG_ACCESS) - mr->access = mr_access_flags; - - hns_roce_free_cmd_mailbox(hr_dev, mailbox); - - return NULL;
free_cmd_mbox: hns_roce_free_cmd_mailbox(hr_dev, mailbox); @@ -420,7 +386,6 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, struct hns_roce_dev *hr_dev = to_hr_dev(pd->device); struct device *dev = hr_dev->dev; struct hns_roce_mr *mr; - u64 length; int ret;
if (mr_type != IB_MR_TYPE_MEM_REG) @@ -437,14 +402,15 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return ERR_PTR(-ENOMEM);
mr->type = MR_TYPE_FRMR; + mr->pd = to_hr_pd(pd)->pdn; + mr->size = max_num_sg * (1 << PAGE_SHIFT);
/* Allocate memory region key */ - length = max_num_sg * (1 << PAGE_SHIFT); - ret = alloc_mr_key(hr_dev, mr, to_hr_pd(pd)->pdn, 0, length, 0); + ret = alloc_mr_key(hr_dev, mr); if (ret) goto err_free;
- ret = alloc_mr_pbl(hr_dev, mr, length, NULL, 0, 0); + ret = alloc_mr_pbl(hr_dev, mr, NULL, 0); if (ret) goto err_key;
@@ -453,7 +419,7 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, goto err_pbl;
mr->ibmr.rkey = mr->ibmr.lkey = mr->key; - mr->ibmr.length = length; + mr->ibmr.length = mr->size;
return &mr->ibmr;
From: Lang Cheng chenglang@huawei.com
Achieve hr_reg_write() to simply the codes to fill fields.
Signed-off-by: Lang Cheng chenglang@huawei.com Signed-off-by: Weihang Li liweihang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_common.h | 22 ++++++++++++ drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 54 +++++++++++++---------------- drivers/infiniband/hw/hns/hns_roce_hw_v2.h | 39 +++++++++++++++++++++ 3 files changed, 85 insertions(+), 30 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h index 5afee04..3ca6e88 100644 --- a/drivers/infiniband/hw/hns/hns_roce_common.h +++ b/drivers/infiniband/hw/hns/hns_roce_common.h @@ -32,6 +32,7 @@
#ifndef _HNS_ROCE_COMMON_H #define _HNS_ROCE_COMMON_H +#include <linux/bitfield.h>
#define roce_write(dev, reg, val) writel((val), (dev)->reg_base + (reg)) #define roce_read(dev, reg) readl((dev)->reg_base + (reg)) @@ -65,6 +66,27 @@
#define hr_reg_enable(ptr, field) _hr_reg_enable(ptr, field)
+#define _hr_reg_clear(ptr, field_type, field_h, field_l) \ + ({ \ + const field_type *_ptr = ptr; \ + *((__le32 *)_ptr + (field_h) / 32) &= \ + cpu_to_le32( \ + ~GENMASK((field_h) % 32, (field_l) % 32)) + \ + BUILD_BUG_ON_ZERO(((field_h) / 32) != \ + ((field_l) / 32)); \ + }) + +#define hr_reg_clear(ptr, field) _hr_reg_clear(ptr, field) + +#define _hr_reg_write(ptr, field_type, field_h, field_l, val) \ + ({ \ + _hr_reg_clear(ptr, field_type, field_h, field_l); \ + *((__le32 *)ptr + (field_h) / 32) |= cpu_to_le32(FIELD_PREP( \ + GENMASK((field_h) % 32, (field_l) % 32), val)); \ + }) + +#define hr_reg_write(ptr, field, val) _hr_reg_write(ptr, field, val) + #define ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S 3 #define ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S 4
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 16ef631..a5bbfb1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -2880,36 +2880,20 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry = mb_buf; memset(mpt_entry, 0, sizeof(*mpt_entry));
- roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_MPT_ST_M, - V2_MPT_BYTE_4_MPT_ST_S, V2_MPT_ST_VALID); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PBL_HOP_NUM_M, - V2_MPT_BYTE_4_PBL_HOP_NUM_S, mr->pbl_hop_num == - HNS_ROCE_HOP_NUM_0 ? 0 : mr->pbl_hop_num); - roce_set_field(mpt_entry->byte_4_pd_hop_st, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_M, - V2_MPT_BYTE_4_PBL_BA_PG_SZ_S, - to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); - roce_set_field(mpt_entry->byte_4_pd_hop_st, V2_MPT_BYTE_4_PD_M, - V2_MPT_BYTE_4_PD_S, mr->pd); - - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RA_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_R_INV_EN_S, 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_L_INV_EN_S, 1); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_BIND_EN_S, - (mr->access & IB_ACCESS_MW_BIND ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_ATOMIC_EN_S, - mr->access & IB_ACCESS_REMOTE_ATOMIC ? 1 : 0); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RR_EN_S, - (mr->access & IB_ACCESS_REMOTE_READ ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_RW_EN_S, - (mr->access & IB_ACCESS_REMOTE_WRITE ? 1 : 0)); - roce_set_bit(mpt_entry->byte_8_mw_cnt_en, V2_MPT_BYTE_8_LW_EN_S, - (mr->access & IB_ACCESS_LOCAL_WRITE ? 1 : 0)); - - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_PA_S, - mr->type == MR_TYPE_MR ? 0 : 1); - roce_set_bit(mpt_entry->byte_12_mw_pa, V2_MPT_BYTE_12_INNER_PA_VLD_S, - 1); + hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID); + hr_reg_write(mpt_entry, MPT_PD, mr->pd); + hr_reg_enable(mpt_entry, MPT_L_INV_EN); + + hr_reg_write(mpt_entry, MPT_BIND_EN, + !!(mr->access & IB_ACCESS_MW_BIND)); + hr_reg_write(mpt_entry, MPT_ATOMIC_EN, + !!(mr->access & IB_ACCESS_REMOTE_ATOMIC)); + hr_reg_write(mpt_entry, MPT_RR_EN, + !!(mr->access & IB_ACCESS_REMOTE_READ)); + hr_reg_write(mpt_entry, MPT_RW_EN, + !!(mr->access & IB_ACCESS_REMOTE_WRITE)); + hr_reg_write(mpt_entry, MPT_LW_EN, + !!((mr->access & IB_ACCESS_LOCAL_WRITE)));
mpt_entry->len_l = cpu_to_le32(lower_32_bits(mr->size)); mpt_entry->len_h = cpu_to_le32(upper_32_bits(mr->size)); @@ -2917,9 +2901,19 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev, mpt_entry->va_l = cpu_to_le32(lower_32_bits(mr->iova)); mpt_entry->va_h = cpu_to_le32(upper_32_bits(mr->iova));
+ if (mr->type != MR_TYPE_MR) + hr_reg_enable(mpt_entry, MPT_PA); + if (mr->type == MR_TYPE_DMA) return 0;
+ if (mr->pbl_hop_num != HNS_ROCE_HOP_NUM_0) + hr_reg_write(mpt_entry, MPT_PBL_HOP_NUM, mr->pbl_hop_num); + + hr_reg_write(mpt_entry, MPT_PBL_BA_PG_SZ, + to_hr_hw_page_shift(mr->pbl_mtr.hem_cfg.ba_pg_shift)); + hr_reg_enable(mpt_entry, MPT_INNER_PA_VLD); + ret = set_mtpt_pbl(hr_dev, mpt_entry, mr);
return ret; diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf8..69bc072 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -993,6 +993,45 @@ struct hns_roce_v2_mpt_entry { __le32 byte_64_buf_pa1; };
+#define MPT_FIELD_LOC(h, l) FIELD_LOC(struct hns_roce_v2_mpt_entry, h, l) + +#define MPT_ST MPT_FIELD_LOC(1, 0) +#define MPT_PBL_HOP_NUM MPT_FIELD_LOC(3, 2) +#define MPT_PBL_BA_PG_SZ MPT_FIELD_LOC(7, 4) +#define MPT_PD MPT_FIELD_LOC(31, 8) +#define MPT_RA_EN MPT_FIELD_LOC(32, 32) +#define MPT_R_INV_EN MPT_FIELD_LOC(33, 33) +#define MPT_L_INV_EN MPT_FIELD_LOC(34, 34) +#define MPT_BIND_EN MPT_FIELD_LOC(35, 35) +#define MPT_ATOMIC_EN MPT_FIELD_LOC(36, 36) +#define MPT_RR_EN MPT_FIELD_LOC(37, 37) +#define MPT_RW_EN MPT_FIELD_LOC(38, 38) +#define MPT_LW_EN MPT_FIELD_LOC(39, 39) +#define MPT_MW_CNT MPT_FIELD_LOC(63, 40) +#define MPT_FRE MPT_FIELD_LOC(64, 64) +#define MPT_PA MPT_FIELD_LOC(65, 65) +#define MPT_ZBVA MPT_FIELD_LOC(66, 66) +#define MPT_SHARE MPT_FIELD_LOC(67, 67) +#define MPT_MR_MW MPT_FIELD_LOC(68, 68) +#define MPT_BPD MPT_FIELD_LOC(69, 69) +#define MPT_BQP MPT_FIELD_LOC(70, 70) +#define MPT_INNER_PA_VLD MPT_FIELD_LOC(71, 71) +#define MPT_MW_BIND_QPN MPT_FIELD_LOC(95, 72) +#define MPT_BOUND_LKEY MPT_FIELD_LOC(127, 96) +#define MPT_LEN MPT_FIELD_LOC(191, 128) +#define MPT_LKEY MPT_FIELD_LOC(223, 192) +#define MPT_VA MPT_FIELD_LOC(287, 224) +#define MPT_PBL_SIZE MPT_FIELD_LOC(319, 288) +#define MPT_PBL_BA MPT_FIELD_LOC(380, 320) +#define MPT_BLK_MODE MPT_FIELD_LOC(381, 381) +#define MPT_RSV0 MPT_FIELD_LOC(383, 382) +#define MPT_PA0 MPT_FIELD_LOC(441, 384) +#define MPT_BOUND_VA MPT_FIELD_LOC(447, 442) +#define MPT_PA1 MPT_FIELD_LOC(505, 448) +#define MPT_PERSIST_EN MPT_FIELD_LOC(506, 506) +#define MPT_RSV2 MPT_FIELD_LOC(507, 507) +#define MPT_PBL_BUF_PG_SZ MPT_FIELD_LOC(511, 508) + #define V2_MPT_BYTE_4_MPT_ST_S 0 #define V2_MPT_BYTE_4_MPT_ST_M GENMASK(1, 0)
On Sat, Jan 23, 2021 at 05:47:59PM +0800, Weihang Li wrote:
This series refactors memory related codes, including MR, MTR(Memory Translate Region) and MPT(Memory Protection Table).
Changes since v1:
- Add static keyword for hns_roce_mr_free().
- Link: https://patchwork.kernel.org/project/linux-rdma/cover/1611236882-41498-1-git...
Lang Cheng (2): RDMA/hns: Optimize the MR registration process RDMA/hns: Use new interface to set MPT related fields
Xi Wang (1): RDMA/hns: Refactor the MTR creation flow
Applied to for-next, thanks
Jason