From: Xi Wang wangxi11@huawei.com
driver inclusion category: bugfix bugzilla: NA CVE: NA
Wraps the public logic as 3 functions: hns_roce_mtr_create(), hns_roce_mtr_destroy() and hns_roce_mtr_map() to support hopnum ranges from 0 to 3. In addition, makes the mtr interfaces easier to use. Signed-off-by: Xi Wang wangxi11@huawei.com Signed-off-by: Shunfeng Yang yangshunfeng2@huawei.com Reviewed-by: chunzhi hu huchunzhi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/infiniband/hw/hns/hns_roce_alloc.c | 93 +-- drivers/infiniband/hw/hns/hns_roce_device.h | 108 +++- drivers/infiniband/hw/hns/hns_roce_hem.c | 33 +- drivers/infiniband/hw/hns/hns_roce_hem.h | 5 +- drivers/infiniband/hw/hns/hns_roce_hw_v1.c | 88 +-- drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 90 +-- drivers/infiniband/hw/hns/hns_roce_mr.c | 477 ++++++++++++++-- drivers/infiniband/hw/hns/hns_roce_qp.c | 538 ++++++------------ .../hw/hns/roce-customer/rdfx_entry.c | 2 +- 9 files changed, 776 insertions(+), 658 deletions(-)
diff --git a/drivers/infiniband/hw/hns/hns_roce_alloc.c b/drivers/infiniband/hw/hns/hns_roce_alloc.c index 1ad0cab94491f..595e8e6663e5b 100644 --- a/drivers/infiniband/hw/hns/hns_roce_alloc.c +++ b/drivers/infiniband/hw/hns/hns_roce_alloc.c @@ -255,102 +255,49 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, EXPORT_SYMBOL_GPL(hns_roce_buf_alloc);
int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf) + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift) { - int i, end; - int total; - - end = start + buf_cnt; - if (end > buf->npages) { - dev_err(hr_dev->dev, - "failed to check kmem bufs, end %d + %d total %u!\n", - start, buf_cnt, buf->npages); + unsigned int offset, max_size; + int total = 0; + int i; + + if (page_shift > buf->trunk_shift) { + dev_err(hr_dev->dev, "failed to check kmem buf shift %u > %u\n", + page_shift, buf->trunk_shift); return -EINVAL; }
- total = 0; - for (i = start; i < end; i++) - bufs[total++] = hns_roce_buf_page(buf, i); + offset = 0; + max_size = buf->ntrunks << buf->trunk_shift; + for (i = 0; i < buf_cnt && offset < max_size; i++) { + bufs[total++] = hns_roce_buf_dma_addr(buf, offset); + offset += (1 << page_shift); + }
return total; } EXPORT_SYMBOL_GPL(hns_roce_get_kmem_bufs);
int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift) { struct ib_block_iter biter; int total = 0; - int idx = 0; - u64 addr; - - if (page_shift < HNS_HW_PAGE_SHIFT) { - dev_err(hr_dev->dev, "failed to check umem page shift %u!\n", - page_shift); - return -EINVAL; - }
/* convert system page cnt to hw page cnt */ rdma_for_each_block(umem->sg_head.sgl, &biter, umem->nmap, 1 << page_shift) { - addr = rdma_block_iter_dma_address(&biter); - if (idx >= start) { - bufs[total++] = addr; - if (total >= buf_cnt) - goto done; - } - idx++; + bufs[total++] = rdma_block_iter_dma_address(&biter); + if (total >= buf_cnt) + goto done; }
done: return total; } - -void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt) -{ - if (hopnum == HNS_ROCE_HOP_NUM_0) - region->hopnum = 0; - else - region->hopnum = hopnum; - - region->offset = offset; - region->count = buf_cnt; -} - -void hns_roce_free_buf_list(dma_addr_t **bufs, int region_cnt) -{ - int i; - - for (i = 0; i < region_cnt; i++) { - if (bufs[i]) { - kfree(bufs[i]); - bufs[i] = NULL; - } - } -} - -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int region_cnt) -{ - struct hns_roce_buf_region *r; - int i; - - for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - bufs[i] = kcalloc(r->count, sizeof(dma_addr_t), GFP_KERNEL); - if (!bufs[i]) - goto err_alloc; - } - - return 0; - -err_alloc: - hns_roce_free_buf_list(bufs, i); - - return -ENOMEM; -} +EXPORT_SYMBOL_GPL(hns_roce_get_umem_bufs);
void hns_roce_cleanup_bitmap(struct hns_roce_dev *hr_dev) { diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ff76c0bcd1a68..6e1ec2d77ce2a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -415,7 +415,7 @@ struct hns_roce_mtt { };
struct hns_roce_buf_region { - int offset; /* page offset */ + u32 offset; /* page offset */ u32 count; /* page count */ int hopnum; /* addressing hop num */ }; @@ -428,13 +428,36 @@ struct hns_roce_hem_list { struct list_head mid_bt[HNS_ROCE_MAX_BT_REGION][HNS_ROCE_MAX_BT_LEVEL]; struct list_head btm_bt; /* link all bottom bt in @mid_bt */ dma_addr_t root_ba; /* pointer to the root ba table */ - int bt_pg_shift; +}; + +struct hns_roce_buf_attr { + struct { + size_t size; /* region size */ + int hopnum; /* multi-hop addressing hop num */ + } region[HNS_ROCE_MAX_BT_REGION]; + unsigned int region_count; /* valid region count */ + unsigned int page_shift; /* buffer page shift */ + unsigned int user_access; /* umem access flag */ + int user_dmasync; /* umem dma sync flag */ + bool mtt_only; /* only alloc buffer-required MTT memory */ +}; + +struct hns_roce_hem_cfg { + dma_addr_t root_ba; /* root BA table's address */ + bool is_direct; /* addressing without BA table */ + unsigned int ba_pg_shift; /* BA table page shift */ + unsigned int buf_pg_shift; /* buffer page shift */ + unsigned int buf_pg_count; /* buffer page count */ + struct hns_roce_buf_region region[HNS_ROCE_MAX_BT_REGION]; + unsigned int region_count; };
/* memory translate region */ struct hns_roce_mtr { - struct hns_roce_hem_list hem_list; - int buf_pg_shift; + struct hns_roce_hem_list hem_list; /* multi-hop addressing resource */ + struct ib_umem *umem; /* user space buffer */ + struct hns_roce_buf *kmem; /* kernel space buffer */ + struct hns_roce_hem_cfg hem_cfg; /* config for hardware addressing */ };
struct hns_roce_mw { @@ -773,7 +796,6 @@ enum hns_roce_qp_dfx_cnt {
struct hns_roce_qp { struct ib_qp ibqp; - struct hns_roce_buf *hr_buf; struct hns_roce_wq rq; struct hns_roce_db rdb; struct hns_roce_db sdb; @@ -784,16 +806,9 @@ struct hns_roce_qp { u32 sq_next_wqe; struct hns_roce_wq sq;
- struct ib_umem *umem; - struct hns_roce_mtt mtt; struct hns_roce_mtr mtr; - - /* this define must less than HNS_ROCE_MAX_BT_REGION */ -#define HNS_ROCE_WQE_REGION_MAX 3 - struct hns_roce_buf_region regions[HNS_ROCE_WQE_REGION_MAX]; - int wqe_bt_pg_shift; - u32 buff_size; + struct mutex mutex; u16 xrcdn; u8 port; @@ -1343,6 +1358,45 @@ static inline dma_addr_t hns_roce_buf_page(struct hns_roce_buf *buf, u32 idx) return hns_roce_buf_dma_addr(buf, idx << buf->page_shift); }
+#define hr_hw_page_align(x) ALIGN(x, 1 << HNS_HW_PAGE_SHIFT) + +static inline u64 to_hr_hw_page_addr(u64 addr) +{ + return addr >> HNS_HW_PAGE_SHIFT; +} + +static inline u32 to_hr_hw_page_shift(u32 page_shift) +{ + return page_shift - HNS_HW_PAGE_SHIFT; +} + +static inline u32 to_hr_hem_hopnum(u32 hopnum, u32 count) +{ + if (count > 0) + return hopnum == HNS_ROCE_HOP_NUM_0 ? 0 : hopnum; + + return 0; +} + +static inline u32 to_hr_hem_entries_size(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift); +} + +static inline u32 to_hr_hem_entries_count(u32 count, u32 buf_shift) +{ + return hr_hw_page_align(count << buf_shift) >> buf_shift; +} + +static inline u32 to_hr_hem_entries_shift(u32 count, u32 buf_shift) +{ + if (!count) + return 0; + + return ilog2(buf_shift ? + to_hr_hem_entries_count(count, buf_shift) : count); +} + static inline u8 to_rdma_port_num(u8 phy_port_num) { return phy_port_num + 1; @@ -1373,18 +1427,19 @@ void hns_roce_mtt_cleanup(struct hns_roce_dev *hr_dev, int hns_roce_buf_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct hns_roce_buf *buf);
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift); -int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt); -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr); - /* hns roce hw need current block and next block addr from mtt */ #define MTT_MIN_COUNT 2 int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr); +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_ucontext *ucontext, + unsigned long user_addr); +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, + struct hns_roce_mtr *mtr); +int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, unsigned int page_cnt);
int hns_roce_init_pd_table(struct hns_roce_dev *hr_dev); int hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev); @@ -1459,16 +1514,11 @@ struct hns_roce_buf *hns_roce_buf_alloc(struct hns_roce_dev *hr_dev, u32 size, int hns_roce_ib_umem_write_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtt *mtt, struct ib_umem *umem);
-void hns_roce_init_buf_region(struct hns_roce_buf_region *region, int hopnum, - int offset, int buf_cnt); -int hns_roce_alloc_buf_list(struct hns_roce_buf_region *regions, - dma_addr_t **bufs, int count); -void hns_roce_free_buf_list(dma_addr_t **bufs, int count); - int hns_roce_get_kmem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct hns_roce_buf *buf); + int buf_cnt, struct hns_roce_buf *buf, + unsigned int page_shift); int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, - int buf_cnt, int start, struct ib_umem *umem, + int buf_cnt, struct ib_umem *umem, unsigned int page_shift);
struct ib_srq *hns_roce_create_srq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 1911470ef26c6..a810fde3e356d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1177,9 +1177,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL;
if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1351,7 +1350,7 @@ static int hem_list_alloc_mid_bt(struct hns_roce_dev *hr_dev, }
if (offset < r->offset) { - dev_err(hr_dev->dev, "invalid offset %d,min %d!\n", + dev_err(hr_dev->dev, "invalid offset %d,min %u!\n", offset, r->offset); return -EINVAL; } @@ -1415,8 +1414,8 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, const struct hns_roce_buf_region *regions, int region_cnt) { - struct roce_hem_item *hem, *temp_hem, *root_hem; struct list_head temp_list[HNS_ROCE_MAX_BT_REGION]; + struct roce_hem_item *hem, *temp_hem, *root_hem; const struct hns_roce_buf_region *r; struct list_head temp_root; struct list_head temp_btm; @@ -1438,12 +1437,16 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM;
+ if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ r = ®ions[region_cnt - 1]; root_hem = hem_list_alloc_item(hr_dev, offset, r->offset + r->count - 1, - ba_num, true, 0); + ba_num, true, 0); if (!root_hem) return -ENOMEM; list_add(&root_hem->list, &temp_root); @@ -1487,9 +1490,9 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, } /* if exist mid bt, link L1 to L0 */ list_for_each_entry_safe(hem, temp_hem, - &hem_list->mid_bt[i][1], list) { - offset = ((hem->start - r->offset) / step) * - BA_BYTE_LEN; + &hem_list->mid_bt[i][1], list) { + offset = (hem->start - r->offset) / step * + BA_BYTE_LEN; hem_list_link_bt(hr_dev, cpu_base + offset, hem->dma_addr); total++; @@ -1517,12 +1520,12 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt) + int region_cnt, unsigned int bt_pg_shift) { const struct hns_roce_buf_region *r; int ofs, end; - int ret = 0; int unit; + int ret; int i;
if (region_cnt > HNS_ROCE_MAX_BT_REGION) { @@ -1531,7 +1534,7 @@ int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, return -EINVAL; }
- unit = (1 << hem_list->bt_pg_shift) / BA_BYTE_LEN; + unit = (1 << bt_pg_shift) / BA_BYTE_LEN; for (i = 0; i < region_cnt; i++) { r = ®ions[i]; if (!r->count) @@ -1578,8 +1581,7 @@ void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, hem_list->root_ba = 0; }
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order) +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list) { int i, j;
@@ -1588,8 +1590,6 @@ void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, for (i = 0; i < HNS_ROCE_MAX_BT_REGION; i++) for (j = 0; j < HNS_ROCE_MAX_BT_LEVEL; j++) INIT_LIST_HEAD(&hem_list->mid_bt[i][j]); - - hem_list->bt_pg_shift = bt_page_order; }
void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, @@ -1620,4 +1620,3 @@ void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev,
return cpu_base; } - diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.h b/drivers/infiniband/hw/hns/hns_roce_hem.h index 292ead0eb2d5b..8033c123175b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.h +++ b/drivers/infiniband/hw/hns/hns_roce_hem.h @@ -133,14 +133,13 @@ int hns_roce_calc_hem_mhop(struct hns_roce_dev *hr_dev, struct hns_roce_hem_mhop *mhop); bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type);
-void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list, - int bt_page_order); +void hns_roce_hem_list_init(struct hns_roce_hem_list *hem_list); int hns_roce_hem_list_calc_root_ba(const struct hns_roce_buf_region *regions, int region_cnt, int unit); int hns_roce_hem_list_request(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list, const struct hns_roce_buf_region *regions, - int region_cnt); + int region_cnt, unsigned int bt_pg_shift); void hns_roce_hem_list_release(struct hns_roce_dev *hr_dev, struct hns_roce_hem_list *hem_list); void *hns_roce_hem_list_find_mtt(struct hns_roce_dev *hr_dev, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index 4cf732ff150ed..d1b21f71ee296 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -2513,7 +2513,6 @@ static void hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev, }
static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, - struct hns_roce_mtt *mtt, enum hns_roce_qp_state cur_state, enum hns_roce_qp_state new_state, struct hns_roce_qp_context *context, @@ -2560,7 +2559,7 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev,
struct hns_roce_cmd_mailbox *mailbox; struct device *dev = &hr_dev->pdev->dev; - int ret = 0; + int ret;
if (cur_state >= HNS_ROCE_QP_NUM_STATE || new_state >= HNS_ROCE_QP_NUM_STATE || @@ -2594,6 +2593,27 @@ static int hns_roce_v1_qp_modify(struct hns_roce_dev *hr_dev, return ret; }
+static int find_wqe_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + u64 *sq_ba, u64 *rq_ba, dma_addr_t *bt_ba) +{ + int count; + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, sq_ba, 1, bt_ba); + if (count < 1) { + dev_err(hr_dev->dev, "Failed to find SQ ba\n"); + return -ENOBUFS; + } + + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, rq_ba, + 1, NULL); + if (!count) { + dev_err(hr_dev->dev, "Failed to find RQ ba\n"); + return -ENOBUFS; + } + + return 0; +} + static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, enum ib_qp_state cur_state, enum ib_qp_state new_state) @@ -2601,25 +2621,20 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct hns_roce_sqp_context *context; - struct device *dev = &hr_dev->pdev->dev; dma_addr_t dma_handle = 0; u32 __iomem *addr; - int rq_pa_start; + u64 sq_ba = 0; + u64 rq_ba = 0; __le32 tmp; u32 reg_val; - u64 *mtts;
context = kzalloc(sizeof(*context), GFP_KERNEL); if (!context) return -ENOMEM;
/* Search QP buf's MTTs */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (!mtts) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - }
if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { roce_set_field(context->qp1c_bytes_4, @@ -2633,11 +2648,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M, QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qp1c_bytes_12, QP1C_BYTES_12_SQ_RQ_BT_H_M, QP1C_BYTES_12_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle));
roce_set_field(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_HEAD_M, QP1C_BYTES_16_RQ_HEAD_S, hr_qp->rq.head); @@ -2645,7 +2660,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S, 1); roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S, @@ -2658,14 +2673,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qp1c_bytes_20, QP1C_BYTES_20_PKEY_IDX_M, QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M, QP1C_BYTES_28_CUR_RQ_WQE_BA_H_S, - (mtts[rq_pa_start]) >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qp1c_bytes_28, QP1C_BYTES_28_RQ_CUR_IDX_M, QP1C_BYTES_28_RQ_CUR_IDX_S, 0); @@ -2679,12 +2692,12 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP1C_BYTES_32_TX_CQ_NUM_S, to_hr_cq(ibqp->send_cq)->cqn);
- context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]); + context->cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M, QP1C_BYTES_40_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_field(context->qp1c_bytes_40, QP1C_BYTES_40_SQ_CUR_IDX_M, QP1C_BYTES_40_SQ_CUR_IDX_S, 0); @@ -2751,10 +2764,10 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, dma_addr_t dma_handle_2 = 0; dma_addr_t dma_handle = 0; __le32 doorbell[2] = {0}; - int rq_pa_start = 0; u64 *mtts_2 = NULL; int ret = -EINVAL; - u64 *mtts = NULL; + u64 sq_ba = 0; + u64 rq_ba = 0; int port; u8 port_num; u8 *dmac; @@ -2765,12 +2778,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, return -ENOMEM;
/* Search qp buf's mtts */ - mtts = hns_roce_table_find(hr_dev, &hr_dev->mr_table.mtt_table, - hr_qp->mtt.first_seg, &dma_handle); - if (mtts == NULL) { - dev_err(dev, "qp buf pa find failed\n"); + if (find_wqe_mtt(hr_dev, hr_qp, &sq_ba, &rq_ba, &dma_handle)) goto out; - }
/* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -2843,7 +2852,6 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, roce_set_field(context->qpc_bytes_16, QP_CONTEXT_QPC_BYTES_16_QP_NUM_M, QP_CONTEXT_QPC_BYTES_16_QP_NUM_S, hr_qp->qpn); - } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_INIT) { roce_set_field(context->qpc_bytes_4, QP_CONTEXT_QPC_BYTES_4_TRANSPORT_SERVICE_TYPE_M, @@ -2925,11 +2933,11 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle)); + context->sq_rq_bt_l = cpu_to_le32(dma_handle); roce_set_field(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M, QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S, - ((u32)(dma_handle >> 32))); + upper_32_bits(dma_handle)); roce_set_bit(context->qpc_bytes_24, QP_CONTEXT_QPC_BYTE_24_REMOTE_ENABLE_E2E_CREDITS_S, 1); @@ -2951,7 +2959,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, 1); roce_set_bit(context->qpc_bytes_32, QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S, - le32_to_cpu(hr_qp->sq_signal_bits)); + hr_qp->sq_signal_bits);
port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) : hr_qp->port; @@ -3028,14 +3036,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_M, QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
- rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE; - context->cur_rq_wqe_ba_l = - cpu_to_le32((u32)(mtts[rq_pa_start])); + context->cur_rq_wqe_ba_l = cpu_to_le32(rq_ba);
roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_S, - mtts[rq_pa_start] >> 32); + upper_32_bits(rq_ba)); roce_set_field(context->qpc_bytes_76, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_M, QP_CONTEXT_QPC_BYTES_76_RX_REQ_MSN_S, 0); @@ -3097,8 +3103,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_156_SL_S, rdma_ah_get_sl(&attr->ah_attr)); hr_qp->sl = rdma_ah_get_sl(&attr->ah_attr); - } else if (cur_state == IB_QPS_RTR && - new_state == IB_QPS_RTS) { + } else if (cur_state == IB_QPS_RTR && new_state == IB_QPS_RTS) { /* If exist optional param, return error */ if ((attr_mask & IB_QP_ALT_PATH) || (attr_mask & IB_QP_ACCESS_FLAGS) || @@ -3110,12 +3115,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, goto out; }
- context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->rx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_120, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba));
roce_set_field(context->qpc_bytes_124, QP_CONTEXT_QPC_BYTES_124_RX_ACK_MSN_M, @@ -3258,12 +3263,12 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M, QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
- context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0])); + context->tx_cur_sq_wqe_ba_l = cpu_to_le32(sq_ba);
roce_set_field(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M, QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_S, - (mtts[0]) >> 32); + upper_32_bits(sq_ba)); roce_set_bit(context->qpc_bytes_188, QP_CONTEXT_QPC_BYTES_188_PKT_RETRY_FLG_S, 0); roce_set_field(context->qpc_bytes_188, @@ -3288,8 +3293,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr, QP_CONTEXT_QPC_BYTES_144_QP_STATE_S, new_state);
/* SW pass context to HW */ - ret = hns_roce_v1_qp_modify(hr_dev, &hr_qp->mtt, - to_hns_roce_state(cur_state), + ret = hns_roce_v1_qp_modify(hr_dev, to_hns_roce_state(cur_state), to_hns_roce_state(new_state), context, hr_qp); if (ret) { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index f2c4304399239..07932c1e029ab 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -149,7 +149,7 @@ static void set_extend_sge(struct hns_roce_qp *qp, struct ib_send_wr *wr, num_in_wqe = HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE; extend_sge_num = valid_num_sge - num_in_wqe; sg = wr->sg_list + num_in_wqe; - shift = qp->hr_buf->page_shift; + shift = qp->mtr.kmem->page_shift;
/* * Check whether wr->num_sge sges are in the same page. If not, we @@ -3898,18 +3898,10 @@ static void set_qpc_wqe_cnt(struct hns_roce_qp *hr_qp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - if (hr_qp->ibqp.qp_type == IB_QPT_GSI) - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - ilog2((unsigned int)hr_qp->sge.sge_cnt)); - else - roce_set_field(context->byte_4_sqpn_tst, - V2_QPC_BYTE_4_SGE_SHIFT_M, - V2_QPC_BYTE_4_SGE_SHIFT_S, - hr_qp->sq.max_gs > - HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE ? - ilog2((unsigned int)hr_qp->sge.sge_cnt) : 0); + roce_set_field(context->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, + V2_QPC_BYTE_4_SGE_SHIFT_S, + to_hr_hem_entries_shift(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift));
roce_set_field(qpc_mask->byte_4_sqpn_tst, V2_QPC_BYTE_4_SGE_SHIFT_M, V2_QPC_BYTE_4_SGE_SHIFT_S, 0); @@ -4369,31 +4361,6 @@ static void modify_qp_init_to_init(struct ib_qp *ibqp, } }
-static bool check_wqe_rq_mtt_count(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, int mtt_cnt, - u32 page_size) -{ - struct device *dev = hr_dev->dev; - - if (hr_qp->rq.wqe_cnt < 1) - return true; - - if (mtt_cnt < 1) { - dev_err(dev, "qp(0x%lx) rqwqe buf ba find failed\n", - hr_qp->qpn); - return false; - } - - if (mtt_cnt < MTT_MIN_COUNT && - (hr_qp->rq.offset + page_size) < hr_qp->buff_size) { - dev_err(dev, "qp(0x%lx) next rqwqe buf ba find failed\n", - hr_qp->qpn); - return false; - } - - return true; -} - static int modify_qp_init_to_rtr(struct ib_qp *ibqp, const struct ib_qp_attr *attr, int attr_mask, struct hns_roce_v2_qp_context *context, @@ -4407,7 +4374,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t dma_handle_3; dma_addr_t dma_handle_2; u64 wqe_sge_ba; - u32 page_size; u8 port_num; u64 *mtts_3; u64 *mtts_2; @@ -4416,13 +4382,13 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, int port;
/* Search qp buf's mtts */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->rq.offset / page_size, mtts, - MTT_MIN_COUNT, &wqe_sge_ba); - if (!ibqp->srq) - if (!check_wqe_rq_mtt_count(hr_dev, hr_qp, count, page_size)) - return -EINVAL; + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->rq.offset, + mtts, ARRAY_SIZE(mtts), &wqe_sge_ba); + if (hr_qp->rq.wqe_cnt && count < 1) { + dev_err(dev, "failed to find RQ WQE, QPN = 0x%lx.\n", + hr_qp->qpn); + return -EINVAL; + }
/* Search IRRL's mtts */ mtts_2 = hns_roce_table_find(hr_dev, &hr_dev->qp_table.irrl_table, @@ -4462,17 +4428,16 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp,
roce_set_field(context->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, - hr_dev->caps.wqe_sq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_sq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_sq_hop_num, + hr_qp->sq.wqe_cnt)); roce_set_field(qpc_mask->byte_12_sq_hop, V2_QPC_BYTE_12_SQ_HOP_NUM_M, V2_QPC_BYTE_12_SQ_HOP_NUM_S, 0);
roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, - (ibqp->qp_type == IB_QPT_GSI || - hr_qp->sq.max_gs > HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE) ? - hr_dev->caps.wqe_sge_hop_num : 0); + to_hr_hem_hopnum(hr_dev->caps.wqe_sge_hop_num, + hr_qp->sge.sge_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_SGE_HOP_NUM_M, V2_QPC_BYTE_20_SGE_HOP_NUM_S, 0); @@ -4480,8 +4445,8 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, - hr_dev->caps.wqe_rq_hop_num == HNS_ROCE_HOP_NUM_0 ? - 0 : hr_dev->caps.wqe_rq_hop_num); + to_hr_hem_hopnum(hr_dev->caps.wqe_rq_hop_num, + hr_qp->rq.wqe_cnt)); roce_set_field(qpc_mask->byte_20_smac_sgid_idx, V2_QPC_BYTE_20_RQ_HOP_NUM_M, V2_QPC_BYTE_20_RQ_HOP_NUM_S, 0); @@ -4489,7 +4454,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, - hr_qp->wqe_bt_pg_shift + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.ba_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BA_PG_SZ_S, 0); @@ -4497,7 +4462,7 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, roce_set_field(context->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, - hr_dev->caps.mtt_buf_pg_sz + PG_SHIFT_OFFSET); + to_hr_hw_page_shift(hr_qp->mtr.hem_cfg.buf_pg_shift)); roce_set_field(qpc_mask->byte_16_buf_ba_pg_sz, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_M, V2_QPC_BYTE_16_WQE_SGE_BUF_PG_SZ_S, 0); @@ -4648,23 +4613,22 @@ static int modify_qp_rtr_to_rts(struct ib_qp *ibqp, struct device *dev = hr_dev->dev; u64 sge_cur_blk = 0; u64 sq_cur_blk = 0; - u32 page_size; int count;
- /* Search qp buf's mtts */ - count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, 0, &sq_cur_blk, 1, NULL); + /* search qp buf's mtts */ + count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, hr_qp->sq.offset, + &sq_cur_blk, 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) buf pa find failed\n", hr_qp->qpn); + dev_err(dev, "failed to find QP(0x%lx) SQ buf.\n", hr_qp->qpn); return -EINVAL; }
if (hr_qp->sge.sge_cnt > 0) { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); count = hns_roce_mtr_find(hr_dev, &hr_qp->mtr, - hr_qp->sge.offset / page_size, - &sge_cur_blk, 1, NULL); + hr_qp->sge.offset, &sge_cur_blk, + 1, NULL); if (count < 1) { - dev_err(dev, "qp(0x%lx) sge pa find failed\n", + dev_err(dev, "failed to find QP(0x%lx) SGE buf.\n", hr_qp->qpn); return -EINVAL; } diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index bef0ceb24b72d..f81fb0359afd6 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -1601,122 +1601,475 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) return 0; }
-void hns_roce_mtr_init(struct hns_roce_mtr *mtr, int bt_pg_shift, - int buf_pg_shift) +static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { - hns_roce_hem_list_init(&mtr->hem_list, bt_pg_shift); - mtr->buf_pg_shift = buf_pg_shift; -} - -void hns_roce_mtr_cleanup(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr) -{ - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); -} -EXPORT_SYMBOL_GPL(hns_roce_mtr_cleanup); - -static int hns_roce_write_mtr(struct hns_roce_dev *hr_dev, - struct hns_roce_mtr *mtr, dma_addr_t *bufs, - struct hns_roce_buf_region *r) -{ - int offset; - int count; - int npage; - u64 *mtts; - int end; + int count, npage; + int offset, end; + __le64 *mtts; + u64 addr; int i;
- offset = r->offset; - end = offset + r->count; + offset = region->offset; + end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS;
- /* Save page addr, low 12 bits : 0 */ - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) - mtts[i] = bufs[npage] >> PAGE_ADDR_SHIFT; + addr = to_hr_hw_page_addr(pages[npage]); else - mtts[i] = bufs[npage]; + addr = pages[npage];
+ mtts[i] = cpu_to_le64(addr); npage++; } offset += count; }
- /* Memory barrier */ - mb(); + return npage; +} + +static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) +{ + int i; + + for (i = 0; i < attr->region_count; i++) + if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 && + attr->region[i].hopnum > 0) + return true; + + /* because the mtr only one root base address, when hopnum is 0 means + * root base address equals the first buffer address, thus all alloced + * memory must in a continuous space accessed by direct mode. + */ + return false; +} + +static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr) +{ + size_t size = 0; + int i; + + for (i = 0; i < attr->region_count; i++) + size += attr->region[i].size; + + return size; +} + +/* + * check the given pages in continuous address space + * Returns 0 on success, or the error page num. + */ +static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count, + unsigned int page_shift) +{ + size_t page_size = 1 << page_shift; + int i; + + for (i = 1; i < page_count; i++) + if (pages[i] - pages[i - 1] != page_size) + return i;
return 0; }
-int hns_roce_mtr_attach(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t **bufs, struct hns_roce_buf_region *regions, - int region_cnt) +static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) { - struct hns_roce_buf_region *r; + /* release user buffers */ + if (mtr->umem) { + ib_umem_release(mtr->umem); + mtr->umem = NULL; + } + + /* release kernel buffers */ + if (mtr->kmem) { + hns_roce_buf_free(hr_dev, mtr->kmem); + mtr->kmem = NULL; + } +} + +static struct ib_umem * +mtr_get_umem(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, size_t buf_size, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + return ib_umem_get(ucontext, user_addr, buf_size, + buf_attr->user_access, + buf_attr->user_dmasync); +} + +static struct hns_roce_buf * +mtr_get_kmem(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, int pg_shift, size_t buf_size, + bool is_direct) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_buf *hr_buf; + + hr_buf = hns_roce_buf_alloc(hr_dev, buf_size, pg_shift, + is_direct ? HNS_ROCE_BUF_DIRECT : 0); + if (IS_ERR_OR_NULL(hr_buf)) { + dev_err(dev, "Failed to alloc kmem, ret %ld\n", + PTR_ERR(hr_buf)); + return NULL; + } + + return hr_buf; +} + +static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + struct device *dev = hr_dev->dev; + size_t total_size; + + total_size = mtr_bufs_size(buf_attr); + if (ucontext) { + mtr->kmem = NULL; + mtr->umem = mtr_get_umem(hr_dev, mtr, buf_attr, total_size, + ucontext, user_addr); + if (IS_ERR_OR_NULL(mtr->umem)) { + dev_err(dev, "Failed to get umem, ret %ld\n", + PTR_ERR(mtr->umem)); + return -ENOMEM; + } + } else { + mtr->umem = NULL; + mtr->kmem = mtr_get_kmem(hr_dev, mtr, buf_attr, + buf_attr->page_shift, total_size, + mtr->hem_cfg.is_direct); + if (!mtr->kmem) { + dev_err(dev, "Failed to alloc kmem\n"); + return -ENOMEM; + } + } + + return 0; +} + +static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + int page_count, unsigned int page_shift) +{ + struct device *dev = hr_dev->dev; + dma_addr_t *pages; + int npage; int ret; - int i;
- ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, regions, - region_cnt); - if (ret) - return ret; + /* alloc a tmp array to store buffer's dma address */ + pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + if (mtr->umem) + npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count, + mtr->umem, page_shift); + else + npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count, + mtr->kmem, page_shift); + + if (npage != page_count) { + dev_err(dev, "failed to get mtr page %d != %d.\n", npage, + page_count); + ret = -ENOBUFS; + goto err_alloc_list; + }
- for (i = 0; i < region_cnt; i++) { - r = ®ions[i]; - ret = hns_roce_write_mtr(hr_dev, mtr, bufs[i], r); + if (mtr->hem_cfg.is_direct && npage > 1) { + ret = mtr_check_direct_pages(pages, npage, page_shift); if (ret) { - dev_err(hr_dev->dev, - "write mtr[%d/%d] err %d,offset=%d.\n", - i, region_cnt, ret, r->offset); - goto err_write; + dev_err(dev, "failed to check %s page: %d / %d.\n", + mtr->umem ? "umtr" : "kmtr", ret, npage); + ret = -ENOBUFS; + goto err_alloc_list; } }
- return 0; + ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count); + if (ret) + dev_err(dev, "failed to map mtr pages, ret = %d.\n", ret);
-err_write: - hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +err_alloc_list: + /* drop tmp array */ + kvfree(pages);
return ret; }
+int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + dma_addr_t *pages, unsigned int page_cnt) +{ + struct device *dev = hr_dev->dev; + struct hns_roce_buf_region *r; + unsigned int i, mapped_cnt; + int ret; + + /* + * Only use the first page address as root ba when hopnum is 0, this + * is because the addresses of all pages are consecutive in this case. + */ + if (mtr->hem_cfg.is_direct) { + mtr->hem_cfg.root_ba = pages[0]; + return 0; + } + + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { + r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + + if (r->offset + r->count > page_cnt) { + ret = -EINVAL; + dev_err(dev, + "failed to check mtr%u count %u + %u > %u\n", + i, r->offset, r->count, page_cnt); + return ret; + } + + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { + dev_err(dev, "failed to map mtr%u offset %u, ret = %d.\n", + i, r->offset, ret); + return ret; + } + mapped_cnt += ret; + ret = 0; + } + + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + dev_err(dev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_map); + int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, int offset, u64 *mtt_buf, int mtt_max, u64 *base_addr) { - u64 *mtts = mtt_buf; - int mtt_count; + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int mtt_count, left; + int start_index; int total = 0; - u64 *addr; - int npage; - int left; + __le64 *mtts; + u32 npage; + u64 addr;
- if (mtts == NULL || mtt_max < 1) + if (!mtt_buf || mtt_max < 1) goto done;
+ /* no mtt memory in direct mode, so just return the buffer address */ + if (cfg->is_direct) { + start_index = offset >> HNS_HW_PAGE_SHIFT; + for (mtt_count = 0; mtt_count < cfg->region_count && + total < mtt_max; mtt_count++) { + npage = cfg->region[mtt_count].offset; + if (npage < start_index) + continue; + + addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT); + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) + mtt_buf[total] = to_hr_hw_page_addr(addr); + else + mtt_buf[total] = addr; + + total++; + } + + goto done; + } + + start_index = offset >> cfg->buf_pg_shift; left = mtt_max; while (left > 0) { mtt_count = 0; - addr = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, - offset + total, + mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, + start_index + total, &mtt_count, NULL); - if (!addr || !mtt_count) + if (!mtts || !mtt_count) goto done;
npage = min(mtt_count, left); - memcpy(&mtts[total], addr, BA_BYTE_LEN * npage); left -= npage; - total += npage; + for (mtt_count = 0; mtt_count < npage; mtt_count++) + mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]); }
done: if (base_addr) - *base_addr = mtr->hem_list.root_ba; + *base_addr = cfg->root_ba;
return total; } EXPORT_SYMBOL_GPL(hns_roce_mtr_find); + +static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev, + struct hns_roce_buf_attr *attr, + struct hns_roce_hem_cfg *cfg, + unsigned int *buf_page_shift, int unalinged_size) +{ + struct hns_roce_buf_region *r; + int first_region_padding; + int page_cnt, region_cnt; + unsigned int page_shift; + size_t buf_size; + + /* if disable mtt, all pages must in a continuous address range */ + cfg->is_direct = !mtr_has_mtt(attr); + buf_size = mtr_bufs_size(attr); + if (cfg->is_direct) { + /* When HEM buffer use level-0 addressing, the page size is + * equal the whole buffer size, and we split whole buffer as + * small pages which is used to check whether the adjacent units + * are in the continuous space and the size is fixed as 4K for + * the hns ROCEE required. + */ + page_shift = HNS_HW_PAGE_SHIFT; + /* The ROCEE requires the page size is 4K * 2^N. */ + cfg->buf_pg_count = 1; + cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT + + order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE)); + first_region_padding = 0; + } else { + page_shift = attr->page_shift; + cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size, + 1 << page_shift); + cfg->buf_pg_shift = page_shift; + first_region_padding = unalinged_size; + } + + /* Convert buffer size to page index and page count for each region and + * the buffer's offset need append to the first region. + */ + for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count && + region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) { + r = &cfg->region[region_cnt]; + r->offset = page_cnt; + buf_size = hr_hw_page_align(attr->region[region_cnt].size + + first_region_padding); + r->count = DIV_ROUND_UP(buf_size, 1 << page_shift); + first_region_padding = 0; + page_cnt += r->count; + r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum, + r->count); + } + + cfg->region_count = region_cnt; + *buf_page_shift = page_shift; + + return page_cnt; +} + +static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + unsigned int ba_page_shift) +{ + struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg; + int ret; + + hns_roce_hem_list_init(&mtr->hem_list); + if (!cfg->is_direct) { + ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list, + cfg->region, cfg->region_count, + ba_page_shift); + if (ret) + return ret; + cfg->root_ba = mtr->hem_list.root_ba; + cfg->ba_pg_shift = ba_page_shift; + } else { + cfg->ba_pg_shift = cfg->buf_pg_shift; + } + + return 0; +} + +static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); +} + +/** + * hns_roce_mtr_create - Create hns memory translate region. + * + * @mtr: memory translate region + * @buf_attr: buffer attribute for creating mtr + * @ba_page_shift: page shift for multi-hop base address table + * @ucontext: user space context, if it's NULL, means kernel space + * @user_addr: userspace virtual address to start at + */ +int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, + struct hns_roce_buf_attr *buf_attr, + unsigned int ba_page_shift, + struct ib_ucontext *ucontext, unsigned long user_addr) +{ + struct device *dev = hr_dev->dev; + unsigned int buf_page_shift = 0; + int buf_page_cnt; + int ret; + + buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg, + &buf_page_shift, + ucontext ? user_addr & ~PAGE_MASK : 0); + if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) { + dev_err(dev, "failed to init mtr cfg, count %d shift %u.\n", + buf_page_cnt, buf_page_shift); + return -EINVAL; + } + + ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift); + if (ret) { + dev_err(dev, "failed to alloc mtr mtt, ret = %d.\n", ret); + return ret; + } + + /* The caller has its own buffer list and invokes the hns_roce_mtr_map() + * to finish the MTT configure. + */ + if (buf_attr->mtt_only) { + mtr->umem = NULL; + mtr->kmem = NULL; + return 0; + } + + ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, ucontext, user_addr); + if (ret) { + dev_err(dev, "failed to alloc mtr bufs, ret = %d.\n", ret); + goto err_alloc_mtt; + } + + /* Write buffer's dma address to MTT */ + ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift); + if (ret) + dev_err(dev, "failed to map mtr bufs, ret = %d.\n", ret); + else + return 0; + + mtr_free_bufs(hr_dev, mtr); +err_alloc_mtt: + mtr_free_mtt(hr_dev, mtr); + return ret; +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_create); + +void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr) +{ + /* release multi-hop addressing resource */ + hns_roce_hem_list_release(hr_dev, &mtr->hem_list); + + /* free buffers */ + mtr_free_bufs(hr_dev, mtr); +} +EXPORT_SYMBOL_GPL(hns_roce_mtr_destroy); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 62fe471a81ca7..9a4118b2fb78f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -366,41 +366,35 @@ static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, return max_sge; }
-static int set_rq_size(struct hns_roce_dev *hr_dev, - struct ib_qp_cap *cap, int is_user, int has_rq, - struct hns_roce_qp *hr_qp) +static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, + struct hns_roce_qp *hr_qp, int has_rq, bool user) { - u32 max_sge = proc_rq_sge(hr_dev, hr_qp, is_user); + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); struct device *dev = hr_dev->dev; - u32 max_cnt; + u32 cnt;
/* If srq exist, set zero for relative number of rq */ if (!has_rq) { hr_qp->rq.wqe_cnt = 0; hr_qp->rq.max_gs = 0; + hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = 0; cap->max_recv_sge = 0; + return 0; }
- if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || + /* Check the validity of QP support capacity */ + if (!cap->max_recv_sge || cap->max_recv_wr > hr_dev->caps.max_wqes || cap->max_recv_sge > max_sge) { - dev_err(dev, "RQ config error, depth=%u, sge=%d\n", - cap->max_recv_wr, cap->max_recv_sge); - return -EINVAL; - } - - max_cnt = max(cap->max_recv_wr, hr_dev->caps.min_wqes); - - hr_qp->rq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->rq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "rq depth %u too large\n", - cap->max_recv_wr); + dev_err(dev, "RQ config error, depth=%u, sge=%u\n", + cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; }
- max_cnt = max(1U, cap->max_recv_sge); - hr_qp->rq.max_gs = roundup_pow_of_two(max_cnt) + hr_qp->rq.rsv_sge; + cnt = roundup_pow_of_two(max(cap->max_recv_wr, hr_dev->caps.min_wqes)); + hr_qp->rq.max_gs = + roundup_pow_of_two(cap->max_recv_sge + hr_qp->rq.rsv_sge);
if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -408,12 +402,59 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz * hr_qp->rq.max_gs);
- cap->max_recv_wr = hr_qp->rq.wqe_cnt; + hr_qp->rq.wqe_cnt = cnt; + if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE && + hr_qp->ibqp.qp_type != IB_QPT_UD && + hr_qp->ibqp.qp_type != IB_QPT_GSI) + hr_qp->rq_inl_buf.wqe_cnt = cnt; + else + hr_qp->rq_inl_buf.wqe_cnt = 0; + + cap->max_recv_wr = cnt; cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge;
return 0; }
+static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +{ + /* GSI/UD QP only has extended sge */ + if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) + return qp->sq.max_gs; + + if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) + return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + + return 0; +} + +static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, + struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) +{ + u32 total_sge_cnt; + u32 wqe_sge_cnt; + + hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + + if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) { + hr_qp->sq.max_gs = HNS_ROCE_SGE_IN_WQE; + return; + } + + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + + wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + + /* If the number of extended sge is not zero, they MUST use the + * space of HNS_HW_PAGE_SIZE at least. + */ + if (wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + hr_qp->sge.sge_cnt = max(total_sge_cnt, + (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); + } +} + static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_ib_create_qp *ucmd) @@ -424,15 +465,13 @@ static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, /* Sanity check SQ size before proceeding */ if (ucmd->log_sq_stride > max_sq_stride || ucmd->log_sq_stride < HNS_ROCE_IB_MIN_SQ_STRIDE) { - dev_err(hr_dev->dev, - "Check SQ size error! Log sq stride 0x%x\n", - ucmd->log_sq_stride); + dev_err(hr_dev->dev, "failed to check SQ stride size\n"); return -EINVAL; }
if (cap->max_send_sge > hr_dev->caps.max_sq_sg) { - dev_err(hr_dev->dev, "SQ sge error! Max send sge is %d, Max sq sge is %d\n", - cap->max_send_sge, hr_dev->caps.max_sq_sg); + dev_err(hr_dev->dev, "failed to check SQ SGE size %u\n", + cap->max_send_sge); return -EINVAL; }
@@ -443,213 +482,77 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { - u32 ex_sge_num; - u32 page_size; - u32 buf_size; - u32 max_cnt; + struct device *dev = hr_dev->dev; + u32 cnt = 0; int ret;
- if (check_shl_overflow(1, ucmd->log_sq_bb_count, &hr_qp->sq.wqe_cnt) || - hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) + if (check_shl_overflow(1, ucmd->log_sq_bb_count, &cnt) || + cnt > hr_dev->caps.max_wqes) return -EINVAL;
ret = check_sq_size_with_integrity(hr_dev, cap, ucmd); if (ret) { - dev_err(hr_dev->dev, "Sanity check sq(0x%lx) size fail\n", - hr_qp->qpn); + dev_err(dev, "failed to check user SQ size\n"); return ret; }
- hr_qp->sq.wqe_shift = ucmd->log_sq_stride; - - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(hr_dev->dev, - "while setting sq(0x%lx) size, sq.wqe_cnt too large\n", - hr_qp->qpn); - return -EINVAL; - } - - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - - if ((hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) && - (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A)) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(hr_dev->dev, - "SQ(0x%lx) extended sge cnt error! sge cnt is %d, max extend sg is %d.\n", - hr_qp->qpn, hr_qp->sge.sge_cnt, - hr_dev->caps.max_extend_sg); - return -EINVAL; - } - } + set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - ex_sge_num = hr_qp->sge.sge_cnt; - - /* Get buf size, SQ and RQ are aligned to page_szie */ - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) { - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), PAGE_SIZE) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - - hr_qp->sq.offset = 0; - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), PAGE_SIZE); - } else { - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - buf_size = ALIGN((hr_qp->sge.sge_cnt << HNS_ROCE_SGE_SHIFT), - page_size); - hr_qp->sge.sge_cnt = ex_sge_num ? - max(buf_size / (1 << hr_qp->sge.sge_shift), - ex_sge_num) : 0; - hr_qp->buff_size = HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << - hr_qp->rq.wqe_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), page_size) + - HNS_ROCE_ALIGN_UP((hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), page_size); - - hr_qp->sq.offset = 0; - if (ex_sge_num) { - hr_qp->sge.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - hr_qp->rq.offset = hr_qp->sge.offset + - HNS_ROCE_ALIGN_UP((hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift), - page_size); - } else { - hr_qp->rq.offset = HNS_ROCE_ALIGN_UP( - (hr_qp->sq.wqe_cnt << - hr_qp->sq.wqe_shift), - page_size); - } - } + hr_qp->sq.wqe_shift = ucmd->log_sq_stride; + hr_qp->sq.wqe_cnt = cnt;
return 0; }
-static int split_wqe_buf_region(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp, - struct hns_roce_buf_region *regions, - int region_max, int page_shift) +static int set_wqe_buf_attr(struct hns_roce_dev *hr_dev, + struct hns_roce_qp *hr_qp, + struct hns_roce_buf_attr *buf_attr) { - int page_size = 1 << page_shift; - bool is_extend_sge; - int region_cnt = 0; int buf_size; - int buf_cnt; - - if (hr_qp->buff_size < 1 || region_max < 1) - return region_cnt; - - if (hr_qp->sge.sge_cnt > 0) - is_extend_sge = true; - else - is_extend_sge = false; - - /* sq region */ - if (is_extend_sge) - buf_size = hr_qp->sge.offset - hr_qp->sq.offset; - else - buf_size = hr_qp->rq.offset - hr_qp->sq.offset; - - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sq_hop_num, - hr_qp->sq.offset / page_size, - buf_cnt); - region_cnt++; - } - - /* sge region */ - if (is_extend_sge) { - buf_size = hr_qp->rq.offset - hr_qp->sge.offset; - if (buf_size > 0 && region_cnt < region_max) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_sge_hop_num, - hr_qp->sge.offset / page_size, - buf_cnt); - region_cnt++; - } - } - - /* rq region */ - buf_size = hr_qp->buff_size - hr_qp->rq.offset; - if (buf_size > 0) { - buf_cnt = DIV_ROUND_UP(buf_size, page_size); - hns_roce_init_buf_region(®ions[region_cnt], - hr_dev->caps.wqe_rq_hop_num, - hr_qp->rq.offset / page_size, - buf_cnt); - region_cnt++; - } - - return region_cnt; -} - -static int calc_wqe_bt_page_shift(struct hns_roce_dev *hr_dev, - struct hns_roce_buf_region *regions, - int region_cnt) -{ - int bt_pg_shift; - int ba_num; - int ret; + int idx = 0;
- bt_pg_shift = PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz; - - /* all root ba entries must in one bt page */ - do { - ba_num = (1 << bt_pg_shift) / BA_BYTE_LEN; - ret = hns_roce_hem_list_calc_root_ba(regions, region_cnt, - ba_num); - if (ret <= ba_num) - break; - - bt_pg_shift++; - } while (ret > ba_num); - - return bt_pg_shift - PAGE_SHIFT; -} - -static int set_extend_sge_param(struct hns_roce_dev *hr_dev, - struct hns_roce_qp *hr_qp) -{ - struct device *dev = hr_dev->dev; + hr_qp->buff_size = 0;
- if (hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - (hr_qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE)); - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - } + /* SQ WQE */ + hr_qp->sq.offset = 0; + buf_size = to_hr_hem_entries_size(hr_qp->sq.wqe_cnt, + hr_qp->sq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* extend SGE WQE in SQ */ + hr_qp->sge.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->sge.sge_cnt, + hr_qp->sge.sge_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_sge_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + /* RQ WQE */ + hr_qp->rq.offset = hr_qp->buff_size; + buf_size = to_hr_hem_entries_size(hr_qp->rq.wqe_cnt, + hr_qp->rq.wqe_shift); + if (buf_size > 0 && idx < ARRAY_SIZE(buf_attr->region)) { + buf_attr->region[idx].size = buf_size; + buf_attr->region[idx].hopnum = hr_dev->caps.wqe_rq_hop_num; + idx++; + hr_qp->buff_size += buf_size; + } + + if (hr_qp->buff_size < 1) + return -EINVAL;
- /* ud sqwqe's sge use extend sge */ - if (hr_dev->caps.max_sq_sg > HNS_ROCE_SGE_IN_WQE && - hr_qp->ibqp.qp_type == IB_QPT_GSI) { - hr_qp->sge.sge_cnt = roundup_pow_of_two(hr_qp->sq.wqe_cnt * - hr_qp->sq.max_gs); - hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; - } + buf_attr->region_count = idx;
- if ((hr_qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) && - hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08_A) { - if (hr_qp->sge.sge_cnt > hr_dev->caps.max_extend_sg) { - dev_err(dev, "The extended sge cnt error! sge_cnt is %d, max extend sg is %d.\n", - hr_qp->sge.sge_cnt, hr_dev->caps.max_extend_sg); - return -EINVAL; - } - } + buf_attr->mtt_only = false; + buf_attr->page_shift = HNS_HW_PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz;
return 0; } @@ -658,67 +561,28 @@ static int set_kernel_sq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, struct hns_roce_qp *hr_qp) { struct device *dev = hr_dev->dev; - u32 buf_size; - u32 page_size; - u32 max_cnt; - int size; - int ret; + u32 cnt;
if (!cap->max_send_wr || cap->max_send_wr > hr_dev->caps.max_wqes || - cap->max_send_sge > hr_dev->caps.max_sq_sg || - cap->max_inline_data > hr_dev->caps.max_sq_inline) { - dev_err(dev, "SQ WR or sge or inline data error!\n"); + cap->max_send_sge > hr_dev->caps.max_sq_sg) { + dev_err(dev, "failed to check SQ WR or SGE num, ret = %d.\n", + -EINVAL); return -EINVAL; }
- hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); - - max_cnt = max(cap->max_send_wr, hr_dev->caps.min_wqes); - - hr_qp->sq.wqe_cnt = roundup_pow_of_two(max_cnt); - if ((u32)hr_qp->sq.wqe_cnt > hr_dev->caps.max_wqes) { - dev_err(dev, "sq.wqe_cnt(0x%x) too large for setting kernel sq size.\n", - (u32)hr_qp->sq.wqe_cnt); + cnt = roundup_pow_of_two(max(cap->max_send_wr, hr_dev->caps.min_wqes)); + if (cnt > hr_dev->caps.max_wqes) { + dev_err(dev, "failed to check WQE num %u\n", cnt); return -EINVAL; }
- /* Get data_seg numbers */ - max_cnt = max(1U, cap->max_send_sge); - if (hr_dev->caps.max_sq_sg <= HNS_ROCE_MAX_SGE_NUM) - hr_qp->sq.max_gs = roundup_pow_of_two(max_cnt); - else - hr_qp->sq.max_gs = max_cnt; - - ret = set_extend_sge_param(hr_dev, hr_qp); - if (ret) { - dev_err(dev, "set extend sge parameters failed(%d)\n", ret); - return ret; - } - - /* Get buf size, SQ and RQ are aligned to PAGE_SIZE */ - page_size = 1 << (hr_dev->caps.mtt_buf_pg_sz + PAGE_SHIFT); - hr_qp->sq.offset = 0; - size = HNS_ROCE_ALIGN_UP(hr_qp->sq.wqe_cnt << hr_qp->sq.wqe_shift, - page_size); - - if (hr_dev->caps.max_sq_sg > HNS_ROCE_MAX_SGE_NUM && - hr_qp->sge.sge_cnt) { - buf_size = ALIGN((hr_qp->sge.sge_cnt << HNS_ROCE_SGE_SHIFT), - page_size); - hr_qp->sge.sge_cnt = max(buf_size / (1 << hr_qp->sge.sge_shift), - (u32)hr_qp->sge.sge_cnt); - hr_qp->sge.offset = size; - size += HNS_ROCE_ALIGN_UP(hr_qp->sge.sge_cnt << - hr_qp->sge.sge_shift, page_size); - } + hr_qp->sq.wqe_shift = ilog2(hr_dev->caps.max_sq_desc_sz); + hr_qp->sq.wqe_cnt = cnt;
- hr_qp->rq.offset = size; - size += HNS_ROCE_ALIGN_UP((hr_qp->rq.wqe_cnt << hr_qp->rq.wqe_shift), - page_size); - hr_qp->buff_size = size; + set_ext_sge_param(hr_dev, cnt, hr_qp, cap);
- /* Get wr and sge number which send */ - cap->max_send_wr = hr_qp->sq.wqe_cnt; + /* sync the parameters of kernel QP to user's configuration */ + cap->max_send_wr = cnt; cap->max_send_sge = hr_qp->sq.max_gs;
/* We don't support inline sends for kernel QPs (yet) */ @@ -794,6 +658,28 @@ static void hns_roce_free_recv_inline_buffer(struct hns_roce_qp *hr_qp) kfree(hr_qp->rq_inl_buf.wqe_list); }
+static int alloc_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct hns_roce_buf_attr *buf_attr, + struct ib_uobject *uobject, unsigned long addr) +{ + struct device *dev = hr_dev->dev; + int ret; + + ret = hns_roce_mtr_create(hr_dev, &hr_qp->mtr, buf_attr, + PAGE_SHIFT + hr_dev->caps.mtt_ba_pg_sz, + uobject ? uobject->context : NULL, addr); + if (ret) + dev_err(dev, "failed to create WQE mtr, ret = %d.\n", ret); + + return ret; +} + +static void free_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_uobject *uobject) +{ + hns_roce_mtr_destroy(hr_dev, &hr_qp->mtr); +} + static void hns_roce_add_cq_to_qp(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_cq *send_cq, struct ib_cq *recv_cq) @@ -819,68 +705,12 @@ static void hns_roce_add_cq_to_qp(struct hns_roce_dev *hr_dev, } }
-static int map_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, - u32 page_shift, bool is_user) -{ - dma_addr_t *buf_list[ARRAY_SIZE(hr_qp->regions)] = { NULL }; - struct hns_roce_buf_region *r; - int region_count; - int buf_count; - int ret; - int i; - - region_count = split_wqe_buf_region(hr_dev, hr_qp, hr_qp->regions, - ARRAY_SIZE(hr_qp->regions), page_shift); - - /* alloc a tmp list to store WQE buffers address */ - ret = hns_roce_alloc_buf_list(hr_qp->regions, buf_list, region_count); - if (ret) { - dev_err(hr_dev->dev, "Failed to alloc WQE buffer list\n"); - return ret; - } - - for (i = 0; i < region_count; i++) { - r = &hr_qp->regions[i]; - if (is_user) - buf_count = hns_roce_get_umem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->umem, - page_shift); - else - buf_count = hns_roce_get_kmem_bufs(hr_dev, buf_list[i], - r->count, r->offset, hr_qp->hr_buf); - - if (buf_count != r->count) { - dev_err(hr_dev->dev, "Failed to get %s WQE buf, expect %d = %d.\n", - is_user ? "user" : "kernel", - r->count, buf_count); - ret = -ENOBUFS; - goto done; - } - } - - hr_qp->wqe_bt_pg_shift = calc_wqe_bt_page_shift(hr_dev, hr_qp->regions, - region_count); - hns_roce_mtr_init(&hr_qp->mtr, PAGE_SHIFT + hr_qp->wqe_bt_pg_shift, - page_shift); - ret = hns_roce_mtr_attach(hr_dev, &hr_qp->mtr, buf_list, hr_qp->regions, - region_count); - if (ret) - dev_err(hr_dev->dev, "Failed to attatch WQE's mtr\n"); - - goto done; - - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); -done: - hns_roce_free_buf_list(buf_list, region_count); - - return ret; -} - static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct ib_qp_init_attr *init_attr, struct ib_uobject *uobject, unsigned long addr) { - u32 page_shift = PAGE_SHIFT + hr_dev->caps.mtt_buf_pg_sz; + struct hns_roce_buf_attr buf_attr = {}; + struct device *dev = hr_dev->dev; bool is_rq_buf_inline; int ret;
@@ -894,28 +724,17 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, } }
- if (uobject) { - hr_qp->umem = ib_umem_get(uobject->context, addr, - hr_qp->buff_size, 0, 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - } else { - struct hns_roce_buf *kmem; - - kmem = hns_roce_buf_alloc(hr_dev, hr_qp->buff_size, page_shift, - 0); - if (IS_ERR(hr_qp->umem)) { - ret = PTR_ERR(hr_qp->umem); - goto err_inline; - } - hr_qp->hr_buf = kmem; + ret = set_wqe_buf_attr(hr_dev, hr_qp, &buf_attr); + if (ret) { + dev_err(dev, "failed to set WQE attr, ret = %d.\n", ret); + goto err_inline; }
- ret = map_wqe_buf(hr_dev, hr_qp, page_shift, !!uobject); - if (ret) - goto err_alloc; + ret = alloc_wqe_buf(hr_dev, hr_qp, &buf_attr, uobject, addr); + if (ret) { + dev_err(dev, "failed to alloc WQE buf, ret = %d.\n", ret); + goto err_inline; + }
return 0;
@@ -923,30 +742,14 @@ static int alloc_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, if (is_rq_buf_inline) hns_roce_free_recv_inline_buffer(hr_qp);
-err_alloc: - if (uobject) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, hr_qp->hr_buf); - hr_qp->hr_buf = NULL; - } - - dev_err(hr_dev->dev, "Failed to alloc WQE buffer, ret %d.\n", ret); - return ret; }
-static void free_qp_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static void free_qp_buf(struct hns_roce_qp *hr_qp, struct ib_pd *ib_pd) { - hns_roce_mtr_cleanup(hr_dev, &hr_qp->mtr); - if (hr_qp->umem) { - ib_umem_release(hr_qp->umem); - hr_qp->umem = NULL; - } else { - hns_roce_buf_free(hr_dev, hr_qp->hr_buf); - hr_qp->hr_buf = NULL; - } + struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device); + + free_wqe_buf(hr_dev, hr_qp, ib_pd->uobject);
if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RQ_INLINE) && hr_qp->rq.wqe_cnt) @@ -967,8 +770,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, else hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR;
- ret = set_rq_size(hr_dev, &init_attr->cap, !!udata, - hns_roce_qp_has_rq(init_attr), hr_qp); + ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { dev_err(hr_dev->dev, "Failed to set user RQ size\n"); return ret; @@ -1171,7 +974,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, hns_roce_release_range_qp(hr_dev, qpn, 1);
err_buf: - free_qp_buf(hr_dev, hr_qp); + free_qp_buf(hr_qp, ib_pd);
if (ib_pd->uobject) { if ((hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_RECORD_DB) && @@ -1215,7 +1018,7 @@ void hns_roce_qp_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) if (hr_qp->ibqp.qp_type != IB_QPT_GSI) hns_roce_release_range_qp(hr_dev, hr_qp->qpn, 1);
- free_qp_buf(hr_dev, hr_qp); + free_qp_buf(hr_qp, hr_qp->ibqp.pd);
if (hr_qp->ibqp.pd->uobject) { struct hns_roce_ucontext *context = @@ -1539,7 +1342,7 @@ EXPORT_SYMBOL_GPL(hns_roce_unlock_cqs);
static void *get_wqe(struct hns_roce_qp *hr_qp, int offset) { - return hns_roce_buf_offset(hr_qp->hr_buf, offset); + return hns_roce_buf_offset(hr_qp->mtr.kmem, offset); }
void *get_recv_wqe(struct hns_roce_qp *hr_qp, int n) @@ -1556,8 +1359,7 @@ EXPORT_SYMBOL_GPL(get_send_wqe);
void *get_send_extend_sge(struct hns_roce_qp *hr_qp, int n) { - return hns_roce_buf_offset(hr_qp->hr_buf, hr_qp->sge.offset + - (n << hr_qp->sge.sge_shift)); + return get_wqe(hr_qp, hr_qp->sge.offset + (n << hr_qp->sge.sge_shift)); } EXPORT_SYMBOL_GPL(get_send_extend_sge);
diff --git a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c index bb8c4d7d2449a..160e0dfd71551 100644 --- a/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c +++ b/drivers/infiniband/hw/hns/roce-customer/rdfx_entry.c @@ -145,7 +145,7 @@ void rdfx_cp_sq_wqe_buf(struct hns_roce_dev *hr_dev, struct hns_roce_qp *qp, memcpy(dfx_qp_buf, dfx_hns_wqe_sge, 2 * sizeof(struct hns_roce_v2_wqe_data_seg)); dfx_qp_buf = hns_roce_buf_offset(rdfx_qp->buf, qp->sge.offset); - dfx_hns_wqe_sge = hns_roce_buf_offset(qp->hr_buf, qp->sge.offset); + dfx_hns_wqe_sge = hns_roce_buf_offset(qp->mtr.kmem, qp->sge.offset); rdfx_change_sq_buf(wr, atomic_en, dfx_qp_buf, dfx_hns_wqe_sge, sq, hr_dev, qp); }