driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
If a uQP works in DCA mode, the WQE's buffer will be split as many blocks and be stored into a list. The blocks are allocated from the DCA's memory pool before posting WRs and are dropped when the QP's CI is equal to PI after polling CQ.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.h | 26 ++++- providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++- providers/hns/hns_roce_u_hw_v2.c | 125 +++++++++++++++++++++- providers/hns/hns_roce_u_hw_v2.h | 2 + providers/hns/hns_roce_u_verbs.c | 32 ++++-- 5 files changed, 345 insertions(+), 13 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index e3fa24d..ba646d3 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -365,11 +365,18 @@ struct hns_roce_sge_ex { unsigned int sge_shift; };
+struct hns_roce_dca_buf { + void **bufs; + unsigned int max_cnt; + unsigned int shift; +}; + struct hns_roce_qp { struct verbs_qp verbs_qp; struct hns_roce_buf buf; + struct hns_roce_dca_buf dca_wqe; int max_inline_data; - int buf_size; + unsigned int buf_size; unsigned int sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_wq rq; @@ -423,11 +430,22 @@ struct hns_roce_u_hw { struct verbs_context_ops hw_ops; };
+struct hns_roce_dca_attach_attr { + uint32_t sq_offset; + uint32_t sge_offset; + uint32_t rq_offset; +}; + +struct hns_roce_dca_detach_attr { + uint32_t sq_index; +}; + /* * The entries's buffer should be aligned to a multiple of the hardware's * minimum page size. */ #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE) +#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) { @@ -603,9 +621,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + uint32_t size, struct hns_roce_dca_buf *buf); +void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr); void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); -int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index c0f86e9..3d41b89 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
return ret; } + +struct hns_dca_mem_query_resp { + uint64_t key; + uint32_t offset; + uint32_t page_count; +}; + +static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t index, struct hns_dca_mem_query_resp *resp) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_QUERY, 5); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + &resp->key, sizeof(resp->key)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + &resp->offset, sizeof(resp->offset)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, + &resp->page_count, sizeof(resp->page_count)); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_err(&ctx->ibv_ctx, + "failed to query DCA mem-%u, ret = %d.\n", + handle, ret); + + return ret; +} + +void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_DETACH, 4); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, + attr->sq_index); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_warn(&ctx->ibv_ctx, + "failed to detach DCA mem-%u, ret = %d.\n", + handle, ret); +} + +struct hns_dca_mem_attach_resp { +#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0) + uint32_t alloc_flags; + uint32_t alloc_pages; +}; + +static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + struct hns_dca_mem_attach_resp *resp) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_ATTACH, 6); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + attr->sq_offset); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, + attr->sge_offset); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, + attr->rq_offset); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + &resp->alloc_flags, sizeof(resp->alloc_flags)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, + &resp->alloc_pages, sizeof(resp->alloc_pages)); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_err(&ctx->ibv_ctx, + "failed to attach DCA mem-%u, ret = %d.\n", + handle, ret); + + return ret; +} + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, uint32_t alloc_size) { @@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) return enable; }
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) +static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; struct hns_roce_dca_mem *mem; @@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) dca_mem_cnt--; } } + +static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf, + uint32_t page_index, int page_count) +{ + void **pages = &buf->bufs[page_index]; + int page_size = 1 << buf->shift; + int i; + + for (i = 0; i < page_count; i++) { + pages[i] = addr; + addr += page_size; + } +} + +static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_buf *buf, uint32_t page_count) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_dca_mem_query_resp resp = {}; + struct hns_roce_dca_mem *mem; + uint32_t idx = 0; + int ret; + + while (idx < page_count && idx < buf->max_cnt) { + resp.page_count = 0; + ret = query_dca_mem(ctx, handle, idx, &resp); + if (ret) + return -ENOMEM; + if (resp.page_count < 1) + break; + + pthread_spin_lock(&dca_ctx->lock); + mem = key_to_dca_mem(dca_ctx, resp.key); + if (mem && resp.offset < mem->buf.length) { + config_dca_pages(dca_mem_addr(mem, resp.offset), + buf, idx, resp.page_count); + } else { + pthread_spin_unlock(&dca_ctx->lock); + break; + } + pthread_spin_unlock(&dca_ctx->lock); + + idx += resp.page_count; + } + + return (idx >= page_count) ? 0 : -ENOMEM; +} + +#define DCA_EXPAND_MEM_TRY_TIMES 3 +int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + uint32_t size, struct hns_roce_dca_buf *buf) +{ + uint32_t buf_pages = size >> buf->shift; + struct hns_dca_mem_attach_resp resp = {}; + bool is_new_buf = true; + int try_times = 0; + int ret = 0; + + do { + resp.alloc_pages = 0; + ret = attach_dca_mem(ctx, handle, attr, &resp); + if (ret) + break; + + if (resp.alloc_pages >= buf_pages) { + is_new_buf = !!(resp.alloc_flags & + HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER); + break; + } + + ret = add_dca_mem(ctx, size); + if (ret) + break; + } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); + + if (ret || resp.alloc_pages < buf_pages) { + verbs_err(&ctx->ibv_ctx, + "failed to attach, size %u count %u != %u, ret = %d.\n", + size, buf_pages, resp.alloc_pages, ret); + return -ENOMEM; + } + + /* No need config user address if DCA config not changed */ + if (!is_new_buf && buf->bufs[0]) + return 0; + + return setup_dca_buf(ctx, handle, buf, buf_pages); +} diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 0a100b8..7a93456 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) return get_sw_cqe_v2(cq, cq->cons_index); }
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp) +{ + return !!qp->dca_wqe.bufs; +} + +static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset) +{ + if (likely(qp->buf.buf)) + return qp->buf.buf + offset; + else if (unlikely(check_qp_dca_enable(qp))) + return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] + + (offset & ((1 << qp->dca_wqe.shift) - 1)); + else + return NULL; +} + static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); }
static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); + return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); }
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); + return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift)); }
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) @@ -580,6 +596,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, wc->opcode = wc_send_op_map[opcode]; }
+static bool check_dca_attach_enable(struct hns_roce_qp *qp) +{ + return check_qp_dca_enable(qp) && + (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH); +} + +static bool check_dca_detach_enable(struct hns_roce_qp *qp) +{ + return check_qp_dca_enable(qp) && + (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH); +} + +static int dca_attach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) +{ + struct hns_roce_dca_attach_attr attr = {}; + uint32_t idx; + int ret; + + hns_roce_spin_lock(&qp->sq.hr_lock); + hns_roce_spin_lock(&qp->rq.hr_lock); + + if (qp->sq.wqe_cnt > 0) { + idx = qp->sq.head & (qp->sq.wqe_cnt - 1); + attr.sq_offset = idx << qp->sq.wqe_shift; + } + + if (qp->ex_sge.sge_cnt > 0) { + idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1); + attr.sge_offset = idx << qp->ex_sge.sge_shift; + } + + if (qp->rq.wqe_cnt > 0) { + idx = qp->rq.head & (qp->rq.wqe_cnt - 1); + attr.rq_offset = idx << qp->rq.wqe_shift; + } + + + ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, + qp->buf_size, &qp->dca_wqe); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); + + return ret; +} + +static void dca_detach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) +{ + struct hns_roce_dca_detach_attr attr; + bool is_empty; + + hns_roce_spin_lock(&qp->sq.hr_lock); + hns_roce_spin_lock(&qp->rq.hr_lock); + + is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail; + if (is_empty && qp->sq.wqe_cnt > 0) + attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); + + if (is_empty && qp->sq.wqe_cnt > 0) + hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr); +} + static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, struct hns_roce_cq *cq) { @@ -919,6 +1002,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
for (npolled = 0; npolled < ne; ++npolled) { err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); + if (qp && check_dca_detach_enable(qp)) + dca_detach_qp_buf(ctx, qp); + if (err != V2_CQ_OK) break; } @@ -970,7 +1056,7 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (unlikely(ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || - ibvqp->state == IBV_QPS_RTR)){ + ibvqp->state == IBV_QPS_RTR)) { verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), "unsupported qp state, state = %d.\n", ibvqp->state); return EINVAL; @@ -980,6 +1066,14 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) return EIO; }
+ if (check_dca_attach_enable(qp)) { + ret = dca_attach_qp_buf(ctx, qp); + if (ret) + verbs_err_datapath(&ctx->ibv_ctx, + "failed to attach QP-%u send, ret = %d.\n", + qp->verbs_qp.qp.qp_num, ret); + } + return ret; }
@@ -1347,6 +1441,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, return 0; }
+static inline void fill_rc_dca_fields(uint32_t qp_num, + struct hns_roce_rc_sq_wqe *wqe) +{ + hr_reg_write(wqe, RCWQE_SQPN_L, qp_num); + hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH); +} + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, const struct ibv_send_wr *wr) { @@ -1454,6 +1555,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, return ret;
wqe_valid: + if (check_qp_dca_enable(qp)) + fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe); + enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
return 0; @@ -1563,6 +1667,14 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) return EIO; }
+ if (check_dca_attach_enable(qp)) { + ret = dca_attach_qp_buf(ctx, qp); + if (ret) + verbs_err_datapath(&ctx->ibv_ctx, + "failed to attach QP-%u recv, ret = %d.\n", + qp->verbs_qp.qp.qp_num, ret); + } + return ret; }
@@ -1758,6 +1870,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { + struct hns_roce_context *ctx = to_hr_ctx(qp->context); struct hns_roce_modify_qp_ex_resp resp_ex = {}; struct hns_roce_modify_qp_ex cmd_ex = {}; struct hns_roce_qp *hr_qp = to_hr_qp(qp); @@ -1804,6 +1917,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, hns_roce_init_qp_indices(to_hr_qp(qp)); }
+ /* Try to shrink the DCA mem */ + if (ctx->dca_ctx.mem_cnt > 0) + hns_roce_shrink_dca_mem(ctx); + record_qp_attr(qp, attr, attr_mask);
return ret; diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h index 1a7b828..50a920f 100644 --- a/providers/hns/hns_roce_u_hw_v2.h +++ b/providers/hns/hns_roce_u_hw_v2.h @@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe { #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
+#define RCWQE_SQPN_L_WIDTH 2 + struct hns_roce_v2_wqe_data_seg { __le32 len; __le32 lkey; diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 69bcc13..248d862 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -1311,6 +1311,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, return 0; }
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) +{ + if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) + return true; + + return false; +} + static void qp_free_wqe(struct hns_roce_qp *qp) { free_recv_rinl_buf(&qp->rq_rinl_buf); @@ -1322,8 +1330,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) hns_roce_free_buf(&qp->buf); }
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, - struct hns_roce_context *ctx) +static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) { struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1341,12 +1349,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, }
if (qp->rq_rinl_buf.wqe_cnt) { - if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf)) + if (alloc_recv_rinl_buf(attr->cap.max_recv_sge, + &qp->rq_rinl_buf)) goto err_alloc; }
- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) - goto err_alloc; + if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); + qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + HNS_HW_PAGE_SIZE)) + goto err_alloc; + }
return 0;
@@ -1636,7 +1656,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, { int ret;
- ret = qp_alloc_wqe(&attr->cap, qp, ctx); + ret = qp_alloc_wqe(attr, qp, ctx); if (ret) return ret;