driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
To commit ?? ("RDMA/hns: Fixes concurrent ressetting and post_recv in DCA mode").
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- kernel-headers/rdma/hns-abi.h | 73 ++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-)
diff --git a/kernel-headers/rdma/hns-abi.h b/kernel-headers/rdma/hns-abi.h index 1d51612..8a8f2e4 100644 --- a/kernel-headers/rdma/hns-abi.h +++ b/kernel-headers/rdma/hns-abi.h @@ -102,7 +102,9 @@ enum hns_roce_qp_cap_flags { HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH = 1 << 4, HNS_ROCE_QP_CAP_DIRECT_WQE = 1 << 5, + HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH = 1 << 6, };
struct hns_roce_ib_create_qp_resp { @@ -114,12 +116,15 @@ struct hns_roce_ib_modify_qp_resp { __u8 tc_mode; __u8 priority; __u8 reserved[6]; + __u32 dcan; + __u32 rsv2; };
enum { HNS_ROCE_EXSGE_FLAGS = 1 << 0, HNS_ROCE_RQ_INLINE_FLAGS = 1 << 1, HNS_ROCE_CQE_INLINE_FLAGS = 1 << 2, + HNS_ROCE_UCTX_CONFIG_DCA = 1 << 3, HNS_ROCE_UCTX_DYN_QP_PGSZ = 1 << 4, };
@@ -127,6 +132,7 @@ enum { HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0, HNS_ROCE_RSP_RQ_INLINE_FLAGS = 1 << 1, HNS_ROCE_RSP_CQE_INLINE_FLAGS = 1 << 2, + HNS_ROCE_UCTX_RSP_DCA_FLAGS = HNS_ROCE_UCTX_CONFIG_DCA, HNS_ROCE_UCTX_RSP_DYN_QP_PGSZ = HNS_ROCE_UCTX_DYN_QP_PGSZ, };
@@ -139,12 +145,20 @@ struct hns_roce_ib_alloc_ucontext_resp { __u32 max_inline_data; __u8 congest_type; __u8 reserved0[7]; - __aligned_u64 rsv_for_dca[2]; + __u32 dca_qps; + __u32 dca_mmap_size; + __aligned_u64 dca_mmap_key; __aligned_u64 reset_mmap_key; };
+enum hns_roce_uctx_comp_mask { + HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS = 1 << 0, +}; + struct hns_roce_ib_alloc_ucontext { __u32 config; + __u32 comp; /* use hns_roce_uctx_comp_mask */ + __u32 dca_max_qps; __u32 reserved; };
@@ -158,4 +172,61 @@ struct hns_roce_ib_create_ah_resp { __u8 tc_mode; };
+#define UVERBS_ID_NS_MASK 0xF000 +#define UVERBS_ID_NS_SHIFT 12 + +enum hns_ib_objects { + HNS_IB_OBJECT_DCA_MEM = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum hns_ib_dca_mem_methods { + HNS_IB_METHOD_DCA_MEM_REG = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_METHOD_DCA_MEM_DEREG, + HNS_IB_METHOD_DCA_MEM_SHRINK, + HNS_IB_METHOD_DCA_MEM_ATTACH, + HNS_IB_METHOD_DCA_MEM_DETACH, + HNS_IB_METHOD_DCA_MEM_QUERY, +}; + +enum hns_ib_dca_mem_reg_attrs { + HNS_IB_ATTR_DCA_MEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_REG_FLAGS, + HNS_IB_ATTR_DCA_MEM_REG_LEN, + HNS_IB_ATTR_DCA_MEM_REG_ADDR, + HNS_IB_ATTR_DCA_MEM_REG_KEY, +}; + +enum hns_ib_dca_mem_dereg_attrs { + HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum hns_ib_dca_mem_shrink_attrs { + HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, + HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, + HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, +}; + +enum hns_ib_dca_mem_attach_attrs { + HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, + HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, +}; + +enum hns_ib_dca_mem_detach_attrs { + HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, +}; + +enum hns_ib_dca_mem_query_attrs { + HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, +}; + #endif /* HNS_ABI_USER_H */
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
The HIP09 introduces the DCA(Dynamic context attachment) feature which supports many RC QPs to share the WQE buffer in a memory pool, this will reduce the memory consumption when there are too many QPs inactive.
Two functions are defined for adding buffers to memory pool and removing buffers from memory pool by calling ib cmd implemented in hns kernelspace driver.
If a QP enables DCA feature, the WQE's buffer will be attached to the memory pool when the users start to post WRs and be detached when all CQEs has been polled.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.c | 61 +++++++++++++- providers/hns/hns_roce_u.h | 21 ++++- providers/hns/hns_roce_u_buf.c | 147 +++++++++++++++++++++++++++++++++ 3 files changed, 226 insertions(+), 3 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 810b650..2272431 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -100,6 +100,53 @@ static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) return count_shift > size_shift ? count_shift - size_shift : 0; }
+static int hns_roce_mmap(struct hns_roce_device *hr_dev, + struct hns_roce_context *context, int cmd_fd) +{ + int page_size = hr_dev->page_size; + + context->uar = mmap(NULL, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); + if (context->uar == MAP_FAILED) + return -ENOMEM; + + return 0; +} + +static int init_dca_context(struct hns_roce_context *ctx, int page_size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + int ret; + + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) + return 0; + + list_head_init(&dca_ctx->mem_list); + ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); + if (ret) + return ret; + + dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; + dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; + dca_ctx->mem_cnt = 0; + + return 0; +} + +static void uninit_dca_context(struct hns_roce_context *ctx) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + + if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) + return; + + pthread_spin_lock(&dca_ctx->lock); + hns_roce_cleanup_dca_mem(ctx); + pthread_spin_unlock(&dca_ctx->lock); + + pthread_spin_destroy(&dca_ctx->lock); +} + static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, struct hns_roce_alloc_ucontext_resp *resp, int page_size) @@ -185,7 +232,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, return NULL;
cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | - HNS_ROCE_CQE_INLINE_FLAGS; + HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -198,9 +245,15 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (context->uar == MAP_FAILED) goto err_free;
+ if (init_dca_context(context, hr_dev->page_size)) + goto err_free; + if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) goto reset_free;
+ if (hns_roce_mmap(hr_dev, context, cmd_fd)) + goto uar_free; + pthread_mutex_init(&context->qp_table_mutex, NULL); pthread_mutex_init(&context->srq_table_mutex, NULL); pthread_spin_init(&context->uar_lock, PTHREAD_PROCESS_PRIVATE); @@ -210,8 +263,11 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev,
return &context->ibv_ctx;
+uar_free: + if (context->reset_state) + munmap(context->reset_state, hr_dev->page_size); reset_free: - munmap(context->uar, hr_dev->page_size); + uninit_dca_context(context); err_free: verbs_uninit_context(&context->ibv_ctx); free(context); @@ -226,6 +282,7 @@ static void hns_roce_free_context(struct ibv_context *ibctx) munmap(context->uar, hr_dev->page_size); if (context->reset_state) munmap(context->reset_state, hr_dev->page_size); + uninit_dca_context(context); verbs_uninit_context(&context->ibv_ctx); free(context); } diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 024932a..90b2205 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -147,6 +147,10 @@
#define hr_reg_read(ptr, field) _hr_reg_read(ptr, field)
+enum { + HNS_ROCE_CAP_FLAG_DCA_MODE = BIT(15), +}; + #define HNS_ROCE_QP_TABLE_BITS 8 #define HNS_ROCE_QP_TABLE_SIZE BIT(HNS_ROCE_QP_TABLE_BITS)
@@ -201,6 +205,18 @@ struct hns_roce_spinlock { int need_lock; };
+#define HNS_DCA_MAX_MEM_SIZE ~0UL +#define HNS_DCA_DEFAULT_UNIT_PAGES 16 + +struct hns_roce_dca_ctx { + struct list_head mem_list; + pthread_spinlock_t lock; + int mem_cnt; + unsigned int unit_size; + uint64_t max_size; + uint64_t curr_size; +}; + struct hns_roce_v2_reset_state { uint32_t is_reset; uint32_t hw_ready; @@ -239,7 +255,7 @@ struct hns_roce_context { unsigned int cqe_size; uint32_t config; unsigned int max_inline_data; - + struct hns_roce_dca_ctx dca_ctx; bool use_new_reset_flag; bool reseted; }; @@ -586,6 +602,9 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); +int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size); + void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
bool is_hns_dev(struct ibv_device *device); diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index 471dd9c..02c43ae 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -60,3 +60,150 @@ void hns_roce_free_buf(struct hns_roce_buf *buf)
munmap(buf->buf, buf->length); } + +struct hns_roce_dca_mem { + uint32_t handle; + struct list_node entry; + struct hns_roce_buf buf; + struct hns_roce_context *ctx; +}; + +static void free_dca_mem(struct hns_roce_context *ctx, + struct hns_roce_dca_mem *mem) +{ + hns_roce_free_buf(&mem->buf); + free(mem); +} + +static struct hns_roce_dca_mem *alloc_dca_mem(uint32_t size) +{ + struct hns_roce_dca_mem *mem = NULL; + int ret; + + mem = malloc(sizeof(struct hns_roce_dca_mem)); + if (!mem) { + errno = ENOMEM; + return NULL; + } + + ret = hns_roce_alloc_buf(&mem->buf, size, HNS_HW_PAGE_SIZE); + if (ret) { + errno = ENOMEM; + free(mem); + return NULL; + } + + return mem; +} + +static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) +{ + return (uintptr_t)dca_mem; +} + +static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) +{ + return dca_mem->buf.buf + offset; +} + +static int register_dca_mem(struct hns_roce_context *ctx, uint64_t key, + void *addr, uint32_t size, uint32_t *handle) +{ + struct ib_uverbs_attr *attr; + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_REG, 4); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_REG_LEN, size); + fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_ADDR, + ioctl_ptr_to_u64(addr)); + fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_REG_KEY, key); + attr = fill_attr_out_obj(cmd, HNS_IB_ATTR_DCA_MEM_REG_HANDLE); + + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) { + verbs_err(&ctx->ibv_ctx, "failed to reg DCA mem, ret = %d.\n", + ret); + return ret; + } + + *handle = read_attr_obj(HNS_IB_ATTR_DCA_MEM_REG_HANDLE, attr); + + return 0; +} + +static void deregister_dca_mem(struct hns_roce_context *ctx, uint32_t handle) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_DEREG, 1); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DEREG_HANDLE, handle); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_warn(&ctx->ibv_ctx, + "failed to dereg DCA mem-%u, ret = %d.\n", + handle, ret); +} + +void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_roce_dca_mem *mem; + struct hns_roce_dca_mem *tmp; + + list_for_each_safe(&dca_ctx->mem_list, mem, tmp, entry) + deregister_dca_mem(ctx, mem->handle); +} + +static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, + uint32_t alloc_size) +{ + bool enable; + + pthread_spin_lock(&ctx->lock); + + if (ctx->unit_size == 0) /* Pool size can't be increased */ + enable = false; + else if (ctx->max_size == HNS_DCA_MAX_MEM_SIZE) /* Pool size no limit */ + enable = true; + else /* Pool size doesn't exceed max size */ + enable = (ctx->curr_size + alloc_size) < ctx->max_size; + + pthread_spin_unlock(&ctx->lock); + + return enable; +} + +int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_roce_dca_mem *mem; + int ret; + + if (!add_dca_mem_enabled(&ctx->dca_ctx, size)) + return -ENOMEM; + + /* Step 1: Alloc DCA mem address */ + mem = alloc_dca_mem( + DIV_ROUND_UP(size, dca_ctx->unit_size) * dca_ctx->unit_size); + if (!mem) + return -ENOMEM; + + /* Step 2: Register DCA mem uobject to pin user address */ + ret = register_dca_mem(ctx, dca_mem_to_key(mem), dca_mem_addr(mem, 0), + mem->buf.length, &mem->handle); + if (ret) { + free_dca_mem(ctx, mem); + return ret; + } + + /* Step 3: Add DCA mem node to pool */ + pthread_spin_lock(&dca_ctx->lock); + list_add_tail(&dca_ctx->mem_list, &mem->entry); + dca_ctx->mem_cnt++; + dca_ctx->curr_size += mem->buf.length; + pthread_spin_unlock(&dca_ctx->lock); + + return 0; +}
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
The QP's WQE buffer may be detached after QP is modified or CQE is polled, and the state of DCA mem object may be changed as clean for no QP is using it. So shrink the clean DCA mem from the memory pool and destroy the DCA mem's buffer to reduce the memory consumption.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.h | 2 + providers/hns/hns_roce_u_buf.c | 103 +++++++++++++++++++++++++++++++ providers/hns/hns_roce_u_hw_v2.c | 7 +++ 3 files changed, 112 insertions(+)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 90b2205..e3fa24d 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -214,6 +214,7 @@ struct hns_roce_dca_ctx { int mem_cnt; unsigned int unit_size; uint64_t max_size; + uint64_t min_size; uint64_t curr_size; };
@@ -602,6 +603,7 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index 02c43ae..c0f86e9 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -101,6 +101,20 @@ static inline uint64_t dca_mem_to_key(struct hns_roce_dca_mem *dca_mem) return (uintptr_t)dca_mem; }
+static struct hns_roce_dca_mem *key_to_dca_mem(struct hns_roce_dca_ctx *ctx, + uint64_t key) +{ + struct hns_roce_dca_mem *mem; + struct hns_roce_dca_mem *tmp; + + list_for_each_safe(&ctx->mem_list, mem, tmp, entry) { + if (dca_mem_to_key(mem) == key) + return mem; + } + + return NULL; +} + static inline void *dca_mem_addr(struct hns_roce_dca_mem *dca_mem, int offset) { return dca_mem->buf.buf + offset; @@ -156,6 +170,32 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx) deregister_dca_mem(ctx, mem->handle); }
+struct hns_dca_mem_shrink_resp { + uint32_t free_mems; + uint64_t free_key; +}; + +static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint64_t size, struct hns_dca_mem_shrink_resp *resp) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_SHRINK, 4); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_HANDLE, handle); + fill_attr_in_uint64(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_RESERVED_SIZE, size); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_KEY, + &resp->free_key, sizeof(resp->free_key)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_SHRINK_OUT_FREE_MEMS, + &resp->free_mems, sizeof(resp->free_mems)); + + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_err(&ctx->ibv_ctx, "failed to shrink DCA mem, ret = %d.\n", + ret); + + return ret; +} static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, uint32_t alloc_size) { @@ -175,6 +215,17 @@ static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, return enable; }
+static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) +{ + bool enable; + + pthread_spin_lock(&ctx->lock); + enable = ctx->mem_cnt > 0 && ctx->min_size < ctx->max_size; + pthread_spin_unlock(&ctx->lock); + + return enable; +} + int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; @@ -207,3 +258,55 @@ int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size)
return 0; } + +void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_dca_mem_shrink_resp resp = {}; + struct hns_roce_dca_mem *mem; + int dca_mem_cnt; + uint32_t handle; + int ret; + + pthread_spin_lock(&dca_ctx->lock); + dca_mem_cnt = ctx->dca_ctx.mem_cnt; + pthread_spin_unlock(&dca_ctx->lock); + while (dca_mem_cnt > 0 && shrink_dca_mem_enabled(dca_ctx)) { + resp.free_mems = 0; + /* Step 1: Use any DCA mem uobject to shrink pool */ + pthread_spin_lock(&dca_ctx->lock); + mem = list_tail(&dca_ctx->mem_list, + struct hns_roce_dca_mem, entry); + handle = mem ? mem->handle : 0; + pthread_spin_unlock(&dca_ctx->lock); + if (!mem) + break; + + ret = shrink_dca_mem(ctx, handle, dca_ctx->min_size, &resp); + if (ret || likely(resp.free_mems < 1)) + break; + + /* Step 2: Remove shrunk DCA mem node from pool */ + pthread_spin_lock(&dca_ctx->lock); + mem = key_to_dca_mem(dca_ctx, resp.free_key); + if (mem) { + list_del(&mem->entry); + dca_ctx->mem_cnt--; + dca_ctx->curr_size -= mem->buf.length; + } + + handle = mem ? mem->handle : 0; + pthread_spin_unlock(&dca_ctx->lock); + if (!mem) + break; + + /* Step 3: Destroy DCA mem uobject */ + deregister_dca_mem(ctx, handle); + free_dca_mem(ctx, mem); + /* No any free memory after deregister 1 DCA mem */ + if (resp.free_mems <= 1) + break; + + dca_mem_cnt--; + } +} diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 9016978..0a100b8 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -932,6 +932,10 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
hns_roce_spin_unlock(&cq->hr_lock);
+ /* Try to shrink the DCA mem */ + if (ctx->dca_ctx.mem_cnt > 0) + hns_roce_shrink_dca_mem(ctx); + return err == V2_CQ_POLL_ERR ? err : npolled; }
@@ -1883,6 +1887,9 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
free(qp);
+ if (ctx->dca_ctx.mem_cnt > 0) + hns_roce_shrink_dca_mem(ctx); + return ret; }
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
If a uQP works in DCA mode, the WQE's buffer will be split as many blocks and be stored into a list. The blocks are allocated from the DCA's memory pool before posting WRs and are dropped when the QP's CI is equal to PI after polling CQ.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.h | 26 ++++- providers/hns/hns_roce_u_buf.c | 173 ++++++++++++++++++++++++++++++- providers/hns/hns_roce_u_hw_v2.c | 125 +++++++++++++++++++++- providers/hns/hns_roce_u_hw_v2.h | 2 + providers/hns/hns_roce_u_verbs.c | 32 ++++-- 5 files changed, 345 insertions(+), 13 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index e3fa24d..ba646d3 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -365,11 +365,18 @@ struct hns_roce_sge_ex { unsigned int sge_shift; };
+struct hns_roce_dca_buf { + void **bufs; + unsigned int max_cnt; + unsigned int shift; +}; + struct hns_roce_qp { struct verbs_qp verbs_qp; struct hns_roce_buf buf; + struct hns_roce_dca_buf dca_wqe; int max_inline_data; - int buf_size; + unsigned int buf_size; unsigned int sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_wq rq; @@ -423,11 +430,22 @@ struct hns_roce_u_hw { struct verbs_context_ops hw_ops; };
+struct hns_roce_dca_attach_attr { + uint32_t sq_offset; + uint32_t sge_offset; + uint32_t rq_offset; +}; + +struct hns_roce_dca_detach_attr { + uint32_t sq_index; +}; + /* * The entries's buffer should be aligned to a multiple of the hardware's * minimum page size. */ #define hr_hw_page_align(x) align(x, HNS_HW_PAGE_SIZE) +#define hr_hw_page_count(x) (hr_hw_page_align(x) / HNS_HW_PAGE_SIZE)
static inline unsigned int to_hr_hem_entries_size(int count, int buf_shift) { @@ -603,9 +621,13 @@ void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
+int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + uint32_t size, struct hns_roce_dca_buf *buf); +void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr); void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx); -int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size);
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index c0f86e9..3d41b89 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -196,6 +196,88 @@ static int shrink_dca_mem(struct hns_roce_context *ctx, uint32_t handle,
return ret; } + +struct hns_dca_mem_query_resp { + uint64_t key; + uint32_t offset; + uint32_t page_count; +}; + +static int query_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + uint32_t index, struct hns_dca_mem_query_resp *resp) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_QUERY, 5); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_PAGE_INDEX, index); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_KEY, + &resp->key, sizeof(resp->key)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_OFFSET, + &resp->offset, sizeof(resp->offset)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_QUERY_OUT_PAGE_COUNT, + &resp->page_count, sizeof(resp->page_count)); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_err(&ctx->ibv_ctx, + "failed to query DCA mem-%u, ret = %d.\n", + handle, ret); + + return ret; +} + +void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_detach_attr *attr) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_DETACH, 4); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_DETACH_SQ_INDEX, + attr->sq_index); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_warn(&ctx->ibv_ctx, + "failed to detach DCA mem-%u, ret = %d.\n", + handle, ret); +} + +struct hns_dca_mem_attach_resp { +#define HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER BIT(0) + uint32_t alloc_flags; + uint32_t alloc_pages; +}; + +static int attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + struct hns_dca_mem_attach_resp *resp) +{ + int ret; + + DECLARE_COMMAND_BUFFER(cmd, HNS_IB_OBJECT_DCA_MEM, + HNS_IB_METHOD_DCA_MEM_ATTACH, 6); + fill_attr_in_obj(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_HANDLE, handle); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SQ_OFFSET, + attr->sq_offset); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_SGE_OFFSET, + attr->sge_offset); + fill_attr_in_uint32(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_RQ_OFFSET, + attr->rq_offset); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_FLAGS, + &resp->alloc_flags, sizeof(resp->alloc_flags)); + fill_attr_out(cmd, HNS_IB_ATTR_DCA_MEM_ATTACH_OUT_ALLOC_PAGES, + &resp->alloc_pages, sizeof(resp->alloc_pages)); + ret = execute_ioctl(&ctx->ibv_ctx.context, cmd); + if (ret) + verbs_err(&ctx->ibv_ctx, + "failed to attach DCA mem-%u, ret = %d.\n", + handle, ret); + + return ret; +} + static bool add_dca_mem_enabled(struct hns_roce_dca_ctx *ctx, uint32_t alloc_size) { @@ -226,7 +308,7 @@ static bool shrink_dca_mem_enabled(struct hns_roce_dca_ctx *ctx) return enable; }
-int hns_roce_add_dca_mem(struct hns_roce_context *ctx, uint32_t size) +static int add_dca_mem(struct hns_roce_context *ctx, uint32_t size) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; struct hns_roce_dca_mem *mem; @@ -310,3 +392,92 @@ void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx) dca_mem_cnt--; } } + +static void config_dca_pages(void *addr, struct hns_roce_dca_buf *buf, + uint32_t page_index, int page_count) +{ + void **pages = &buf->bufs[page_index]; + int page_size = 1 << buf->shift; + int i; + + for (i = 0; i < page_count; i++) { + pages[i] = addr; + addr += page_size; + } +} + +static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_buf *buf, uint32_t page_count) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + struct hns_dca_mem_query_resp resp = {}; + struct hns_roce_dca_mem *mem; + uint32_t idx = 0; + int ret; + + while (idx < page_count && idx < buf->max_cnt) { + resp.page_count = 0; + ret = query_dca_mem(ctx, handle, idx, &resp); + if (ret) + return -ENOMEM; + if (resp.page_count < 1) + break; + + pthread_spin_lock(&dca_ctx->lock); + mem = key_to_dca_mem(dca_ctx, resp.key); + if (mem && resp.offset < mem->buf.length) { + config_dca_pages(dca_mem_addr(mem, resp.offset), + buf, idx, resp.page_count); + } else { + pthread_spin_unlock(&dca_ctx->lock); + break; + } + pthread_spin_unlock(&dca_ctx->lock); + + idx += resp.page_count; + } + + return (idx >= page_count) ? 0 : -ENOMEM; +} + +#define DCA_EXPAND_MEM_TRY_TIMES 3 +int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, + struct hns_roce_dca_attach_attr *attr, + uint32_t size, struct hns_roce_dca_buf *buf) +{ + uint32_t buf_pages = size >> buf->shift; + struct hns_dca_mem_attach_resp resp = {}; + bool is_new_buf = true; + int try_times = 0; + int ret = 0; + + do { + resp.alloc_pages = 0; + ret = attach_dca_mem(ctx, handle, attr, &resp); + if (ret) + break; + + if (resp.alloc_pages >= buf_pages) { + is_new_buf = !!(resp.alloc_flags & + HNS_DCA_ATTACH_OUT_FLAGS_NEW_BUFFER); + break; + } + + ret = add_dca_mem(ctx, size); + if (ret) + break; + } while (try_times++ < DCA_EXPAND_MEM_TRY_TIMES); + + if (ret || resp.alloc_pages < buf_pages) { + verbs_err(&ctx->ibv_ctx, + "failed to attach, size %u count %u != %u, ret = %d.\n", + size, buf_pages, resp.alloc_pages, ret); + return -ENOMEM; + } + + /* No need config user address if DCA config not changed */ + if (!is_new_buf && buf->bufs[0]) + return 0; + + return setup_dca_buf(ctx, handle, buf, buf_pages); +} diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 0a100b8..7a93456 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -199,19 +199,35 @@ static struct hns_roce_v2_cqe *next_cqe_sw_v2(struct hns_roce_cq *cq) return get_sw_cqe_v2(cq, cq->cons_index); }
+static inline bool check_qp_dca_enable(struct hns_roce_qp *qp) +{ + return !!qp->dca_wqe.bufs; +} + +static inline void *get_wqe(struct hns_roce_qp *qp, unsigned int offset) +{ + if (likely(qp->buf.buf)) + return qp->buf.buf + offset; + else if (unlikely(check_qp_dca_enable(qp))) + return qp->dca_wqe.bufs[offset >> qp->dca_wqe.shift] + + (offset & ((1 << qp->dca_wqe.shift) - 1)); + else + return NULL; +} + static void *get_recv_wqe_v2(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->rq.offset + (n << qp->rq.wqe_shift); + return get_wqe(qp, qp->rq.offset + (n << qp->rq.wqe_shift)); }
static void *get_send_wqe(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->sq.offset + (n << qp->sq.wqe_shift); + return get_wqe(qp, qp->sq.offset + (n << qp->sq.wqe_shift)); }
static void *get_send_sge_ex(struct hns_roce_qp *qp, unsigned int n) { - return qp->buf.buf + qp->ex_sge.offset + (n << qp->ex_sge.sge_shift); + return get_wqe(qp, qp->ex_sge.offset + (n << qp->ex_sge.sge_shift)); }
static void *get_srq_wqe(struct hns_roce_srq *srq, unsigned int n) @@ -580,6 +596,73 @@ static void parse_cqe_for_req(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc, wc->opcode = wc_send_op_map[opcode]; }
+static bool check_dca_attach_enable(struct hns_roce_qp *qp) +{ + return check_qp_dca_enable(qp) && + (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_ATTACH); +} + +static bool check_dca_detach_enable(struct hns_roce_qp *qp) +{ + return check_qp_dca_enable(qp) && + (qp->flags & HNS_ROCE_QP_CAP_DYNAMIC_CTX_DETACH); +} + +static int dca_attach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) +{ + struct hns_roce_dca_attach_attr attr = {}; + uint32_t idx; + int ret; + + hns_roce_spin_lock(&qp->sq.hr_lock); + hns_roce_spin_lock(&qp->rq.hr_lock); + + if (qp->sq.wqe_cnt > 0) { + idx = qp->sq.head & (qp->sq.wqe_cnt - 1); + attr.sq_offset = idx << qp->sq.wqe_shift; + } + + if (qp->ex_sge.sge_cnt > 0) { + idx = qp->next_sge & (qp->ex_sge.sge_cnt - 1); + attr.sge_offset = idx << qp->ex_sge.sge_shift; + } + + if (qp->rq.wqe_cnt > 0) { + idx = qp->rq.head & (qp->rq.wqe_cnt - 1); + attr.rq_offset = idx << qp->rq.wqe_shift; + } + + + ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, + qp->buf_size, &qp->dca_wqe); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); + + return ret; +} + +static void dca_detach_qp_buf(struct hns_roce_context *ctx, + struct hns_roce_qp *qp) +{ + struct hns_roce_dca_detach_attr attr; + bool is_empty; + + hns_roce_spin_lock(&qp->sq.hr_lock); + hns_roce_spin_lock(&qp->rq.hr_lock); + + is_empty = qp->sq.head == qp->sq.tail && qp->rq.head == qp->rq.tail; + if (is_empty && qp->sq.wqe_cnt > 0) + attr.sq_index = qp->sq.head & (qp->sq.wqe_cnt - 1); + + hns_roce_spin_unlock(&qp->rq.hr_lock); + hns_roce_spin_unlock(&qp->sq.hr_lock); + + if (is_empty && qp->sq.wqe_cnt > 0) + hns_roce_detach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr); +} + static void cqe_proc_sq(struct hns_roce_qp *hr_qp, uint32_t wqe_idx, struct hns_roce_cq *cq) { @@ -919,6 +1002,9 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
for (npolled = 0; npolled < ne; ++npolled) { err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); + if (qp && check_dca_detach_enable(qp)) + dca_detach_qp_buf(ctx, qp); + if (err != V2_CQ_OK) break; } @@ -970,7 +1056,7 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (unlikely(ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || - ibvqp->state == IBV_QPS_RTR)){ + ibvqp->state == IBV_QPS_RTR)) { verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), "unsupported qp state, state = %d.\n", ibvqp->state); return EINVAL; @@ -980,6 +1066,14 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx) return EIO; }
+ if (check_dca_attach_enable(qp)) { + ret = dca_attach_qp_buf(ctx, qp); + if (ret) + verbs_err_datapath(&ctx->ibv_ctx, + "failed to attach QP-%u send, ret = %d.\n", + qp->verbs_qp.qp.qp_num, ret); + } + return ret; }
@@ -1347,6 +1441,13 @@ static int set_rc_inl(struct hns_roce_qp *qp, const struct ibv_send_wr *wr, return 0; }
+static inline void fill_rc_dca_fields(uint32_t qp_num, + struct hns_roce_rc_sq_wqe *wqe) +{ + hr_reg_write(wqe, RCWQE_SQPN_L, qp_num); + hr_reg_write(wqe, RCWQE_SQPN_H, qp_num >> RCWQE_SQPN_L_WIDTH); +} + static void set_bind_mw_seg(struct hns_roce_rc_sq_wqe *wqe, const struct ibv_send_wr *wr) { @@ -1454,6 +1555,9 @@ static int set_rc_wqe(void *wqe, struct hns_roce_qp *qp, struct ibv_send_wr *wr, return ret;
wqe_valid: + if (check_qp_dca_enable(qp)) + fill_rc_dca_fields(qp->verbs_qp.qp.qp_num, rc_sq_wqe); + enable_wqe(qp, rc_sq_wqe, qp->sq.head + nreq);
return 0; @@ -1563,6 +1667,14 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) return EIO; }
+ if (check_dca_attach_enable(qp)) { + ret = dca_attach_qp_buf(ctx, qp); + if (ret) + verbs_err_datapath(&ctx->ibv_ctx, + "failed to attach QP-%u recv, ret = %d.\n", + qp->verbs_qp.qp.qp_num, ret); + } + return ret; }
@@ -1758,6 +1870,7 @@ static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask) { + struct hns_roce_context *ctx = to_hr_ctx(qp->context); struct hns_roce_modify_qp_ex_resp resp_ex = {}; struct hns_roce_modify_qp_ex cmd_ex = {}; struct hns_roce_qp *hr_qp = to_hr_qp(qp); @@ -1804,6 +1917,10 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, hns_roce_init_qp_indices(to_hr_qp(qp)); }
+ /* Try to shrink the DCA mem */ + if (ctx->dca_ctx.mem_cnt > 0) + hns_roce_shrink_dca_mem(ctx); + record_qp_attr(qp, attr, attr_mask);
return ret; diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h index 1a7b828..50a920f 100644 --- a/providers/hns/hns_roce_u_hw_v2.h +++ b/providers/hns/hns_roce_u_hw_v2.h @@ -237,6 +237,8 @@ struct hns_roce_rc_sq_wqe { #define RCWQE_MW_RR_EN RCWQE_FIELD_LOC(259, 259) #define RCWQE_MW_RW_EN RCWQE_FIELD_LOC(260, 260)
+#define RCWQE_SQPN_L_WIDTH 2 + struct hns_roce_v2_wqe_data_seg { __le32 len; __le32 lkey; diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 69bcc13..248d862 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -1311,6 +1311,14 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, return 0; }
+static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) +{ + if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) + return true; + + return false; +} + static void qp_free_wqe(struct hns_roce_qp *qp) { free_recv_rinl_buf(&qp->rq_rinl_buf); @@ -1322,8 +1330,8 @@ static void qp_free_wqe(struct hns_roce_qp *qp) hns_roce_free_buf(&qp->buf); }
-static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, - struct hns_roce_context *ctx) +static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp, struct hns_roce_context *ctx) { struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
@@ -1341,12 +1349,24 @@ static int qp_alloc_wqe(struct ibv_qp_cap *cap, struct hns_roce_qp *qp, }
if (qp->rq_rinl_buf.wqe_cnt) { - if (alloc_recv_rinl_buf(cap->max_recv_sge, &qp->rq_rinl_buf)) + if (alloc_recv_rinl_buf(attr->cap.max_recv_sge, + &qp->rq_rinl_buf)) goto err_alloc; }
- if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, 1 << qp->pageshift)) - goto err_alloc; + if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { + /* when DCA is enabled, use a buffer list to store page addr */ + qp->buf.buf = NULL; + qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); + qp->dca_wqe.shift = HNS_HW_PAGE_SHIFT; + qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); + if (!qp->dca_wqe.bufs) + goto err_alloc; + } else { + if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, + HNS_HW_PAGE_SIZE)) + goto err_alloc; + }
return 0;
@@ -1636,7 +1656,7 @@ static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, { int ret;
- ret = qp_alloc_wqe(&attr->cap, qp, ctx); + ret = qp_alloc_wqe(attr, qp, ctx); if (ret) return ret;
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
The user DCA needs to check the QP attaching state before filling wqe buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but this will result in too much time being wasted on system calls, so use a shared table between user driver and kernel driver to sync DCA status.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- providers/hns/hns_roce_u.h | 10 ++++++++ 2 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 2272431..56ff201 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -113,9 +113,33 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev, return 0; }
-static int init_dca_context(struct hns_roce_context *ctx, int page_size) +static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + int page_size, size_t size, uint64_t mmap_key) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + void *addr; + + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, + mmap_key); + if (addr == MAP_FAILED) { + verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); + return -EINVAL; + } + + dca_ctx->buf_status = addr; + dca_ctx->sync_status = addr + size / 2; + + return 0; +} + +static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, + int page_size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + uint64_t mmap_key = resp->dca_mmap_key; + int mmap_size = resp->dca_mmap_size; + int max_qps = resp->dca_qps; int ret;
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) @@ -130,6 +154,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; dca_ctx->mem_cnt = 0;
+ if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; + + if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { + dca_ctx->status_size = mmap_size; + dca_ctx->max_qps = min_t(int, max_qps, + mmap_size * 8 / bits_per_qp); + } + } + return 0; }
@@ -143,6 +177,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) pthread_spin_lock(&dca_ctx->lock); hns_roce_cleanup_dca_mem(ctx); pthread_spin_unlock(&dca_ctx->lock); + if (dca_ctx->buf_status) + munmap(dca_ctx->buf_status, dca_ctx->status_size);
pthread_spin_destroy(&dca_ctx->lock); } @@ -217,6 +253,14 @@ static int set_context_attr(struct hns_roce_device *hr_dev, return 0; }
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) +{ + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | + HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; + cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; +} + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) @@ -231,8 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (!context) return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | - HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + ucontext_set_cmd(&cmd, hr_dev->page_size); if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -245,7 +288,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (context->uar == MAP_FAILED) goto err_free;
- if (init_dca_context(context, hr_dev->page_size)) + if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) goto err_free;
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index ba646d3..e808ff3 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -35,6 +35,7 @@
#include <stddef.h> #include <endian.h> +#include <stdatomic.h> #include <util/compiler.h>
#include <infiniband/driver.h> @@ -44,6 +45,7 @@ #include <ccan/array_size.h> #include <util/bitmap.h> #include <ccan/container_of.h> +#include <ccan/minmax.h> #include <linux/if_ether.h> #include "hns_roce_u_abi.h"
@@ -52,6 +54,8 @@
#define PFX "hns: "
+typedef _Atomic(uint64_t) atomic_bitmap_t; + /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -216,6 +220,12 @@ struct hns_roce_dca_ctx { uint64_t max_size; uint64_t min_size; uint64_t curr_size; + +#define HNS_DCA_BITS_PER_STATUS 1 + unsigned int max_qps; + unsigned int status_size; + atomic_bitmap_t *buf_status; + atomic_bitmap_t *sync_status; };
struct hns_roce_v2_reset_state {
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
Use DCA num from the resp of modify_qp() and indicate the DCA status bit in the shared memory, if the num is valid, the user DCA can get the DCA status by testing the bit in the shared memory for each QP, othewise invoke the verbs 'HNS_IB_METHOD_DCA_MEM_ATTACH' to check the DCA status.
Each QP has 2 bits in shared memory, 1 bit is used to lock the DCA status changing by kernel driver or user driver, another bit is used to indicate the DCA attaching status.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.h | 31 +++++++++++++++++++++++ providers/hns/hns_roce_u_buf.c | 42 ++++++++++++++++++++++++++++++++ providers/hns/hns_roce_u_hw_v2.c | 21 +++++++++++++++- 3 files changed, 93 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index e808ff3..5bddb00 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -379,6 +379,7 @@ struct hns_roce_dca_buf { void **bufs; unsigned int max_cnt; unsigned int shift; + unsigned int dcan; };
struct hns_roce_qp { @@ -444,6 +445,7 @@ struct hns_roce_dca_attach_attr { uint32_t sq_offset; uint32_t sge_offset; uint32_t rq_offset; + bool force; };
struct hns_roce_dca_detach_attr { @@ -556,6 +558,32 @@ static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) return 0; }
+#define HNS_ROCE_BIT_MASK(nr) (1UL << ((nr) % 64)) +#define HNS_ROCE_BIT_WORD(nr) ((nr) / 64) + +static inline bool atomic_test_bit(atomic_bitmap_t *p, uint32_t nr) +{ + p += HNS_ROCE_BIT_WORD(nr); + return !!(atomic_load(p) & HNS_ROCE_BIT_MASK(nr)); +} + +static inline bool test_and_set_bit_lock(atomic_bitmap_t *p, uint32_t nr) +{ + uint64_t mask = HNS_ROCE_BIT_MASK(nr); + + p += HNS_ROCE_BIT_WORD(nr); + if (atomic_load(p) & mask) + return true; + + return (atomic_fetch_or(p, mask) & mask) != 0; +} + +static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) +{ + p += HNS_ROCE_BIT_WORD(nr); + atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); +} + int hns_roce_u_query_device(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size); @@ -636,6 +664,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, uint32_t size, struct hns_roce_dca_buf *buf); void hns_roce_detach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, struct hns_roce_dca_detach_attr *attr); +bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); +void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan); + void hns_roce_shrink_dca_mem(struct hns_roce_context *ctx); void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index 3d41b89..08c0fbc 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -440,6 +440,45 @@ static int setup_dca_buf(struct hns_roce_context *ctx, uint32_t handle, return (idx >= page_count) ? 0 : -ENOMEM; }
+#define DCAN_TO_SYNC_BIT(n) ((n) * HNS_DCA_BITS_PER_STATUS) +#define DCAN_TO_STAT_BIT(n) DCAN_TO_SYNC_BIT(n) + +#define MAX_DCA_TRY_LOCK_TIMES 10 +bool hns_roce_dca_start_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) +{ + atomic_bitmap_t *st = ctx->sync_status; + int try_times = 0; + + if (!st || dcan >= ctx->max_qps) + return true; + + while (test_and_set_bit_lock(st, DCAN_TO_SYNC_BIT(dcan))) + if (try_times++ > MAX_DCA_TRY_LOCK_TIMES) + return false; + + return true; +} + +void hns_roce_dca_stop_post(struct hns_roce_dca_ctx *ctx, uint32_t dcan) +{ + atomic_bitmap_t *st = ctx->sync_status; + + if (!st || dcan >= ctx->max_qps) + return; + + clear_bit_unlock(st, DCAN_TO_SYNC_BIT(dcan)); +} + +static bool check_dca_is_attached(struct hns_roce_dca_ctx *ctx, uint32_t dcan) +{ + atomic_bitmap_t *st = ctx->buf_status; + + if (!st || dcan >= ctx->max_qps) + return false; + + return atomic_test_bit(st, DCAN_TO_STAT_BIT(dcan)); +} + #define DCA_EXPAND_MEM_TRY_TIMES 3 int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, struct hns_roce_dca_attach_attr *attr, @@ -451,6 +490,9 @@ int hns_roce_attach_dca_mem(struct hns_roce_context *ctx, uint32_t handle, int try_times = 0; int ret = 0;
+ if (!attr->force && check_dca_is_attached(&ctx->dca_ctx, buf->dcan)) + return 0; + do { resp.alloc_pages = 0; ret = attach_dca_mem(ctx, handle, attr, &resp); diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 7a93456..15d9108 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -612,6 +612,7 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, struct hns_roce_qp *qp) { struct hns_roce_dca_attach_attr attr = {}; + bool enable_detach; uint32_t idx; int ret;
@@ -633,9 +634,16 @@ static int dca_attach_qp_buf(struct hns_roce_context *ctx, attr.rq_offset = idx << qp->rq.wqe_shift; }
+ enable_detach = check_dca_detach_enable(qp); + if (enable_detach && + !hns_roce_dca_start_post(&ctx->dca_ctx, qp->dca_wqe.dcan)) + /* Force attach if failed to sync dca status */ + attr.force = true;
ret = hns_roce_attach_dca_mem(ctx, qp->verbs_qp.qp.handle, &attr, - qp->buf_size, &qp->dca_wqe); + qp->buf_size, &qp->dca_wqe); + if (ret && enable_detach) + hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan);
hns_roce_spin_unlock(&qp->rq.hr_lock); hns_roce_spin_unlock(&qp->sq.hr_lock); @@ -1643,6 +1651,9 @@ out:
hns_roce_spin_unlock(&qp->sq.hr_lock);
+ if (check_dca_detach_enable(qp)) + hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + if (ibvqp->state == IBV_QPS_ERR) { attr.qp_state = IBV_QPS_ERR;
@@ -1784,6 +1795,9 @@ out:
hns_roce_spin_unlock(&qp->rq.hr_lock);
+ if (check_dca_detach_enable(qp)) + hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + if (ibvqp->state == IBV_QPS_ERR) { attr.qp_state = IBV_QPS_ERR; hns_roce_u_v2_modify_qp(ibvqp, &attr, IBV_QP_STATE); @@ -1902,6 +1916,7 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, if (attr->qp_state == IBV_QPS_RTR) { hr_qp->tc_mode = resp_ex.drv_payload.tc_mode; hr_qp->priority = resp_ex.drv_payload.priority; + hr_qp->dca_wqe.dcan = resp_ex.drv_payload.dcan; } }
@@ -2951,6 +2966,10 @@ static int wr_complete(struct ibv_qp_ex *ibv_qp)
out: hns_roce_spin_unlock(&qp->sq.hr_lock); + + if (check_dca_detach_enable(qp)) + hns_roce_dca_stop_post(&ctx->dca_ctx, qp->dca_wqe.dcan); + if (ibv_qp->qp_base.state == IBV_QPS_ERR) { attr.qp_state = IBV_QPS_ERR; hns_roce_u_v2_modify_qp(&ibv_qp->qp_base, &attr, IBV_QP_STATE);
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
Add two direct verbs to config DCA: 1. hnsdv_open_device() is used to config DCA memory pool. 2. hnsdv_create_qp() is used to create a DCA QP.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- debian/control | 2 +- providers/hns/hns_roce_u.c | 80 ++++++++++++++++++++++++++++---- providers/hns/hns_roce_u.h | 4 +- providers/hns/hns_roce_u_buf.c | 3 ++ providers/hns/hns_roce_u_verbs.c | 39 ++++++++++++++-- providers/hns/hnsdv.h | 29 +++++++++++- providers/hns/libhns.map | 1 + 7 files changed, 140 insertions(+), 18 deletions(-)
diff --git a/debian/control b/debian/control index 160824f..2a55372 100644 --- a/debian/control +++ b/debian/control @@ -87,7 +87,7 @@ Description: User space provider drivers for libibverbs - efa: Amazon Elastic Fabric Adapter - erdma: Alibaba Elastic RDMA (iWarp) Adapter - hfi1verbs: Intel Omni-Path HFI - - hns: HiSilicon Hip06 SoC + - hns: HiSilicon Hip08+ SoC - ipathverbs: QLogic InfiniPath HCAs - irdma: Intel Ethernet Connection RDMA - mana: Microsoft Azure Network Adapter diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 56ff201..93a0312 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -132,8 +132,55 @@ static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, return 0; }
+struct ibv_context *hnsdv_open_device(struct ibv_device *device, + struct hnsdv_context_attr *attr) +{ + if (!is_hns_dev(device)) { + errno = EOPNOTSUPP; + return NULL; + } + + return verbs_open_device(device, attr); +} + +static void set_dca_pool_param(struct hns_roce_context *ctx, + struct hnsdv_context_attr *attr, int page_size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + + if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE) + dca_ctx->unit_size = align(attr->dca_unit_size, page_size); + else + dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; + + /* The memory pool cannot be expanded, only init the DCA context. */ + if (dca_ctx->unit_size == 0) + return; + + /* If not set, the memory pool can be expanded unlimitedly. */ + if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MAX_SIZE) + dca_ctx->max_size = DIV_ROUND_UP(attr->dca_max_size, + dca_ctx->unit_size) * + dca_ctx->unit_size; + else + dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; + + /* If not set, the memory pool cannot be shrunk. */ + if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_MIN_SIZE) + dca_ctx->min_size = DIV_ROUND_UP(attr->dca_min_size, + dca_ctx->unit_size) * + dca_ctx->unit_size; + else + dca_ctx->min_size = HNS_DCA_MAX_MEM_SIZE; + + verbs_debug(&ctx->ibv_ctx, + "Support DCA, unit %u, max %lu, min %lu Bytes.\n", + dca_ctx->unit_size, dca_ctx->max_size, dca_ctx->min_size); +} + static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, struct hns_roce_alloc_ucontext_resp *resp, + struct hnsdv_context_attr *attr, int page_size) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; @@ -145,14 +192,18 @@ static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) return 0;
+ dca_ctx->unit_size = 0; + dca_ctx->mem_cnt = 0; + list_head_init(&dca_ctx->mem_list); ret = pthread_spin_init(&dca_ctx->lock, PTHREAD_PROCESS_PRIVATE); if (ret) return ret;
- dca_ctx->unit_size = page_size * HNS_DCA_DEFAULT_UNIT_PAGES; - dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; - dca_ctx->mem_cnt = 0; + if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) + return 0; + + set_dca_pool_param(ctx, attr, page_size);
if (mmap_key) { const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; @@ -253,18 +304,28 @@ static int set_context_attr(struct hns_roce_device *hr_dev, return 0; }
-static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) +static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, + struct hnsdv_context_attr *attr) { cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | - HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; - cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; - cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; + HNS_ROCE_CQE_INLINE_FLAGS; + + if (!attr || !(attr->flags & HNSDV_CONTEXT_FLAGS_DCA)) + return; + + cmd->config |= HNS_ROCE_UCTX_CONFIG_DCA; + + if (attr->comp_mask & HNSDV_CONTEXT_MASK_DCA_PRIME_QPS) { + cmd->comp |= HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; + cmd->dca_max_qps = attr->dca_prime_qps; + } }
static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) { + struct hnsdv_context_attr *ctx_attr = private_data; struct hns_roce_device *hr_dev = to_hr_dev(ibdev); struct hns_roce_alloc_ucontext_resp resp = {}; struct hns_roce_alloc_ucontext cmd = {}; @@ -275,7 +336,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (!context) return NULL;
- ucontext_set_cmd(&cmd, hr_dev->page_size); + ucontext_set_cmd(&cmd, ctx_attr); if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -288,7 +349,8 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (context->uar == MAP_FAILED) goto err_free;
- if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) goto err_free;
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index 5bddb00..691bf61 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -584,6 +584,8 @@ static inline void clear_bit_unlock(atomic_bitmap_t *p, uint32_t nr) atomic_fetch_and(p, ~HNS_ROCE_BIT_MASK(nr)); }
+bool is_hns_dev(struct ibv_device *device); + int hns_roce_u_query_device(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size); @@ -672,8 +674,6 @@ void hns_roce_cleanup_dca_mem(struct hns_roce_context *ctx);
void hns_roce_init_qp_indices(struct hns_roce_qp *qp);
-bool is_hns_dev(struct ibv_device *device); - extern const struct hns_roce_u_hw hns_roce_u_hw_v2;
#endif /* _HNS_ROCE_U_H */ diff --git a/providers/hns/hns_roce_u_buf.c b/providers/hns/hns_roce_u_buf.c index 08c0fbc..780683e 100644 --- a/providers/hns/hns_roce_u_buf.c +++ b/providers/hns/hns_roce_u_buf.c @@ -56,6 +56,9 @@ int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size,
void hns_roce_free_buf(struct hns_roce_buf *buf) { + if (!buf->buf) + return; + ibv_dofork_range(buf->buf, buf->length);
munmap(buf->buf, buf->length); diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 248d862..8964d53 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -1072,6 +1072,15 @@ enum { IBV_QP_INIT_ATTR_SEND_OPS_FLAGS, };
+enum { + SEND_OPS_FLAG_MASK = + IBV_QP_EX_WITH_RDMA_WRITE | IBV_QP_EX_WITH_RDMA_WRITE_WITH_IMM | + IBV_QP_EX_WITH_SEND | IBV_QP_EX_WITH_SEND_WITH_IMM | + IBV_QP_EX_WITH_RDMA_READ | IBV_QP_EX_WITH_ATOMIC_CMP_AND_SWP | + IBV_QP_EX_WITH_ATOMIC_FETCH_AND_ADD | IBV_QP_EX_WITH_LOCAL_INV | + IBV_QP_EX_WITH_SEND_WITH_INV, +}; + static int check_qp_create_mask(struct hns_roce_context *ctx, struct ibv_qp_init_attr_ex *attr) { @@ -1080,6 +1089,10 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) return EOPNOTSUPP;
+ if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && + !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) + return -EOPNOTSUPP; + switch (attr->qp_type) { case IBV_QPT_UD: if (hr_dev->hw_version == HNS_ROCE_HW_VER2) @@ -1311,9 +1324,21 @@ static int calc_qp_buff_size(struct hns_roce_device *hr_dev, return 0; }
-static inline bool check_qp_support_dca(bool pool_en, enum ibv_qp_type qp_type) +static inline bool check_qp_support_dca(struct hns_roce_dca_ctx *dca_ctx, + struct ibv_qp_init_attr_ex *attr, + struct hnsdv_qp_init_attr *hns_attr) { - if (pool_en && (qp_type == IBV_QPT_RC || qp_type == IBV_QPT_XRC_SEND)) + /* DCA pool disable */ + if (!dca_ctx->unit_size) + return false; + + /* Unsupport type */ + if (attr->qp_type != IBV_QPT_RC && attr->qp_type != IBV_QPT_XRC_SEND) + return false; + + if (hns_attr && + (hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS) && + (hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_DCA_MODE)) return true;
return false; @@ -1331,6 +1356,7 @@ static void qp_free_wqe(struct hns_roce_qp *qp) }
static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, + struct hnsdv_qp_init_attr *hns_attr, struct hns_roce_qp *qp, struct hns_roce_context *ctx) { struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); @@ -1354,7 +1380,8 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, goto err_alloc; }
- if (check_qp_support_dca(ctx->dca_ctx.max_size != 0, attr->qp_type)) { + if (check_qp_support_dca(&ctx->dca_ctx, attr, hns_attr) && + ctx->dca_ctx.max_size > 0) { /* when DCA is enabled, use a buffer list to store page addr */ qp->buf.buf = NULL; qp->dca_wqe.max_cnt = hr_hw_page_count(qp->buf_size); @@ -1362,6 +1389,7 @@ static int qp_alloc_wqe(struct ibv_qp_init_attr_ex *attr, qp->dca_wqe.bufs = calloc(qp->dca_wqe.max_cnt, sizeof(void *)); if (!qp->dca_wqe.bufs) goto err_alloc; + verbs_debug(&ctx->ibv_ctx, "alloc DCA buf.\n"); } else { if (hns_roce_alloc_buf(&qp->buf, qp->buf_size, HNS_HW_PAGE_SIZE)) @@ -1651,12 +1679,13 @@ void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx) }
static int hns_roce_alloc_qp_buf(struct ibv_qp_init_attr_ex *attr, + struct hnsdv_qp_init_attr *hns_attr, struct hns_roce_qp *qp, struct hns_roce_context *ctx) { int ret;
- ret = qp_alloc_wqe(attr, qp, ctx); + ret = qp_alloc_wqe(attr, hns_attr, qp, ctx); if (ret) return ret;
@@ -1731,7 +1760,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, if (ret) goto err_spinlock;
- ret = hns_roce_alloc_qp_buf(attr, qp, context); + ret = hns_roce_alloc_qp_buf(attr, hns_attr, qp, context); if (ret) goto err_buf;
diff --git a/providers/hns/hnsdv.h b/providers/hns/hnsdv.h index 451b26e..68bf001 100644 --- a/providers/hns/hnsdv.h +++ b/providers/hns/hnsdv.h @@ -22,17 +22,42 @@ enum hnsdv_qp_congest_ctrl_type { HNSDV_QP_CREATE_ENABLE_DIP = 1 << 3, };
+enum hnsdv_qp_create_flags { + HNSDV_QP_CREATE_ENABLE_DCA_MODE = 1 << 0, +}; + +enum hnsdv_context_comp_mask { + HNSDV_CONTEXT_MASK_DCA_PRIME_QPS = 1 << 0, + HNSDV_CONTEXT_MASK_DCA_UNIT_SIZE = 1 << 1, + HNSDV_CONTEXT_MASK_DCA_MAX_SIZE = 1 << 2, + HNSDV_CONTEXT_MASK_DCA_MIN_SIZE = 1 << 3, +}; + enum hnsdv_qp_init_attr_mask { + HNSDV_QP_INIT_ATTR_MASK_QP_CREATE_FLAGS = 1 << 0, HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE = 1 << 1, };
+struct hnsdv_context_attr { + uint64_t flags; /* Use enum hnsdv_context_attr_flags */ + uint64_t comp_mask; /* Use enum hnsdv_context_comp_mask */ + uint32_t dca_prime_qps; + uint32_t dca_unit_size; + uint64_t dca_max_size; + uint64_t dca_min_size; +}; + struct hnsdv_qp_init_attr { uint64_t comp_mask; /* Use enum hnsdv_qp_init_attr_mask */ - uint32_t create_flags; + uint32_t create_flags; /* Use enum hnsdv_qp_create_flags */ uint8_t congest_type; /* Use enum hnsdv_qp_congest_ctrl_type */ uint8_t reserved[3]; };
+enum hnsdv_context_attr_flags { + HNSDV_CONTEXT_FLAGS_DCA = 1 << 0, +}; + enum hnsdv_query_context_comp_mask { HNSDV_CONTEXT_MASK_CONGEST_TYPE = 1 << 0, }; @@ -50,6 +75,8 @@ int hnsdv_query_device(struct ibv_context *ctx_in, struct ibv_qp *hnsdv_create_qp(struct ibv_context *context, struct ibv_qp_init_attr_ex *qp_attr, struct hnsdv_qp_init_attr *hns_qp_attr); +struct ibv_context *hnsdv_open_device(struct ibv_device *device, + struct hnsdv_context_attr *attr);
#ifdef __cplusplus } diff --git a/providers/hns/libhns.map b/providers/hns/libhns.map index e9bf417..a955346 100644 --- a/providers/hns/libhns.map +++ b/providers/hns/libhns.map @@ -5,5 +5,6 @@ HNS_1.0 { hnsdv_is_supported; hnsdv_create_qp; hnsdv_query_device; + hnsdv_open_device; local: *; };