driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I9C2AQ
------------------------------------------------------------------
The user DCA needs to check the QP attaching state before filling wqe buffer by the response from uverbs 'HNS_IB_METHOD_DCA_MEM_ATTACH', but this will result in too much time being wasted on system calls, so use a shared table between user driver and kernel driver to sync DCA status.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com Reviewed-by: Yangyang Li liyangyang20@huawei.com --- providers/hns/hns_roce_u.c | 51 +++++++++++++++++++++++++++++++++++--- providers/hns/hns_roce_u.h | 10 ++++++++ 2 files changed, 57 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 2272431..56ff201 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -113,9 +113,33 @@ static int hns_roce_mmap(struct hns_roce_device *hr_dev, return 0; }
-static int init_dca_context(struct hns_roce_context *ctx, int page_size) +static int mmap_dca(struct hns_roce_context *ctx, int cmd_fd, + int page_size, size_t size, uint64_t mmap_key) { struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + void *addr; + + addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, cmd_fd, + mmap_key); + if (addr == MAP_FAILED) { + verbs_err(&ctx->ibv_ctx, "failed to mmap() dca prime qp.\n"); + return -EINVAL; + } + + dca_ctx->buf_status = addr; + dca_ctx->sync_status = addr + size / 2; + + return 0; +} + +static int init_dca_context(struct hns_roce_context *ctx, int cmd_fd, + struct hns_roce_alloc_ucontext_resp *resp, + int page_size) +{ + struct hns_roce_dca_ctx *dca_ctx = &ctx->dca_ctx; + uint64_t mmap_key = resp->dca_mmap_key; + int mmap_size = resp->dca_mmap_size; + int max_qps = resp->dca_qps; int ret;
if (!(ctx->config & HNS_ROCE_UCTX_RSP_DCA_FLAGS)) @@ -130,6 +154,16 @@ static int init_dca_context(struct hns_roce_context *ctx, int page_size) dca_ctx->max_size = HNS_DCA_MAX_MEM_SIZE; dca_ctx->mem_cnt = 0;
+ if (mmap_key) { + const unsigned int bits_per_qp = 2 * HNS_DCA_BITS_PER_STATUS; + + if (!mmap_dca(ctx, cmd_fd, page_size, mmap_size, mmap_key)) { + dca_ctx->status_size = mmap_size; + dca_ctx->max_qps = min_t(int, max_qps, + mmap_size * 8 / bits_per_qp); + } + } + return 0; }
@@ -143,6 +177,8 @@ static void uninit_dca_context(struct hns_roce_context *ctx) pthread_spin_lock(&dca_ctx->lock); hns_roce_cleanup_dca_mem(ctx); pthread_spin_unlock(&dca_ctx->lock); + if (dca_ctx->buf_status) + munmap(dca_ctx->buf_status, dca_ctx->status_size);
pthread_spin_destroy(&dca_ctx->lock); } @@ -217,6 +253,14 @@ static int set_context_attr(struct hns_roce_device *hr_dev, return 0; }
+static void ucontext_set_cmd(struct hns_roce_alloc_ucontext *cmd, int page_size) +{ + cmd->config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | + HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + cmd->comp = HNS_ROCE_ALLOC_UCTX_COMP_DCA_MAX_QPS; + cmd->dca_max_qps = page_size * 8 / 2 * HNS_DCA_BITS_PER_STATUS; +} + static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, int cmd_fd, void *private_data) @@ -231,8 +275,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (!context) return NULL;
- cmd.config |= HNS_ROCE_EXSGE_FLAGS | HNS_ROCE_RQ_INLINE_FLAGS | - HNS_ROCE_CQE_INLINE_FLAGS | HNS_ROCE_UCTX_CONFIG_DCA; + ucontext_set_cmd(&cmd, hr_dev->page_size); if (ibv_cmd_get_context(&context->ibv_ctx, &cmd.ibv_cmd, sizeof(cmd), &resp.ibv_resp, sizeof(resp))) goto err_free; @@ -245,7 +288,7 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, if (context->uar == MAP_FAILED) goto err_free;
- if (init_dca_context(context, hr_dev->page_size)) + if (init_dca_context(context, cmd_fd, &resp, hr_dev->page_size)) goto err_free;
if (init_reset_context(context, cmd_fd, &resp, hr_dev->page_size)) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index ba646d3..e808ff3 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -35,6 +35,7 @@
#include <stddef.h> #include <endian.h> +#include <stdatomic.h> #include <util/compiler.h>
#include <infiniband/driver.h> @@ -44,6 +45,7 @@ #include <ccan/array_size.h> #include <util/bitmap.h> #include <ccan/container_of.h> +#include <ccan/minmax.h> #include <linux/if_ether.h> #include "hns_roce_u_abi.h"
@@ -52,6 +54,8 @@
#define PFX "hns: "
+typedef _Atomic(uint64_t) atomic_bitmap_t; + /* The minimum page size is 4K for hardware */ #define HNS_HW_PAGE_SHIFT 12 #define HNS_HW_PAGE_SIZE (1 << HNS_HW_PAGE_SHIFT) @@ -216,6 +220,12 @@ struct hns_roce_dca_ctx { uint64_t max_size; uint64_t min_size; uint64_t curr_size; + +#define HNS_DCA_BITS_PER_STATUS 1 + unsigned int max_qps; + unsigned int status_size; + atomic_bitmap_t *buf_status; + atomic_bitmap_t *sync_status; };
struct hns_roce_v2_reset_state {