From: zzry 1245464216@qq.com
Add support for thread domain (TD) and parent domain (PAD). Extend the orginal hns_roce_pd struct to hns_roce_pad by adding the new hns_roce_td struct. When a parent domain holds a thread domain, the associated data path will be set to lock-free mode to improve performance. --- providers/hns/hns_roce_u.c | 5 +- providers/hns/hns_roce_u.h | 69 +++++++++++++- providers/hns/hns_roce_u_verbs.c | 156 ++++++++++++++++++++++++++++--- 3 files changed, 215 insertions(+), 15 deletions(-)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 266e73e..e3c72bb 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -67,7 +67,7 @@ static const struct verbs_context_ops hns_common_ops = { .create_qp = hns_roce_u_create_qp, .create_qp_ex = hns_roce_u_create_qp_ex, .dealloc_mw = hns_roce_u_dealloc_mw, - .dealloc_pd = hns_roce_u_free_pd, + .dealloc_pd = hns_roce_u_dealloc_pd, .dereg_mr = hns_roce_u_dereg_mr, .destroy_cq = hns_roce_u_destroy_cq, .modify_cq = hns_roce_u_modify_cq, @@ -88,6 +88,9 @@ static const struct verbs_context_ops hns_common_ops = { .close_xrcd = hns_roce_u_close_xrcd, .open_qp = hns_roce_u_open_qp, .get_srq_num = hns_roce_u_get_srq_num, + .alloc_td = hns_roce_u_alloc_td, + .dealloc_td = hns_roce_u_dealloc_td, + .alloc_parent_domain = hns_roce_u_alloc_pad, };
static uint32_t calc_table_shift(uint32_t entry_count, uint32_t size_shift) diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index afb68fe..338f162 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -190,6 +190,11 @@ struct hns_roce_db_page { unsigned long *bitmap; };
+struct hns_roce_spinlock { + pthread_spinlock_t lock; + int need_lock; +}; + struct hns_roce_context { struct verbs_context ibv_ctx; void *uar; @@ -224,9 +229,21 @@ struct hns_roce_context { unsigned int max_inline_data; };
+struct hns_roce_td { + struct ibv_td ibv_td; + atomic_int refcount; +}; + struct hns_roce_pd { struct ibv_pd ibv_pd; unsigned int pdn; + atomic_int refcount; + struct hns_roce_pd *protection_domain; +}; + +struct hns_roce_pad { + struct hns_roce_pd pd; + struct hns_roce_td *td; };
struct hns_roce_cq { @@ -398,9 +415,35 @@ static inline struct hns_roce_context *to_hr_ctx(struct ibv_context *ibv_ctx) return container_of(ibv_ctx, struct hns_roce_context, ibv_ctx.context); }
+static inline struct hns_roce_td *to_hr_td(struct ibv_td *ibv_td) +{ + return container_of(ibv_td, struct hns_roce_td, ibv_td); +} + +/* to_hr_pd always returns the real hns_roce_pd obj. */ static inline struct hns_roce_pd *to_hr_pd(struct ibv_pd *ibv_pd) { - return container_of(ibv_pd, struct hns_roce_pd, ibv_pd); + struct hns_roce_pd *pd = + container_of(ibv_pd, struct hns_roce_pd, ibv_pd); + + if (pd->protection_domain) + return pd->protection_domain; + + return pd; +} + +static inline struct hns_roce_pad *to_hr_pad(struct ibv_pd *ibv_pd) +{ + struct hns_roce_pad *pad = + ibv_pd ? + container_of(ibv_pd, struct hns_roce_pad, pd.ibv_pd) : + NULL; + + if (pad && pad->pd.protection_domain) + return pad; + + /* Otherwise ibv_pd isn't a parent_domain */ + return NULL; }
static inline struct hns_roce_cq *to_hr_cq(struct ibv_cq *ibv_cq) @@ -423,14 +466,35 @@ static inline struct hns_roce_ah *to_hr_ah(struct ibv_ah *ibv_ah) return container_of(ibv_ah, struct hns_roce_ah, ibv_ah); }
+static inline int hns_roce_spin_lock(struct hns_roce_spinlock *hr_lock) +{ + if (hr_lock->need_lock) + return pthread_spin_lock(&hr_lock->lock); + + return 0; +} + +static inline int hns_roce_spin_unlock(struct hns_roce_spinlock *hr_lock) +{ + if (hr_lock->need_lock) + return pthread_spin_unlock(&hr_lock->lock); + + return 0; +} + int hns_roce_u_query_device(struct ibv_context *context, const struct ibv_query_device_ex_input *input, struct ibv_device_attr_ex *attr, size_t attr_size); int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, struct ibv_port_attr *attr);
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, + struct ibv_td_init_attr *attr); +int hns_roce_u_dealloc_td(struct ibv_td *ibv_td); +struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr); struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context); -int hns_roce_u_free_pd(struct ibv_pd *pd); +int hns_roce_u_dealloc_pd(struct ibv_pd *pd);
struct ibv_mr *hns_roce_u_reg_mr(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access); @@ -489,6 +553,7 @@ int hns_roce_u_close_xrcd(struct ibv_xrcd *ibv_xrcd); int hns_roce_alloc_buf(struct hns_roce_buf *buf, unsigned int size, int page_size); void hns_roce_free_buf(struct hns_roce_buf *buf); +void hns_roce_qp_spinlock_destroy(struct hns_roce_qp *qp);
void hns_roce_free_qp_buf(struct hns_roce_qp *qp, struct hns_roce_context *ctx);
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 34f7ee4..89114f3 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -33,6 +33,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <math.h> #include <errno.h> #include <pthread.h> #include <sys/mman.h> @@ -42,6 +43,37 @@ #include "hns_roce_u_db.h" #include "hns_roce_u_hw_v2.h"
+static bool hns_roce_whether_need_lock(struct ibv_pd *pd) +{ + struct hns_roce_pad *pad; + + pad = to_hr_pad(pd); + if (pad && pad->td) + return false; + + return true; +} + +static int hns_roce_spinlock_init(struct hns_roce_spinlock *hr_lock, + bool need_lock) +{ + hr_lock->need_lock = need_lock; + + if (need_lock) + return pthread_spin_init(&hr_lock->lock, + PTHREAD_PROCESS_PRIVATE); + + return 0; +} + +static int hns_roce_spinlock_destroy(struct hns_roce_spinlock *hr_lock) +{ + if (hr_lock->need_lock) + return pthread_spin_destroy(&hr_lock->lock); + + return 0; +} + void hns_roce_init_qp_indices(struct hns_roce_qp *qp) { qp->sq.head = 0; @@ -85,38 +117,138 @@ int hns_roce_u_query_port(struct ibv_context *context, uint8_t port, return ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd)); }
+struct ibv_td *hns_roce_u_alloc_td(struct ibv_context *context, + struct ibv_td_init_attr *attr) +{ + struct hns_roce_td *td; + + if (attr->comp_mask) { + errno = EOPNOTSUPP; + return NULL; + } + + td = calloc(1, sizeof(*td)); + if (!td) { + errno = ENOMEM; + return NULL; + } + + td->ibv_td.context = context; + atomic_init(&td->refcount, 1); + + return &td->ibv_td; +} + +int hns_roce_u_dealloc_td(struct ibv_td *ibv_td) +{ + struct hns_roce_td *td; + + td = to_hr_td(ibv_td); + if (atomic_load(&td->refcount) > 1) + return EBUSY; + + free(td); + + return 0; +} + struct ibv_pd *hns_roce_u_alloc_pd(struct ibv_context *context) { + struct hns_roce_alloc_pd_resp resp = {}; struct ibv_alloc_pd cmd; struct hns_roce_pd *pd; - struct hns_roce_alloc_pd_resp resp = {}; - - pd = malloc(sizeof(*pd)); - if (!pd) - return NULL;
- if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), - &resp.ibv_resp, sizeof(resp))) { - free(pd); + pd = calloc(1, sizeof(*pd)); + if (!pd) { + errno = ENOMEM; return NULL; } + errno = ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd, sizeof(cmd), + &resp.ibv_resp, sizeof(resp));
+ if (errno) + goto err; + + atomic_init(&pd->refcount, 1); pd->pdn = resp.pdn;
return &pd->ibv_pd; + +err: + free(pd); + return NULL; +} + +struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, + struct ibv_parent_domain_init_attr *attr) +{ + struct hns_roce_pad *pad; + + if (ibv_check_alloc_parent_domain(attr)) + return NULL; + + if (attr->comp_mask) { + errno = EOPNOTSUPP; + return NULL; + } + + pad = calloc(1, sizeof(*pad)); + if (!pad) { + errno = ENOMEM; + return NULL; + } + + if (attr->td) { + pad->td = to_hr_td(attr->td); + atomic_fetch_add(&pad->td->refcount, 1); + } + + pad->pd.protection_domain = to_hr_pd(attr->pd); + atomic_fetch_add(&pad->pd.protection_domain->refcount, 1); + + atomic_init(&pad->pd.refcount, 1); + ibv_initialize_parent_domain(&pad->pd.ibv_pd, + &pad->pd.protection_domain->ibv_pd); + + return &pad->pd.ibv_pd; +} + +static void hns_roce_free_pad(struct hns_roce_pad *pad) +{ + atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); + + if (pad->td) + atomic_fetch_sub(&pad->td->refcount, 1); + + free(pad); }
-int hns_roce_u_free_pd(struct ibv_pd *pd) +static int hns_roce_free_pd(struct hns_roce_pd *pd) { int ret;
- ret = ibv_cmd_dealloc_pd(pd); + if (atomic_load(&pd->refcount) > 1) + return EBUSY; + + ret = ibv_cmd_dealloc_pd(&pd->ibv_pd); if (ret) return ret;
- free(to_hr_pd(pd)); + free(pd); + return 0; +}
- return ret; +int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) +{ + struct hns_roce_pad *pad = to_hr_pad(ibv_pd); + struct hns_roce_pd *pd = to_hr_pd(ibv_pd); + + if (pad) { + hns_roce_free_pad(pad); + return 0; + } + + return hns_roce_free_pd(pd); }
struct ibv_xrcd *hns_roce_u_open_xrcd(struct ibv_context *context,