From: zzry 1245464216@qq.com
Drop CQ locks when associated to a PAD holding a TD. --- providers/hns/hns_roce_u.h | 3 +- providers/hns/hns_roce_u_hw_v2.c | 46 +++++++++++++------------- providers/hns/hns_roce_u_verbs.c | 56 ++++++++++++++++++++++++++++---- 3 files changed, 74 insertions(+), 31 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index f414da9..e79b1ec 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -249,7 +249,7 @@ struct hns_roce_pad { struct hns_roce_cq { struct verbs_cq verbs_cq; struct hns_roce_buf buf; - pthread_spinlock_t lock; + struct hns_roce_spinlock hr_lock; unsigned int cqn; unsigned int cq_depth; unsigned int cons_index; @@ -259,6 +259,7 @@ struct hns_roce_cq { unsigned long flags; unsigned int cqe_size; struct hns_roce_v2_cqe *cqe; + struct ibv_pd *parent_domain; };
struct hns_roce_idx_que { diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 8b4af93..8b0c904 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -268,9 +268,9 @@ static int hns_roce_v2_wq_overflow(struct hns_roce_wq *wq, unsigned int nreq, if (cur + nreq < wq->max_post) return 0;
- pthread_spin_lock(&cq->lock); + hns_roce_spin_lock(&cq->hr_lock); cur = wq->head - wq->tail; - pthread_spin_unlock(&cq->lock); + hns_roce_spin_unlock(&cq->hr_lock);
return cur + nreq >= wq->max_post; } @@ -724,7 +724,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, int err = V2_CQ_OK; int npolled;
- pthread_spin_lock(&cq->lock); + hns_roce_spin_lock(&cq->hr_lock);
for (npolled = 0; npolled < ne; ++npolled) { err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); @@ -739,7 +739,7 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, update_cq_db(ctx, cq); }
- pthread_spin_unlock(&cq->lock); + hns_roce_spin_unlock(&cq->hr_lock);
return err == V2_CQ_POLL_ERR ? err : npolled; } @@ -1510,9 +1510,9 @@ static void __hns_roce_v2_cq_clean(struct hns_roce_cq *cq, uint32_t qpn, static void hns_roce_v2_cq_clean(struct hns_roce_cq *cq, unsigned int qpn, struct hns_roce_srq *srq) { - pthread_spin_lock(&cq->lock); + hns_roce_spin_lock(&cq->hr_lock); __hns_roce_v2_cq_clean(cq, qpn, srq); - pthread_spin_unlock(&cq->lock); + hns_roce_spin_unlock(&cq->hr_lock); }
static void record_qp_attr(struct ibv_qp *qp, struct ibv_qp_attr *attr, @@ -1588,18 +1588,18 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) { if (send_cq == recv_cq) { - pthread_spin_lock(&send_cq->lock); + hns_roce_spin_lock(&send_cq->hr_lock); } else if (send_cq->cqn < recv_cq->cqn) { - pthread_spin_lock(&send_cq->lock); - pthread_spin_lock(&recv_cq->lock); + hns_roce_spin_lock(&send_cq->hr_lock); + hns_roce_spin_lock(&recv_cq->hr_lock); } else { - pthread_spin_lock(&recv_cq->lock); - pthread_spin_lock(&send_cq->lock); + hns_roce_spin_lock(&recv_cq->hr_lock); + hns_roce_spin_lock(&send_cq->hr_lock); } } else if (send_cq) { - pthread_spin_lock(&send_cq->lock); + hns_roce_spin_lock(&send_cq->hr_lock); } else if (recv_cq) { - pthread_spin_lock(&recv_cq->lock); + hns_roce_spin_lock(&recv_cq->hr_lock); } }
@@ -1610,18 +1610,18 @@ static void hns_roce_unlock_cqs(struct ibv_qp *qp)
if (send_cq && recv_cq) { if (send_cq == recv_cq) { - pthread_spin_unlock(&send_cq->lock); + hns_roce_spin_unlock(&send_cq->hr_lock); } else if (send_cq->cqn < recv_cq->cqn) { - pthread_spin_unlock(&recv_cq->lock); - pthread_spin_unlock(&send_cq->lock); + hns_roce_spin_unlock(&recv_cq->hr_lock); + hns_roce_spin_unlock(&send_cq->hr_lock); } else { - pthread_spin_unlock(&send_cq->lock); - pthread_spin_unlock(&recv_cq->lock); + hns_roce_spin_unlock(&send_cq->hr_lock); + hns_roce_spin_unlock(&recv_cq->hr_lock); } } else if (send_cq) { - pthread_spin_unlock(&send_cq->lock); + hns_roce_spin_unlock(&send_cq->hr_lock); } else if (recv_cq) { - pthread_spin_unlock(&recv_cq->lock); + hns_roce_spin_unlock(&recv_cq->hr_lock); } }
@@ -1799,11 +1799,11 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current, if (attr->comp_mask) return EINVAL;
- pthread_spin_lock(&cq->lock); + hns_roce_spin_lock(&cq->hr_lock);
err = hns_roce_poll_one(ctx, &qp, cq, NULL); if (err != V2_CQ_OK) - pthread_spin_unlock(&cq->lock); + hns_roce_spin_unlock(&cq->hr_lock);
return err; } @@ -1837,7 +1837,7 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) else update_cq_db(ctx, cq);
- pthread_spin_unlock(&cq->lock); + hns_roce_spin_unlock(&cq->hr_lock); }
static enum ibv_wc_opcode wc_read_opcode(struct ibv_cq_ex *current) diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index eb52ff9..cc37f44 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -407,6 +407,11 @@ int hns_roce_u_dealloc_mw(struct ibv_mw *mw) return 0; }
+enum { + CREATE_CQ_SUPPORTED_COMP_MASK = IBV_CQ_INIT_ATTR_MASK_FLAGS | + IBV_CQ_INIT_ATTR_MASK_PD, +}; + enum { CREATE_CQ_SUPPORTED_WC_FLAGS = IBV_WC_STANDARD_FLAGS | IBV_WC_EX_WITH_CVLAN, @@ -415,21 +420,47 @@ enum { static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, struct hns_roce_context *context) { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + if (!attr->cqe || attr->cqe > context->max_cqe) return EINVAL;
- if (attr->comp_mask) + if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { + verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", + attr->comp_mask); return EOPNOTSUPP; + }
if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) return EOPNOTSUPP;
+ if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { + if (!pad) { + verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); + return EINVAL; + } + atomic_fetch_add(&pad->pd.refcount, 1); + } + attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, roundup_pow_of_two(attr->cqe));
return 0; }
+static int hns_roce_cq_spinlock_init(struct ibv_context *context, + struct hns_roce_cq *cq, + struct ibv_cq_init_attr_ex *attr) +{ + bool need_lock; + + need_lock = hns_roce_whether_need_lock(attr->parent_domain); + if (!need_lock) + verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n"); + + return hns_roce_spinlock_init(&cq->hr_lock, need_lock); +} + static int hns_roce_alloc_cq_buf(struct hns_roce_cq *cq) { int buf_size = hr_hw_page_align(cq->cq_depth * cq->cqe_size); @@ -486,7 +517,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, goto err; }
- ret = pthread_spin_init(&cq->lock, PTHREAD_PROCESS_PRIVATE); + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) + cq->parent_domain = attr->parent_domain; + + ret = hns_roce_cq_spinlock_init(context, cq, attr); if (ret) goto err_lock;
@@ -517,8 +551,9 @@ err_cmd: hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); err_db: hns_roce_free_buf(&cq->buf); -err_lock: err_buf: + hns_roce_spinlock_destroy(&cq->hr_lock); +err_lock: free(cq); err: if (ret < 0) @@ -569,16 +604,23 @@ int hns_roce_u_modify_cq(struct ibv_cq *cq, struct ibv_modify_cq_attr *attr)
int hns_roce_u_destroy_cq(struct ibv_cq *cq) { + struct hns_roce_cq *hr_cq = to_hr_cq(cq); + struct hns_roce_pad *pad = to_hr_pad(hr_cq->parent_domain); int ret;
ret = ibv_cmd_destroy_cq(cq); if (ret) return ret;
- hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, - HNS_ROCE_CQ_TYPE_DB); - hns_roce_free_buf(&to_hr_cq(cq)->buf); - free(to_hr_cq(cq)); + hns_roce_free_db(to_hr_ctx(cq->context), hr_cq->db, HNS_ROCE_CQ_TYPE_DB); + hns_roce_free_buf(&hr_cq->buf); + + hns_roce_spinlock_destroy(&hr_cq->hr_lock); + + if (pad) + atomic_fetch_sub(&pad->pd.refcount, 1); + + free(hr_cq);
return ret; }