[PATCH v1.1 rdma-core 0/5] Bugfixes and one debug improvement

The first commit adds some debug logs for lock-free mode so that we can intuitively observe whether the lock-free mode is configured. The following three commits fix some errors. The issue of the last commit was found during the testing of the previous ones, where I created a XRC SRQ in lock-free mode but failed to destroy it because of the refcnt check added in the second commit. The failure was because the PAD was acquired through ibv_srq->pd in destroy_srq(), while ibv_srq->pd wasn't assigned when the SRQ was created by ibv_create_srq_ex(). Junxian Huang (5): libhns: Add debug logs for lock-free mode libhns: Fix ret not assigned in create_srq() libhns: Fix pad refcnt leaking in error flow of create_qp/cq/srq libhns: Fix freeing pad without checking refcnt verbs: Assign ibv_srq->pd when creating SRQ libibverbs/cmd_srq.c | 1 + providers/hns/hns_roce_u_verbs.c | 51 +++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 14 deletions(-) -- 2.33.0

Currently there is no way to observe whether the lock-free mode is configured from the driver's perspective. Add some debug logs for this. Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- providers/hns/hns_roce_u_verbs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 6fc8ece32..ed8a9c538 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -550,6 +550,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, cq->arm_sn = 1; + if (!cq->hr_lock.need_lock) + verbs_debug(verbs_get_ctx(context), + "set CQ(0x%x) to lock-free mode.\n", cq->cqn); + return &cq->verbs_cq.cq_ex; err_cmd: @@ -872,6 +876,10 @@ static struct ibv_srq *create_srq(struct ibv_context *context, init_attr->attr.max_sge = min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge); + if (!srq->hr_lock.need_lock) + verbs_debug(verbs_get_ctx(context), + "set SRQ(0x%x) to lock-free mode.\n", srq->srqn); + return &srq->verbs_srq.srq; err_destroy_srq: @@ -1627,6 +1635,11 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, qp_setup_config(attr, qp, context); + if (!qp->sq.hr_lock.need_lock) + verbs_debug(verbs_get_ctx(ibv_ctx), + "set QP(0x%x) to lock-free mode.\n", + qp->verbs_qp.qp.qp_num); + return &qp->verbs_qp.qp; err_dwqe: -- 2.33.0

Fix the problem that ret may not be assigned in the error flow of create_srq(). Fixes: b38bae4b5b9e ("libhns: Add support for lock-free SRQ") Fixes: b914c76318f5 ("libhns: Refactor the process of create_srq") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- providers/hns/hns_roce_u_verbs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index ed8a9c538..b7d5f8d8e 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -853,16 +853,20 @@ static struct ibv_srq *create_srq(struct ibv_context *context, if (pad) atomic_fetch_add(&pad->pd.refcount, 1); - if (hns_roce_srq_spinlock_init(srq, init_attr)) + ret = hns_roce_srq_spinlock_init(srq, init_attr); + if (ret) goto err_free_srq; set_srq_param(context, srq, init_attr); - if (alloc_srq_buf(srq)) + ret = alloc_srq_buf(srq); + if (ret) goto err_destroy_lock; srq->rdb = hns_roce_alloc_db(hr_ctx, HNS_ROCE_SRQ_TYPE_DB); - if (!srq->rdb) + if (!srq->rdb) { + ret = ENOMEM; goto err_srq_buf; + } ret = exec_srq_create_cmd(context, srq, init_attr); if (ret) -- 2.33.0

Decrease pad refcnt by 1 in error flow of create qp/cq/srq. Fixes: 8c865c315c34 ("libhns: Add support for lock-free CQ") Fixes: 179f015e090d ("libhns: Add support for lock-free QP") Fixes: b38bae4b5b9e ("libhns: Add support for lock-free SRQ") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- providers/hns/hns_roce_u_verbs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index b7d5f8d8e..62d42c98e 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -435,12 +435,9 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, return EOPNOTSUPP; } - if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { - if (!pad) { - verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); - return EINVAL; - } - atomic_fetch_add(&pad->pd.refcount, 1); + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD && !pad) { + verbs_err(&context->ibv_ctx, "failed to check the pad of cq.\n"); + return EINVAL; } attr->cqe = max_t(uint32_t, HNS_ROCE_MIN_CQE_NUM, @@ -510,6 +507,7 @@ static int exec_cq_create_cmd(struct ibv_context *context, static struct ibv_cq_ex *create_cq(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr) { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); struct hns_roce_context *hr_ctx = to_hr_ctx(context); struct hns_roce_cq *cq; int ret; @@ -524,8 +522,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, goto err; } - if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { cq->parent_domain = attr->parent_domain; + atomic_fetch_add(&pad->pd.refcount, 1); + } ret = hns_roce_cq_spinlock_init(cq, attr); if (ret) @@ -563,6 +563,8 @@ err_db: err_buf: hns_roce_spinlock_destroy(&cq->hr_lock); err_lock: + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) + atomic_fetch_sub(&pad->pd.refcount, 1); free(cq); err: if (ret < 0) @@ -899,6 +901,8 @@ err_destroy_lock: hns_roce_spinlock_destroy(&srq->hr_lock); err_free_srq: + if (pad) + atomic_fetch_sub(&pad->pd.refcount, 1); free(srq); err: @@ -1655,6 +1659,8 @@ err_cmd: err_buf: hns_roce_qp_spinlock_destroy(qp); err_spinlock: + if (pad) + atomic_fetch_sub(&pad->pd.refcount, 1); free(qp); err: if (ret < 0) -- 2.33.0

Currently pad refcnt will be added when creating qp/cq/srq, but it is not checked when freeing pad. Add a check to prevent freeing pad when it is still used by any qp/cq/srq. Fixes: ae35032532fb ("libhns: Add support for thread domain and parent domain") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- providers/hns/hns_roce_u_verbs.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 62d42c98e..05d5b2d34 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -208,14 +208,18 @@ struct ibv_pd *hns_roce_u_alloc_pad(struct ibv_context *context, return &pad->pd.ibv_pd; } -static void hns_roce_free_pad(struct hns_roce_pad *pad) +static int hns_roce_free_pad(struct hns_roce_pad *pad) { + if (atomic_load(&pad->pd.refcount) > 1) + return EBUSY; + atomic_fetch_sub(&pad->pd.protection_domain->refcount, 1); if (pad->td) atomic_fetch_sub(&pad->td->refcount, 1); free(pad); + return 0; } static int hns_roce_free_pd(struct hns_roce_pd *pd) @@ -238,10 +242,8 @@ int hns_roce_u_dealloc_pd(struct ibv_pd *ibv_pd) struct hns_roce_pad *pad = to_hr_pad(ibv_pd); struct hns_roce_pd *pd = to_hr_pd(ibv_pd); - if (pad) { - hns_roce_free_pad(pad); - return 0; - } + if (pad) + return hns_roce_free_pad(pad); return hns_roce_free_pd(pd); } -- 2.33.0

Some providers need to access ibv_srq->pd during SRQ destruction, but it may not be assigned currently when using ibv_create_srq_ex(). This may lead to some SRQ-related resource leaks. Assign ibv_srq->pd when creating SRQ to ensure pd can be obtained correctly. Fixes: 40c1365b2198 ("Add support for XRC SRQs") Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> --- libibverbs/cmd_srq.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libibverbs/cmd_srq.c b/libibverbs/cmd_srq.c index dfaaa6aa2..259ea0d10 100644 --- a/libibverbs/cmd_srq.c +++ b/libibverbs/cmd_srq.c @@ -63,6 +63,7 @@ static int ibv_icmd_create_srq(struct ibv_pd *pd, struct verbs_srq *vsrq, struct verbs_xrcd *vxrcd = NULL; enum ibv_srq_type srq_type; + srq->pd = pd; srq->context = pd->context; pthread_mutex_init(&srq->mutex, NULL); pthread_cond_init(&srq->cond, NULL); -- 2.33.0
participants (1)
-
Junxian Huang