From: Juan Zhou zhoujuan51@h-partners.com
Chengchang Tang (5): libhns: Support reporting wc as software mode libhns: return error when post send in reset state libhns: separate the initialization steps of lock libhns: assign doorbell to zero when allocate it libhns: Fix missing reset notification.
providers/hns/hns_roce_u.c | 4 + providers/hns/hns_roce_u.h | 14 ++ providers/hns/hns_roce_u_db.c | 2 + providers/hns/hns_roce_u_hw_v2.c | 272 +++++++++++++++++++++++++++---- providers/hns/hns_roce_u_hw_v2.h | 2 + providers/hns/hns_roce_u_verbs.c | 147 ++++++++++++++--- 6 files changed, 387 insertions(+), 54 deletions(-)
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
When HW is in resetting stage, we could not poll back all the expected work completions as the HW won't generate cqe anymore.
This patch allows driver to compose the expected wc instead of the HW during resetting stage. Once the hardware finished resetting, we can poll cq from hardware again.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u.h | 12 ++ providers/hns/hns_roce_u_hw_v2.c | 216 +++++++++++++++++++++++++++++-- providers/hns/hns_roce_u_hw_v2.h | 2 + providers/hns/hns_roce_u_verbs.c | 91 +++++++++++++ 4 files changed, 309 insertions(+), 12 deletions(-)
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index e3012e1..b3f21ba 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -277,6 +277,8 @@ struct hns_roce_context { unsigned int max_inline_data;
struct hns_roce_dca_ctx dca_ctx; + + bool reseted; };
struct hns_roce_td { @@ -309,6 +311,11 @@ struct hns_roce_cq { unsigned long flags; unsigned int cqe_size; struct hns_roce_v2_cqe *cqe; + struct list_head list_sq; + struct list_head list_rq; + struct list_head list_srq; + struct list_head list_xrc_srq; + struct hns_roce_v2_cqe *sw_cqe; };
struct hns_roce_idx_que { @@ -344,6 +351,7 @@ struct hns_roce_srq { unsigned int wqe_shift; unsigned int *db; unsigned short counter; + struct list_node xrc_srcq_node; };
struct hns_roce_wq { @@ -413,6 +421,10 @@ struct hns_roce_qp { unsigned int rb_sq_head; /* roll back sq head */ struct hns_roce_sge_info sge_info;
+ struct list_node rcq_node; + struct list_node scq_node; + struct list_node srcq_node; + /* Just for UD. If not enabled, 'sl' in ibv_wc * will be filled with 'port_type' in cqe. */ diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 9238fe5..4e92397 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -843,6 +843,180 @@ static int hns_roce_poll_one(struct hns_roce_context *ctx, return hns_roce_flush_cqe(*cur_qp, status); }
+static void hns_roce_fill_swc(struct hns_roce_cq *cq, struct ibv_wc *wc, + uint64_t wr_id, uint32_t qp_num) +{ + if (!wc) { + cq->verbs_cq.cq_ex.status = IBV_WC_WR_FLUSH_ERR; + cq->verbs_cq.cq_ex.wr_id = wr_id; + hr_reg_write(cq->sw_cqe, CQE_LCL_QPN, qp_num); + return; + } + + wc->wr_id = wr_id; + wc->status = IBV_WC_WR_FLUSH_ERR; + wc->vendor_err = 0; + wc->qp_num = qp_num; +} + +static int hns_roce_get_wq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, + struct ibv_wc *wc, bool is_sq) +{ + struct hns_roce_wq *wq = is_sq ? &qp->sq : &qp->rq; + unsigned int left_wr; + uint64_t wr_id; + + left_wr = wq->head - wq->tail; + if (left_wr == 0) { + if (is_sq) + list_del_init(&qp->scq_node); + else + list_del_init(&qp->rcq_node); + + return -ENOENT; + } + + wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + hns_roce_fill_swc(cq, wc, wr_id, qp->verbs_qp.qp.qp_num); + wq->tail++; + return V2_CQ_OK; +} + +static int hns_roce_gen_sq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) +{ + struct hns_roce_qp *next, *qp = NULL; + + list_for_each_safe(&cq->list_sq, qp, next, scq_node) { + if (hns_roce_get_wq_swc(cq, qp, wc, true) == -ENOENT) + continue; + + return V2_CQ_OK; + } + + return !wc ? -ENOENT : V2_CQ_EMPTY; +} + +static int hns_roce_gen_rq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) +{ + struct hns_roce_qp *next, *qp = NULL; + + list_for_each_safe(&cq->list_rq, qp, next, rcq_node) { + if (hns_roce_get_wq_swc(cq, qp, wc, false) == -ENOENT) + continue; + + return V2_CQ_OK; + } + + return !wc ? -ENOENT : V2_CQ_EMPTY; +} + +static int hns_roce_get_srq_swc(struct hns_roce_cq *cq, struct hns_roce_qp *qp, + struct hns_roce_srq *srq, struct ibv_wc *wc) +{ + unsigned int left_wr; + uint64_t wr_id; + + hns_roce_spin_lock(&srq->hr_lock); + left_wr = srq->idx_que.head - srq->idx_que.tail; + if (left_wr == 0) { + if (qp) + list_del_init(&qp->srcq_node); + else + list_del_init(&srq->xrc_srcq_node); + + hns_roce_spin_unlock(&srq->hr_lock); + return -ENOENT; + } + + wr_id = srq->wrid[srq->idx_que.tail & (srq->wqe_cnt - 1)]; + hns_roce_fill_swc(cq, wc, wr_id, srq->srqn); + srq->idx_que.tail++; + hns_roce_spin_unlock(&srq->hr_lock); + + return V2_CQ_OK; +} + +static int hns_roce_gen_common_srq_swc(struct hns_roce_cq *cq, + struct ibv_wc *wc) +{ + struct hns_roce_qp *next, *qp = NULL; + struct hns_roce_srq *srq; + + list_for_each_safe(&cq->list_srq, qp, next, srcq_node) { + srq = to_hr_srq(qp->verbs_qp.qp.srq); + if (hns_roce_get_srq_swc(cq, qp, srq, wc) == -ENOENT) + continue; + + return V2_CQ_OK; + } + + return !wc ? -ENOENT : V2_CQ_EMPTY; +} + +static int hns_roce_gen_xrc_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) +{ + struct hns_roce_srq *next, *srq = NULL; + + list_for_each_safe(&cq->list_xrc_srq, srq, next, xrc_srcq_node) { + if (hns_roce_get_srq_swc(cq, NULL, srq, wc) == -ENOENT) + continue; + + return V2_CQ_OK; + } + + return !wc ? -ENOENT : V2_CQ_EMPTY; +} + +static int hns_roce_gen_srq_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) +{ + int err; + + err = hns_roce_gen_common_srq_swc(cq, wc); + if (err == V2_CQ_OK) + return err; + + return hns_roce_gen_xrc_srq_swc(cq, wc); +} + +static int hns_roce_poll_one_swc(struct hns_roce_cq *cq, struct ibv_wc *wc) +{ + int err; + + err = hns_roce_gen_sq_swc(cq, wc); + if (err == V2_CQ_OK) + return err; + + err = hns_roce_gen_rq_swc(cq, wc); + if (err == V2_CQ_OK) + return err; + + return hns_roce_gen_srq_swc(cq, wc); +} + +static int hns_roce_poll_swc(struct hns_roce_cq *cq, int ne, struct ibv_wc *wc) +{ + int npolled; + int err; + + for (npolled = 0; npolled < ne; npolled++) { + err = hns_roce_poll_one_swc(cq, wc + npolled); + if (err == V2_CQ_EMPTY) + break; + } + + return npolled; +} + +static bool hns_roce_reseted(struct hns_roce_context *ctx) +{ + struct hns_roce_v2_reset_state *state = ctx->reset_state; + + if (state && state->is_reset) + ctx->reseted = true; + + return ctx->reseted; +} + static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne, struct ibv_wc *wc) { @@ -854,6 +1028,12 @@ static int hns_roce_u_v2_poll_cq(struct ibv_cq *ibvcq, int ne,
hns_roce_spin_lock(&cq->hr_lock);
+ if (unlikely(hns_roce_reseted(ctx))) { + npolled = hns_roce_poll_swc(cq, ne, wc); + hns_roce_spin_unlock(&cq->hr_lock); + return npolled; + } + for (npolled = 0; npolled < ne; ++npolled) { err = hns_roce_poll_one(ctx, &qp, cq, wc + npolled); if (qp && check_dca_detach_enable(qp)) @@ -1773,11 +1953,8 @@ static int hns_roce_u_v2_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, return ret; }
-static void hns_roce_lock_cqs(struct ibv_qp *qp) +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) { - struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); - struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); - if (send_cq && recv_cq) { if (send_cq == recv_cq) { hns_roce_spin_lock(&send_cq->hr_lock); @@ -1795,11 +1972,8 @@ static void hns_roce_lock_cqs(struct ibv_qp *qp) } }
-static void hns_roce_unlock_cqs(struct ibv_qp *qp) +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq) { - struct hns_roce_cq *send_cq = to_hr_cq(qp->send_cq); - struct hns_roce_cq *recv_cq = to_hr_cq(qp->recv_cq); - if (send_cq && recv_cq) { if (send_cq == recv_cq) { hns_roce_spin_unlock(&send_cq->hr_lock); @@ -1832,17 +2006,22 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp)
hns_roce_v2_clear_qp(ctx, qp);
- hns_roce_lock_cqs(ibqp); + hns_roce_lock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
- if (ibqp->recv_cq) + if (ibqp->recv_cq) { __hns_roce_v2_cq_clean(to_hr_cq(ibqp->recv_cq), ibqp->qp_num, ibqp->srq ? to_hr_srq(ibqp->srq) : NULL); + list_del(&qp->srcq_node); + list_del(&qp->rcq_node); + }
- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) + if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, NULL); + list_del(&qp->scq_node); + }
- hns_roce_unlock_cqs(ibqp); + hns_roce_unlock_cqs(to_hr_cq(ibqp->send_cq), to_hr_cq(ibqp->recv_cq));
hns_roce_free_qp_buf(qp, ctx);
@@ -1988,10 +2167,16 @@ static int wc_start_poll_cq(struct ibv_cq_ex *current,
hns_roce_spin_lock(&cq->hr_lock);
+ if (unlikely(hns_roce_reseted(ctx))) { + err = hns_roce_poll_one_swc(cq, NULL); + goto start_poll_done; + } + err = hns_roce_poll_one(ctx, &qp, cq, NULL); if (qp && check_dca_detach_enable(qp)) dca_detach_qp_buf(ctx, qp);
+start_poll_done: if (err != V2_CQ_OK) hns_roce_spin_unlock(&cq->hr_lock);
@@ -2005,6 +2190,9 @@ static int wc_next_poll_cq(struct ibv_cq_ex *current) struct hns_roce_qp *qp = NULL; int err;
+ if (unlikely(hns_roce_reseted(ctx))) + return hns_roce_poll_one_swc(cq, NULL); + err = hns_roce_poll_one(ctx, &qp, cq, NULL); if (qp && check_dca_detach_enable(qp)) dca_detach_qp_buf(ctx, qp); @@ -2024,11 +2212,15 @@ static void wc_end_poll_cq(struct ibv_cq_ex *current) struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); struct hns_roce_context *ctx = to_hr_ctx(current->context);
+ if (unlikely(hns_roce_reseted(ctx))) + goto end_poll_done; + if (cq->flags & HNS_ROCE_CQ_FLAG_RECORD_DB) *cq->db = cq->cons_index & RECORD_DB_CI_MASK; else update_cq_db(ctx, cq);
+end_poll_done: hns_roce_spin_unlock(&cq->hr_lock); }
diff --git a/providers/hns/hns_roce_u_hw_v2.h b/providers/hns/hns_roce_u_hw_v2.h index d628d76..50a920f 100644 --- a/providers/hns/hns_roce_u_hw_v2.h +++ b/providers/hns/hns_roce_u_hw_v2.h @@ -346,5 +346,7 @@ void hns_roce_v2_clear_qp(struct hns_roce_context *ctx, struct hns_roce_qp *qp); void hns_roce_attach_cq_ex_ops(struct ibv_cq_ex *cq_ex, uint64_t wc_flags); int hns_roce_attach_qp_ex_ops(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp); +void hns_roce_lock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq); +void hns_roce_unlock_cqs(struct hns_roce_cq *send_cq, struct hns_roce_cq *recv_cq);
#endif /* _HNS_ROCE_U_HW_V2_H */ diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 4b641ea..8fb415b 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -519,6 +519,32 @@ static int exec_cq_create_cmd(struct ibv_context *context, return 0; }
+static int hns_roce_init_cq_swc(struct hns_roce_cq *cq, + struct ibv_cq_init_attr_ex *attr) +{ + list_head_init(&cq->list_sq); + list_head_init(&cq->list_rq); + list_head_init(&cq->list_srq); + list_head_init(&cq->list_xrc_srq); + + if (!(attr->wc_flags & CREATE_CQ_SUPPORTED_WC_FLAGS)) + return 0; + + cq->sw_cqe = calloc(1, sizeof(struct hns_roce_v2_cqe)); + if (!cq->sw_cqe) + return -ENOMEM; + + return 0; +} + +static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq) +{ + if (cq->sw_cqe) { + free(cq->sw_cqe); + cq->sw_cqe = NULL; + } +} + static struct ibv_cq_ex *create_cq(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr) { @@ -564,6 +590,10 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context,
*cq->db = 0;
+ ret = hns_roce_init_cq_swc(cq, attr); + if (ret) + goto err_swc; + ret = exec_cq_create_cmd(context, cq, attr); if (ret) goto err_cmd; @@ -573,6 +603,8 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, return &cq->verbs_cq.cq_ex;
err_cmd: + hns_roce_uninit_cq_swc(cq); +err_swc: hns_roce_free_db(hr_ctx, cq->db, HNS_ROCE_CQ_TYPE_DB); err_db: hns_roce_free_buf(&cq->buf); @@ -632,6 +664,8 @@ int hns_roce_u_destroy_cq(struct ibv_cq *cq) if (ret) return ret;
+ hns_roce_uninit_cq_swc(to_hr_cq(cq)); + hns_roce_free_db(to_hr_ctx(cq->context), to_hr_cq(cq)->db, HNS_ROCE_CQ_TYPE_DB); hns_roce_free_buf(&to_hr_cq(cq)->buf); @@ -839,6 +873,22 @@ static int exec_srq_create_cmd(struct ibv_context *context, return 0; }
+static void init_srq_cq_list(struct hns_roce_srq *srq, + struct ibv_srq_init_attr_ex *init_attr) +{ + struct hns_roce_cq *srq_cq; + + list_node_init(&srq->xrc_srcq_node); + + if (!init_attr->cq) + return; + + srq_cq = to_hr_cq(init_attr->cq); + hns_roce_spin_lock(&srq_cq->hr_lock); + list_add_tail(&srq_cq->list_xrc_srq, &srq->xrc_srcq_node); + hns_roce_spin_unlock(&srq_cq->hr_lock); +} + static struct ibv_srq *create_srq(struct ibv_context *context, struct ibv_srq_init_attr_ex *init_attr) { @@ -886,6 +936,8 @@ static struct ibv_srq *create_srq(struct ibv_context *context, init_attr->attr.max_sge = min(init_attr->attr.max_sge - srq->rsv_sge, hr_ctx->max_srq_sge);
+ init_srq_cq_list(srq, init_attr); + return &srq->verbs_srq.srq;
err_destroy_srq: @@ -958,12 +1010,26 @@ int hns_roce_u_query_srq(struct ibv_srq *srq, struct ibv_srq_attr *srq_attr) return ret; }
+static void del_srq_from_cq_list(struct hns_roce_srq *srq) +{ + struct hns_roce_cq *srq_cq = to_hr_cq(srq->verbs_srq.cq); + + if (!srq_cq) + return; + + hns_roce_spin_lock(&srq_cq->hr_lock); + list_del(&srq->xrc_srcq_node); + hns_roce_spin_unlock(&srq_cq->hr_lock); +} + int hns_roce_u_destroy_srq(struct ibv_srq *ibv_srq) { struct hns_roce_context *ctx = to_hr_ctx(ibv_srq->context); struct hns_roce_srq *srq = to_hr_srq(ibv_srq); int ret;
+ del_srq_from_cq_list(srq); + ret = ibv_cmd_destroy_srq(ibv_srq); if (ret) return ret; @@ -1648,6 +1714,30 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, return 0; }
+static void add_qp_to_cq_list(struct ibv_qp_init_attr_ex *attr, + struct hns_roce_qp *qp) +{ + struct hns_roce_cq *send_cq, *recv_cq; + + send_cq = attr->send_cq ? to_hr_cq(attr->send_cq) : NULL; + recv_cq = attr->recv_cq ? to_hr_cq(attr->recv_cq) : NULL; + + list_node_init(&qp->scq_node); + list_node_init(&qp->rcq_node); + list_node_init(&qp->srcq_node); + + hns_roce_lock_cqs(send_cq, recv_cq); + if (send_cq) + list_add_tail(&send_cq->list_sq, &qp->scq_node); + if (recv_cq) { + if (attr->srq) + list_add_tail(&recv_cq->list_srq, &qp->srcq_node); + else + list_add_tail(&recv_cq->list_rq, &qp->rcq_node); + } + hns_roce_unlock_cqs(send_cq, recv_cq); +} + static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, struct ibv_qp_init_attr_ex *attr, struct hnsdv_qp_init_attr *hns_attr) @@ -1697,6 +1787,7 @@ static struct ibv_qp *create_qp(struct ibv_context *ibv_ctx, }
qp_setup_config(attr, qp, context); + add_qp_to_cq_list(attr, qp);
if (hns_attr && hns_attr->create_flags & HNSDV_QP_CREATE_ENABLE_UD_SL) qp->enable_ud_sl = true;
driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
If the device has been resetted, the original business will not be able to continue. The current design is to allow users to continue issuing IO. Such a design is meaningless, the user should perceive the exception and restore the business as soon as possible.
The current kernel mode directly returns an error when device has been resetted, and this patch can unify the behavior of the kernel mode and user mode.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u_hw_v2.c | 53 +++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 4e92397..29b6268 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -1090,8 +1090,15 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (unlikely(ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || - ibvqp->state == IBV_QPS_RTR)) + ibvqp->state == IBV_QPS_RTR)) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); return -EINVAL; + } else if (unlikely(hns_roce_reseted(ctx))) { + verbs_err_datapath(&ctx->ibv_ctx, + "failed to send, device has been reseted!\n"); + return -EIO; + }
if (check_dca_attach_enable(qp)) { ret = dca_attach_qp_buf(ctx, qp); @@ -1691,8 +1698,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) struct ibv_qp *ibvqp = &qp->verbs_qp.qp; int ret = 0;
- if (ibvqp->state == IBV_QPS_RESET) + if (ibvqp->state == IBV_QPS_RESET) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); return -EINVAL; + } else if (unlikely(hns_roce_reseted(ctx))) { + verbs_err_datapath(&ctx->ibv_ctx, + "fail to recv, device has been reseted!\n"); + return -EIO; + }
if (check_dca_attach_enable(qp)) { ret = dca_attach_qp_buf(ctx, qp); @@ -2099,6 +2113,16 @@ static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq) hr_reg_write(db, DB_PI, srq->idx_que.head); }
+static int check_srq_recv(struct hns_roce_context *ctx) +{ + if (hns_roce_reseted(ctx)) { + verbs_err_datapath(&ctx->ibv_ctx, + "srq failed to recv, device has been reseted!\n"); + return -EIO; + } + return 0; +} + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) @@ -2110,6 +2134,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, int ret = 0; void *wqe;
+ ret = check_srq_recv(ctx); + if (ret) { + *bad_wr = wr; + return ret; + } + hns_roce_spin_lock(&srq->hr_lock);
max_sge = srq->max_gs - srq->rsv_sge; @@ -2934,27 +2964,14 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) { struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); - enum ibv_qp_state state = ibv_qp->qp_base.state; int ret;
- if (state == IBV_QPS_RESET || - state == IBV_QPS_INIT || - state == IBV_QPS_RTR) { - qp->err = EINVAL; + ret = check_qp_send(qp, ctx); + if (ret) { + qp->err = ret; return; }
- if (check_qp_dca_enable(qp)) { - ret = dca_attach_qp_buf(ctx, qp); - if (ret) { - verbs_err_datapath(&ctx->ibv_ctx, - "failed to attach QP-%u send, ret = %d.\n", - qp->verbs_qp.qp.qp_num, ret); - qp->err = ret; - return; - } - } - hns_roce_spin_lock(&qp->sq.hr_lock); qp->sge_info.start_idx = qp->next_sge; qp->rb_sq_head = qp->sq.head;
driver inclusion category: cleanup bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
Separate the initialization steps of the lock from create_cq() and create_srq(), just like in create_qp(), to unify all create-style processes.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u_verbs.c | 48 +++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 16 deletions(-)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 8fb415b..e7a7388 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -545,13 +545,28 @@ static void hns_roce_uninit_cq_swc(struct hns_roce_cq *cq) } }
+static int hns_roce_cq_spinlock_init(struct ibv_context *context, + struct hns_roce_cq *cq, + struct ibv_cq_init_attr_ex *attr) +{ + struct hns_roce_pad *pad = NULL; + int need_lock; + + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) + pad = to_hr_pad(attr->parent_domain); + + need_lock = hns_roce_whether_need_lock(pad ? &pad->pd.ibv_pd : NULL); + if (!need_lock) + verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n"); + + return hns_roce_spinlock_init(&cq->hr_lock, need_lock); +} + static struct ibv_cq_ex *create_cq(struct ibv_context *context, struct ibv_cq_init_attr_ex *attr) { struct hns_roce_context *hr_ctx = to_hr_ctx(context); - struct hns_roce_pad *pad = NULL; struct hns_roce_cq *cq; - int need_lock; int ret;
ret = verify_cq_create_attr(attr, hr_ctx); @@ -564,14 +579,7 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, goto err; }
- if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) - pad = to_hr_pad(attr->parent_domain); - - need_lock = hns_roce_whether_need_lock(pad ? &pad->pd.ibv_pd : NULL); - if (!need_lock) - verbs_info(verbs_get_ctx(context), "configure cq as no lock.\n"); - - ret = hns_roce_spinlock_init(&cq->hr_lock, need_lock); + ret = hns_roce_cq_spinlock_init(context, cq, attr); if (ret) goto err_lock;
@@ -889,12 +897,24 @@ static void init_srq_cq_list(struct hns_roce_srq *srq, hns_roce_spin_unlock(&srq_cq->hr_lock); }
+static int hns_roce_srq_spinlock_init(struct ibv_context *context, + struct hns_roce_srq *srq, + struct ibv_srq_init_attr_ex *attr) +{ + int need_lock; + + need_lock = hns_roce_whether_need_lock(attr->pd); + if (!need_lock) + verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n"); + + return hns_roce_spinlock_init(&srq->hr_lock, need_lock); +} + static struct ibv_srq *create_srq(struct ibv_context *context, struct ibv_srq_init_attr_ex *init_attr) { struct hns_roce_context *hr_ctx = to_hr_ctx(context); struct hns_roce_srq *srq; - int need_lock; int ret;
ret = verify_srq_create_attr(hr_ctx, init_attr); @@ -907,11 +927,7 @@ static struct ibv_srq *create_srq(struct ibv_context *context, goto err; }
- need_lock = hns_roce_whether_need_lock(init_attr->pd); - if (!need_lock) - verbs_info(verbs_get_ctx(context), "configure srq as no lock.\n"); - - if (hns_roce_spinlock_init(&srq->hr_lock, need_lock)) + if (hns_roce_srq_spinlock_init(context, srq, init_attr)) goto err_free_srq;
set_srq_param(context, srq, init_attr);
driver inclusion category: cleanup bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
Clear the doorbell when getting it to avoid clearing it in each function that uses hns_roce_alloc_db()
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u_db.c | 2 ++ providers/hns/hns_roce_u_verbs.c | 8 -------- 2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/providers/hns/hns_roce_u_db.c b/providers/hns/hns_roce_u_db.c index f5acac2..73a71de 100644 --- a/providers/hns/hns_roce_u_db.c +++ b/providers/hns/hns_roce_u_db.c @@ -115,6 +115,8 @@ found:
out: pthread_mutex_unlock((pthread_mutex_t *)&ctx->db_list_mutex); + if (db) + *((unsigned int *)db) = 0;
return db; } diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index e7a7388..7b58dd0 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -596,8 +596,6 @@ static struct ibv_cq_ex *create_cq(struct ibv_context *context, goto err_db; }
- *cq->db = 0; - ret = hns_roce_init_cq_swc(cq, attr); if (ret) goto err_swc; @@ -938,8 +936,6 @@ static struct ibv_srq *create_srq(struct ibv_context *context, if (!srq->db) goto err_srq_buf;
- *srq->db = 0; - ret = exec_srq_create_cmd(context, srq, init_attr); if (ret) goto err_srq_db; @@ -1591,8 +1587,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp, qp->sdb = hns_roce_alloc_db(ctx, HNS_ROCE_QP_TYPE_DB); if (!qp->sdb) return -ENOMEM; - - *qp->sdb = 0; }
if (attr->cap.max_recv_sge) { @@ -1604,8 +1598,6 @@ static int qp_alloc_db(struct ibv_qp_init_attr_ex *attr, struct hns_roce_qp *qp,
return -ENOMEM; } - - *qp->rdb = 0; }
return 0;
driver inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83L7U
----------------------------------------------------------
Currently, userspace driver get the reset notification by reading a a shared variable which would be set to non-zero during reset. However, if the user does not call driver's IO interface during reset, the reset notification will be ignored. because this variable will be clear after completes the reset.
This patch use a new reset flag to get whether the driver has been reset at any time. A non-zero value will be assigned to this new reset flag by default, which will permanently become 0 once a reset occurs. During reset, the kernel space driver will assign 0 to this variable. After reset, this variable will be remapped to a page of all zeros. The userspace driver can judge whether the driver has been reset by whether this variable is 0.
Fixes: 34f2ad8085c2 ("libhns: Add reset stop flow mechanism") Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u.c | 4 ++++ providers/hns/hns_roce_u.h | 2 ++ providers/hns/hns_roce_u_hw_v2.c | 3 +++ 3 files changed, 9 insertions(+)
diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c index 87f9ed8..0660081 100644 --- a/providers/hns/hns_roce_u.c +++ b/providers/hns/hns_roce_u.c @@ -226,6 +226,7 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, int page_size) { uint64_t reset_mmap_key = resp->reset_mmap_key; + struct hns_roce_v2_reset_state *state;
/* The reset mmap key is 0, which means it is not supported. */ if (reset_mmap_key == 0) @@ -236,6 +237,9 @@ static int init_reset_context(struct hns_roce_context *ctx, int cmd_fd, if (ctx->reset_state == MAP_FAILED) return -ENOMEM;
+ state = ctx->reset_state; + ctx->use_new_reset_flag = state->hw_ready; + return 0; }
diff --git a/providers/hns/hns_roce_u.h b/providers/hns/hns_roce_u.h index b3f21ba..5501d8e 100644 --- a/providers/hns/hns_roce_u.h +++ b/providers/hns/hns_roce_u.h @@ -235,6 +235,7 @@ struct hns_roce_dca_ctx {
struct hns_roce_v2_reset_state { uint32_t is_reset; + uint32_t hw_ready; };
struct hns_roce_cmd_flag { @@ -278,6 +279,7 @@ struct hns_roce_context {
struct hns_roce_dca_ctx dca_ctx;
+ bool use_new_reset_flag; bool reseted; };
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 29b6268..ac40d5d 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -1011,6 +1011,9 @@ static bool hns_roce_reseted(struct hns_roce_context *ctx) { struct hns_roce_v2_reset_state *state = ctx->reset_state;
+ if (ctx->use_new_reset_flag) + return !state->hw_ready; + if (state && state->is_reset) ctx->reseted = true;