driver inclusion category: feature bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I83BP0
----------------------------------------------------------
If the device has been resetted, the original business will not be able to continue. The current design is to allow users to continue issuing IO. Such a design is meaningless, the user should perceive the exception and restore the business as soon as possible.
The current kernel mode directly returns an error when device has been resetted, and this patch can unify the behavior of the kernel mode and user mode.
Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u_hw_v2.c | 53 +++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 18 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 4e92397..29b6268 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -1090,8 +1090,15 @@ static int check_qp_send(struct hns_roce_qp *qp, struct hns_roce_context *ctx)
if (unlikely(ibvqp->state == IBV_QPS_RESET || ibvqp->state == IBV_QPS_INIT || - ibvqp->state == IBV_QPS_RTR)) + ibvqp->state == IBV_QPS_RTR)) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); return -EINVAL; + } else if (unlikely(hns_roce_reseted(ctx))) { + verbs_err_datapath(&ctx->ibv_ctx, + "failed to send, device has been reseted!\n"); + return -EIO; + }
if (check_dca_attach_enable(qp)) { ret = dca_attach_qp_buf(ctx, qp); @@ -1691,8 +1698,15 @@ static int check_qp_recv(struct hns_roce_qp *qp, struct hns_roce_context *ctx) struct ibv_qp *ibvqp = &qp->verbs_qp.qp; int ret = 0;
- if (ibvqp->state == IBV_QPS_RESET) + if (ibvqp->state == IBV_QPS_RESET) { + verbs_err(verbs_get_ctx(qp->verbs_qp.qp.context), + "unsupported qp state, state = %d.\n", ibvqp->state); return -EINVAL; + } else if (unlikely(hns_roce_reseted(ctx))) { + verbs_err_datapath(&ctx->ibv_ctx, + "fail to recv, device has been reseted!\n"); + return -EIO; + }
if (check_dca_attach_enable(qp)) { ret = dca_attach_qp_buf(ctx, qp); @@ -2099,6 +2113,16 @@ static void update_srq_db(struct hns_roce_db *db, struct hns_roce_srq *srq) hr_reg_write(db, DB_PI, srq->idx_que.head); }
+static int check_srq_recv(struct hns_roce_context *ctx) +{ + if (hns_roce_reseted(ctx)) { + verbs_err_datapath(&ctx->ibv_ctx, + "srq failed to recv, device has been reseted!\n"); + return -EIO; + } + return 0; +} + static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, struct ibv_recv_wr *wr, struct ibv_recv_wr **bad_wr) @@ -2110,6 +2134,12 @@ static int hns_roce_u_v2_post_srq_recv(struct ibv_srq *ib_srq, int ret = 0; void *wqe;
+ ret = check_srq_recv(ctx); + if (ret) { + *bad_wr = wr; + return ret; + } + hns_roce_spin_lock(&srq->hr_lock);
max_sge = srq->max_gs - srq->rsv_sge; @@ -2934,27 +2964,14 @@ static void wr_start(struct ibv_qp_ex *ibv_qp) { struct hns_roce_context *ctx = to_hr_ctx(ibv_qp->qp_base.context); struct hns_roce_qp *qp = to_hr_qp(&ibv_qp->qp_base); - enum ibv_qp_state state = ibv_qp->qp_base.state; int ret;
- if (state == IBV_QPS_RESET || - state == IBV_QPS_INIT || - state == IBV_QPS_RTR) { - qp->err = EINVAL; + ret = check_qp_send(qp, ctx); + if (ret) { + qp->err = ret; return; }
- if (check_qp_dca_enable(qp)) { - ret = dca_attach_qp_buf(ctx, qp); - if (ret) { - verbs_err_datapath(&ctx->ibv_ctx, - "failed to attach QP-%u send, ret = %d.\n", - qp->verbs_qp.qp.qp_num, ret); - qp->err = ret; - return; - } - } - hns_roce_spin_lock(&qp->sq.hr_lock); qp->sge_info.start_idx = qp->next_sge; qp->rb_sq_head = qp->sq.head;