From: Juan Zhou zhoujuan51@h-partners.com
Chengchang Tang (1): libhns: Add a judgment to the congestion control algorithm
Haoyue Xu (1): libhns: Fix arrangement in SL to adapt MPI APP
Junxian Huang (1): libhns: Fix incorrect post-send with direct wqe of wr-list in user space
Luoyouming (1): libhns: Fix the owner bit error of sq in new io
libibverbs/man/ibv_poll_cq.3 | 5 +++++ libibverbs/verbs.h | 15 ++++++++++++++- providers/hns/hns_roce_u_hw_v2.c | 33 ++++++++++++++++++++++++++------ providers/hns/hns_roce_u_verbs.c | 3 +++ 4 files changed, 49 insertions(+), 7 deletions(-)
-- 2.30.0
From: Haoyue Xu xuhaoyue1@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5YM
---------------------------------------------------------------
The sl arrangement sequence is inconsistent with the MPI application, and it needs to be adapted.
libhns: Fix sl and Pktype for responder UD
for responder UD over RoCE sl represents RoCE packet type as: Bits[2:0]: L3_Header_Type, as defined below - 0x0 : GRH - (RoCE v1.0) - 0x1 : IPv6 - (RoCE v1.5/v2.0) - 0x2 : IPv4 - (RoCE v1.5/v2.0)
Merged-with: b82e2e4d3ed1 ("libhns: Fix sl and Pktype for responder UD") Merged-with: a190e00b47b4 ("libhns: Fix arrangement in SL to adapt MPI APP") Signed-off-by: Haoyue Xu xuhaoyue1@hisilicon.com
Add the pktype, because: According to Annex17_RoCEv2 (A17.4.5.1): For UD, the Completion Queue Entry (CQE) includes remote address information (InfiniBand Specification Vol. 1 Rev 1.2.1 Section 11.4.2.1). For RoCEv2, the remote address information comprises the source L2 Address and a flag that indicates if the received frame is an IPv4, IPv6 or RoCE packet. It is also following the UCX patch of https://github.com/openucx/ucx/commit/ed28845b88633e65d64fce8ec880060aa61bd5...
Signed-off-by: Haoyue Xu xuhaoyue1@hisilicon.com --- libibverbs/man/ibv_poll_cq.3 | 5 +++++ libibverbs/verbs.h | 15 ++++++++++++++- providers/hns/hns_roce_u_hw_v2.c | 20 +++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/libibverbs/man/ibv_poll_cq.3 b/libibverbs/man/ibv_poll_cq.3 index 76dda96..2928e69 100644 --- a/libibverbs/man/ibv_poll_cq.3 +++ b/libibverbs/man/ibv_poll_cq.3 @@ -42,7 +42,12 @@ uint32_t src_qp; /* Source QP number (remote QP number) o unsigned int wc_flags; /* Flags of the completed WR */ uint16_t pkey_index; /* P_Key index (valid only for GSI QPs) */ uint16_t slid; /* Source LID */ +union { +.in +8 uint8_t sl; /* Service Level */ +uint8_t pktype; /* Packet Type */ +.in -8 +}; uint8_t dlid_path_bits; /* DLID path bits (not applicable for multicast messages) */ .in -8 }; diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 713cce6..6c8bbc3 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -582,6 +582,12 @@ enum ibv_wc_flags { IBV_WC_TM_DATA_VALID = 1 << 6, };
+enum IBV_PKTYPE_ROCE { + IBV_PKTYPE_ROCE_V1, + IBV_PKTYPE_ROCE_V2_IPV6, + IBV_PKTYPE_ROCE_V2_IPV4, +}; + struct ibv_wc { uint64_t wr_id; enum ibv_wc_status status; @@ -600,7 +606,14 @@ struct ibv_wc { unsigned int wc_flags; uint16_t pkey_index; uint16_t slid; - uint8_t sl; + /* SL for IB or packet type + * (GRH/IPv4/IPv6) for RoCE in the CQE + */ + union { + uint8_t sl; + /* use enum IBV_PKTYPE_ROCE*/ + uint8_t pktype; + }; uint8_t dlid_path_bits; };
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index a49b50d..61c7c00 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -521,9 +521,17 @@ static void handle_recv_rq_inl(struct hns_roce_v2_cqe *cqe, handle_recv_inl_data(cqe, &(cur_qp->rq_rinl_buf), wr_num, wqe_buf); }
+static const uint8_t sl_for_ud[] = { + IBV_PKTYPE_ROCE_V1, + IBV_PKTYPE_ROCE_V2_IPV6, + IBV_PKTYPE_ROCE_V2_IPV4 +}; + static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) { - wc->sl = hr_reg_read(cqe, CQE_SL); + uint8_t port_type = hr_reg_read(cqe, CQE_PORT_TYPE); + + wc->pktype = sl_for_ud[port_type]; wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN); wc->slid = 0; wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0; @@ -2130,7 +2138,17 @@ static uint32_t wc_read_slid(struct ibv_cq_ex *current) static uint8_t wc_read_sl(struct ibv_cq_ex *current) { struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); + struct hns_roce_qp *hr_qp; + uint8_t port_type; + uint32_t qpn;
+ qpn = hr_reg_read(cq->cqe, CQE_LCL_QPN); + hr_qp = hns_roce_v2_find_qp(ctx, qpn); + if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) { + port_type = hr_reg_read(cq->cqe, CQE_PORT_TYPE); + return sl_for_ud[port_type]; + } return (uint8_t)hr_reg_read(cq->cqe, CQE_SL); }
From: Luoyouming luoyouming@huawei.com
driver inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5Y5
---------------------------------------------------------------
The code does not use the head position of sq to set the owner bit, but uses the head after adding 1 to cause an owner bit error. When the wqe queue has not been flipped, the hardware has flipped based on the owner bit judgment, resulting in failure to obtain wqe, unable to send, and unable to generate cqe. This patch will set the onwer bit ahead of time before the head value increases.
Fixes: 36446a56eea5 ("libhns: Extended QP supports the new post send mechanism") Signed-off-by: Luoyouming luoyouming@huawei.com --- providers/hns/hns_roce_u_hw_v2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 61c7c00..ad97eb8 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -2221,6 +2221,9 @@ init_rc_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
qp->sq.wrid[wqe_idx] = wr_id; qp->cur_wqe = wqe; + + enable_wqe(qp, wqe, qp->sq.head); + qp->sq.head++;
return wqe; @@ -2242,9 +2245,6 @@ static void wr_set_sge_rc(struct ibv_qp_ex *ibv_qp, uint32_t lkey, wqe->msg_len = htole32(length); hr_reg_write(wqe, RCWQE_LEN0, length); hr_reg_write(wqe, RCWQE_SGE_NUM, !!length); - /* ignore ex sge start index */ - - enable_wqe(qp, wqe, qp->sq.head); }
static void set_sgl_rc(struct hns_roce_v2_wqe_data_seg *dseg, @@ -2554,6 +2554,9 @@ init_ud_wqe(struct hns_roce_qp *qp, uint64_t wr_id, unsigned int opcode)
qp->sq.wrid[wqe_idx] = wr_id; qp->cur_wqe = wqe; + + enable_wqe(qp, wqe, qp->sq.head); + qp->sq.head++;
return wqe; @@ -2623,7 +2626,6 @@ static void wr_set_sge_ud(struct ibv_qp_ex *ibv_qp, uint32_t lkey, dseg->len = htole32(length);
qp->sge_info.start_idx++; - enable_wqe(qp, wqe, qp->sq.head); }
static void wr_set_sge_list_ud(struct ibv_qp_ex *ibv_qp, size_t num_sge,
From: Junxian Huang huangjunxian6@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7A2SA
---------------------------------------------------------------
Currently, direct wqe is not supported for wr-list. RoCE driver excludes direct wqe for wr-list by judging whether the number of wr is 1.
For a wr-list where the second wr is a length-error atomic wr, the post-send driver handles the first wr and adds 1 to the wr number counter firstly. While handling the second wr, the driver finds out a length error and terminates the wr handle process, remaining the counter at 1. This causes the driver mistakenly judges there is only 1 wr and thus enters the direct wqe process, carrying the current length-error atomic wqe.
This patch fixes the error by adding a judgement whether the current wr is a bad wr. If so, use the normal doorbell process but not direct wqe despite the wr number is 1.
Fixes: 159933c37450 ("libhns: Add support for direct wqe") Signed-off-by: Junxian Huang huangjunxian6@hisilicon.com --- providers/hns/hns_roce_u_hw_v2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index ad97eb8..8421606 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -1483,7 +1483,8 @@ out:
udma_to_device_barrier();
- if (nreq == 1 && (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + if (nreq == 1 && !ret && + (qp->flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) hns_roce_write_dwqe(qp, wqe); else hns_roce_update_sq_db(ctx, qp);
driver inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A7HI
---------------------------------------------------------------
The congestion control algorithm is used only when the comp_mask flag HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE is seted.
A check on comp_mask is added to prevent invalid parameter errors caused by unconfigured congestion control algorithm types.
Fixes: 7623f24781f1 ("libhns: Support congestion control algorithm configuration") Signed-off-by: Chengchang Tang tangchengchang@huawei.com --- providers/hns/hns_roce_u_verbs.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c index 6c6120c..fa27fc1 100644 --- a/providers/hns/hns_roce_u_verbs.c +++ b/providers/hns/hns_roce_u_verbs.c @@ -988,6 +988,9 @@ static int check_qp_congest_type(struct hns_roce_context *ctx, { struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device);
+ if (!(hns_attr->comp_mask & HNSDV_QP_INIT_ATTR_MASK_QP_CONGEST_TYPE)) + return 0; + if (!check_comp_mask(hns_attr->congest_type, hr_dev->congest_type)) { verbs_err(&ctx->ibv_ctx, "unsupported congest type 0x%x.\n", hns_attr->congest_type);