From: Haoyue Xu xuhaoyue1@hisilicon.com
driver inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/rdma-core/issues/I7A5YM
---------------------------------------------------------------
The sl arrangement sequence is inconsistent with the MPI application, and it needs to be adapted.
libhns: Fix sl and Pktype for responder UD
for responder UD over RoCE sl represents RoCE packet type as: Bits[2:0]: L3_Header_Type, as defined below - 0x0 : GRH - (RoCE v1.0) - 0x1 : IPv6 - (RoCE v1.5/v2.0) - 0x2 : IPv4 - (RoCE v1.5/v2.0)
Merged-with: b82e2e4d3ed1 ("libhns: Fix sl and Pktype for responder UD") Merged-with: a190e00b47b4 ("libhns: Fix arrangement in SL to adapt MPI APP") Signed-off-by: Haoyue Xu xuhaoyue1@hisilicon.com
Add the pktype, because: According to Annex17_RoCEv2 (A17.4.5.1): For UD, the Completion Queue Entry (CQE) includes remote address information (InfiniBand Specification Vol. 1 Rev 1.2.1 Section 11.4.2.1). For RoCEv2, the remote address information comprises the source L2 Address and a flag that indicates if the received frame is an IPv4, IPv6 or RoCE packet. It is also following the UCX patch of https://github.com/openucx/ucx/commit/ed28845b88633e65d64fce8ec880060aa61bd5...
Signed-off-by: Haoyue Xu xuhaoyue1@hisilicon.com --- libibverbs/man/ibv_poll_cq.3 | 5 +++++ libibverbs/verbs.h | 15 ++++++++++++++- providers/hns/hns_roce_u_hw_v2.c | 20 +++++++++++++++++++- 3 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/libibverbs/man/ibv_poll_cq.3 b/libibverbs/man/ibv_poll_cq.3 index 76dda96..2928e69 100644 --- a/libibverbs/man/ibv_poll_cq.3 +++ b/libibverbs/man/ibv_poll_cq.3 @@ -42,7 +42,12 @@ uint32_t src_qp; /* Source QP number (remote QP number) o unsigned int wc_flags; /* Flags of the completed WR */ uint16_t pkey_index; /* P_Key index (valid only for GSI QPs) */ uint16_t slid; /* Source LID */ +union { +.in +8 uint8_t sl; /* Service Level */ +uint8_t pktype; /* Packet Type */ +.in -8 +}; uint8_t dlid_path_bits; /* DLID path bits (not applicable for multicast messages) */ .in -8 }; diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index 713cce6..6c8bbc3 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -582,6 +582,12 @@ enum ibv_wc_flags { IBV_WC_TM_DATA_VALID = 1 << 6, };
+enum IBV_PKTYPE_ROCE { + IBV_PKTYPE_ROCE_V1, + IBV_PKTYPE_ROCE_V2_IPV6, + IBV_PKTYPE_ROCE_V2_IPV4, +}; + struct ibv_wc { uint64_t wr_id; enum ibv_wc_status status; @@ -600,7 +606,14 @@ struct ibv_wc { unsigned int wc_flags; uint16_t pkey_index; uint16_t slid; - uint8_t sl; + /* SL for IB or packet type + * (GRH/IPv4/IPv6) for RoCE in the CQE + */ + union { + uint8_t sl; + /* use enum IBV_PKTYPE_ROCE*/ + uint8_t pktype; + }; uint8_t dlid_path_bits; };
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index a49b50d..61c7c00 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -521,9 +521,17 @@ static void handle_recv_rq_inl(struct hns_roce_v2_cqe *cqe, handle_recv_inl_data(cqe, &(cur_qp->rq_rinl_buf), wr_num, wqe_buf); }
+static const uint8_t sl_for_ud[] = { + IBV_PKTYPE_ROCE_V1, + IBV_PKTYPE_ROCE_V2_IPV6, + IBV_PKTYPE_ROCE_V2_IPV4 +}; + static void parse_for_ud_qp(struct hns_roce_v2_cqe *cqe, struct ibv_wc *wc) { - wc->sl = hr_reg_read(cqe, CQE_SL); + uint8_t port_type = hr_reg_read(cqe, CQE_PORT_TYPE); + + wc->pktype = sl_for_ud[port_type]; wc->src_qp = hr_reg_read(cqe, CQE_RMT_QPN); wc->slid = 0; wc->wc_flags |= hr_reg_read(cqe, CQE_GRH) ? IBV_WC_GRH : 0; @@ -2130,7 +2138,17 @@ static uint32_t wc_read_slid(struct ibv_cq_ex *current) static uint8_t wc_read_sl(struct ibv_cq_ex *current) { struct hns_roce_cq *cq = to_hr_cq(ibv_cq_ex_to_cq(current)); + struct hns_roce_context *ctx = to_hr_ctx(current->context); + struct hns_roce_qp *hr_qp; + uint8_t port_type; + uint32_t qpn;
+ qpn = hr_reg_read(cq->cqe, CQE_LCL_QPN); + hr_qp = hns_roce_v2_find_qp(ctx, qpn); + if (hr_qp->verbs_qp.qp.qp_type == IBV_QPT_UD) { + port_type = hr_reg_read(cq->cqe, CQE_PORT_TYPE); + return sl_for_ud[port_type]; + } return (uint8_t)hr_reg_read(cq->cqe, CQE_SL); }