mailweb.openeuler.org
Manage this list
×
Keyboard Shortcuts
Thread View
j
: Next unread message
k
: Previous unread message
j a
: Jump to all threads
j l
: Jump to MailingList overview
2025
January
2024
December
November
October
September
August
July
June
May
April
March
February
January
2023
December
November
October
September
August
July
June
May
April
March
February
January
2022
December
November
October
September
August
July
June
May
April
March
February
January
2021
December
November
October
September
August
July
June
May
April
March
February
January
2020
December
November
October
September
List overview
Download
High-performance-network
January 2025
----- 2025 -----
January 2025
----- 2024 -----
December 2024
November 2024
October 2024
September 2024
August 2024
July 2024
June 2024
May 2024
April 2024
March 2024
February 2024
January 2024
----- 2023 -----
December 2023
November 2023
October 2023
September 2023
August 2023
July 2023
June 2023
May 2023
April 2023
March 2023
February 2023
January 2023
----- 2022 -----
December 2022
November 2022
October 2022
September 2022
August 2022
July 2022
June 2022
May 2022
April 2022
March 2022
February 2022
January 2022
----- 2021 -----
December 2021
November 2021
October 2021
September 2021
August 2021
July 2021
June 2021
May 2021
April 2021
March 2021
February 2021
January 2021
----- 2020 -----
December 2020
November 2020
October 2020
September 2020
high-performance-network@openeuler.org
1 participants
2 discussions
Start a n
N
ew thread
[PATCH] libhns: Add error logs to help diagnosis
by Chengchang Tang
03 Jan '25
03 Jan '25
From: Xinghai Cen <cenxinghai(a)h-partners.com> Add error logs to help diagnosis. Signed-off-by: Xinghai Cen <cenxinghai(a)h-partners.com> --- ...hns-Add-error-logs-to-help-diagnosis.patch | 242 ++++++++++++++++++ rdma-core.spec | 9 +- 2 files changed, 250 insertions(+), 1 deletion(-) create mode 100644 0042-libhns-Add-error-logs-to-help-diagnosis.patch diff --git a/0042-libhns-Add-error-logs-to-help-diagnosis.patch b/0042-libhns-Add-error-logs-to-help-diagnosis.patch new file mode 100644 index 0000000..9d880ea --- /dev/null +++ b/0042-libhns-Add-error-logs-to-help-diagnosis.patch @@ -0,0 +1,242 @@ +From 60c45b5f7c2cd0c2e7139d472406f071f327bb91 Mon Sep 17 00:00:00 2001 +From: Junxian Huang <huangjunxian6(a)hisilicon.com> +Date: Fri, 27 Dec 2024 14:02:29 +0800 +Subject: [PATCH] libhns: Add error logs to help diagnosis + +mainline inclusion +from mainline-master +commit 7849f1b17f89b8baa0065adaf9cd04204698ea82 +category: feature +bugzilla:
https://gitee.com/src-openeuler/rdma-core/issues/IBFGPH
+CVE: NA + +Reference:
https://github.com/linux-rdma/rdma-core/pull/1533/commits/7849f1b17f89b8baa…
+ +---------------------------------------------------------------------- + +Add error logs to help diagnosis. + +Signed-off-by: Junxian Huang <huangjunxian6(a)hisilicon.com> +--- + providers/hns/hns_roce_u.c | 4 +- + providers/hns/hns_roce_u_hw_v2.c | 3 ++ + providers/hns/hns_roce_u_verbs.c | 79 ++++++++++++++++++++++++++------ + 3 files changed, 70 insertions(+), 16 deletions(-) + +diff --git a/providers/hns/hns_roce_u.c b/providers/hns/hns_roce_u.c +index e219b9e..ec995e7 100644 +--- a/providers/hns/hns_roce_u.c ++++ b/providers/hns/hns_roce_u.c +@@ -424,8 +424,10 @@ static struct verbs_context *hns_roce_alloc_context(struct ibv_device *ibdev, + + context->uar = mmap(NULL, hr_dev->page_size, PROT_READ | PROT_WRITE, + MAP_SHARED, cmd_fd, 0); +- if (context->uar == MAP_FAILED) ++ if (context->uar == MAP_FAILED) { ++ verbs_err(&context->ibv_ctx, "failed to mmap uar page.\n"); + goto err_set_attr; ++ } + + if (init_dca_context(context, cmd_fd, + &resp, ctx_attr, hr_dev->page_size)) +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index c746e03..0628646 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -3057,6 +3057,9 @@ static int fill_send_wr_ops(const struct ibv_qp_init_attr_ex *attr, + fill_send_wr_ops_ud(qp_ex); + break; + default: ++ verbs_err(verbs_get_ctx(qp_ex->qp_base.context), ++ "QP type %d not supported for qp_ex send ops.\n", ++ attr->qp_type); + return -EOPNOTSUPP; + } + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index c733b21..e9acfab 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -422,8 +422,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + { + struct hns_roce_pad *pad = to_hr_pad(attr->parent_domain); + +- if (!attr->cqe || attr->cqe > context->max_cqe) ++ if (!attr->cqe || attr->cqe > context->max_cqe) { ++ verbs_err(&context->ibv_ctx, "unsupported cq depth %u.\n", ++ attr->cqe); + return EINVAL; ++ } + + if (!check_comp_mask(attr->comp_mask, CREATE_CQ_SUPPORTED_COMP_MASK)) { + verbs_err(&context->ibv_ctx, "unsupported cq comps 0x%x\n", +@@ -431,8 +434,11 @@ static int verify_cq_create_attr(struct ibv_cq_init_attr_ex *attr, + return EOPNOTSUPP; + } + +- if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) ++ if (!check_comp_mask(attr->wc_flags, CREATE_CQ_SUPPORTED_WC_FLAGS)) { ++ verbs_err(&context->ibv_ctx, "unsupported wc flags 0x%llx.\n", ++ attr->wc_flags); + return EOPNOTSUPP; ++ } + + if (attr->comp_mask & IBV_CQ_INIT_ATTR_MASK_PD) { + if (!pad) { +@@ -504,8 +510,11 @@ static int exec_cq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_cq_ex(context, attr, &cq->verbs_cq, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex), 0); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create cq cmd, ret = %d.\n", ret); + return ret; ++ } + + cq->cqn = resp_drv->cqn; + cq->flags = resp_drv->cap_flags; +@@ -724,13 +733,20 @@ static int verify_srq_create_attr(struct hns_roce_context *context, + struct ibv_srq_init_attr_ex *attr) + { + if (attr->srq_type != IBV_SRQT_BASIC && +- attr->srq_type != IBV_SRQT_XRC) ++ attr->srq_type != IBV_SRQT_XRC) { ++ verbs_err(&context->ibv_ctx, ++ "unsupported srq type, type = %d.\n", attr->srq_type); + return -EINVAL; ++ } + + if (!attr->attr.max_sge || + attr->attr.max_wr > context->max_srq_wr || +- attr->attr.max_sge > context->max_srq_sge) ++ attr->attr.max_sge > context->max_srq_sge) { ++ verbs_err(&context->ibv_ctx, ++ "invalid srq attr size, max_wr = %u, max_sge = %u.\n", ++ attr->attr.max_wr, attr->attr.max_sge); + return -EINVAL; ++ } + + attr->attr.max_wr = max_t(uint32_t, attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); +@@ -862,8 +878,12 @@ static int exec_srq_create_cmd(struct ibv_context *context, + ret = ibv_cmd_create_srq_ex(context, &srq->verbs_srq, init_attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); +- if (ret) ++ if (ret) { ++ verbs_err(verbs_get_ctx(context), ++ "failed to exec create srq cmd, ret = %d.\n", ++ ret); + return ret; ++ } + + srq->srqn = resp_ex.srqn; + srq->cap_flags = resp_ex.cap_flags; +@@ -1086,9 +1106,12 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + struct ibv_qp_init_attr_ex *attr) + { + struct hns_roce_device *hr_dev = to_hr_dev(ctx->ibv_ctx.context.device); ++ int ret = 0; + +- if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) +- return EOPNOTSUPP; ++ if (!check_comp_mask(attr->comp_mask, CREATE_QP_SUP_COMP_MASK)) { ++ ret = EOPNOTSUPP; ++ goto out; ++ } + + if (attr->comp_mask & IBV_QP_INIT_ATTR_SEND_OPS_FLAGS && + !check_comp_mask(attr->send_ops_flags, SEND_OPS_FLAG_MASK)) +@@ -1102,17 +1125,21 @@ static int check_qp_create_mask(struct hns_roce_context *ctx, + case IBV_QPT_RC: + case IBV_QPT_XRC_SEND: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_PD)) +- return EINVAL; ++ ret = EINVAL; + break; + case IBV_QPT_XRC_RECV: + if (!(attr->comp_mask & IBV_QP_INIT_ATTR_XRCD)) +- return EINVAL; ++ ret = EINVAL; + break; + default: + return EOPNOTSUPP; + } + +- return 0; ++out: ++ if (ret) ++ verbs_err(&ctx->ibv_ctx, "invalid comp_mask 0x%x.\n", ++ attr->comp_mask); ++ return ret; + } + + static int hns_roce_qp_has_rq(struct ibv_qp_init_attr_ex *attr) +@@ -1137,8 +1164,13 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + if (cap->max_send_wr > ctx->max_qp_wr || + cap->max_recv_wr > ctx->max_qp_wr || + cap->max_send_sge > ctx->max_sge || +- cap->max_recv_sge > ctx->max_sge) ++ cap->max_recv_sge > ctx->max_sge) { ++ verbs_err(&ctx->ibv_ctx, ++ "invalid qp cap size, max_send/recv_wr = {%u, %u}, max_send/recv_sge = {%u, %u}.\n", ++ cap->max_send_wr, cap->max_recv_wr, ++ cap->max_send_sge, cap->max_recv_sge); + return -EINVAL; ++ } + + has_rq = hns_roce_qp_has_rq(attr); + if (!has_rq) { +@@ -1147,12 +1179,20 @@ static int verify_qp_create_cap(struct hns_roce_context *ctx, + } + + min_wqe_num = HNS_ROCE_V2_MIN_WQE_NUM; +- if (cap->max_send_wr < min_wqe_num) ++ if (cap->max_send_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change sq depth from %u to minimum %u.\n", ++ cap->max_send_wr, min_wqe_num); + cap->max_send_wr = min_wqe_num; ++ } + + if (cap->max_recv_wr) { +- if (cap->max_recv_wr < min_wqe_num) ++ if (cap->max_recv_wr < min_wqe_num) { ++ verbs_debug(&ctx->ibv_ctx, ++ "change rq depth from %u to minimum %u.\n", ++ cap->max_recv_wr, min_wqe_num); + cap->max_recv_wr = min_wqe_num; ++ } + + if (!cap->max_recv_sge) + return -EINVAL; +@@ -1646,6 +1686,11 @@ static int qp_exec_create_cmd(struct ibv_qp_init_attr_ex *attr, + ret = ibv_cmd_create_qp_ex2(&ctx->ibv_ctx.context, &qp->verbs_qp, attr, + &cmd_ex.ibv_cmd, sizeof(cmd_ex), + &resp_ex.ibv_resp, sizeof(resp_ex)); ++ if (ret) { ++ verbs_err(&ctx->ibv_ctx, ++ "failed to exec create qp cmd, ret = %d.\n", ret); ++ return ret; ++ } + + qp->flags = resp_ex.drv_payload.cap_flags; + *dwqe_mmap_key = resp_ex.drv_payload.dwqe_mmap_key; +@@ -1707,8 +1752,12 @@ static int mmap_dwqe(struct ibv_context *ibv_ctx, struct hns_roce_qp *qp, + { + qp->dwqe_page = mmap(NULL, HNS_ROCE_DWQE_PAGE_SIZE, PROT_WRITE, + MAP_SHARED, ibv_ctx->cmd_fd, dwqe_mmap_key); +- if (qp->dwqe_page == MAP_FAILED) ++ if (qp->dwqe_page == MAP_FAILED) { ++ verbs_err(verbs_get_ctx(ibv_ctx), ++ "failed to mmap direct wqe page, QPN = %u.\n", ++ qp->verbs_qp.qp.qp_num); + return -EINVAL; ++ } + + return 0; + } +-- +2.33.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 121f2f5..db8e113 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 50.0 -Release: 17 +Release: 18 Summary: RDMA core userspace libraries and daemons License: GPL-2.0-only OR BSD-2-Clause AND BSD-3-Clause Url:
https://github.com/linux-rdma/rdma-core
@@ -47,6 +47,7 @@ patch38: 0038-libhns-Fix-reference-to-uninitialized-cq-pointer.patch patch39: 0039-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch patch40: 0040-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch patch41: 0041-libhns-Fix-coredump-during-QP-destruction-when-send_.patch +patch42: 0042-libhns-Add-error-logs-to-help-diagnosis.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) systemd systemd-devel @@ -626,6 +627,12 @@ fi %doc %{_docdir}/%{name}-%{version}/70-persistent-ipoib.rules %changelog +* Fri Jan 3 2025 Xinghai Cen <cenxinghai(a)h-partners.com> - 50.0-18 +- Type: requirement +- ID: NA +- SUG: NA +- DESC: Add error logs to help diagnosis + * Thu Nov 28 2024 Xinghai Cen <cenxinghai(a)h-partners.com> - 50.0-17 - Type: bugfix - ID: NA -- 2.33.0
1
0
0
0
[PATCH] libhns: Fixed two bugs in libhns
by Chengchang Tang
03 Jan '25
03 Jan '25
From: Xinghai Cen <cenxinghai(a)h-partners.com> Fixed two bugs in libhns: libhns: Fix bypassed vendor check in hnsdv_query_device() libhns: Fix coredump during QP destruction when send_cq: == recv_cq Signed-off-by: Xinghai Cen <cenxinghai(a)h-partners.com> --- ...sed-vendor-check-in-hnsdv_query_devi.patch | 40 ++++++++++++++ ...ump-during-QP-destruction-when-send_.patch | 53 +++++++++++++++++++ rdma-core.spec | 10 +++- 3 files changed, 102 insertions(+), 1 deletion(-) create mode 100644 0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch create mode 100644 0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch diff --git a/0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch b/0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch new file mode 100644 index 0000000..14f2edc --- /dev/null +++ b/0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch @@ -0,0 +1,40 @@ +From 485cddd47c83d6f229450b28d55d8e07f60ddcc0 Mon Sep 17 00:00:00 2001 +From: Yuyu Li <liyuyu6(a)huawei.com> +Date: Thu, 21 Nov 2024 21:37:15 +0800 +Subject: [PATCH] libhns: Fix bypassed vendor check in hnsdv_query_device() + +driver inclusion +category: bugfix +bugzilla:
https://gitee.com/src-openeuler/rdma-core/issues/IBF87T
+ +-------------------------------------------------------------------------- + +The device vendor check is actually bypassed currently due +to the wrong if-condition. It should be a '||' statement. + +Fixes: 19e1eabc154f ("libhns: Add input parameter check for hnsdv_query_device()") +Signed-off-by: Yuyu Li <liyuyu6(a)huawei.com> +--- + providers/hns/hns_roce_u_verbs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/providers/hns/hns_roce_u_verbs.c b/providers/hns/hns_roce_u_verbs.c +index 090efbf..a6afce2 100644 +--- a/providers/hns/hns_roce_u_verbs.c ++++ b/providers/hns/hns_roce_u_verbs.c +@@ -128,10 +128,10 @@ int hnsdv_query_device(struct ibv_context *context, + struct hns_roce_context *ctx = context ? to_hr_ctx(context) : NULL; + struct hns_roce_device *hr_dev; + +- if (!ctx || !attrs_out) ++ if (!ctx || !context->device || !attrs_out) + return EINVAL; + +- if (!context->device && !is_hns_dev(context->device)) { ++ if (!is_hns_dev(context->device)) { + verbs_err(verbs_get_ctx(context), "not a HNS RoCE device!\n"); + return EOPNOTSUPP; + } +-- +2.33.0 + diff --git a/0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch b/0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch new file mode 100644 index 0000000..4909bd3 --- /dev/null +++ b/0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch @@ -0,0 +1,53 @@ +From ad5055f9b32ab0915803575385985fb10a29434a Mon Sep 17 00:00:00 2001 +From: Yuyu Li <liyuyu6(a)huawei.com> +Date: Mon, 25 Nov 2024 15:42:16 +0800 +Subject: [PATCH] libhns: Fix coredump during QP destruction when send_cq + == recv_cq + +driver inclusion +category: bugfix +bugzilla:
https://gitee.com/src-openeuler/rdma-core/issues/IBF87T
+ +-------------------------------------------------------------------------- + +If the specified send CQ and recv CQ are both +the same CQ, the QP node in SCQ is not deleted. +which causes a segfault to occur when recreating +the QP. Here fix it. + +coredump info: +0x0000ffff8fbc37d4 in list_add_before_ +0x0000ffff8fbc381c in list_add_tail_ +0x0000ffff8fbc9d9c in add_qp_to_cq_list +0x0000ffff8fbca008 in create_qp +0x0000ffff8fbca110 in hns_roce_u_create_qp +0x0000ffff8feae39c in __ibv_create_qp_1_1 +0x0000000000401420 in test_ctrl_path + +Fixes: 5bebdb5ba77b ("libhns: Support reporting wc as software mode") +Signed-off-by: Yuyu Li <liyuyu6(a)huawei.com> +--- + providers/hns/hns_roce_u_hw_v2.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c +index 8f071e1..48a7566 100644 +--- a/providers/hns/hns_roce_u_hw_v2.c ++++ b/providers/hns/hns_roce_u_hw_v2.c +@@ -2033,9 +2033,10 @@ static int hns_roce_u_v2_destroy_qp(struct ibv_qp *ibqp) + list_del(&qp->rcq_node); + } + +- if (ibqp->send_cq && ibqp->send_cq != ibqp->recv_cq) { +- __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, +- NULL); ++ if (ibqp->send_cq) { ++ if (ibqp->send_cq != ibqp->recv_cq) ++ __hns_roce_v2_cq_clean(to_hr_cq(ibqp->send_cq), ibqp->qp_num, ++ NULL); + list_del(&qp->scq_node); + } + +-- +2.33.0 + diff --git a/rdma-core.spec b/rdma-core.spec index 229810a..5e7be99 100644 --- a/rdma-core.spec +++ b/rdma-core.spec @@ -1,6 +1,6 @@ Name: rdma-core Version: 41.0 -Release: 28 +Release: 29 Summary: RDMA core userspace libraries and daemons License: GPLv2 or BSD Url:
https://github.com/linux-rdma/rdma-core
@@ -94,6 +94,8 @@ patch85: 0085-libhns-Fix-memory-leakage-when-DCA-is-enabled.patch patch86: 0086-libhns-Fix-the-exception-branch-of-wr_start-is-not-l.patch patch87: 0087-libhns-Fix-out-of-order-issue-of-requester-when-sett.patch patch88: 0088-libhns-Fix-reference-to-uninitialized-cq-pointer.patch +patch89: 0089-libhns-Fix-bypassed-vendor-check-in-hnsdv_query_devi.patch +patch90: 0090-libhns-Fix-coredump-during-QP-destruction-when-send_.patch BuildRequires: binutils cmake >= 2.8.11 gcc libudev-devel pkgconfig pkgconfig(libnl-3.0) BuildRequires: pkgconfig(libnl-route-3.0) valgrind-devel systemd systemd-devel @@ -341,6 +343,12 @@ fi %{_mandir}/* %changelog +* Thu Jan 2 2025 Xinghai Cen <cenxinghai(a)h-partners.com> - 41.0-29 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Fixed two bugs in libhns + * Thu Nov 21 2024 Wentao Hu <huwentao19(a)h-partners.com> - 41.0-28 - Type: bugfix - ID: NA -- 2.33.0
1
0
0
0
Results per page:
10
25
50
100
200