
From: Guofeng Yue <yueguofeng@h-partners.com> Sync some patches from mainline Signed-off-by: Guofeng Yue <yueguofeng@h-partners.com> --- ...replace-rand-with-getrandom-during-M.patch | 91 +++++++++++++++ ...m-buffer-initialization-optimization.patch | 82 ++++++++++++++ ...st-Fix-perform-warm-up-process-stuck.patch | 64 +++++++++++ ...lock-free-mode-not-working-for-SRQ-X.patch | 105 ++++++++++++++++++ 0020-Perftest-Fix-recv-cq-leak.patch | 54 +++++++++ perftest.spec | 13 ++- 6 files changed, 408 insertions(+), 1 deletion(-) create mode 100644 0016-Revert-Perftest-replace-rand-with-getrandom-during-M.patch create mode 100644 0017-Perftest-random-buffer-initialization-optimization.patch create mode 100644 0018-Perftest-Fix-perform-warm-up-process-stuck.patch create mode 100644 0019-Perftest-Fix-TD-lock-free-mode-not-working-for-SRQ-X.patch create mode 100644 0020-Perftest-Fix-recv-cq-leak.patch diff --git a/0016-Revert-Perftest-replace-rand-with-getrandom-during-M.patch b/0016-Revert-Perftest-replace-rand-with-getrandom-during-M.patch new file mode 100644 index 0000000..0551e6d --- /dev/null +++ b/0016-Revert-Perftest-replace-rand-with-getrandom-during-M.patch @@ -0,0 +1,91 @@ +From 454a41de4caa020a900eb9511fc49069ef10c53d Mon Sep 17 00:00:00 2001 +From: Guofeng Yue <yueguofeng@h-partners.com> +Date: Mon, 9 Jun 2025 14:51:20 +0800 +Subject: [PATCH 16/20] Revert "Perftest: replace rand() with getrandom() + during MR buffer initialization" + +This reverts commit 189406b72d9d94c3c95298ba65ad9ce4ae90405b. +--- + configure.ac | 1 - + src/perftest_resources.c | 31 +++++-------------------------- + 2 files changed, 5 insertions(+), 27 deletions(-) + +diff --git a/configure.ac b/configure.ac +index d976663..a756488 100755 +--- a/configure.ac ++++ b/configure.ac +@@ -60,7 +60,6 @@ AC_PROG_LIBTOOL + AC_PROG_RANLIB + AC_HEADER_STDC + AC_CHECK_HEADERS([infiniband/verbs.h],,[AC_MSG_ERROR([ibverbs header files not found])]) +-AC_CHECK_HEADERS([sys/random.h],,) + AC_CHECK_LIB([ibverbs], [ibv_get_device_list], [], [AC_MSG_ERROR([libibverbs not found])]) + AC_CHECK_LIB([rdmacm], [rdma_create_event_channel], [], AC_MSG_ERROR([librdmacm-devel not found])) + AC_CHECK_LIB([ibumad], [umad_init], [LIBUMAD=-libumad], AC_MSG_ERROR([libibumad not found])) +diff --git a/src/perftest_resources.c b/src/perftest_resources.c +index 843c45f..6609afc 100755 +--- a/src/perftest_resources.c ++++ b/src/perftest_resources.c +@@ -22,9 +22,6 @@ + #ifdef HAVE_CONFIG_H + #include <config.h> + #endif +-#ifdef HAVE_SYS_RANDOM_H +-#include <sys/random.h> +-#endif + #ifdef HAVE_SRD + #include <infiniband/efadv.h> + #endif +@@ -1604,33 +1601,12 @@ int create_cqs(struct pingpong_context *ctx, struct perftest_parameters *user_pa + return ret; + } + +-static void random_data(char *buf, int buff_size) +-{ +- int i; +-#ifdef HAVE_SYS_RANDOM_H +- char *tmp = buf; +- int ret; +- +- for(i = buff_size; i > 0;) { +- ret = getrandom(tmp, i, 0); +- if(ret < 0) +- goto fall_back; +- tmp += ret; +- i -= ret; +- } +- return; +-fall_back: +-#endif +- srand(time(NULL)); +- for (i = 0; i < buff_size; i++) +- buf[i] = (char)rand(); +-} +- + /****************************************************************************** + * + ******************************************************************************/ + int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *user_param, int qp_index) + { ++ int i; + int flags = IBV_ACCESS_LOCAL_WRITE; + + +@@ -1769,10 +1745,13 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u + #ifdef HAVE_CUDA + if (!user_param->use_cuda) { + #endif ++ srand(time(NULL)); + if (user_param->verb == WRITE && user_param->tst == LAT) { + memset(ctx->buf[qp_index], 0, ctx->buff_size); + } else { +- random_data(ctx->buf[qp_index], ctx->buff_size); ++ for (i = 0; i < ctx->buff_size; i++) { ++ ((char*)ctx->buf[qp_index])[i] = (char)rand(); ++ } + } + #ifdef HAVE_CUDA + } +-- +2.33.0 + diff --git a/0017-Perftest-random-buffer-initialization-optimization.patch b/0017-Perftest-random-buffer-initialization-optimization.patch new file mode 100644 index 0000000..fd48325 --- /dev/null +++ b/0017-Perftest-random-buffer-initialization-optimization.patch @@ -0,0 +1,82 @@ +From eef2e242bf7db2879b7b87fb53312030513754b6 Mon Sep 17 00:00:00 2001 +From: Shmuel Shaul <sshaul@nvidia.com> +Date: Mon, 21 Apr 2025 14:58:47 +0300 +Subject: [PATCH 17/20] Perftest: random buffer initialization optimization + +Replace the standard rand() function with PCG32 algorithm in buffer +initialization +to improve performance. The PCG32 implementation: +- Generates 32-bit random numbers (0 to 4,294,967,295) +- Uses /dev/urandom for initial seeding with fallback to time+pid+clock +- Provides better performance than standard rand() +- Maintains good randomness properties + +Signed-off-by: Shmuel Shaul <sshaul@nvidia.com> +--- + src/perftest_resources.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +diff --git a/src/perftest_resources.c b/src/perftest_resources.c +index 6609afc..7c01da7 100755 +--- a/src/perftest_resources.c ++++ b/src/perftest_resources.c +@@ -38,6 +38,7 @@ static enum ibv_wr_opcode opcode_atomic_array[] = {IBV_WR_ATOMIC_CMP_AND_SWP,IBV + struct perftest_parameters* duration_param; + struct check_alive_data check_alive_data; + ++ + /****************************************************************************** + * Beginning + ******************************************************************************/ +@@ -320,6 +321,33 @@ static int pp_free_mmap(struct pingpong_context *ctx) + return 0; + } + ++static uint32_t perftest_rand(uint32_t *state) { ++ uint32_t x = *state; ++ *state = x * 747796405 + 2891336453; ++ uint32_t word = ((x >> ((x >> 28) + 4)) ^ x) * 277803737; ++ return (word >> 22) ^ word; ++ } ++ ++ // Proper initialization the rand algorithm ++ static uint32_t init_perftest_rand_state() { ++ uint32_t seed; ++ ++ FILE* f = fopen("/dev/urandom", "rb"); ++ if (f) { ++ if (fread(&seed, sizeof(seed), 1, f) == 1) { ++ fclose(f); ++ return seed; ++ } ++ fclose(f); ++ } ++ ++ seed = (uint32_t)time(NULL); ++ seed ^= (uint32_t)getpid(); ++ seed ^= (uint32_t)clock(); ++ ++ return seed; ++ } ++ + static int next_word_string(char* input, char* output, int from_index) + { + int i = from_index; +@@ -1745,12 +1773,12 @@ int create_single_mr(struct pingpong_context *ctx, struct perftest_parameters *u + #ifdef HAVE_CUDA + if (!user_param->use_cuda) { + #endif +- srand(time(NULL)); ++ uint32_t rng_state = init_perftest_rand_state(); + if (user_param->verb == WRITE && user_param->tst == LAT) { + memset(ctx->buf[qp_index], 0, ctx->buff_size); + } else { + for (i = 0; i < ctx->buff_size; i++) { +- ((char*)ctx->buf[qp_index])[i] = (char)rand(); ++ ((char*)ctx->buf[qp_index])[i] = (char)perftest_rand(&rng_state); + } + } + #ifdef HAVE_CUDA +-- +2.33.0 + diff --git a/0018-Perftest-Fix-perform-warm-up-process-stuck.patch b/0018-Perftest-Fix-perform-warm-up-process-stuck.patch new file mode 100644 index 0000000..8054456 --- /dev/null +++ b/0018-Perftest-Fix-perform-warm-up-process-stuck.patch @@ -0,0 +1,64 @@ +From eeb0572c2500ade41860dc9b2bb89619aa13b07a Mon Sep 17 00:00:00 2001 +From: Guofeng Yue <yueguofeng@h-partners.com> +Date: Tue, 15 Apr 2025 17:09:47 +0800 +Subject: [PATCH 18/20] Perftest: Fix perform warm up process stuck + +In perform_warm_up mode, if the length of post_list is 1 and the +message size is less than or equal to 8192, all send_flags in WRs +are 0 and CQEs will not be generated since IBV_SEND_SIGNALED is +not set. As a result, the perform_warm_up process will stuck in +an infinite poll-CQ loop. + +Set IBV_SEND_SIGNALED in this case to requiring CQE, and clear the +flag after post_send_method to avoid affecting subsequent tests. + +Fixes: 56d025e4f19a ("Allow overriding CQ moderation on post list mode (#58)") +Signed-off-by: Guofeng Yue <yueguofeng@h-partners.com> +Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> +--- + src/perftest_resources.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/src/perftest_resources.c b/src/perftest_resources.c +index 7c01da7..d123e79 100755 +--- a/src/perftest_resources.c ++++ b/src/perftest_resources.c +@@ -3301,6 +3301,7 @@ int perform_warm_up(struct pingpong_context *ctx,struct perftest_parameters *use + struct ibv_wc *wc_for_cleaning = NULL; + int num_of_qps = user_param->num_of_qps; + int return_value = 0; ++ int set_signaled = 0; + + if(user_param->duplex && (user_param->use_xrc || user_param->connection_type == DC)) + num_of_qps /= 2; +@@ -3317,9 +3318,13 @@ int perform_warm_up(struct pingpong_context *ctx,struct perftest_parameters *use + ne = ibv_poll_cq(ctx->send_cq,user_param->tx_depth,wc_for_cleaning); + + for (index=0 ; index < num_of_qps ; index++) { ++ /* ask for completion on this wr */ ++ if (user_param->post_list == 1 && !(ctx->wr[index].send_flags & IBV_SEND_SIGNALED)) { ++ ctx->wr[index].send_flags |= IBV_SEND_SIGNALED; ++ set_signaled = 1; ++ } + + for (warmindex = 0 ;warmindex < warmupsession ;warmindex += user_param->post_list) { +- + err = post_send_method(ctx, index, user_param); + if (err) { + fprintf(stderr,"Couldn't post send during warm up: qp %d scnt=%d \n",index,warmindex); +@@ -3328,6 +3333,12 @@ int perform_warm_up(struct pingpong_context *ctx,struct perftest_parameters *use + } + } + ++ /* Clear the flag to avoid affecting subsequent tests. */ ++ if (set_signaled) { ++ ctx->wr[index].send_flags &= ~IBV_SEND_SIGNALED; ++ set_signaled = 0; ++ } ++ + do { + + ne = ibv_poll_cq(ctx->send_cq,1,&wc); +-- +2.33.0 + diff --git a/0019-Perftest-Fix-TD-lock-free-mode-not-working-for-SRQ-X.patch b/0019-Perftest-Fix-TD-lock-free-mode-not-working-for-SRQ-X.patch new file mode 100644 index 0000000..5f7957f --- /dev/null +++ b/0019-Perftest-Fix-TD-lock-free-mode-not-working-for-SRQ-X.patch @@ -0,0 +1,105 @@ +From 68fd12d94e24a6cd250e682f8242d9f2be2d4ba5 Mon Sep 17 00:00:00 2001 +From: Guofeng Yue <yueguofeng@h-partners.com> +Date: Tue, 15 Apr 2025 17:09:46 +0800 +Subject: [PATCH 19/20] Perftest: Fix TD lock-free mode not working for SRQ/XRC + QP + +When creating SRQ/XRC QP in TD lock-free mode, pass in ctx->pad +instead of ctx->pd, otherwise the lock-free won't work. + +Besides, use ctx->pad directly when creating QP/SRQ since pad +is designed to be interchangeable with the usual pd. When +lock-free mode is disabled, pad is the exactly the usual pd. + +Fixes: b6f957f6bc6c ("Perftest: Add support for TD lock-free mode") +Signed-off-by: Guofeng Yue <yueguofeng@h-partners.com> +Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> +--- + src/perftest_resources.c | 21 ++++++++++++--------- + src/perftest_resources.h | 2 +- + 2 files changed, 13 insertions(+), 10 deletions(-) + +diff --git a/src/perftest_resources.c b/src/perftest_resources.c +index d123e79..b388a45 100755 +--- a/src/perftest_resources.c ++++ b/src/perftest_resources.c +@@ -913,7 +913,8 @@ static int ctx_xrc_srq_create(struct pingpong_context *ctx, + else + srq_init_attr.cq = ctx->send_cq; + +- srq_init_attr.pd = ctx->pd; ++ srq_init_attr.pd = ctx->pad; ++ + ctx->srq = ibv_create_srq_ex(ctx->context, &srq_init_attr); + if (ctx->srq == NULL) { + fprintf(stderr, "Couldn't open XRC SRQ\n"); +@@ -956,7 +957,8 @@ static struct ibv_qp *ctx_xrc_qp_create(struct pingpong_context *ctx, + qp_init_attr.cap.max_send_wr = user_param->tx_depth; + qp_init_attr.cap.max_send_sge = 1; + qp_init_attr.comp_mask = IBV_QP_INIT_ATTR_PD; +- qp_init_attr.pd = ctx->pd; ++ qp_init_attr.pd = ctx->pad; ++ + #ifdef HAVE_IBV_WR_API + if (!user_param->use_old_post_send) + qp_init_attr.comp_mask |= IBV_QP_INIT_ATTR_SEND_OPS_FLAGS; +@@ -1994,6 +1996,10 @@ int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_para + fprintf(stderr, "Couldn't allocate PAD\n"); + return FAILURE; + } ++ } else { ++ #endif ++ ctx->pad = ctx->pd; ++ #ifdef HAVE_TD_API + } + #endif + +@@ -2111,7 +2117,7 @@ int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_para + attr.comp_mask = IBV_SRQ_INIT_ATTR_TYPE | IBV_SRQ_INIT_ATTR_PD; + attr.attr.max_wr = user_param->rx_depth; + attr.attr.max_sge = 1; +- attr.pd = ctx->pd; ++ attr.pd = ctx->pad; + + attr.srq_type = IBV_SRQT_BASIC; + ctx->srq = ibv_create_srq_ex(ctx->context, &attr); +@@ -2132,7 +2138,7 @@ int ctx_init(struct pingpong_context *ctx, struct perftest_parameters *user_para + .max_sge = 1 + } + }; +- ctx->srq = ibv_create_srq(ctx->pd, &attr); ++ ctx->srq = ibv_create_srq(ctx->pad, &attr); + if (!ctx->srq) { + fprintf(stderr, "Couldn't create SRQ\n"); + return FAILURE; +@@ -2319,11 +2325,8 @@ struct ibv_qp* ctx_qp_create(struct pingpong_context *ctx, + else if (opcode == IBV_WR_RDMA_READ) + attr_ex.send_ops_flags |= IBV_QP_EX_WITH_RDMA_READ; + } +- #ifdef HAVE_TD_API +- attr_ex.pd = user_param->no_lock ? ctx->pad : ctx->pd; +- #else +- attr_ex.pd = ctx->pd; +- #endif ++ ++ attr_ex.pd = ctx->pad; + attr_ex.comp_mask |= IBV_QP_INIT_ATTR_SEND_OPS_FLAGS | IBV_QP_INIT_ATTR_PD; + attr_ex.send_cq = attr.send_cq; + attr_ex.recv_cq = attr.recv_cq; +diff --git a/src/perftest_resources.h b/src/perftest_resources.h +index ba8630b..fb11d44 100755 +--- a/src/perftest_resources.h ++++ b/src/perftest_resources.h +@@ -172,8 +172,8 @@ struct pingpong_context { + struct ibv_pd *pd; + #ifdef HAVE_TD_API + struct ibv_td *td; +- struct ibv_pd *pad; + #endif ++ struct ibv_pd *pad; + struct ibv_mr **mr; + struct ibv_cq *send_cq; + struct ibv_cq *recv_cq; +-- +2.33.0 + diff --git a/0020-Perftest-Fix-recv-cq-leak.patch b/0020-Perftest-Fix-recv-cq-leak.patch new file mode 100644 index 0000000..9a06f3c --- /dev/null +++ b/0020-Perftest-Fix-recv-cq-leak.patch @@ -0,0 +1,54 @@ +From 7dc37bf199b64d9deb7ae041bc5c66819fdd6c32 Mon Sep 17 00:00:00 2001 +From: Junxian Huang <huangjunxian6@hisilicon.com> +Date: Thu, 21 Jul 2022 16:14:09 +0300 +Subject: [PATCH 20/20] Perftest: Fix recv-cq leak + +Perftest creates both send-cq and recv-cq but only destroy send-cq +on SEND client. This further leads to failure in deallocating parent +domain due to the pad refcount design in driver: + +Failed to deallocate PAD - No data available +Failed to deallocate TD - No data available +Failed to deallocate PD - No data available + +The original mainline PR was: +https://github.com/linux-rdma/perftest/commit/869f96161be03850c9ace80bbac488... + +Signed-off-by: Shmuel Shaul <sshaul@nvidia.com> +Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com> +--- + src/perftest_resources.c | 11 +++++------ + 1 file changed, 5 insertions(+), 6 deletions(-) + +diff --git a/src/perftest_resources.c b/src/perftest_resources.c +index b388a45..b6a0da6 100755 +--- a/src/perftest_resources.c ++++ b/src/perftest_resources.c +@@ -1253,6 +1253,7 @@ int destroy_ctx(struct pingpong_context *ctx, + int i, first, dereg_counter, rc; + int test_result = 0; + int num_of_qps = user_param->num_of_qps; ++ int dct_only = (user_param->machine == SERVER && !(user_param->duplex || user_param->tst == LAT)); + + if (user_param->wait_destroy) { + printf(" Waiting %u seconds before releasing resources...\n", +@@ -1347,12 +1348,10 @@ int destroy_ctx(struct pingpong_context *ctx, + test_result = 1; + } + +- if (user_param->verb == SEND && (user_param->tst == LAT || user_param->machine == SERVER || user_param->duplex || (ctx->channel)) ) { +- if (!(user_param->connection_type == DC && user_param->machine == SERVER)) { +- if (ibv_destroy_cq(ctx->recv_cq)) { +- fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno)); +- test_result = 1; +- } ++ if ((user_param->verb == SEND) || (user_param->connection_type == DC && !dct_only)){ ++ if (ibv_destroy_cq(ctx->recv_cq)) { ++ fprintf(stderr, "Failed to destroy CQ - %s\n", strerror(errno)); ++ test_result = 1; + } + } + +-- +2.33.0 + diff --git a/perftest.spec b/perftest.spec index 9aa4b46..cf405e8 100644 --- a/perftest.spec +++ b/perftest.spec @@ -1,6 +1,6 @@ Name: perftest Version: 4.5 -Release: 13 +Release: 14 License: GPL-2.0-only OR BSD-2-Clause Summary: RDMA Performance Testing Tools Url: https://github.com/linux-rdma/perftest @@ -21,6 +21,11 @@ Patch12: 0012-Perftest-Add-support-for-TD-lock-free-mode.patch Patch13: 0013-Perftest-Fix-TD-lock-free-mode-not-working-for-QP.patch Patch14: 0014-Perftest-Fix-failure-in-creating-cq-when-create-cq-e.patch Patch15: 0015-Perftest-modify-source_ip-to-bind_sounce_ip-to-fix-i.patch +Patch16: 0016-Revert-Perftest-replace-rand-with-getrandom-during-M.patch +Patch17: 0017-Perftest-random-buffer-initialization-optimization.patch +Patch18: 0018-Perftest-Fix-perform-warm-up-process-stuck.patch +Patch19: 0019-Perftest-Fix-TD-lock-free-mode-not-working-for-SRQ-X.patch +Patch20: 0020-Perftest-Fix-recv-cq-leak.patch BuildRequires: automake gcc libibverbs-devel >= 1.2.0 librdmacm-devel >= 1.0.21 libibumad-devel >= 1.3.10.2 BuildRequires: pciutils-devel libibverbs librdmacm libibumad @@ -47,6 +52,12 @@ done %_bindir/* %changelog +* Tue Jun 10 2025 Guofeng Yue <yueguofeng@h-partners.com> - 4.5-14 +- Type: bugfix +- ID: NA +- SUG: NA +- DESC: Sync some patches from mainline + * Wed Mar 12 2025 Funda Wang <fundawang@yeah.net> - 4.5-13 - Type: bugfix - ID: NA -- 2.33.0