[PATCH OLK-6.6 00/10] OLK-6.6 Bperf

Ian Rogers (1): perf test bpf-counters: Add test for BPF event modifier Tengda Wu (2): perf stat: Support inherit events during fork() for bperf perf test: Use sqrtloop workload to test bperf event Tyrone Wu (1): selftests/bpf: fix perf_event link info name_len assertion Veronika Molnarova (1): perf test stat_bpf_counter.sh: Stabilize the test results Xiaomeng Zhang (5): perf stat: Increase perf_attr_map entries perf stat: Fix incorrect display of bperf when event count is 0 Revert "[Backport] selftests/bpf: fix perf_event link info name_len assertion" Revert "[Backport] selftests/bpf: Add cookies check for perf_event fill_link_info test" Revert "[Backport] selftests/bpf: Use bpf_link__destroy in fill_link_info tests" tools/lib/perf/include/perf/bpf_perf.h | 1 + tools/perf/builtin-stat.c | 1 + tools/perf/tests/shell/stat_bpf_counters.sh | 79 ++++++++++----- tools/perf/util/bpf_counter.c | 61 +++++++++--- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 98 +++++++++++++++++-- tools/perf/util/bpf_skel/bperf_u.h | 5 + tools/perf/util/target.h | 1 + .../selftests/bpf/prog_tests/fill_link_info.c | 62 +++++------- 8 files changed, 220 insertions(+), 88 deletions(-) -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16628 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/56I... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16628 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/56I...

From: Tengda Wu <wutengda@huaweicloud.com> bperf has a nice ability to share PMUs, but it still does not support inherit events during fork(), resulting in some deviations in its stat results compared with perf. perf stat result: $ ./perf stat -e cycles,instructions -- ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 2,316,038,116 cycles 2,859,350,725 instructions 1.009603637 seconds time elapsed 1.004196000 seconds user 0.003950000 seconds sys bperf stat result: $ ./perf stat --bpf-counters -e cycles,instructions -- \ ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 18,762,093 cycles 23,487,766 instructions 1.008913769 seconds time elapsed 1.003248000 seconds user 0.004069000 seconds sys In order to support event inheritance, two new bpf programs are added to monitor the fork and exit of tasks respectively. When a task is created, add it to the filter map to enable counting, and reuse the `accum_key` of its parent task to count together with the parent task. When a task exits, remove it from the filter map to disable counting. After support: $ ./perf stat --bpf-counters -e cycles,instructions -- \ ./perf test -w sqrtloop Performance counter stats for './perf test -w sqrtloop': 2,316,252,189 cycles 2,859,946,547 instructions 1.009422314 seconds time elapsed 1.003597000 seconds user 0.004270000 seconds sys Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Cc: song@kernel.org Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20241021110201.325617-2-wutengda@huaweicloud.com Signed-off-by: Namhyung Kim <namhyung@kernel.org> --- tools/perf/builtin-stat.c | 1 + tools/perf/util/bpf_counter.c | 35 +++++-- tools/perf/util/bpf_skel/bperf_follower.bpf.c | 98 +++++++++++++++++-- tools/perf/util/bpf_skel/bperf_u.h | 5 + tools/perf/util/target.h | 1 + 5 files changed, 126 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9692ebdd7f11..712c170eb778 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2624,6 +2624,7 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + target.inherit = !stat_config.no_inherit; err = target__validate(&target); if (err) { target__strerror(&target, err, errbuf, BUFSIZ); diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 6732cbbcf9b3..8afa07917312 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -391,6 +391,7 @@ static int bperf_check_target(struct evsel *evsel, } static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -441,12 +442,32 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; @@ -526,9 +547,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -540,6 +558,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -550,12 +569,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -620,7 +641,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ int BPF_PROG(fexit_XXX) return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..2ee2cc30340f 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; -- 2.34.1

From: Ian Rogers <irogers@google.com> Refactor test to better enable sharing of logic, to give an idea of progress and introduce test functions. Add test of measuring both cycles and cycles:b simultaneously. Signed-off-by: Ian Rogers <irogers@google.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Kan Liang <kan.liang@linux.intel.com> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@amd.com> Cc: Song Liu <song@kernel.org> Cc: Thomas Richter <tmricht@linux.ibm.com> Link: https://lore.kernel.org/r/20240416170014.985191-2-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/tests/shell/stat_bpf_counters.sh | 75 ++++++++++++++------- 1 file changed, 52 insertions(+), 23 deletions(-) diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index a87bb2814b4c..ac1035a56e05 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -4,21 +4,59 @@ set -e +workload="perf bench sched messaging -g 1 -l 100 -t" + # check whether $2 is within +/- 10% of $1 compare_number() { - first_num=$1 - second_num=$2 - - # upper bound is first_num * 110% - upper=$(expr $first_num + $first_num / 10 ) - # lower bound is first_num * 90% - lower=$(expr $first_num - $first_num / 10 ) - - if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then - echo "The difference between $first_num and $second_num are greater than 10%." - exit 1 - fi + first_num=$1 + second_num=$2 + + # upper bound is first_num * 120% + upper=$(expr $first_num + $first_num / 5 ) + # lower bound is first_num * 80% + lower=$(expr $first_num - $first_num / 5 ) + + if [ $second_num -gt $upper ] || [ $second_num -lt $lower ]; then + echo "The difference between $first_num and $second_num are greater than 20%." + exit 1 + fi +} + +check_counts() +{ + base_cycles=$1 + bpf_cycles=$2 + + if [ "$base_cycles" = "<not" ]; then + echo "Skipping: cycles event not counted" + exit 2 + fi + if [ "$bpf_cycles" = "<not" ]; then + echo "Failed: cycles not counted with --bpf-counters" + exit 1 + fi +} + +test_bpf_counters() +{ + printf "Testing --bpf-counters " + base_cycles=$(perf stat --no-big-num -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}') + bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}') + check_counts $base_cycles $bpf_cycles + compare_number $base_cycles $bpf_cycles + echo "[Success]" +} + +test_bpf_modifier() +{ + printf "Testing bpf event modifier " + stat_output=$(perf stat --no-big-num -e cycles/name=base_cycles/,cycles/name=bpf_cycles/b -- $workload 2>&1) + base_cycles=$(echo "$stat_output"| awk '/base_cycles/ {print $1}') + bpf_cycles=$(echo "$stat_output"| awk '/bpf_cycles/ {print $1}') + check_counts $base_cycles $bpf_cycles + compare_number $base_cycles $bpf_cycles + echo "[Success]" } # skip if --bpf-counters is not supported @@ -30,16 +68,7 @@ if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then exit 2 fi -base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') -if [ "$base_cycles" = "<not" ]; then - echo "Skipping: cycles event not counted" - exit 2 -fi -bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}') -if [ "$bpf_cycles" = "<not" ]; then - echo "Failed: cycles not counted with --bpf-counters" - exit 1 -fi +test_bpf_counters +test_bpf_modifier -compare_number $base_cycles $bpf_cycles exit 0 -- 2.34.1

From: Veronika Molnarova <vmolnaro@redhat.com> The test has been failing for some time when two separate runs of perf benchmarks are recorded for cycles events and their counts are compared, while once the recording was done with option --bpf-counters and once without it. It is expected that the count of the samples should be within a certain range, firstly the difference was set to be within 10%, which was then later raised to 20%. However, the test case keeps failing on certain architectures as recording the provided benchmark can produce completely different counts based on the current load of the system. Sampling two separate runs on intel-eaglestream-spr-13 of "perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t": Performance counter stats for 'perf bench sched messaging -g 1 -l 100 -t': 396782898 cycles 0.010051983 seconds time elapsed 0.008664000 seconds user 0.097058000 seconds sys Performance counter stats for 'perf bench sched messaging -g 1 -l 100 -t': 1431133032 cycles 0.021803714 seconds time elapsed 0.023377000 seconds user 0.349918000 seconds sys , which is ranging from 400mil to 1400mil samples. Instead of recording the cycles use instructions event, which provides more stable values. At the same time change the tested workload to one of the provided testing workloads by perf that is not based on a scheduler, which can provide another dependency on the current load. Sampling instructions event with the new workload provide much more stable results on intel-eaglestream-spr-13 of "perf stat --no-big-num -e instructions -- perf test -w brstack": Performance counter stats for 'perf test -w brstack': 64584494 instructions 0.009173945 seconds time elapsed 0.007262000 seconds user 0.002071000 seconds sys Performance counter stats for 'perf test -w brstack': 64672669 instructions 0.008888135 seconds time elapsed 0.005018000 seconds user 0.004018000 seconds sys Signed-off-by: Veronika Molnarova <vmolnaro@redhat.com> Acked-by: Namhyung Kim <namhyung@kernel.org> Cc: mpetlan@redhat.com Signed-off-by: Namhyung Kim <namhyung@kernel.org> Link: https://lore.kernel.org/r/20240625092001.10909-1-vmolnaro@redhat.com --- tools/perf/tests/shell/stat_bpf_counters.sh | 36 ++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index ac1035a56e05..892ddccae1e0 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -4,7 +4,7 @@ set -e -workload="perf bench sched messaging -g 1 -l 100 -t" +workload="perf test -w brstack" # check whether $2 is within +/- 10% of $1 compare_number() @@ -25,15 +25,15 @@ compare_number() check_counts() { - base_cycles=$1 - bpf_cycles=$2 + base_instructions=$1 + bpf_instructions=$2 - if [ "$base_cycles" = "<not" ]; then - echo "Skipping: cycles event not counted" + if [ "$base_instructions" = "<not" ]; then + echo "Skipping: instructions event not counted" exit 2 fi - if [ "$bpf_cycles" = "<not" ]; then - echo "Failed: cycles not counted with --bpf-counters" + if [ "$bpf_instructions" = "<not" ]; then + echo "Failed: instructions not counted with --bpf-counters" exit 1 fi } @@ -41,29 +41,29 @@ check_counts() test_bpf_counters() { printf "Testing --bpf-counters " - base_cycles=$(perf stat --no-big-num -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}') - bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- $workload 2>&1 | awk '/cycles/ {print $1}') - check_counts $base_cycles $bpf_cycles - compare_number $base_cycles $bpf_cycles + base_instructions=$(perf stat --no-big-num -e instructions -- $workload 2>&1 | awk '/instructions/ {print $1}') + bpf_instructions=$(perf stat --no-big-num --bpf-counters -e instructions -- $workload 2>&1 | awk '/instructions/ {print $1}') + check_counts $base_instructions $bpf_instructions + compare_number $base_instructions $bpf_instructions echo "[Success]" } test_bpf_modifier() { printf "Testing bpf event modifier " - stat_output=$(perf stat --no-big-num -e cycles/name=base_cycles/,cycles/name=bpf_cycles/b -- $workload 2>&1) - base_cycles=$(echo "$stat_output"| awk '/base_cycles/ {print $1}') - bpf_cycles=$(echo "$stat_output"| awk '/bpf_cycles/ {print $1}') - check_counts $base_cycles $bpf_cycles - compare_number $base_cycles $bpf_cycles + stat_output=$(perf stat --no-big-num -e instructions/name=base_instructions/,instructions/name=bpf_instructions/b -- $workload 2>&1) + base_instructions=$(echo "$stat_output"| awk '/base_instructions/ {print $1}') + bpf_instructions=$(echo "$stat_output"| awk '/bpf_instructions/ {print $1}') + check_counts $base_instructions $bpf_instructions + compare_number $base_instructions $bpf_instructions echo "[Success]" } # skip if --bpf-counters is not supported -if ! perf stat -e cycles --bpf-counters true > /dev/null 2>&1; then +if ! perf stat -e instructions --bpf-counters true > /dev/null 2>&1; then if [ "$1" = "-v" ]; then echo "Skipping: --bpf-counters not supported" - perf --no-pager stat -e cycles --bpf-counters true || true + perf --no-pager stat -e instructions --bpf-counters true || true fi exit 2 fi -- 2.34.1

From: Tengda Wu <wutengda@huaweicloud.com> Replace `brstack` workload with `sqrtloop` workload, because `sqrtloop` workload contains fork(), which is suitable for testing the bperf event inheritance feature. Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Cc: song@kernel.org Cc: bpf@vger.kernel.org Link: https://lore.kernel.org/r/20241021110201.325617-3-wutengda@huaweicloud.com Signed-off-by: Namhyung Kim <namhyung@kernel.org> --- tools/perf/tests/shell/stat_bpf_counters.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index 892ddccae1e0..d5c135225cc1 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -4,7 +4,7 @@ set -e -workload="perf test -w brstack" +workload="perf test -w sqrtloop" # check whether $2 is within +/- 10% of $1 compare_number() -- 2.34.1

bperf restricts the size of perf_attr_map's entries to 16, which cannot hold all events in many scenarios. A typical example is when the user specifies `-a -ddd` ([0]). And in other cases such as top-down analysis, which often requires a set of more than 16 PMUs to be collected simultaneously. Fix this by increase perf_attr_map entries to 100, and an event number check has been introduced when bperf__load() to ensure that users receive a more friendly prompt when the event limit is reached. [0] https://lore.kernel.org/all/20230104064402.1551516-3-namhyung@kernel.org/ Fixes: 7fac83aaf2ee ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- tools/perf/util/bpf_counter.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 8afa07917312..57fc6bab64ba 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -28,7 +28,7 @@ #include "bpf_skel/bperf_leader.skel.h" #include "bpf_skel/bperf_follower.skel.h" -#define ATTR_MAP_SIZE 16 +#define ATTR_MAP_SIZE 100 static inline void *u64_to_ptr(__u64 ptr) { @@ -469,6 +469,12 @@ static int bperf__load(struct evsel *evsel, struct target *target) enum bperf_filter_type filter_type; __u32 i; + if (evsel->evlist->core.nr_entries > ATTR_MAP_SIZE) { + pr_err("Too many events, please limit to %d or less\n", + ATTR_MAP_SIZE); + return -1; + } + if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; -- 2.34.1

There are 2 possible reasons for the event count being 0: not supported and not counted. Perf distinguishes between these two possibilities through `evsel->supported`, but in bperf, this value is always false. This is because bperf is prematurely break or continue in the evlist__for_each_cpu loop under __run_perf_stat(), skipping the `counter->supported` assignment, resulting in bperf incorrectly displaying <not supported> when the count is 0. The most direct way to fix it is to do `evsel->supported` assignment when opening an event in bperf. However, since bperf only opens events when loading the leader, the followers are not aware of whether the event is supported or not. Therefore, we store the `evsel->supported` value in a common location, which is the `perf_event_attr_map`, to achieve synchronization of event support across perf sessions. Fixes: 7fac83aaf2ee ("perf stat: Introduce 'bperf' to share hardware PMCs with BPF") Signed-off-by: Tengda Wu <wutengda@huaweicloud.com> Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- tools/lib/perf/include/perf/bpf_perf.h | 1 + tools/perf/util/bpf_counter.c | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tools/lib/perf/include/perf/bpf_perf.h b/tools/lib/perf/include/perf/bpf_perf.h index e7cf6ba7b674..64c8d211726d 100644 --- a/tools/lib/perf/include/perf/bpf_perf.h +++ b/tools/lib/perf/include/perf/bpf_perf.h @@ -23,6 +23,7 @@ struct perf_event_attr_map_entry { __u32 link_id; __u32 diff_map_id; + __u8 supported; }; /* default attr_map name */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 57fc6bab64ba..6e54b6318766 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -423,18 +423,19 @@ static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, diff_map_fd = bpf_map__fd(skel->maps.diff_readings); entry->link_id = bpf_link_get_id(link_fd); entry->diff_map_id = bpf_map_get_id(diff_map_fd); - err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); - assert(err == 0); - - evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); - assert(evsel->bperf_leader_link_fd >= 0); - /* * save leader_skel for install_pe, which is called within * following evsel__open_per_cpu call */ evsel->leader_skel = skel; - evsel__open_per_cpu(evsel, all_cpu_map, -1); + if (!evsel__open_per_cpu(evsel, all_cpu_map, -1)) + entry->supported = true; + + err = bpf_map_update_elem(attr_map_fd, &evsel->core.attr, entry, BPF_ANY); + assert(err == 0); + + evsel->bperf_leader_link_fd = bpf_link_get_fd_by_id(entry->link_id); + assert(evsel->bperf_leader_link_fd >= 0); out: bperf_leader_bpf__destroy(skel); @@ -464,7 +465,7 @@ static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, static int bperf__load(struct evsel *evsel, struct target *target) { - struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; + struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff, false}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; __u32 i; @@ -512,6 +513,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) err = -1; goto out; } + evsel->supported = entry.supported; /* * The bpf_link holds reference to the leader program, and the * leader program holds reference to the maps. Therefore, if -- 2.34.1

Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIYG4 -------------------------------- LTS commit 27ea1a6931df ("[Backport] selftests/bpf: fix perf_event link info name_len assertion") was backported with stable depend patches: - commit 3b090f4cd4d5 ("[Backport] selftests/bpf: Add cookies check for perf_event fill_link_info test") - commit 0ed80b43ed34 ("[Backport] selftests/bpf: Use bpf_link__destroy in fill_link_info tests") However, we do not need these two depend patches and they will cause compiling error of bpf selftests. So let's revert these patches and re-adapt the target commit. Fixes: 27ea1a6931df ("[Backport] selftests/bpf: fix perf_event link info name_len assertion") Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e0208b0e53f1..e81f4d114c7a 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -65,9 +65,8 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); - ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, - "name_len"); if (!info.perf_event.kprobe.func_name) { + ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -78,9 +77,8 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: - ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, - "name_len"); if (!info.perf_event.tracepoint.tp_name) { + ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -96,9 +94,8 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); - ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, - "name_len"); if (!info.perf_event.uprobe.file_name) { + ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; -- 2.34.1

Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIYG4 -------------------------------- LTS commit 27ea1a6931df ("[Backport] selftests/bpf: fix perf_event link info name_len assertion") was backported with stable depend patches: - commit 3b090f4cd4d5 ("[Backport] selftests/bpf: Add cookies check for perf_event fill_link_info test") - commit 0ed80b43ed34 ("[Backport] selftests/bpf: Use bpf_link__destroy in fill_link_info tests") However, we do not need these two depend patches and they will cause compiling error of bpf selftests. So let's revert these patches and re-adapt the target commit. Fixes: 3b090f4cd4d5 ("[Backport] selftests/bpf: Add cookies check for perf_event fill_link_info test") Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- .../selftests/bpf/prog_tests/fill_link_info.c | 26 ++++--------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index e81f4d114c7a..d07ac9f3de78 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -30,8 +30,6 @@ static noinline void uprobe_func(void) asm volatile (""); } -#define PERF_EVENT_COOKIE 0xdeadbeef - static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { @@ -63,8 +61,6 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, "kprobe_addr"); - ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); - if (!info.perf_event.kprobe.func_name) { ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); @@ -84,8 +80,6 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add goto again; } - ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie"); - err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, strlen(TP_NAME)); ASSERT_EQ(err, 0, "cmp_tp_name"); @@ -101,8 +95,6 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add goto again; } - ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie"); - err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, strlen(UPROBE_FILE)); ASSERT_EQ(err, 0, "cmp_file_name"); @@ -146,7 +138,6 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, .attach_mode = PROBE_ATTACH_MODE_LINK, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, - .bpf_cookie = PERF_EVENT_COOKIE, ); ssize_t entry_offset = 0; struct bpf_link *link; @@ -175,13 +166,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, static void test_tp_fill_link_info(struct test_fill_link_info *skel) { - DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts, - .bpf_cookie = PERF_EVENT_COOKIE, - ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts); + link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); if (!ASSERT_OK_PTR(link, "attach_tp")) return; @@ -194,17 +182,13 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel) static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { - DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, - .retprobe = type == BPF_PERF_EVENT_URETPROBE, - .bpf_cookie = PERF_EVENT_COOKIE, - ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset, - &opts); + link = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; -- 2.34.1

Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIYG4 -------------------------------- LTS commit 27ea1a6931df ("[Backport] selftests/bpf: fix perf_event link info name_len assertion") was backported with stable depend patches: - commit 3b090f4cd4d5 ("[Backport] selftests/bpf: Add cookies check for perf_event fill_link_info test") - commit 0ed80b43ed34 ("[Backport] selftests/bpf: Use bpf_link__destroy in fill_link_info tests") However, we do not need these two depend patches and they will cause compiling error of bpf selftests. So let's revert these patches and re-adapt the target commit. Fixes: 0ed80b43ed34 ("[Backport] selftests/bpf: Use bpf_link__destroy in fill_link_info tests") Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- .../selftests/bpf/prog_tests/fill_link_info.c | 44 +++++++++---------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d07ac9f3de78..c5a51138a376 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -140,14 +140,14 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, ); ssize_t entry_offset = 0; - struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts); - if (!ASSERT_OK_PTR(link, "attach_kprobe")) + skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, + KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) return; - link_fd = bpf_link__fd(link); + link_fd = bpf_link__fd(skel->links.kprobe_run); if (!invalid) { /* See also arch_adjust_kprobe_addr(). */ if (skel->kconfig->CONFIG_X86_KERNEL_IBT) @@ -161,41 +161,39 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, } else { kprobe_fill_invalid_user_buffer(link_fd); } - bpf_link__destroy(link); + bpf_link__detach(skel->links.kprobe_run); } static void test_tp_fill_link_info(struct test_fill_link_info *skel) { - struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); - if (!ASSERT_OK_PTR(link, "attach_tp")) + skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) return; - link_fd = bpf_link__fd(link); + link_fd = bpf_link__fd(skel->links.tp_run); err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__destroy(link); + bpf_link__detach(skel->links.tp_run); } static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { - struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); - if (!ASSERT_OK_PTR(link, "attach_uprobe")) + skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) return; - link_fd = bpf_link__fd(link); + link_fd = bpf_link__fd(skel->links.uprobe_run); err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); ASSERT_OK(err, "verify_perf_link_info"); - bpf_link__destroy(link); + bpf_link__detach(skel->links.uprobe_run); } static int verify_kmulti_link_info(int fd, bool retprobe) @@ -284,24 +282,24 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, bool retprobe, bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); - struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; - link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); - if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) + skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) return; - link_fd = bpf_link__fd(link); + link_fd = bpf_link__fd(skel->links.kmulti_run); if (!invalid) { err = verify_kmulti_link_info(link_fd, retprobe); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); } - bpf_link__destroy(link); + bpf_link__detach(skel->links.kmulti_run); } void test_fill_link_info(void) -- 2.34.1

From: Tyrone Wu <wudevelops@gmail.com> mainline inclusion from mainline-v6.12-rc4 commit 4538a38f654a1c292fe489a9b66179262bfed088 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBIYG4 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Fix `name_len` field assertions in `bpf_link_info.perf_event` for kprobe/uprobe/tracepoint to validate correct name size instead of 0. Fixes: 23cf7aa539dc ("selftests/bpf: Add selftest for fill_link_info") Signed-off-by: Tyrone Wu <wudevelops@gmail.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Acked-by: Yafang Shao <laoar.shao@gmail.com> Link: https://lore.kernel.org/r/20241008164312.46269-2-wudevelops@gmail.com Signed-off-by: Alexei Starovoitov <ast@kernel.org> Conflicts: tools/testing/selftests/bpf/prog_tests/fill_link_info.c [The conflicts were due to some minor issue.] Signed-off-by: Xiaomeng Zhang <zhangxiaomeng13@huawei.com> --- tools/testing/selftests/bpf/prog_tests/fill_link_info.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index c5a51138a376..86d133805f4d 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -61,8 +61,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, "kprobe_addr"); + ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, + "name_len"); if (!info.perf_event.kprobe.func_name) { - ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -73,8 +74,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: + ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, + "name_len"); if (!info.perf_event.tracepoint.tp_name) { - ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -88,8 +90,9 @@ static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long add case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, + "name_len"); if (!info.perf_event.uprobe.file_name) { - ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; -- 2.34.1
participants (2)
-
patchwork bot
-
Xiaomeng Zhang