From: Hui Tang tanghui20@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I5RMFU CVE: NA
--------------------------------
1.Change arg type of 'bpf_get_cpumask_info' to avoid bpf program stack exceeds 512 bytes. 2.Fix back-edge error in sample 'sched_select_core' 3.Fix loop too complex in sample 'sached_select_core'
Changes in v2: Move cpu initialization out of the for loop.
Fixes: 1bf0417b95a9 ("sched: programmable: Add helper function for cpu topo...") Fixes: 2c1189e32bf9 ("samples:bpf: Add samples for cfs select core") Signed-off-by: Hui Tang tanghui20@huawei.com --- include/uapi/linux/bpf.h | 2 +- kernel/sched/bpf_topology.c | 9 ++- samples/bpf/sched_select_core_kern.c | 32 ++++++++--- tools/include/uapi/linux/bpf.h | 2 +- tools/lib/bpf/libbpf_sched.h | 86 +++++++++++++++++++++------- 5 files changed, 96 insertions(+), 35 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index db585d960d64..7e474c6fc4a7 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3903,7 +3903,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus) * Description * Get system cpus returned in *cpus*. * Return diff --git a/kernel/sched/bpf_topology.c b/kernel/sched/bpf_topology.c index 9c2eda139e2a..843b6092a64f 100644 --- a/kernel/sched/bpf_topology.c +++ b/kernel/sched/bpf_topology.c @@ -70,10 +70,9 @@ const struct bpf_func_proto bpf_init_cpu_topology_proto = { .arg2_type = ARG_ANYTHING, };
-BPF_CALL_2(bpf_get_cpumask_info, struct bpf_cpumask_info *, cpus, - int, len) +BPF_CALL_2(bpf_get_cpumask_info, struct bpf_map *, map, struct bpf_cpumask_info *, cpus) { - if (len != sizeof(*cpus)) + if (!cpus) return -EINVAL;
cpumask_copy(&cpus->cpu_possible_cpumask, cpu_possible_mask); @@ -92,6 +91,6 @@ const struct bpf_func_proto bpf_get_cpumask_info_proto = { .func = bpf_get_cpumask_info, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_UNINIT_MEM, - .arg2_type = ARG_CONST_SIZE, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, }; diff --git a/samples/bpf/sched_select_core_kern.c b/samples/bpf/sched_select_core_kern.c index 18617e89b395..30765ae65f85 100644 --- a/samples/bpf/sched_select_core_kern.c +++ b/samples/bpf/sched_select_core_kern.c @@ -62,7 +62,7 @@ struct tag_info {
struct tag_info tag_tbl[] = { {TAG_NONE, ""}, - {TAG_ID(1), "0-3"}, + {TAG_ID(1), "0-1"}, {TAG_ID(2), "4-7"}, {TAG_MAX, ""}, }; @@ -94,13 +94,17 @@ static struct cpumask *select_better_cpus(struct task_struct *p, long min_util = INT_MIN; struct task_group *tg; long spare; - int cpu; + int cpu, i;
if (!prefer_cpus_valid(prefer_cpus, (void *)getVal(p->cpus_ptr))) return (void *)getVal(p->cpus_ptr);
tg = p->sched_task_group; - libbpf_for_each_cpu(cpu, prefer_cpus) { + for (i = 0, cpu = -1; i < BPF_SCHED_LOOP_MAX; i++) { + cpu = libbpf_cpumask_next(cpu, (void *)getVal(prefer_cpus)); + if (cpu >= libbpf_nr_cpus_ids()) + break; + if (idlest_cpu && libbpf_available_idle_cpu(cpu)) { *idlest_cpu = cpu; } else if (idlest_cpu) { @@ -159,9 +163,14 @@ int BPF_PROG(cfs_select_cpu_range, struct sched_migrate_ctx *h_ctx) SEC("sched/cfs_select_rq_exit") int BPF_PROG(cfs_select_cpu_range_exit, struct sched_migrate_ctx *h_ctx) { + struct task_struct *p = getVal(h_ctx->task); + long tag = getVal(p->tag); int *idlest_cpu; int key = 0;
+ if (tag <= TAG_NONE || tag >= TAG_MAX) + return SELECT_RQ_EXIT_CPU_VALID; + idlest_cpu = bpf_map_lookup_elem(&map_idlest_cpu, &key); if (!idlest_cpu) { libbpf_sched_set_task_cpus_ptr(h_ctx, (void *)getVal(h_ctx->cpus_allowed)); @@ -186,7 +195,7 @@ static int find_idlest_cpu(struct task_struct *p, int parent) int cpu; int i;
- for (i = 0, cpu = -1; i < NR_CPUS; i++) { + for (i = 0, cpu = -1; i < BPF_SCHED_LOOP_MAX; i++) { cpu = libbpf_cpumask_next(cpu, (void *)getVal(p->cpus_ptr)); if (cpu >= libbpf_nr_cpus_ids()) break; @@ -203,17 +212,26 @@ static int find_idlest_cpu(struct task_struct *p, int parent)
static int select_idle_cpu(struct task_struct *p, int parent, int prev_cpu) { - int cpu; + int cpu, i;
if (libbpf_available_idle_cpu(prev_cpu)) return prev_cpu;
if (libbpf_available_idle_cpu(parent)) - return prev_cpu; + return parent; + + cpu = libbpf_cpumask_next_wrap(prev_cpu - 1, + (void *)getVal(p->cpus_ptr), + prev_cpu, false); + for (i = 0; i < BPF_SCHED_LOOP_MAX; i++) { + if (cpu >= libbpf_nr_cpumask_bits()) + break;
- libbpf_for_each_cpu_wrap(cpu, (void *)getVal(p->cpus_ptr), prev_cpu) { if (libbpf_available_idle_cpu(cpu)) return cpu; + + cpu = libbpf_cpumask_next_wrap(cpu, (void *)getVal(p->cpus_ptr), + prev_cpu, true); }
return prev_cpu; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 57b927e99092..22f7880a0bb6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4613,7 +4613,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * int bpf_get_cpumask_info(struct bpf_map *map, struct bpf_cpumask_info *cpus) * Description * Get system cpus returned in *cpus*. * Return diff --git a/tools/lib/bpf/libbpf_sched.h b/tools/lib/bpf/libbpf_sched.h index 6cb30e8e81f8..187e854b99b3 100644 --- a/tools/lib/bpf/libbpf_sched.h +++ b/tools/lib/bpf/libbpf_sched.h @@ -21,6 +21,8 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h>
+/* set bigger value may lead verifier failed */ +#define BPF_SCHED_LOOP_MAX 1024 #define INVALID_PTR ((void *)(0UL)) #define getVal(P) \ ({ \ @@ -69,6 +71,13 @@ static __always_inline int libbpf_nr_cpumask_bits(void);
#endif
+struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, u32); + __type(value, struct bpf_cpumask_info); + __uint(max_entries, 1); +} map_cpumask_info SEC(".maps"); + static __always_inline long libbpf_cpumask_copy(struct cpumask *dst, struct cpumask *src) { @@ -228,58 +237,93 @@ static __always_inline long libbpf_cpumask_cpulist_parse(char *src1,
static __always_inline int libbpf_num_active_cpus(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nums_active_cpus); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nums_active_cpus); }
static __always_inline int libbpf_num_possible_cpus(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nums_possible_cpus); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nums_possible_cpus); }
static __always_inline void libbpf_possible_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_possible_cpumask); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_possible_cpumask); }
static __always_inline void libbpf_active_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_active_cpumask); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_active_cpumask); }
static __always_inline void libbpf_isolate_cpus_mask(struct cpumask *mask) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - libbpf_cpumask_copy(mask, &cpus.cpu_isolate_cpumask); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + libbpf_cpumask_copy(mask, &cpus->cpu_isolate_cpumask); }
static __always_inline int libbpf_nr_cpus_ids(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.nr_cpu_ids); + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1; + + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->nr_cpu_ids); }
static __always_inline int libbpf_nr_cpumask_bits(void) { - struct bpf_cpumask_info cpus; + struct bpf_cpumask_info *cpus; + int key = 0; + + cpus = bpf_map_lookup_elem(&map_cpumask_info, &key); + if (!cpus) + return -1;
- bpf_get_cpumask_info(&cpus, sizeof(cpus)); - return getVal(cpus.bpf_nr_cpumask_bits); + bpf_get_cpumask_info(&map_cpumask_info, cpus); + return getVal(cpus->bpf_nr_cpumask_bits); }
static __always_inline unsigned long libbpf_cfs_load_avg_of(int cpu)