From: Ren Zhijie renzhijie2@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA
--------------------------------
Add bpf helper function bpf_init_cpu_topology() which obtains cpu topology info through the macros topology_* that are defined by include/linux/topology.h, and save it in BPF MAP.
The cpu topology info are useful to select core in userspace.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com --- include/linux/bpf_topology.h | 46 ++++++++++++++++ include/uapi/linux/bpf.h | 14 +++++ kernel/sched/Makefile | 3 +- kernel/sched/bpf_sched.c | 8 +++ kernel/sched/bpf_topology.c | 97 ++++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 4 ++ tools/include/uapi/linux/bpf.h | 14 +++++ 7 files changed, 185 insertions(+), 1 deletion(-) create mode 100644 include/linux/bpf_topology.h create mode 100644 kernel/sched/bpf_topology.c
diff --git a/include/linux/bpf_topology.h b/include/linux/bpf_topology.h new file mode 100644 index 000000000000..0c7ee492edde --- /dev/null +++ b/include/linux/bpf_topology.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_BPF_TOPOLOGY_H +#define _LINUX_BPF_TOPOLOGY_H + +#include <linux/cpumask.h> + +struct bpf_cpu_topology { + int cpu; + int core_id; + int cluster_id; + int die_id; + int physical_package_id; + int numa_node; + struct cpumask thread_siblings; + struct cpumask core_siblings; + struct cpumask cluster_cpus; + struct cpumask die_cpus; + struct cpumask package_cpus; + struct cpumask node_cpu_lists; +}; + +struct bpf_cpumask_info { + unsigned int nums_possible_cpus; + unsigned int nums_active_cpus; + unsigned int nums_isolate_cpus; + unsigned int nr_cpu_ids; + unsigned int bpf_nr_cpumask_bits; + struct cpumask cpu_possible_cpumask; + struct cpumask cpu_active_cpumask; + struct cpumask cpu_isolate_cpumask; +}; + +#endif /* _LINUX_BPF_TOPOLOGY_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index b898cae70b0a..06ae8a7f9ef3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3846,6 +3846,18 @@ union bpf_attr { * Get *cpu* capacity and store in *ctx*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_init_cpu_topology(struct bpf_map *map, u64 flags) + * Description + * Initializing the cpu topology which used for bpf prog. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * Description + * Get system cpus returned in *cpus*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4020,6 +4032,8 @@ union bpf_attr { FN(sched_cpu_nr_running_of), \ FN(sched_cpu_idle_stat_of), \ FN(sched_cpu_capacity_of), \ + FN(init_cpu_topology), \ + FN(get_cpumask_info), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index 8ae9e39eb83a..c809d5c28424 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -36,4 +36,5 @@ obj-$(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) += cpufreq_schedutil.o obj-$(CONFIG_MEMBARRIER) += membarrier.o obj-$(CONFIG_CPU_ISOLATION) += isolation.o obj-$(CONFIG_PSI) += psi.o -obj-$(CONFIG_BPF_SCHED) += bpf_sched.o \ No newline at end of file +obj-$(CONFIG_BPF_SCHED) += bpf_sched.o +obj-$(CONFIG_BPF_SCHED) += bpf_topology.o \ No newline at end of file diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index db2ca47f2937..6f2200170093 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -4,6 +4,7 @@ #include <linux/bpf_verifier.h> #include <linux/bpf_sched.h> #include <linux/btf_ids.h> +#include <linux/bpf_topology.h> #include "sched.h"
DEFINE_STATIC_KEY_FALSE(bpf_sched_enabled_key); @@ -26,6 +27,9 @@ BTF_SET_START(bpf_sched_hooks) #undef BPF_SCHED_HOOK BTF_SET_END(bpf_sched_hooks)
+const struct bpf_func_proto bpf_init_cpu_topology_proto __weak; +const struct bpf_func_proto bpf_get_cpumask_info_proto __weak; + int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, const struct bpf_prog *prog) { @@ -421,6 +425,10 @@ bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sched_cpu_idle_stat_of_proto; case BPF_FUNC_sched_cpu_capacity_of: return &bpf_sched_cpu_capacity_of_proto; + case BPF_FUNC_init_cpu_topology: + return &bpf_init_cpu_topology_proto; + case BPF_FUNC_get_cpumask_info: + return &bpf_get_cpumask_info_proto; default: return bpf_base_func_proto(func_id); } diff --git a/kernel/sched/bpf_topology.c b/kernel/sched/bpf_topology.c new file mode 100644 index 000000000000..9c2eda139e2a --- /dev/null +++ b/kernel/sched/bpf_topology.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/bpf.h> +#include <linux/btf_ids.h> +#include <linux/bpf_verifier.h> +#include <linux/topology.h> +#include <linux/cpumask.h> +#include <linux/bpf_topology.h> +#include <linux/sched/isolation.h> + +static void bpf_update_cpu_topology(struct bpf_cpu_topology *cpu_topology, int cpu) +{ + cpu_topology->cpu = cpu; + cpu_topology->core_id = topology_core_id(cpu); + cpu_topology->cluster_id = topology_cluster_id(cpu); + cpu_topology->die_id = topology_die_id(cpu); + cpu_topology->physical_package_id = topology_physical_package_id(cpu); + cpu_topology->numa_node = cpu_to_node(cpu); + cpumask_copy(&cpu_topology->thread_siblings, topology_sibling_cpumask(cpu)); + cpumask_copy(&cpu_topology->core_siblings, topology_core_cpumask(cpu)); + cpumask_copy(&cpu_topology->cluster_cpus, topology_cluster_cpumask(cpu)); + cpumask_copy(&cpu_topology->die_cpus, topology_die_cpumask(cpu)); + cpumask_copy(&cpu_topology->package_cpus, topology_core_cpumask(cpu)); + cpumask_copy(&cpu_topology->node_cpu_lists, cpumask_of_node(cpu_to_node(cpu))); +} + +BPF_CALL_2(bpf_init_cpu_topology, struct bpf_map *, map, u64, flags) +{ + const struct cpumask *cpu_map = cpu_active_mask; + int ret = 0; + int i = -1; + + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); + for_each_cpu(i, cpu_map) { + struct bpf_cpu_topology topo; + + bpf_update_cpu_topology(&topo, i); + ret = map->ops->map_update_elem(map, &i, &topo, flags); + if (ret) { + int idx = i; + + for (; idx >= 0; idx--) + map->ops->map_delete_elem(map, &idx); + break; + } + } + + return ret; +} + +BTF_ID_LIST_SINGLE(bpf_cpu_topology_ids, struct, bpf_cpu_topology) + +const struct bpf_func_proto bpf_init_cpu_topology_proto = { + .func = bpf_init_cpu_topology, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_2(bpf_get_cpumask_info, struct bpf_cpumask_info *, cpus, + int, len) +{ + if (len != sizeof(*cpus)) + return -EINVAL; + + cpumask_copy(&cpus->cpu_possible_cpumask, cpu_possible_mask); + cpumask_copy(&cpus->cpu_active_cpumask, cpu_active_mask); + cpumask_copy(&cpus->cpu_isolate_cpumask, housekeeping_cpumask(HK_FLAG_DOMAIN)); + cpus->nums_possible_cpus = num_possible_cpus(); + cpus->nums_active_cpus = num_active_cpus(); + cpus->nums_isolate_cpus = cpumask_weight(&cpus->cpu_isolate_cpumask); + cpus->nr_cpu_ids = nr_cpu_ids; + cpus->bpf_nr_cpumask_bits = nr_cpumask_bits; + + return 0; +} + +const struct bpf_func_proto bpf_get_cpumask_info_proto = { + .func = bpf_get_cpumask_info, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_UNINIT_MEM, + .arg2_type = ARG_CONST_SIZE, +}; diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index f2b5e63801ca..b99981bf62f2 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -441,6 +441,8 @@ class PrinterHelpers(Printer): 'struct bpf_sched_cpu_nr_running', 'struct bpf_sched_cpu_idle_stat', 'struct bpf_sched_cpu_capacity', + 'struct bpf_cpu_topology', + 'struct bpf_cpumask_info', ] known_types = { '...', @@ -490,6 +492,8 @@ class PrinterHelpers(Printer): 'struct bpf_sched_cpu_nr_running', 'struct bpf_sched_cpu_idle_stat', 'struct bpf_sched_cpu_capacity', + 'struct bpf_cpu_topology', + 'struct bpf_cpumask_info', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 97295cd863c4..b3be7de118d5 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3846,6 +3846,18 @@ union bpf_attr { * Get *cpu* capacity and store in *ctx*. * Return * 0 on success, or a negative error in case of failure. + * + * long bpf_init_cpu_topology(struct bpf_map *map, u64 flags) + * Description + * Initializing the cpu topology which used for bpf prog. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_get_cpumask_info(struct bpf_cpumask_info *cpus, int len) + * Description + * Get system cpus returned in *cpus*. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -4020,6 +4032,8 @@ union bpf_attr { FN(sched_cpu_nr_running_of), \ FN(sched_cpu_idle_stat_of), \ FN(sched_cpu_capacity_of), \ + FN(init_cpu_topology), \ + FN(get_cpumask_info), \ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper