From: Hui Tang tanghui20@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIT1 CVE: NA
--------------------------------
Support kfunc for BPF_PROG_TYPE_SCHED.
And add the helper functions to get cpu statistics, as follows: 1.acquire cfs/rt/irq cpu load statitic. 2.acquire multiple types of nr_running statitic. 3.acquire cpu idle statitic. 4.acquire cpu capacity.
Based on CPU statistics in different dimensions, specific scheduling policies can be implemented in bpf program.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 20 ++++++ kernel/bpf/btf.c | 3 + kernel/sched/bpf_sched.c | 140 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index b8be76b0c120..d65ec3179225 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2557,6 +2557,26 @@ struct sched_migrate_node { int dst_cpu; int dst_node; }; + +struct bpf_sched_cpu_stats { + refcount_t usage; + unsigned int nr_running; + unsigned int cfs_nr_running; + unsigned int cfs_h_nr_running; + unsigned int cfs_idle_h_nr_running; + unsigned int rt_nr_running; + unsigned int rr_nr_running; + + KABI_RESERVE(1) + KABI_RESERVE(2) + KABI_RESERVE(3) + KABI_RESERVE(4) + KABI_RESERVE(5) + KABI_RESERVE(6) + KABI_RESERVE(7) + KABI_RESERVE(8) +}; + #endif
#ifdef CONFIG_SCHED_CORE diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index e197476f3c28..7a82194ad51d 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -217,6 +217,7 @@ enum btf_kfunc_hook { BTF_KFUNC_HOOK_SOCKET_FILTER, BTF_KFUNC_HOOK_LWT, BTF_KFUNC_HOOK_NETFILTER, + BTF_KFUNC_HOOK_SCHED, BTF_KFUNC_HOOK_MAX, };
@@ -7864,6 +7865,8 @@ static int bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type) return BTF_KFUNC_HOOK_LWT; case BPF_PROG_TYPE_NETFILTER: return BTF_KFUNC_HOOK_NETFILTER; + case BPF_PROG_TYPE_SCHED: + return BTF_KFUNC_HOOK_SCHED; default: return BTF_KFUNC_HOOK_MAX; } diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index e2525bd60abf..7eb3be52110f 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/bpf.h> +#include <linux/bpf_mem_alloc.h> #include <linux/cgroup.h> #include <linux/bpf_verifier.h> #include <linux/bpf_sched.h> @@ -62,3 +63,142 @@ const struct bpf_verifier_ops bpf_sched_verifier_ops = { .get_func_proto = bpf_sched_func_proto, .is_valid_access = btf_ctx_access, }; + +static struct bpf_mem_alloc bpf_cpustats_ma; + +__diag_push(); +__diag_ignore_all("-Wmissing-prototypes", + "Global functions as their definitions will be in BTF"); + +/** + * bpf_sched_cpu_stats_create() - Create a mutable BPF cpustats context. + * + * Allocates a cpustats context that can be queried, mutated, acquired, and + * released by a BPF program. The cpustats context returned by this function + * must either be embedded in a map as a kptr, or freed with + * bpf_sched_cpu_stats_release(). + * + * bpf_sched_cpu_stats_create() allocates memory using the BPF memory allocator, + * and will not block. It may return NULL if no memory is available. + */ +__bpf_kfunc struct bpf_sched_cpu_stats *bpf_sched_cpustats_create(void) +{ + struct bpf_sched_cpu_stats *cpustats; + + cpustats = bpf_mem_cache_alloc(&bpf_cpustats_ma); + if (!cpustats) + return NULL; + + memset(cpustats, 0, sizeof(*cpustats)); + refcount_set(&cpustats->usage, 1); + + return cpustats; +} + +/** + * bpf_sched_cpu_stats_acquire() - Acquire a reference to a BPF cpustats. + * @cpustats: The BPF cpustats being acquired. The cpustats must be a trusted + * pointer. + * + * Acquires a reference to a BPF cpustats. The cpustats returned by this + * function must either be embedded in a map as a kptr, or freed with + * bpf_sched_cpu_stats_release(). + */ +__bpf_kfunc struct bpf_sched_cpu_stats *bpf_sched_cpustats_acquire( + struct bpf_sched_cpu_stats *cpustats) +{ + refcount_inc(&cpustats->usage); + return cpustats; +} + +/** + * bpf_sched_cpustats_release() - Release a previously acquired BPF cpustats. + * @cpustats: The cpustats being released. + * + * Releases a previously acquired reference to a BPF cpustats. When the final + * reference of the BPF cpustats has been released, it is subsequently freed in + * an RCU callback in the BPF memory allocator. + */ +__bpf_kfunc void +bpf_sched_cpustats_release(struct bpf_sched_cpu_stats *cpustats) +{ + if (!refcount_dec_and_test(&cpustats->usage)) + return; + + migrate_disable(); + bpf_mem_cache_free_rcu(&bpf_cpustats_ma, cpustats); + migrate_enable(); +} + +/** + * bpf_sched_cpu_stats_of() - Acquire cpu sched statistics. + * @cpuid: CPU ID, input. + * @ctx: The cpu statistics is being stored. + * + * Return: + * 0 - Success. + * <0 - Fail. + */ +__bpf_kfunc s32 bpf_sched_cpu_stats_of(int cpuid, + struct bpf_sched_cpu_stats *ctx) +{ + struct rq *rq; + int cpu = cpuid; + + if (!ctx) + return -EINVAL; + + if ((unsigned int)cpu >= nr_cpu_ids) + return -EINVAL; + + rq = cpu_rq(cpu); + memset(ctx, 0, sizeof(*ctx)); + + SCHED_WARN_ON(!rcu_read_lock_held()); + /* nr_running */ + ctx->nr_running = rq->nr_running; + ctx->cfs_nr_running = rq->cfs.nr_running; + ctx->cfs_h_nr_running = rq->cfs.h_nr_running; + ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running; + ctx->rt_nr_running = rq->rt.rt_nr_running; + ctx->rr_nr_running = rq->rt.rr_nr_running; + + return 0; +} + +__diag_pop(); + +BTF_SET8_START(sched_cpustats_kfunc_btf_ids) +BTF_ID_FLAGS(func, bpf_sched_cpustats_create, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_sched_cpustats_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_sched_cpustats_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_sched_cpu_stats_of, KF_RCU) +BTF_SET8_END(sched_cpustats_kfunc_btf_ids) + +static const struct btf_kfunc_id_set cpustats_kfunc_set = { + .owner = THIS_MODULE, + .set = &sched_cpustats_kfunc_btf_ids, +}; + +BTF_ID_LIST(cpustats_dtor_ids) +BTF_ID(struct, bpf_sched_cpu_stats) +BTF_ID(func, bpf_sched_cpustats_release) + +static int __init bpf_kfunc_init(void) +{ + int ret; + const struct btf_id_dtor_kfunc cpustats_dtors[] = { + { + .btf_id = cpustats_dtor_ids[0], + .kfunc_btf_id = cpustats_dtor_ids[1] + }, + }; + + ret = bpf_mem_alloc_init(&bpf_cpustats_ma, sizeof(struct bpf_sched_cpu_stats), false); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &cpustats_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED, &cpustats_kfunc_set); + return ret ?: register_btf_id_dtor_kfuncs(cpustats_dtors, + ARRAY_SIZE(cpustats_dtors), + THIS_MODULE); +} +late_initcall(bpf_kfunc_init);