From: Guan Jing guanjing6@huawei.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIT1
Reference: https://lore.kernel.org/all/20210916162451.709260-1-guro@fb.com/
-------------------
This commit introduces basic definitions and infrastructure for scheduler bpf programs. It defines the BPF_PROG_TYPE_SCHED program type and the BPF_SCHED attachment type.
The implementation is inspired by lsm bpf programs and is based on kretprobes. This will allow to add new hooks with a minimal changes to the kernel code and without any changes to libbpf/bpftool. It's very convenient as I anticipate a large number of private patches being used for a long time before (or if at all) reaching upstream.
Sched programs are expected to return an int, which meaning will be context defined.
This patch doesn't add any real scheduler hooks (only a stub), it will be done by following patches in the series.
Scheduler bpf programs as now are very restricted in what they can do: only the bpf_printk() helper is available. The scheduler context can impose significant restrictions on what's safe and what's not. So let's extend their abilities on case by case basis when a need arise.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com --- include/linux/bpf_sched.h | 26 ++++++++++++++ include/linux/bpf_types.h | 4 +++ include/linux/sched_hook_defs.h | 2 ++ include/uapi/linux/bpf.h | 2 ++ kernel/bpf/btf.c | 3 ++ kernel/bpf/syscall.c | 30 ++++++++++++++++ kernel/bpf/trampoline.c | 3 ++ kernel/bpf/verifier.c | 23 ++++++++++++ kernel/sched/bpf_sched.c | 62 +++++++++++++++++++++++++++++++++ kernel/sched/build_utility.c | 4 +++ tools/include/uapi/linux/bpf.h | 2 ++ tools/lib/bpf/bpf.c | 1 + 12 files changed, 162 insertions(+) create mode 100644 include/linux/bpf_sched.h create mode 100644 include/linux/sched_hook_defs.h create mode 100644 kernel/sched/bpf_sched.c
diff --git a/include/linux/bpf_sched.h b/include/linux/bpf_sched.h new file mode 100644 index 000000000000..874393e6a6aa --- /dev/null +++ b/include/linux/bpf_sched.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_BPF_SCHED_H +#define _LINUX_BPF_SCHED_H + +#include <linux/bpf.h> + +#ifdef CONFIG_BPF_SCHED + +#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) \ + RET bpf_sched_##NAME(__VA_ARGS__); +#include <linux/sched_hook_defs.h> +#undef BPF_SCHED_HOOK + +int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog); + +#else /* !CONFIG_BPF_SCHED */ + +static inline int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_BPF_SCHED */ +#endif /* _LINUX_BPF_SCHED_H */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index fc0d6f32c687..dd79463eea4e 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -83,6 +83,10 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_SYSCALL, bpf_syscall, BPF_PROG_TYPE(BPF_PROG_TYPE_NETFILTER, netfilter, struct bpf_nf_ctx, struct bpf_nf_ctx) #endif +#ifdef CONFIG_BPF_SCHED +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED, bpf_sched, + void *, void *) +#endif /* CONFIG_BPF_SCHED */
BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h new file mode 100644 index 000000000000..14344004e335 --- /dev/null +++ b/include/linux/sched_hook_defs.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +BPF_SCHED_HOOK(int, 0, dummy, void) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4924f0cde1bc..9dd0b85549b6 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -988,6 +988,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + BPF_PROG_TYPE_SCHED, };
enum bpf_attach_type { @@ -1040,6 +1041,7 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_SCHED, __MAX_BPF_ATTACH_TYPE };
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 8090d7fb11ef..b8aa9e66e356 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -5982,6 +5982,9 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return true; t = btf_type_by_id(btf, t->type); break; +#ifdef CONFIG_BPF_SCHED + case BPF_SCHED: +#endif case BPF_MODIFY_RETURN: /* For now the BPF_MODIFY_RETURN can only be attached to * functions that return an int. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index d77b2f8b9364..793dfbced1ac 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2412,6 +2412,9 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_STRUCT_OPS: case BPF_PROG_TYPE_EXT: +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: +#endif break; default: return -EINVAL; @@ -2539,6 +2542,9 @@ static bool is_perfmon_prog_type(enum bpf_prog_type prog_type) case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */ case BPF_PROG_TYPE_EXT: /* extends any prog */ +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: +#endif return true; default: return false; @@ -3115,6 +3121,14 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, goto out_put_prog; } break; +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: + if (prog->expected_attach_type != BPF_SCHED) { + err = -EINVAL; + goto out_put_prog; + } + break; +#endif default: err = -EINVAL; goto out_put_prog; @@ -3582,6 +3596,9 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog, case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_LSM: +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: +#endif if (user_tp_name) /* The attach point for this category of programs * should be specified via btf_id during program load. @@ -3717,6 +3734,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_TCX_INGRESS: case BPF_TCX_EGRESS: return BPF_PROG_TYPE_SCHED_CLS; +#ifdef CONFIG_BPF_SCHED + case BPF_SCHED: + return BPF_PROG_TYPE_SCHED; +#endif default: return BPF_PROG_TYPE_UNSPEC; } @@ -3744,6 +3765,12 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, -EINVAL : 0; case BPF_PROG_TYPE_EXT: return 0; +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +#endif case BPF_PROG_TYPE_NETFILTER: if (attach_type != BPF_NETFILTER) return -EINVAL; @@ -4922,6 +4949,9 @@ static int link_create(union bpf_attr *attr, bpfptr_t uattr) ret = cgroup_bpf_link_attach(attr, prog); break; case BPF_PROG_TYPE_EXT: +#ifdef CONFIG_BPF_SCHED + case BPF_PROG_TYPE_SCHED: +#endif ret = bpf_tracing_prog_attach(prog, attr->link_create.target_fd, attr->link_create.target_btf_id, diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index e97aeda3a86b..e5b97eb226e8 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -493,6 +493,9 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: return BPF_TRAMP_FENTRY; +#ifdef CONFIG_BPF_SCHED + case BPF_SCHED: +#endif case BPF_MODIFY_RETURN: return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 824531d4c262..d3090e8778e1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -28,6 +28,10 @@ #include <linux/cpumask.h> #include <net/xdp.h>
+#ifdef CONFIG_BPF_SCHED +#include <linux/bpf_sched.h> +#endif + #include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { @@ -19453,6 +19457,9 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: +#ifdef CONFIG_BPF_SCHED + case BPF_SCHED: +#endif if (!btf_type_is_func(t)) { bpf_log(log, "attach_btf_id %u is not a function\n", btf_id); @@ -19627,10 +19634,18 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) return check_struct_ops_btf_id(env);
+#ifdef CONFIG_BPF_SCHED + if (prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_LSM && + prog->type != BPF_PROG_TYPE_EXT && + prog->type != BPF_PROG_TYPE_SCHED) + return 0; +#else if (prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_EXT) return 0; +#endif
ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); if (ret) @@ -19673,6 +19688,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return -EINVAL; }
+#ifdef CONFIG_BPF_SCHED + if (prog->type == BPF_PROG_TYPE_SCHED) { + ret = bpf_sched_verify_prog(&env->log, prog); + if (ret < 0) + return ret; + } +#endif + key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id); tr = bpf_trampoline_get(key, &tgt_info); if (!tr) diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c new file mode 100644 index 000000000000..2360404d4a07 --- /dev/null +++ b/kernel/sched/bpf_sched.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/cgroup.h> +#include <linux/bpf_verifier.h> +#include <linux/bpf_sched.h> +#include <linux/btf_ids.h> +#include "sched.h" + +/* + * For every hook declare a nop function where a BPF program can be attached. + */ +#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) \ +noinline RET bpf_sched_##NAME(__VA_ARGS__) \ +{ \ + return DEFAULT; \ +} + +#include <linux/sched_hook_defs.h> +#undef BPF_SCHED_HOOK + +#define BPF_SCHED_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_sched_##NAME) +BTF_SET_START(bpf_sched_hooks) +#include <linux/sched_hook_defs.h> +#undef BPF_SCHED_HOOK +BTF_SET_END(bpf_sched_hooks) + +int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, + const struct bpf_prog *prog) +{ + if (!prog->gpl_compatible) { + bpf_log(vlog, + "sched programs must have a GPL compatible license\n"); + return -EINVAL; + } + + if (!btf_id_set_contains(&bpf_sched_hooks, prog->aux->attach_btf_id)) { + bpf_log(vlog, "attach_btf_id %u points to wrong type name %s\n", + prog->aux->attach_btf_id, prog->aux->attach_func_name); + return -EINVAL; + } + + return 0; +} + +static const struct bpf_func_proto * +bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + default: + return bpf_base_func_proto(func_id); + } +} + +const struct bpf_prog_ops bpf_sched_prog_ops = { +}; + +const struct bpf_verifier_ops bpf_sched_verifier_ops = { + .get_func_proto = bpf_sched_func_proto, + .is_valid_access = btf_ctx_access, +}; diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c index 99bdd96f454f..d44c584d9bc7 100644 --- a/kernel/sched/build_utility.c +++ b/kernel/sched/build_utility.c @@ -108,3 +108,7 @@ #ifdef CONFIG_SCHED_AUTOGROUP # include "autogroup.c" #endif + +#ifdef CONFIG_BPF_SCHED +# include "bpf_sched.c" +#endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4924f0cde1bc..9dd0b85549b6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -988,6 +988,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + BPF_PROG_TYPE_SCHED, };
enum bpf_attach_type { @@ -1040,6 +1041,7 @@ enum bpf_attach_type { BPF_TCX_INGRESS, BPF_TCX_EGRESS, BPF_TRACE_UPROBE_MULTI, + BPF_SCHED, __MAX_BPF_ATTACH_TYPE };
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index b0f1913763a3..ddbb16be651f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -781,6 +781,7 @@ int bpf_link_create(int prog_fd, int target_fd, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_SCHED: case BPF_LSM_MAC: attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0); if (!OPTS_ZEROED(opts, tracing))