The patchset as follow:
Guan Jing (3): libbpf: add support for scheduler bpf programs bpftool: recognize scheduler programs sched: Add helper functions to get cpu statistics
include/linux/sched.h | 9 ++++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/sched/bpf_sched.c | 38 ++++++++++++++++++++++++++++++++++ scripts/bpf_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 7 +++++++ tools/lib/bpf/libbpf.c | 23 +++++++++++++++++++- tools/lib/bpf/libbpf.h | 2 ++ tools/lib/bpf/libbpf.map | 1 + 8 files changed, 88 insertions(+), 1 deletion(-)
From: Guan Jing guanjing6@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7X7WW
-------------------
This patch adds a support for loading and attaching scheduler bpf programs.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com --- tools/lib/bpf/libbpf.c | 21 ++++++++++++++++++++- tools/lib/bpf/libbpf.h | 2 ++ tools/lib/bpf/libbpf.map | 1 + 3 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index de35b9a21dad..86d03f132d7f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3030,7 +3030,8 @@ static int bpf_object_fixup_btf(struct bpf_object *obj) static bool prog_needs_vmlinux_btf(struct bpf_program *prog) { if (prog->type == BPF_PROG_TYPE_STRUCT_OPS || - prog->type == BPF_PROG_TYPE_LSM) + prog->type == BPF_PROG_TYPE_LSM || + prog->type == BPF_PROG_TYPE_SCHED) return true;
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs @@ -8770,6 +8771,7 @@ static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, stru static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link); static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link); +static int attach_sched(const struct bpf_program *prog, long cookie, struct bpf_link **link);
static const struct bpf_sec_def section_defs[] = { SEC_DEF("socket", SOCKET_FILTER, 0, SEC_NONE), @@ -8864,6 +8866,7 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE), SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE), SEC_DEF("netfilter", NETFILTER, BPF_NETFILTER, SEC_NONE), + SEC_DEF("sched/", SCHED, BPF_SCHED, SEC_ATTACH_BTF, attach_sched), };
int libbpf_register_prog_handler(const char *sec, @@ -9243,6 +9246,7 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj, #define BTF_TRACE_PREFIX "btf_trace_" #define BTF_LSM_PREFIX "bpf_lsm_" #define BTF_ITER_PREFIX "bpf_iter_" +#define BTF_SCHED_PREFIX "bpf_sched_" #define BTF_MAX_NAME_SIZE 128
void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -9262,6 +9266,10 @@ void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, *prefix = BTF_ITER_PREFIX; *kind = BTF_KIND_FUNC; break; + case BPF_SCHED: + *prefix = BTF_SCHED_PREFIX; + *kind = BTF_KIND_FUNC; + break; default: *prefix = ""; *kind = BTF_KIND_FUNC; @@ -12119,6 +12127,17 @@ struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, return link; }
+struct bpf_link *bpf_program__attach_sched(const struct bpf_program *prog) +{ + return bpf_program__attach_btf_id(prog, NULL); +} + +static int attach_sched(const struct bpf_program *prog, long cookie, struct bpf_link **link) +{ + *link = bpf_program__attach_sched(prog); + return libbpf_get_error(*link); +} + struct bpf_link *bpf_program__attach(const struct bpf_program *prog) { struct bpf_link *link = NULL; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0e52621cba43..aabdd973c1a5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -769,6 +769,8 @@ bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex); LIBBPF_API struct bpf_link * bpf_program__attach_freplace(const struct bpf_program *prog, int target_fd, const char *attach_func_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_sched(const struct bpf_program *prog);
struct bpf_netfilter_opts { /* size of this struct, for forward/backward compatibility */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 57712321490f..228ab00a5e69 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -236,6 +236,7 @@ LIBBPF_0.2.0 { perf_buffer__buffer_fd; perf_buffer__epoll_fd; perf_buffer__consume_buffer; + bpf_program__attach_sched; } LIBBPF_0.1.0;
LIBBPF_0.3.0 {
From: Guan Jing guanjing6@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7X7WW
--------------------------------
Teach bpftool to recognize scheduler bpf programs.
Signed-off-by: Roman Gushchin guro@fb.com Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com --- tools/lib/bpf/libbpf.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 86d03f132d7f..03c811065cc6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -122,6 +122,7 @@ static const char * const attach_type_name[] = { [BPF_TCX_INGRESS] = "tcx_ingress", [BPF_TCX_EGRESS] = "tcx_egress", [BPF_TRACE_UPROBE_MULTI] = "trace_uprobe_multi", + [BPF_SCHED] = "sched", };
static const char * const link_type_name[] = { @@ -210,6 +211,7 @@ static const char * const prog_type_name[] = { [BPF_PROG_TYPE_SK_LOOKUP] = "sk_lookup", [BPF_PROG_TYPE_SYSCALL] = "syscall", [BPF_PROG_TYPE_NETFILTER] = "netfilter", + [BPF_PROG_TYPE_SCHED] = "sched", };
static int __base_pr(enum libbpf_print_level level, const char *format,
From: Guan Jing guanjing6@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7X7WW
--------------------------------
Add the helper functions to get cpu statistics, as follows: 1.acquire cfs/rt/irq cpu load statitic. 2.acquire multiple types of nr_running statitic. 3.acquire cpu idle statitic. 4.acquire cpu capacity.
Based on CPU statistics in different dimensions, specific scheduling policies can be implemented in bpf program.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Ren Zhijie renzhijie2@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 9 ++++++++ include/uapi/linux/bpf.h | 7 +++++++ kernel/sched/bpf_sched.c | 38 ++++++++++++++++++++++++++++++++++ scripts/bpf_doc.py | 2 ++ tools/include/uapi/linux/bpf.h | 7 +++++++ 5 files changed, 63 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index f40411aa7b70..d9709c8b6182 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2530,6 +2530,15 @@ static inline void rseq_syscall(struct pt_regs *regs) #ifdef CONFIG_BPF_SCHED extern void sched_settag(struct task_struct *tsk, s64 tag);
+struct bpf_sched_cpu_stats { + unsigned int nr_running; + unsigned int cfs_nr_running; + unsigned int cfs_h_nr_running; + unsigned int cfs_idle_h_nr_running; + unsigned int rt_nr_running; + unsigned int rr_nr_running; +}; + struct sched_migrate_ctx { struct task_struct *task; struct cpumask *select_idle_mask; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c112c6f7c766..15fc72f92968 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -5669,6 +5669,12 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * int bpf_sched_cpu_stats_of(int *cpu, struct bpf_sched_cpu_stats *ctx, int len) + * Description + * Get multiple types of *cpu* statistics and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5883,6 +5889,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(sched_cpu_stats_of, 212, ##ctx) \ /* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't diff --git a/kernel/sched/bpf_sched.c b/kernel/sched/bpf_sched.c index e2525bd60abf..1ddff44b6a93 100644 --- a/kernel/sched/bpf_sched.c +++ b/kernel/sched/bpf_sched.c @@ -44,12 +44,50 @@ int bpf_sched_verify_prog(struct bpf_verifier_log *vlog, return 0; }
+BPF_CALL_3(bpf_sched_cpu_stats_of, int *, cpuid, + struct bpf_sched_cpu_stats *, ctx, + int, len) +{ + struct rq *rq; + int cpu = *cpuid; + + if ((unsigned int)cpu >= nr_cpu_ids) { + memset(ctx, 0, len); + return -EINVAL; + } + + rq = cpu_rq(cpu); + memset(ctx, 0, len); + + SCHED_WARN_ON(!rcu_read_lock_held()); + /* nr_running */ + ctx->nr_running = rq->nr_running; + ctx->cfs_nr_running = rq->cfs.nr_running; + ctx->cfs_h_nr_running = rq->cfs.h_nr_running; + ctx->cfs_idle_h_nr_running = rq->cfs.idle_h_nr_running; + ctx->rt_nr_running = rq->rt.rt_nr_running; + ctx->rr_nr_running = rq->rt.rr_nr_running; + + return 0; +} + +static const struct bpf_func_proto bpf_sched_cpu_stats_of_proto = { + .func = bpf_sched_cpu_stats_of, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_INT, + .arg2_type = ARG_PTR_TO_UNINIT_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_sched_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); + case BPF_FUNC_sched_cpu_stats_of: + return &bpf_sched_cpu_stats_of_proto; default: return bpf_base_func_proto(func_id); } diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 0232545d54b7..65d80fa1de25 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -701,6 +701,7 @@ class PrinterHelpers(Printer): 'struct iphdr', 'struct ipv6hdr', 'struct sched_migrate_ctx', + 'struct bpf_sched_cpu_stats', ] known_types = { '...', @@ -757,6 +758,7 @@ class PrinterHelpers(Printer): 'struct iphdr', 'struct ipv6hdr', 'struct sched_migrate_ctx', + 'struct bpf_sched_cpu_stats', } mapped_types = { 'u8': '__u8', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index c112c6f7c766..15fc72f92968 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -5669,6 +5669,12 @@ union bpf_attr { * 0 on success. * * **-ENOENT** if the bpf_local_storage cannot be found. + * + * int bpf_sched_cpu_stats_of(int *cpu, struct bpf_sched_cpu_stats *ctx, int len) + * Description + * Get multiple types of *cpu* statistics and store in *ctx*. + * Return + * 0 on success, or a negative error in case of failure. */ #define ___BPF_FUNC_MAPPER(FN, ctx...) \ FN(unspec, 0, ##ctx) \ @@ -5883,6 +5889,7 @@ union bpf_attr { FN(user_ringbuf_drain, 209, ##ctx) \ FN(cgrp_storage_get, 210, ##ctx) \ FN(cgrp_storage_delete, 211, ##ctx) \ + FN(sched_cpu_stats_of, 212, ##ctx) \ /* */
/* backwards-compatibility macros for users of __BPF_FUNC_MAPPER that don't
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/11931 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/11931 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...