[PATCH openEuler-22.09 00/20] Add programmable schedule feature for

older
[PATCH openEuler-1.0-LTS 1/3] net:...

Zheng Zengkai

5 Aug 2022 5 Aug '22

12:42 p.m.

Add programmable schedule feature for openEuler-22.09. Chen Hui (12): sched: programmable: Add a tag for the task group sched: programmable: Add user interface of task group tag sched: programmable: Add a tag for the task sched: programmable: Add user interface of task tag sched: programmable: add bpf_sched_task_tag_of helper function sched: programmable: Add convenient helper functions to convert sched entity bpf: BPF samples support SCHED program type samples: bpf: Add bpf sched preempt sample program bpf:programmable: Add four helper functions to get cpu stat bpf:programmable: Add cpumask ops collection sched: programmable: Add lib for sched programmable sched: programmable: Add hook for select_task_rq_fair Guan Jing (2): sched: programmable: Add hook for entity_before samples: bpf: Add bpf sched pick task sample Hui Tang (3): bpf:programmable: Add helper func to check cpu share cache bpf:programmable: Add helper func to set cpus_ptr in task samples:bpf: Add samples for cfs select core Ren Zhijie (3): sched: programmable: add bpf_sched_tg_tag_of helper function sched: programmable: Add helpers to set tag of task or task_group sched: programmable: Add helper function for cpu topology. fs/proc/base.c | 65 ++++ include/linux/bpf_topology.h | 46 +++ include/linux/sched.h | 85 ++++++ include/linux/sched_hook_defs.h | 7 +- include/uapi/linux/bpf.h | 112 +++++++ init/init_task.c | 3 + kernel/bpf/helpers.c | 12 + kernel/bpf/verifier.c | 4 +- kernel/sched/Makefile | 3 +- kernel/sched/bpf_sched.c | 409 +++++++++++++++++++++++++ kernel/sched/bpf_topology.c | 99 ++++++ kernel/sched/core.c | 103 +++++++ kernel/sched/fair.c | 58 ++++ kernel/sched/sched.h | 8 + samples/bpf/Makefile | 9 + samples/bpf/bpf_load.c | 8 +- samples/bpf/sched_pick_task_kern.c | 62 ++++ samples/bpf/sched_pick_task_user.c | 92 ++++++ samples/bpf/sched_preempt_kern.c | 151 ++++++++++ samples/bpf/sched_preempt_user.c | 139 +++++++++ samples/bpf/sched_select_core_kern.c | 239 +++++++++++++++ samples/bpf/sched_select_core_user.c | 114 +++++++ scripts/bpf_helpers_doc.py | 20 ++ tools/include/uapi/linux/bpf.h | 112 +++++++ tools/lib/bpf/libbpf_sched.h | 435 +++++++++++++++++++++++++++ 25 files changed, 2389 insertions(+), 6 deletions(-) create mode 100644 include/linux/bpf_topology.h create mode 100644 kernel/sched/bpf_topology.c create mode 100644 samples/bpf/sched_pick_task_kern.c create mode 100644 samples/bpf/sched_pick_task_user.c create mode 100644 samples/bpf/sched_preempt_kern.c create mode 100644 samples/bpf/sched_preempt_user.c create mode 100644 samples/bpf/sched_select_core_kern.c create mode 100644 samples/bpf/sched_select_core_user.c create mode 100644 tools/lib/bpf/libbpf_sched.h -- 2.20.1

Show replies by date

Zheng Zengkai

5 Aug 5 Aug

12:42 p.m.

New subject: [PATCH openEuler-22.09 01/20] sched: programmable: Add a tag for the task group

From: Chen Hui <judy.chenhui@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA -------------------------------- Add a tag for the task group, to support the tag-based scheduling mechanism. The tag is used to identify a special task or a type of special tasks, there are many special tasks in the real world, such as foreground and background tasks, online and offline tasks, ect. so, we can identify such special tasks, and execute specific policies. Signed-off-by: Chen Hui <judy.chenhui@huawei.com> Signed-off-by: Ren Zhijie <renzhijie2@huawei.com> --- kernel/sched/core.c | 19 +++++++++++++++++++ kernel/sched/sched.h | 5 +++++ 2 files changed, 24 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 51c707897c8d..c53c032a378a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7755,6 +7755,13 @@ static void sched_free_group(struct task_group *tg) kmem_cache_free(task_group_cache, tg); } +#ifdef CONFIG_BPF_SCHED +static inline void tg_init_tag(struct task_group *tg, struct task_group *ptg) +{ + tg->tag = ptg->tag; +} +#endif + /* allocate runqueue etc for a new task group */ struct task_group *sched_create_group(struct task_group *parent) { @@ -7775,6 +7782,10 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err; +#ifdef CONFIG_BPF_SCHED + tg_init_tag(tg, parent); +#endif + alloc_uclamp_sched_group(tg, parent); return tg; @@ -7846,6 +7857,14 @@ static void sched_change_group(struct task_struct *tsk, int type) sched_change_qos_group(tsk, tg); #endif +#ifdef CONFIG_BPF_SCHED + /* + * This function has cleared and restored the task status, + * so we do not need to dequeue and enqueue the task again. + */ + tsk->tag = tg->tag; +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED if (tsk->sched_class->task_change_group) tsk->sched_class->task_change_group(tsk, type); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 42d5fb7d9464..67fd5cbbefea 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -455,6 +455,11 @@ struct task_group { struct uclamp_se uclamp[UCLAMP_CNT]; #endif +#ifdef CONFIG_BPF_SCHED + /* Used to pad the tag of a group */ + long tag; +#endif + KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) -- 2.20.1

Zheng Zengkai

12:42 p.m.

New subject: [PATCH openEuler-22.09 02/20] sched: programmable: Add user interface of task group tag

From: Chen Hui <judy.chenhui@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA -------------------------------- Add user interface of task group tag, bridges the information gap between user-mode and kernel-mode. Signed-off-by: Chen Hui <judy.chenhui@huawei.com> Signed-off-by: Ren Zhijie <renzhijie2@huawei.com> --- kernel/sched/core.c | 81 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c53c032a378a..2256ac22d26d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8637,6 +8637,80 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css, } #endif +#ifdef CONFIG_BPF_SCHED +static void sched_settag(struct task_struct *tsk, s64 tag) +{ + int queued, running, queue_flags = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + struct rq_flags rf; + struct rq *rq; + + if (tsk->tag == tag) + return; + + rq = task_rq_lock(tsk, &rf); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + update_rq_clock(rq); + if (queued) + dequeue_task(rq, tsk, queue_flags); + if (running) + put_prev_task(rq, tsk); + + tsk->tag = tag; + + if (queued) + enqueue_task(rq, tsk, queue_flags); + if (running) + set_next_task(rq, tsk); + + task_rq_unlock(rq, tsk, &rf); +} + +static int tg_change_tag(struct task_group *tg, void *data) +{ + struct css_task_iter it; + struct task_struct *tsk; + s64 tag = *(s64 *)data; + struct cgroup_subsys_state *css = &tg->css; + + tg->tag = tag; + + css_task_iter_start(css, 0, &it); + while ((tsk = css_task_iter_next(&it))) + sched_settag(tsk, tag); + css_task_iter_end(&it); + + return 0; +} + +static int cpu_tag_write(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 tag) +{ + struct task_group *tg = css_tg(css); + + if (tg == &root_task_group) + return -EINVAL; + + if (tg->tag == tag) + return 0; + + rcu_read_lock(); + walk_tg_tree_from(tg, tg_change_tag, tg_nop, (void *)(&tag)); + rcu_read_unlock(); + + return 0; +} + +static inline s64 cpu_tag_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->tag; +} +#endif + static struct cftype cpu_legacy_files[] = { #ifdef CONFIG_FAIR_GROUP_SCHED { @@ -8698,6 +8772,13 @@ static struct cftype cpu_legacy_files[] = { .read_s64 = cpu_qos_read, .write_s64 = cpu_qos_write, }, +#endif +#ifdef CONFIG_BPF_SCHED + { + .name = "tag", + .read_s64 = cpu_tag_read, + .write_s64 = cpu_tag_write, + }, #endif { } /* Terminate */ }; -- 2.20.1

Zheng Zengkai

12:42 p.m.

New subject: [PATCH openEuler-22.09 03/20] sched: programmable: Add a tag for the task

From: Chen Hui <judy.chenhui@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA -------------------------------- Add a tag for the task, same as the task group. Signed-off-by: Chen Hui <judy.chenhui@huawei.com> Signed-off-by: Ren Zhijie <renzhijie2@huawei.com> --- include/linux/sched.h | 5 +++++ init/init_task.c | 3 +++ kernel/sched/core.c | 3 +++ 3 files changed, 11 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 47f462040f4d..08803b8664b3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1397,6 +1397,11 @@ struct task_struct { */ randomized_struct_fields_end +#ifdef CONFIG_BPF_SCHED + /* Used to pad the tag of a task */ + long tag; +#endif + KABI_RESERVE(1) KABI_RESERVE(2) KABI_RESERVE(3) diff --git a/init/init_task.c b/init/init_task.c index 5fa18ed59d33..7003426df677 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -213,6 +213,9 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP_FILTER .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_BPF_SCHED + .tag = 0, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2256ac22d26d..8f385aa1c201 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3113,6 +3113,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p) #ifdef CONFIG_SMP p->wake_entry.u_flags = CSD_TYPE_TTWU; #endif +#ifdef CONFIG_BPF_SCHED + p->tag = 0; +#endif } DEFINE_STATIC_KEY_FALSE(sched_numa_balancing); -- 2.20.1

12:42 p.m.

New subject: [PATCH openEuler-22.09 20/20] samples: bpf: Add bpf sched pick task sample

From: Guan Jing <guanjing6@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA -------------------------------- Add bpf sched pick task sample. Signed-off-by: Guan Jing <guanjing6@huawei.com> --- samples/bpf/Makefile | 3 + samples/bpf/sched_pick_task_kern.c | 62 ++++++++++++++++++++ samples/bpf/sched_pick_task_user.c | 92 ++++++++++++++++++++++++++++++ 3 files changed, 157 insertions(+) create mode 100644 samples/bpf/sched_pick_task_kern.c create mode 100644 samples/bpf/sched_pick_task_user.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62dadae992a2..1d92e87565ad 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -56,6 +56,7 @@ tprogs-y += ibumad tprogs-y += hbm tprogs-y += sched_preempt tprogs-y += sched_select_core +tprogs-y += sched_pick_task # Libbpf dependencies LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a @@ -115,6 +116,7 @@ ibumad-objs := bpf_load.o ibumad_user.o $(TRACE_HELPERS) hbm-objs := bpf_load.o hbm.o $(CGROUP_HELPERS) sched_preempt-objs := sched_preempt_user.o sched_select_core-objs := sched_select_core_user.o +sched_pick_task-objs := sched_pick_task_user.o # Tell kbuild to always build the programs always-y := $(tprogs-y) @@ -178,6 +180,7 @@ always-y += hbm_edt_kern.o always-y += xdpsock_kern.o always-y += sched_preempt_kern.o always-y += sched_select_core_kern.o +always-y += sched_pick_task_kern.o ifeq ($(ARCH), arm) # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux diff --git a/samples/bpf/sched_pick_task_kern.c b/samples/bpf/sched_pick_task_kern.c new file mode 100644 index 000000000000..5cd09e26f806 --- /dev/null +++ b/samples/bpf/sched_pick_task_kern.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <linux/version.h> +#include <linux/sched.h> +#include <uapi/linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define PICK_CURR 1; +#define PICK_NOMAL 0; +#define ERROR -1; + +enum task_type { + TASK_TYPE_OFFLINE = -1, + TASK_TYPE_ONLINE, + TASK_TYPE_MAX +}; + +int querry_se_tag(struct sched_entity *se) +{ + int se_tag = TASK_TYPE_ONLINE; + + if (bpf_sched_entity_is_task(se)) { + struct task_struct *task = bpf_sched_entity_to_task(se); + se_tag = bpf_sched_task_tag_of(task); + } else { + struct task_group *tg = bpf_sched_entity_to_tg(se); + se_tag = bpf_sched_tg_tag_of(tg); + } + + return se_tag; +} + +SEC("sched/cfs_tag_pick_next_entity") +int BPF_PROG(sched_cfs_tag_pick_next_entity, struct sched_entity *curr, struct sched_entity *next) +{ + int curr_type = 0; + int next_type = 0; + + if (curr == NULL || next == NULL) + return PICK_NOMAL; + + curr_type = querry_se_tag(curr); + next_type = querry_se_tag(next); + + if (curr_type > next_type) + return PICK_CURR; + + return PICK_NOMAL; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/sched_pick_task_user.c b/samples/bpf/sched_pick_task_user.c new file mode 100644 index 000000000000..316ff8a601e1 --- /dev/null +++ b/samples/bpf/sched_pick_task_user.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2022. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/resource.h> +#include <bpf/libbpf.h> + +#define TRACE_DIR "/sys/kernel/debug/tracing/" +#define BUF_SIZE (4096) + +/* read trace logs from debug fs */ +void read_trace_pipe(void) +{ + int trace_fd; + + trace_fd = open(TRACE_DIR "trace_pipe", O_RDONLY, 0); + if (trace_fd < 0) + return; + + while (1) { + static char buf[BUF_SIZE]; + ssize_t sz; + + sz = read(trace_fd, buf, sizeof(buf) - 1); + if (sz > 0) { + buf[sz] = 0; + puts(buf); + } + } +} + +int main(int argc, char **argv) +{ + char filename[256]; + struct bpf_object *obj; + struct bpf_program *prog; + struct bpf_link *link; + int err; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + /* Open BPF application */ + obj = bpf_object__open_file(filename, NULL); + if (libbpf_get_error(obj)) { + fprintf(stderr, "ERROR: opening BPF object file failed\n"); + return 1; + } + + /* Load and verify BPF program */ + err = bpf_object__load(obj); + if (err) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + goto cleanup; + } + + prog = bpf_object__find_program_by_name(obj, "sched_cfs_tag_pick_next_entity"); + if (libbpf_get_error(prog)) { + fprintf(stderr, "ERROR: finding a prog in obj file failed\n"); + goto cleanup; + } + + link = bpf_program__attach(prog); + if (libbpf_get_error(link)) { + fprintf(stderr, "ERROR: loading BPF object file failed\n"); + link = NULL; + goto cleanup; + } + + printf("preempt BPF started, hit Ctrl+C to stop!\n"); + + read_trace_pipe(); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); +out: + return 0; +} -- 2.20.1

1187

Age (days ago)

1187

Last active (days ago)

List overview

20 comments

1 participants

participants (1)

Zheng Zengkai

[PATCH openEuler-22.09 00/20] Add programmable schedule feature for

tags

participants (1)