hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIT1
--------------------------------
Add hook of sched type in select_task_rq_fair(), as follows: 'cfs_select_rq' Replace the original core selection policy or implement dynamic CPU affinity.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com --- include/linux/sched.h | 12 ++++++++++++ include/linux/sched_hook_defs.h | 2 +- kernel/sched/core.c | 15 +++++++++++++++ kernel/sched/fair.c | 29 +++++++++++++++++++++++++++++ kernel/sched/sched.h | 4 ++++ scripts/bpf_doc.py | 2 ++ 6 files changed, 63 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index c968bd562a9f..94e6cbb056fd 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2481,6 +2481,18 @@ struct bpf_sched_cpu_stats { unsigned int rr_nr_running; };
+struct sched_migrate_ctx { + struct task_struct *task; + struct cpumask *select_idle_mask; + int prev_cpu; + int curr_cpu; + int is_sync; + int want_affine; + int wake_flags; + int sd_flag; + int new_cpu; +}; + #endif
#endif diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h index 14344004e335..e97c11774fa4 100644 --- a/include/linux/sched_hook_defs.h +++ b/include/linux/sched_hook_defs.h @@ -1,2 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -BPF_SCHED_HOOK(int, 0, dummy, void) +BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx) \ No newline at end of file diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ff2a439754f1..a5f56608fd43 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2468,7 +2468,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) * Per-CPU kthreads are allowed to run on !active && online CPUs, see * __set_cpus_allowed_ptr() and select_fallback_rq(). */ +#ifdef CONFIG_BPF_SCHED +inline bool is_cpu_allowed(struct task_struct *p, int cpu) +#else static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +#endif { /* When not in the task's cpumask, no point in looking further. */ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) @@ -9955,6 +9959,10 @@ LIST_HEAD(task_groups); static struct kmem_cache *task_group_cache __read_mostly; #endif
+#ifdef CONFIG_BPF_SCHED +DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); +#endif + void __init sched_init(void) { unsigned long ptr = 0; @@ -10010,6 +10018,13 @@ void __init sched_init(void) global_rt_period(), global_rt_runtime()); #endif /* CONFIG_RT_GROUP_SCHED */
+#if defined(CONFIG_CPUMASK_OFFSTACK) && defined(CONFIG_BPF_SCHED) + for_each_possible_cpu(i) { + per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( + cpumask_size(), GFP_KERNEL, cpu_to_node(i)); + } +#endif + #ifdef CONFIG_CGROUP_SCHED task_group_cache = KMEM_CACHE(task_group, 0);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 318258ea011e..36bd03741d0e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -52,6 +52,7 @@ #include <asm/switch_to.h>
#include <linux/sched/cond_resched.h> +#include <linux/bpf_sched.h>
#include "sched.h" #include "stats.h" @@ -99,6 +100,10 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#ifdef CONFIG_BPF_SCHED +DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); +#endif + int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { @@ -8154,6 +8159,11 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) if (!pd || READ_ONCE(rd->overutilized)) goto unlock;
+#ifdef CONFIG_BPF_SCHED + struct sched_migrate_ctx ctx; + int ret; +#endif + /* * Energy-aware wake-up happens on the lowest sched_domain starting * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu. @@ -8475,6 +8485,25 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) }
rcu_read_lock(); +#ifdef CONFIG_BPF_SCHED + if (bpf_sched_enabled()) { + ctx.task = p; + ctx.prev_cpu = prev_cpu; + ctx.curr_cpu = cpu; + ctx.is_sync = sync; + ctx.wake_flags = wake_flags; + ctx.want_affine = want_affine; + ctx.sd_flag = sd_flag; + ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask); + + ret = bpf_sched_cfs_select_rq(&ctx); + if (ret >= 0 && is_cpu_allowed(p, ret)) { + rcu_read_unlock(); + return ret; + } + } +#endif + for_each_domain(cpu, tmp) { /* * If both 'cpu' and 'prev_cpu' are part of this domain, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 05a7f09f2bba..830087ca204c 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3597,4 +3597,8 @@ static inline void init_sched_mm_cid(struct task_struct *t) { } extern u64 avg_vruntime(struct cfs_rq *cfs_rq); extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
+#ifdef CONFIG_BPF_SCHED +inline bool is_cpu_allowed(struct task_struct *p, int cpu); +#endif + #endif /* _KERNEL_SCHED_SCHED_H */ diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index fd0c5f5d25bd..359373bc8dab 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -701,6 +701,7 @@ class PrinterHelpers(Printer): 'struct iphdr', 'struct ipv6hdr', 'struct bpf_sched_cpu_stats', + 'struct sched_migrate_ctx', ] known_types = { '...', @@ -757,6 +758,7 @@ class PrinterHelpers(Printer): 'struct iphdr', 'struct ipv6hdr', 'struct bpf_sched_cpu_stats', + 'struct sched_migrate_ctx', } mapped_types = { 'u8': '__u8',