From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8OIT1
--------------------------------
Add hook of sched type in select_task_rq_fair(), as follows: 'cfs_select_rq' Replace the original core selection policy or implement dynamic CPU affinity.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com Signed-off-by: Guan Jing guanjing6@huawei.com --- include/linux/sched.h | 12 ++++++++++++ include/linux/sched_hook_defs.h | 2 +- kernel/sched/core.c | 15 +++++++++++++++ kernel/sched/fair.c | 28 ++++++++++++++++++++++++++++ kernel/sched/sched.h | 4 ++++ scripts/bpf_doc.py | 2 ++ 6 files changed, 62 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 857c0d8df803..569d055518cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2480,6 +2480,18 @@ static inline void rseq_syscall(struct pt_regs *regs)
#ifdef CONFIG_BPF_SCHED extern void sched_settag(struct task_struct *tsk, s64 tag); + +struct sched_migrate_ctx { + struct task_struct *task; + struct cpumask *select_idle_mask; + int prev_cpu; + int curr_cpu; + int is_sync; + int want_affine; + int wake_flags; + int sd_flag; + int new_cpu; +}; #endif
#ifdef CONFIG_SCHED_CORE diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h index 14344004e335..0e91209826a1 100644 --- a/include/linux/sched_hook_defs.h +++ b/include/linux/sched_hook_defs.h @@ -1,2 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 */ -BPF_SCHED_HOOK(int, 0, dummy, void) +BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index dc48b7dd9036..f49884275d02 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2469,7 +2469,11 @@ static inline bool rq_has_pinned_tasks(struct rq *rq) * Per-CPU kthreads are allowed to run on !active && online CPUs, see * __set_cpus_allowed_ptr() and select_fallback_rq(). */ +#ifdef CONFIG_BPF_SCHED +inline bool is_cpu_allowed(struct task_struct *p, int cpu) +#else static inline bool is_cpu_allowed(struct task_struct *p, int cpu) +#endif { /* When not in the task's cpumask, no point in looking further. */ if (!cpumask_test_cpu(cpu, p->cpus_ptr)) @@ -9956,6 +9960,10 @@ LIST_HEAD(task_groups); static struct kmem_cache *task_group_cache __read_mostly; #endif
+#ifdef CONFIG_BPF_SCHED +DECLARE_PER_CPU(cpumask_var_t, select_idle_mask); +#endif + void __init sched_init(void) { unsigned long ptr = 0; @@ -10011,6 +10019,13 @@ void __init sched_init(void) global_rt_period(), global_rt_runtime()); #endif /* CONFIG_RT_GROUP_SCHED */
+#if defined(CONFIG_CPUMASK_OFFSTACK) && defined(CONFIG_BPF_SCHED) + for_each_possible_cpu(i) { + per_cpu(select_idle_mask, i) = (cpumask_var_t)kzalloc_node( + cpumask_size(), GFP_KERNEL, cpu_to_node(i)); + } +#endif + #ifdef CONFIG_CGROUP_SCHED task_group_cache = KMEM_CACHE(task_group, 0);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9e32aae696ba..fc17a3af11b1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -52,6 +52,7 @@ #include <asm/switch_to.h>
#include <linux/sched/cond_resched.h> +#include <linux/bpf_sched.h>
#include "sched.h" #include "stats.h" @@ -99,6 +100,10 @@ unsigned int sysctl_sched_child_runs_first __read_mostly;
const_debug unsigned int sysctl_sched_migration_cost = 500000UL;
+#ifdef CONFIG_BPF_SCHED +DEFINE_PER_CPU(cpumask_var_t, select_idle_mask); +#endif + int sched_thermal_decay_shift; static int __init setup_sched_thermal_decay_shift(char *str) { @@ -8468,6 +8473,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY int idlest_cpu = -1; #endif +#ifdef CONFIG_BPF_SCHED + struct sched_migrate_ctx ctx; + int ret; +#endif
time = schedstat_start_time();
@@ -8502,6 +8511,25 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) }
rcu_read_lock(); +#ifdef CONFIG_BPF_SCHED + if (bpf_sched_enabled()) { + ctx.task = p; + ctx.prev_cpu = prev_cpu; + ctx.curr_cpu = cpu; + ctx.is_sync = sync; + ctx.wake_flags = wake_flags; + ctx.want_affine = want_affine; + ctx.sd_flag = sd_flag; + ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask); + + ret = bpf_sched_cfs_select_rq(&ctx); + if (ret >= 0 && is_cpu_allowed(p, ret)) { + rcu_read_unlock(); + return ret; + } + } +#endif + for_each_domain(cpu, tmp) { /* * If both 'cpu' and 'prev_cpu' are part of this domain, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index de81c7f2ad93..b7ba69337453 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3621,4 +3621,8 @@ static inline void init_sched_mm_cid(struct task_struct *t) { } extern u64 avg_vruntime(struct cfs_rq *cfs_rq); extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se);
+#ifdef CONFIG_BPF_SCHED +inline bool is_cpu_allowed(struct task_struct *p, int cpu); +#endif + #endif /* _KERNEL_SCHED_SCHED_H */ diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py index 61b7dddedc46..a8a547fdb321 100755 --- a/scripts/bpf_doc.py +++ b/scripts/bpf_doc.py @@ -700,6 +700,7 @@ class PrinterHelpers(Printer): 'struct bpf_dynptr', 'struct iphdr', 'struct ipv6hdr', + 'struct sched_migrate_ctx', ] known_types = { '...', @@ -755,6 +756,7 @@ class PrinterHelpers(Printer): 'const struct bpf_dynptr', 'struct iphdr', 'struct ipv6hdr', + 'struct sched_migrate_ctx', } mapped_types = { 'u8': '__u8',