From: Chen Hui judy.chenhui@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5KUFB CVE: NA
--------------------------------
Add three hooks of sched type in select_task_rq_fair(), as follows: 'cfs_select_rq' Replace the original core selection policy or implement dynamic CPU affinity.
'cfs_select_rq_exit' Restoring the CPU affinity of the task before exiting of 'select_task_rq_fair'.
To be used with 'cfs_select_rq' hook to implement dynamic CPU affinity.
'cfs_wake_affine' Determine on which CPU task can run soonest. Allow user to implement deferent policies.
Signed-off-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Hui Tang tanghui20@huawei.com --- include/linux/sched.h | 20 +++++++++++++ include/linux/sched_hook_defs.h | 3 ++ kernel/sched/fair.c | 50 +++++++++++++++++++++++++++++++++ scripts/bpf_helpers_doc.py | 2 ++ 4 files changed, 75 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 1b0cc2bfbd75..626cc23b11ec 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2241,5 +2241,25 @@ enum cpumask_op_type { CPUMASK_NEXT_AND, CPUMASK_CPULIST_PARSE }; + +struct sched_migrate_ctx { + struct task_struct *task; + struct cpumask *cpus_allowed; + struct cpumask *select_idle_mask; + int prev_cpu; + int curr_cpu; + int is_sync; + int want_affine; + int wake_flags; + int sd_flag; + int new_cpu; +}; + +struct sched_affine_ctx { + struct task_struct *task; + int prev_cpu; + int curr_cpu; + int is_sync; +}; #endif #endif diff --git a/include/linux/sched_hook_defs.h b/include/linux/sched_hook_defs.h index e2f65e4b8895..4e359649db4b 100644 --- a/include/linux/sched_hook_defs.h +++ b/include/linux/sched_hook_defs.h @@ -3,3 +3,6 @@ BPF_SCHED_HOOK(int, 0, cfs_check_preempt_tick, struct sched_entity *curr, unsign BPF_SCHED_HOOK(int, 0, cfs_check_preempt_wakeup, struct task_struct *curr, struct task_struct *p) BPF_SCHED_HOOK(int, 0, cfs_wakeup_preempt_entity, struct sched_entity *curr, struct sched_entity *se) +BPF_SCHED_HOOK(int, -1, cfs_select_rq, struct sched_migrate_ctx *ctx) +BPF_SCHED_HOOK(int, -1, cfs_wake_affine, struct sched_affine_ctx *ctx) +BPF_SCHED_HOOK(int, -1, cfs_select_rq_exit, struct sched_migrate_ctx *ctx) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 79e36870b206..23206e6320f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6023,6 +6023,22 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, { int target = nr_cpumask_bits;
+#ifdef CONFIG_BPF_SCHED + if (bpf_sched_enabled()) { + struct sched_affine_ctx ctx; + int ret; + + ctx.task = p; + ctx.prev_cpu = prev_cpu; + ctx.curr_cpu = this_cpu; + ctx.is_sync = sync; + + ret = bpf_sched_cfs_wake_affine(&ctx); + if (ret >= 0 && ret < nr_cpumask_bits) + return ret; + } +#endif + if (sched_feat(WA_IDLE)) target = wake_affine_idle(this_cpu, prev_cpu, sync);
@@ -6887,6 +6903,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f int new_cpu = prev_cpu; int want_affine = 0; int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING); +#ifdef CONFIG_BPF_SCHED + struct sched_migrate_ctx ctx; + int ret; +#endif
time = schedstat_start_time();
@@ -6904,6 +6924,26 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f }
rcu_read_lock(); +#ifdef CONFIG_BPF_SCHED + if (bpf_sched_enabled()) { + ctx.task = p; + ctx.prev_cpu = prev_cpu; + ctx.curr_cpu = cpu; + ctx.is_sync = sync; + ctx.wake_flags = wake_flags; + ctx.want_affine = want_affine; + ctx.sd_flag = sd_flag; + ctx.cpus_allowed = (void *)p->cpus_ptr; + ctx.select_idle_mask = this_cpu_cpumask_var_ptr(select_idle_mask); + + ret = bpf_sched_cfs_select_rq(&ctx); + if (ret >= 0) { + rcu_read_unlock(); + return ret; + } + } +#endif + for_each_domain(cpu, tmp) { /* * If both 'cpu' and 'prev_cpu' are part of this domain, @@ -6935,6 +6975,16 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f if (want_affine) current->recent_used_cpu = cpu; } + +#ifdef CONFIG_BPF_SCHED + if (bpf_sched_enabled()) { + ctx.new_cpu = new_cpu; + ret = bpf_sched_cfs_select_rq_exit(&ctx); + if (ret >= 0) + new_cpu = ret; + } +#endif + rcu_read_unlock(); schedstat_end_time(cpu_rq(cpu), time);
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 2f9d2160b5fb..fd89d2f2a86d 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -445,6 +445,7 @@ class PrinterHelpers(Printer): 'struct bpf_cpumask_info', 'struct cpumask', 'struct cpumask_op_args', + 'struct sched_migrate_ctx', ] known_types = { '...', @@ -498,6 +499,7 @@ class PrinterHelpers(Printer): 'struct bpf_cpumask_info', 'struct cpumask', 'struct cpumask_op_args', + 'struct sched_migrate_ctx', } mapped_types = { 'u8': '__u8',