Only when the load of the physical core exceeds the set threshold, the task will be migrated, otherwise the task will keep on the physical core.
External impacts: 1) default config in arm64,x86: CONFIG_SCHED_KEEP_ON_CORE=y 2) sysctl: /proc/sys/kernel/sched_util_keep_on_core 3) sched features: KEEP_ON_CORE (default NO_KEEP_ON_CORE)
Signed-off-by: Cheng Yu serein.chengyu@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/sched/sysctl.h | 4 +++ init/Kconfig | 9 +++++++ kernel/sched/fair.c | 36 ++++++++++++++++++++++++++ kernel/sched/features.h | 4 +++ kernel/sysctl.c | 10 +++++++ 7 files changed, 65 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 69ff0b64ba59..8c95a5332b40 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -189,6 +189,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f3b810d0cf47..65ef2f183b02 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -194,6 +194,7 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_SCHED_STEAL=y +CONFIG_SCHED_KEEP_ON_CORE=y CONFIG_CHECKPOINT_RESTORE=y CONFIG_SCHED_AUTOGROUP=y # CONFIG_SYSFS_DEPRECATED is not set diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 5cd5b3c579d3..fb1436286994 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -35,6 +35,10 @@ extern unsigned int sysctl_sched_child_runs_first; extern int sysctl_sched_util_low_pct; #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE +extern int sysctl_sched_util_keep_on_core; +#endif + #ifdef CONFIG_QOS_SCHED_SMART_GRID extern unsigned int sysctl_smart_grid_strategy_ctrl; extern int sysctl_affinity_adjust_delay_ms; diff --git a/init/Kconfig b/init/Kconfig index e552194efbea..ee94515b2f04 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1370,6 +1370,15 @@ config SCHED_STEAL
If unsure, say N here.
+config SCHED_KEEP_ON_CORE + bool "Prefer physical cores when migrating tasks" + depends on SMP + default n + help + Only when the load of the physical core exceeds the set threshold, + the task will be migrated, otherwise the task will keep on the + physical core. + config CHECKPOINT_RESTORE bool "Checkpoint/restore support" select PROC_CHILDREN diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 95d1841f8a20..4f3d81537bab 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4497,6 +4497,12 @@ static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {} #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE +static int core_has_spare(int cpu); +#else +static inline int core_has_spare(int cpu) { return 0; } +#endif + #else /* CONFIG_SMP */
#define UPDATE_TG 0x0 @@ -4523,6 +4529,7 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) static inline void rq_idle_stamp_update(struct rq *rq) {} static inline void rq_idle_stamp_clear(struct rq *rq) {} static inline int try_steal(struct rq *this_rq, struct rq_flags *rf) { return 0; } +static inline int core_has_spare(int cpu) { return 0; } static inline void overload_clear(struct rq *rq) {} static inline void overload_set(struct rq *rq) {}
@@ -8210,6 +8217,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f } #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(new_cpu)) + new_cpu = cpumask_first(cpu_smt_mask((new_cpu))); +#endif + rcu_read_unlock();
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY @@ -9701,6 +9715,14 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env) } #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE + if (static_branch_likely(&sched_smt_present) && + sched_feat(KEEP_ON_CORE)) + if (core_has_spare(env->dst_cpu) && + cpumask_first(cpu_smt_mask((env->dst_cpu))) != env->dst_cpu) + return 0; +#endif + /* * We do not migrate tasks that are: * 1) throttled_lb_pair, or @@ -13189,6 +13211,20 @@ static int try_steal(struct rq *dst_rq, struct rq_flags *dst_rf) } #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE +int sysctl_sched_util_keep_on_core = 100; + +static int core_has_spare(int cpu) +{ + int core_id = cpumask_first(cpu_smt_mask(cpu)); + struct rq *rq = cpu_rq(core_id); + unsigned long util = rq->cfs.avg.util_avg; + unsigned long capacity = rq->cpu_capacity; + + return util * 100 < capacity * sysctl_sched_util_keep_on_core; +} +#endif + static void rq_online_fair(struct rq *rq) { update_sysctl(); diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 76fade025c4b..fb885b20ba34 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -70,6 +70,10 @@ SCHED_FEAT(SIS_UTIL, false) SCHED_FEAT(STEAL, false) #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE +SCHED_FEAT(KEEP_ON_CORE, false) +#endif + /* * Issue a WARN when we do multiple update_rq_clock() calls * in a single rq->lock section. Default disabled because the diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3941856c19d1..9abc01982645 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2817,6 +2817,16 @@ static struct ctl_table kern_table[] = { .extra2 = &one_hundred, }, #endif +#ifdef CONFIG_SCHED_KEEP_ON_CORE + { + .procname = "sched_util_keep_on_core", + .data = &sysctl_sched_util_keep_on_core, + .maxlen = sizeof(sysctl_sched_util_keep_on_core), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + }, +#endif #ifdef CONFIG_QOS_SCHED_SMART_GRID { .procname = "smart_grid_strategy_ctrl",