[PATCH OLK-5.10 V3] sched/fair: Prefer physical cores when migrating tasks

12 Aug 2024

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/IAJEHU
CVE: NA

--------------------------------

When cpu hyperthreading is enabled, one physical core can virtualize
multiple logical cpus. Assume that physical core0 virtualizes two
logical cpus, cpu0 and cpu1. Only when the load of cpu0 exceeds the set
ratio to the capacity of cpu0, the task will be migrated to the cpu1,
otherwise the task will not be migrated and the cpu0 will still be used.

External impacts:
   1) default config in arm64,x86: CONFIG_SCHED_KEEP_ON_CORE=y
   2) sysctl: /proc/sys/kernel/sched_util_ratio
   3) sched features: KEEP_ON_CORE (default NO_KEEP_ON_CORE)

Signed-off-by: Cheng Yu <serein.chengyu@huawei.com>
---
 arch/arm64/configs/openeuler_defconfig |  1 +
 arch/x86/configs/openeuler_defconfig   |  1 +
 include/linux/sched/sysctl.h           |  4 ++++
 init/Kconfig                           | 12 ++++++++++
 kernel/sched/fair.c                    | 31 ++++++++++++++++++++++++++
 kernel/sched/features.h                |  4 ++++
 kernel/sysctl.c                        | 10 +++++++++
 7 files changed, 63 insertions(+)

diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig
index 69ff0b64ba59..8c95a5332b40 100644
--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -189,6 +189,7 @@ CONFIG_USER_NS=y
 CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_SCHED_STEAL=y
+CONFIG_SCHED_KEEP_ON_CORE=y
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
 # CONFIG_SYSFS_DEPRECATED is not set
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig
index f3b810d0cf47..c2ebdb75bbe3 100644
--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -194,6 +194,7 @@ CONFIG_USER_NS=y
 CONFIG_PID_NS=y
 CONFIG_NET_NS=y
 CONFIG_SCHED_STEAL=y
+# CONFIG_SCHED_KEEP_ON_CORE is not set
 CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_SCHED_AUTOGROUP=y
 # CONFIG_SYSFS_DEPRECATED is not set
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index 5cd5b3c579d3..09214349bddf 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -35,6 +35,10 @@ extern unsigned int sysctl_sched_child_runs_first;
 extern int sysctl_sched_util_low_pct;
 #endif
 
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+extern int sysctl_sched_util_ratio;
+#endif
+
 #ifdef CONFIG_QOS_SCHED_SMART_GRID
 extern unsigned int sysctl_smart_grid_strategy_ctrl;
 extern int sysctl_affinity_adjust_delay_ms;
diff --git a/init/Kconfig b/init/Kconfig
index e552194efbea..9dcc12704729 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1370,6 +1370,18 @@ config SCHED_STEAL
 
 	  If unsure, say N here.
 
+config SCHED_KEEP_ON_CORE
+	bool "Prefer physical cores when migrating tasks"
+	depends on SCHED_SMT
+	default n
+	help
+	  When cpu hyperthreading is enabled, one physical core can virtualize
+	  multiple logical cpus. Assume that physical core0 virtualizes two
+	  logical cpus, cpu0 and cpu1. Only when the load of cpu0 exceeds the
+	  ratio to the capacity of cpu0, the task will be migrated to the cpu1,
+	  otherwise the task will not be migrated and the cpu0 will still be
+	  used.
+
 config CHECKPOINT_RESTORE
 	bool "Checkpoint/restore support"
 	select PROC_CHILDREN
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 95d1841f8a20..8dc1d2afe1b7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7242,6 +7242,20 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu
 	return -1;
 }
 
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+int sysctl_sched_util_ratio = 100;
+
+static int core_has_spare(int cpu)
+{
+	int core_id = cpumask_first(cpu_smt_mask(cpu));
+	struct rq *rq = cpu_rq(core_id);
+	unsigned long util = rq->cfs.avg.util_avg;
+	unsigned long capacity = rq->cpu_capacity;
+
+	return util * 100 < capacity * sysctl_sched_util_ratio;
+}
+#endif
+
 #else /* CONFIG_SCHED_SMT */
 
 static inline void set_idle_cores(int cpu, int val)
@@ -8210,6 +8224,14 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_f
 	}
 #endif
 
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+	if (sched_feat(KEEP_ON_CORE) &&
+	    static_branch_likely(&sched_smt_present)) {
+		if (core_has_spare(new_cpu))
+			new_cpu = cpumask_first(cpu_smt_mask((new_cpu)));
+	}
+#endif
+
 	rcu_read_unlock();
 
 #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
@@ -9701,6 +9723,15 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 	}
 #endif
 
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+	if (sched_feat(KEEP_ON_CORE) &&
+	    static_branch_likely(&sched_smt_present)) {
+		if (core_has_spare(env->dst_cpu) &&
+		    cpumask_first(cpu_smt_mask((env->dst_cpu))) != env->dst_cpu)
+			return 0;
+	}
+#endif
+
 	/*
 	 * We do not migrate tasks that are:
 	 * 1) throttled_lb_pair, or
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 76fade025c4b..fb885b20ba34 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -70,6 +70,10 @@ SCHED_FEAT(SIS_UTIL, false)
 SCHED_FEAT(STEAL, false)
 #endif
 
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+SCHED_FEAT(KEEP_ON_CORE, false)
+#endif
+
 /*
  * Issue a WARN when we do multiple update_rq_clock() calls
  * in a single rq->lock section. Default disabled because the
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3941856c19d1..bd7b17be9ba4 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2817,6 +2817,16 @@ static struct ctl_table kern_table[] = {
 		.extra2		= &one_hundred,
 	},
 #endif
+#ifdef CONFIG_SCHED_KEEP_ON_CORE
+	{
+		.procname       = "sched_util_ratio",
+		.data           = &sysctl_sched_util_ratio,
+		.maxlen         = sizeof(sysctl_sched_util_ratio),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1         = SYSCTL_ZERO,
+	},
+#endif
 #ifdef CONFIG_QOS_SCHED_SMART_GRID
 	{
 		.procname	= "smart_grid_strategy_ctrl",
-- 
2.25.1

    

Cheng Yu

patchwork bot

tags

participants (2)