Add smart halt polling and schedule wakeup ipi optimization
Li Hua (2): arm: Optimize ttwu IPI sched/idle: Add IAS_SMART_HALT_POLL config for smart halt polling feature
Xiangyou Xie (1): sched/idle: introduce smart halt polling
arch/arm/include/asm/thread_info.h | 2 ++ include/linux/kernel.h | 4 +++ init/Kconfig | 8 ++++++ kernel/sched/idle.c | 42 ++++++++++++++++++++++++++++++ kernel/sysctl.c | 9 +++++++ 5 files changed, 65 insertions(+)
From: Li Hua hucool.lihua@huawei.com
hulk inclusion category: feature bugzilla: 176961 https://gitee.com/openeuler/kernel/issues/I4E05T
-------------------
When it is to wake up a task in a remote cpu shared LLC , we can simply set need_resched flag, waking up a cpu that is in polling idle. This wakeup action does not require an IPI.
But the premise is that it need to support _TIF_POLLING_NRFLAG
Signed-off-by: Li Hua hucool.lihua@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- arch/arm/include/asm/thread_info.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index 2e4733a2e737..bc6a86fa3142 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -145,6 +145,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define TIF_SECCOMP 7 /* seccomp syscall filtering active */ #define TIF_PATCH_PENDING 8 /* pending live patching update */
+#define TIF_POLLING_NRFLAG 16 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_USING_IWMMXT 17 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_RESTORE_SIGMASK 20 @@ -157,6 +158,7 @@ extern int vfp_restore_user_hwstate(struct user_vfp *, #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) #define _TIF_USING_IWMMXT (1 << TIF_USING_IWMMXT) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: 176961 https://gitee.com/openeuler/kernel/issues/I4E05T
-------------------------------------------------
In guest, Before entering the real idle, polling for a while. if the current task is set TIF_NEED_RESCHED during the polling process, it will immediately break from the polling loop.
The polling time poll_threshold_ns can be adjusted by sysctl to avoid 100% of the CPU usage in host. This value can be adjusted according to the requirements.
This optimization needs to support _TIF_POLLING_NRFLAG, that can optimize the overhead of ttwu IPI. Wakeup response delay reduced from 4us to 1us.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Li Hua hucool.lihua@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/kernel.h | 2 ++ kernel/sched/idle.c | 30 +++++++++++++++++++++++++++++- kernel/sysctl.c | 7 +++++++ 3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 0b809c92d817..63555b947d96 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -555,6 +555,8 @@ extern int sysctl_panic_on_rcu_stall; extern int sysctl_panic_on_stackoverflow;
extern bool crash_kexec_post_notifiers; +extern unsigned long poll_threshold_ns; +
/* * panic_cpu is used for synchronizing panic() and crash_kexec() execution. It diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..0bd53051ac71 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -13,6 +13,12 @@ /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
+/* + * Poll_threshold_ns indicates the maximum polling time before + * entering real idle. + */ +unsigned long poll_threshold_ns; + /** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. @@ -52,12 +58,31 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif
+static void smart_idle_poll(void) +{ + unsigned long poll_duration = poll_threshold_ns; + ktime_t cur, stop; + + if (!poll_duration) + return; + + stop = ktime_add_ns(ktime_get(), poll_duration); + + do { + cpu_relax(); + if (tif_need_resched()) + break; + cur = ktime_get(); + } while (ktime_before(cur, stop)); +} + static noinline int __cpuidle cpu_idle_poll(void) { trace_cpu_idle(0, smp_processor_id()); stop_critical_timings(); rcu_idle_enter(); local_irq_enable(); + smart_idle_poll();
while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) @@ -261,6 +286,7 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); + unsigned long idle_poll_flag = poll_threshold_ns; /* * If the arch has a polling bit, we maintain an invariant: * @@ -293,9 +319,11 @@ static void do_idle(void) * broadcast device expired for us, we don't want to go deep * idle as we know that the IPI is going to arrive right away. */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired() || + idle_poll_flag) { tick_nohz_idle_restart_tick(); cpu_idle_poll(); + idle_poll_flag = 0; } else { cpuidle_idle_call(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 513cb217232a..261388afe482 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,13 @@ static struct ctl_table kern_table[] = { .proc_handler = sysctl_sched_uclamp_handler, }, #endif + { + .procname = "halt_poll_threshold", + .data = &poll_threshold_ns, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled",
From: Li Hua hucool.lihua@huawei.com
hulk inclusion category: feature bugzilla: 176961 https://gitee.com/openeuler/kernel/issues/I4E05T
-------------------
Add IAS_SMART_HALT_POLL config for smart halt polling feature
Signed-off-by: Li Hua hucool.lihua@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Chen Jun chenjun102@huawei.com --- include/linux/kernel.h | 2 ++ init/Kconfig | 8 ++++++++ kernel/sched/idle.c | 14 ++++++++++++++ kernel/sysctl.c | 2 ++ 4 files changed, 26 insertions(+)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 63555b947d96..b8cce99fd8eb 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -555,7 +555,9 @@ extern int sysctl_panic_on_rcu_stall; extern int sysctl_panic_on_stackoverflow;
extern bool crash_kexec_post_notifiers; +#ifdef CONFIG_IAS_SMART_HALT_POLL extern unsigned long poll_threshold_ns; +#endif
/* diff --git a/init/Kconfig b/init/Kconfig index 66d7f2708b95..cedc5ab247a5 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -774,6 +774,14 @@ config GENERIC_SCHED_CLOCK
menu "Scheduler features"
+config IAS_SMART_HALT_POLL + bool "Enable smart halt poll" + default n + help + Before entering the real idle, polling for a while. if the current + task is set TIF_NEED_RESCHED during the polling process, it will + immediately break from the polling loop. + config UCLAMP_TASK bool "Enable utilization clamping for RT/FAIR tasks" depends on CPU_FREQ_GOV_SCHEDUTIL diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 0bd53051ac71..4f7b0ee06144 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -13,11 +13,13 @@ /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
+#ifdef CONFIG_IAS_SMART_HALT_POLL /* * Poll_threshold_ns indicates the maximum polling time before * entering real idle. */ unsigned long poll_threshold_ns; +#endif
/** * sched_idle_set_state - Record idle state for the current CPU. @@ -58,6 +60,7 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif
+#ifdef CONFIG_IAS_SMART_HALT_POLL static void smart_idle_poll(void) { unsigned long poll_duration = poll_threshold_ns; @@ -75,6 +78,7 @@ static void smart_idle_poll(void) cur = ktime_get(); } while (ktime_before(cur, stop)); } +#endif
static noinline int __cpuidle cpu_idle_poll(void) { @@ -82,7 +86,9 @@ static noinline int __cpuidle cpu_idle_poll(void) stop_critical_timings(); rcu_idle_enter(); local_irq_enable(); +#ifdef CONFIG_IAS_SMART_HALT_POLL smart_idle_poll(); +#endif
while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) @@ -286,7 +292,9 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); +#ifdef CONFIG_IAS_SMART_HALT_POLL unsigned long idle_poll_flag = poll_threshold_ns; +#endif /* * If the arch has a polling bit, we maintain an invariant: * @@ -319,11 +327,17 @@ static void do_idle(void) * broadcast device expired for us, we don't want to go deep * idle as we know that the IPI is going to arrive right away. */ +#ifdef CONFIG_IAS_SMART_HALT_POLL if (cpu_idle_force_poll || tick_check_broadcast_expired() || idle_poll_flag) { +#else + if (cpu_idle_force_poll || tick_check_broadcast_expired()) { +#endif tick_nohz_idle_restart_tick(); cpu_idle_poll(); +#ifdef CONFIG_IAS_SMART_HALT_POLL idle_poll_flag = 0; +#endif } else { cpuidle_idle_call(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 261388afe482..a573817a6fe0 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1849,6 +1849,7 @@ static struct ctl_table kern_table[] = { .proc_handler = sysctl_sched_uclamp_handler, }, #endif +#ifdef CONFIG_IAS_SMART_HALT_POLL { .procname = "halt_poll_threshold", .data = &poll_threshold_ns, @@ -1856,6 +1857,7 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_doulongvec_minmax, }, +#endif #ifdef CONFIG_SCHED_AUTOGROUP { .procname = "sched_autogroup_enabled",