sched/idle: introduce smart halt polling
arch/arm64/Kconfig | 3 + arch/arm64/configs/openeuler_defconfig | 3 + arch/arm64/include/asm/thread_info.h | 2 + arch/arm64/kernel/process.c | 4 ++ drivers/cpuidle/Kconfig | 4 +- drivers/cpuidle/cpuidle-haltpoll.c | 98 ++++++++++++++++++++++---- drivers/cpuidle/governors/haltpoll.c | 6 +- drivers/cpuidle/poll_state.c | 3 + include/asm-generic/kvm_para.h | 2 +- include/linux/kernel.h | 1 + kernel/sched/idle.c | 30 +++++++- kernel/sysctl.c | 7 ++ 12 files changed, 143 insertions(+), 20 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I912VN
-------------------------------------------------
In guest, Before entering the real idle, polling for a while. if the current task is set TIF_NEED_RESCHED during the polling process, it will immediately break from the polling loop.
The polling time poll_threshold_ns can be adjusted by sysctl to avoid 100% of the CPU usage in host. This value can be adjusted according to the requirements.
This optimization needs to support _TIF_POLLING_NRFLAG, that can optimize the overhead of ttwu IPI. Wakeup response delay reduced from 4us to 1us. ------------------------------------------------------------------
arm64: Add some definitions of kvm_para*
Currently, ARM does not support kvm_para* of KVM_GUEST. We provide some definitions of kvm_para* functions, although it is only a simple return. ------------------------------------------------------------------
cpuidle: haltpoll: Only check boot_option_idle_override in x86
boot_option_idle_override is defined only in x86/ia64. Since haltpoll supports x86 and arm64, let's check boot_option_idle_override only in x86. ------------------------------------------------------------------
ARM: cpuidle: Add support for cpuidle-haltpoll driver for ARM
Add support for cpuidle-haltpoll driver for ARM. Allow arm to use the couidle-haltpoll driver. ------------------------------------------------------------------
config: enable CONFIG_CPU_IDLE_GOV_HALTPOLL and CONFIG_HALTPOLL_CPUIDLE for arm
We enable haltpoll by default for the improvement of performance. X86 has been supported. Now, we will provide it on ARM. ------------------------------------------------------------------
arm64: Optimize ttwu IPI
When it is to wake up a task in a remote cpu shared LLC , we can simply set need_resched flag, waking up a cpu that is in polling idle. This wakeup action does not require an IPI.
But the premise is that it need to support _TIF_POLLING_NRFLAG ------------------------------------------------------------------
cpuidle: add cpuidle-haltpoll driver module parameter
To ensure energy efficiency, haltpoll is disabled by default. But In some performance scenarios, you can enable haltpoll using the following methods:
echo Y > /sys/module/cpuidle_haltpoll/parameters/force
Signed-off-by: liangtian liangtian13@huawei.com --- arch/arm64/Kconfig | 3 + arch/arm64/configs/openeuler_defconfig | 3 + arch/arm64/include/asm/thread_info.h | 2 + arch/arm64/kernel/process.c | 4 ++ drivers/cpuidle/Kconfig | 4 +- drivers/cpuidle/cpuidle-haltpoll.c | 98 ++++++++++++++++++++++---- drivers/cpuidle/governors/haltpoll.c | 6 +- drivers/cpuidle/poll_state.c | 3 + include/asm-generic/kvm_para.h | 2 +- include/linux/kernel.h | 1 + kernel/sched/idle.c | 30 +++++++- kernel/sysctl.c | 7 ++ 12 files changed, 143 insertions(+), 20 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 85ac1e83f747..83612218295d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -413,6 +413,9 @@ config KASAN_SHADOW_OFFSET config UNWIND_TABLES bool
+config ARCH_HAS_CPU_RELAX + def_bool y + source "arch/arm64/Kconfig.platforms"
source "kernel/livepatch/Kconfig" diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 2ddea5999010..276ebd171f90 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -612,10 +612,13 @@ CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y CONFIG_CPU_IDLE_GOV_TEO=y +CONFIG_CPU_IDLE_GOV_HALTPOLL=y
# # ARM CPU Idle Drivers # +CONFIG_ARM_CPUIDLE=y +CONFIG_HALTPOLL_CPUIDLE=y # CONFIG_ARM_PSCI_CPUIDLE is not set # end of ARM CPU Idle Drivers # end of CPU Idle diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index a2596f942500..5cc94a855f4a 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -72,6 +72,7 @@ void arch_setup_new_exec(void); #define TIF_SYSCALL_TRACEPOINT 10 /* syscall tracepoint for ftrace */ #define TIF_SECCOMP 11 /* syscall secure computing */ #define TIF_SYSCALL_EMU 12 /* syscall emulation active */ +#define TIF_POLLING_NRFLAG 16 /* idle is polling for TIF_NEED_RESCHED */ #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -101,6 +102,7 @@ void arch_setup_new_exec(void); #define _TIF_MTE_ASYNC_FAULT (1 << TIF_MTE_ASYNC_FAULT) #define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) #define _TIF_32BIT_AARCH64 (1 << TIF_32BIT_AARCH64) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 068e5bb2661b..e1e51ed94736 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -68,6 +68,10 @@ EXPORT_SYMBOL(__stack_chk_guard); void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off);
+#if defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) +EXPORT_SYMBOL(arch_cpu_idle); +#endif + #ifdef CONFIG_HOTPLUG_CPU void __noreturn arch_cpu_idle_dead(void) { diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index cac5997dca50..65a60235836e 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -35,7 +35,7 @@ config CPU_IDLE_GOV_TEO
config CPU_IDLE_GOV_HALTPOLL bool "Haltpoll governor (for virtualized systems)" - depends on KVM_GUEST + depends on KVM_GUEST || ARM64 help This governor implements haltpoll idle state selection, to be used in conjunction with the haltpoll cpuidle driver, allowing @@ -73,7 +73,7 @@ endmenu
config HALTPOLL_CPUIDLE tristate "Halt poll cpuidle driver" - depends on X86 && KVM_GUEST + depends on (X86 && KVM_GUEST) || ARM64 select CPU_IDLE_GOV_HALTPOLL default y help diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index e66df22f9695..7f56eea713b5 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -18,9 +18,17 @@ #include <linux/kvm_para.h> #include <linux/cpuidle_haltpoll.h>
-static bool force __read_mostly; -module_param(force, bool, 0444); -MODULE_PARM_DESC(force, "Load unconditionally"); +static bool force; +MODULE_PARM_DESC(force, "bool, enable haltpoll driver"); +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp); +static int register_haltpoll_driver(void); +static void unregister_haltpoll_driver(void); + +static const struct kernel_param_ops enable_haltpoll_ops = { + .set = enable_haltpoll_driver, + .get = param_get_bool, +}; +module_param_cb(force, &enable_haltpoll_ops, &force, 0644);
static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state; @@ -36,6 +44,42 @@ static int default_enter_idle(struct cpuidle_device *dev, return index; }
+ +static int enable_haltpoll_driver(const char *val, const struct kernel_param *kp) +{ +#ifdef CONFIG_ARM64 + int ret; + bool do_enable; + + if (!val) + return 0; + + ret = strtobool(val, &do_enable); + + if (ret || force == do_enable) + return ret; + + if (do_enable) { + ret = register_haltpoll_driver(); + + if (!ret) { + pr_info("Enable haltpoll driver.\n"); + force = 1; + } else { + pr_err("Fail to enable haltpoll driver.\n"); + } + } else { + unregister_haltpoll_driver(); + force = 0; + pr_info("Unregister haltpoll driver.\n"); + } + + return ret; +#else + return -1; +#endif +} + static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", .governor = "haltpoll", @@ -84,32 +128,30 @@ static int haltpoll_cpu_offline(unsigned int cpu) return 0; }
-static void haltpoll_uninit(void) -{ - if (haltpoll_hp_state) - cpuhp_remove_state(haltpoll_hp_state); - cpuidle_unregister_driver(&haltpoll_driver);
- free_percpu(haltpoll_cpuidle_devices); - haltpoll_cpuidle_devices = NULL; +static bool haltpoll_want(void) +{ + return kvm_para_has_hint(KVM_HINTS_REALTIME); }
-static bool haltpoll_want(void) +static void haltpoll_uninit(void) { - return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; + unregister_haltpoll_driver(); }
-static int __init haltpoll_init(void) +static int register_haltpoll_driver(void) { int ret; struct cpuidle_driver *drv = &haltpoll_driver;
+#ifdef CONFIG_X86 /* Do not load haltpoll if idle= is passed */ if (boot_option_idle_override != IDLE_NO_OVERRIDE) return -ENODEV;
- if (!kvm_para_available() || !haltpoll_want()) + if (!force && (!kvm_para_available() || !haltpoll_want())) return -ENODEV; +#endif
cpuidle_poll_state_init(drv);
@@ -135,9 +177,35 @@ static int __init haltpoll_init(void) return ret; }
+static void unregister_haltpoll_driver(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; + +} + +static int __init haltpoll_init(void) +{ + int ret = 0; +#ifdef CONFIG_X86 + /* Do not load haltpoll if idle= is passed */ + if (boot_option_idle_override != IDLE_NO_OVERRIDE) + return -ENODEV; +#endif + if (force || (haltpoll_want() && kvm_para_available())) + ret = register_haltpoll_driver(); + + return ret; +} + static void __exit haltpoll_exit(void) { - haltpoll_uninit(); + if (haltpoll_cpuidle_devices) + haltpoll_uninit(); }
module_init(haltpoll_init); diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 1dff3a52917d..71c41cba3e96 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -40,6 +40,10 @@ module_param(guest_halt_poll_grow_start, uint, 0644); static bool guest_halt_poll_allow_shrink __read_mostly = true; module_param(guest_halt_poll_allow_shrink, bool, 0644);
+static bool enable __read_mostly = true; +module_param(enable, bool, 0444); +MODULE_PARM_DESC(enable, "Load unconditionally"); + /** * haltpoll_select - selects the next idle state to enter * @drv: cpuidle driver containing state data @@ -143,7 +147,7 @@ static struct cpuidle_governor haltpoll_governor = {
static int __init init_haltpoll(void) { - if (kvm_para_available()) + if (enable) return cpuidle_register_governor(&haltpoll_governor);
return 0; diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 9b6d90a72601..b939e2e25e3d 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -7,6 +7,9 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/sched/idle.h> +#ifdef CONFIG_ARM64 +#include <linux/cpu.h> +#endif
#define POLL_IDLE_RELAX_COUNT 200
diff --git a/include/asm-generic/kvm_para.h b/include/asm-generic/kvm_para.h index 728e5c5706c4..a0a4cf1cda9a 100644 --- a/include/asm-generic/kvm_para.h +++ b/include/asm-generic/kvm_para.h @@ -4,7 +4,7 @@
#include <uapi/asm-generic/kvm_para.h>
- +#define KVM_HINTS_REALTIME 0 /* * This function is used by architectures that support kvm to avoid issuing * false soft lockup messages. diff --git a/include/linux/kernel.h b/include/linux/kernel.h index cee8fe87e9f4..d0ac98b7d656 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -220,6 +220,7 @@ extern void bust_spinlocks(int yes); extern int root_mountflags;
extern bool early_boot_irqs_disabled; +extern unsigned long poll_threshold_ns;
/* * Values used for system_state. Ordering of the states must not be changed diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 5007b25c5bc6..d09a1ff1c87a 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -10,6 +10,12 @@ /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
+/* + * Poll_threshold_ns indicates the maximum polling time before + * entering real idle. + */ +unsigned long poll_threshold_ns; + /** * sched_idle_set_state - Record idle state for the current CPU. * @idle_state: State to record. @@ -49,6 +55,24 @@ static int __init cpu_idle_nopoll_setup(char *__unused) __setup("hlt", cpu_idle_nopoll_setup); #endif
+static void smart_idle_poll(void) +{ + unsigned long poll_duration = poll_threshold_ns; + ktime_t cur, stop; + + if (!poll_duration) + return; + + stop = ktime_add_ns(ktime_get(), poll_duration); + + do { + cpu_relax(); + if (tif_need_resched()) + break; + cur = ktime_get(); + } while (ktime_before(cur, stop)); +} + static noinline int __cpuidle cpu_idle_poll(void) { instrumentation_begin(); @@ -56,6 +80,7 @@ static noinline int __cpuidle cpu_idle_poll(void) stop_critical_timings(); ct_cpuidle_enter();
+ smart_idle_poll(); raw_local_irq_enable(); while (!tif_need_resched() && (cpu_idle_force_poll || tick_check_broadcast_expired())) @@ -237,6 +262,7 @@ static void cpuidle_idle_call(void) static void do_idle(void) { int cpu = smp_processor_id(); + unsigned long idle_poll_flag = poll_threshold_ns;
/* * Check if we need to update blocked load @@ -275,9 +301,11 @@ static void do_idle(void) * broadcast device expired for us, we don't want to go deep * idle as we know that the IPI is going to arrive right away. */ - if (cpu_idle_force_poll || tick_check_broadcast_expired()) { + if (cpu_idle_force_poll || tick_check_broadcast_expired() || + idle_poll_flag) { tick_nohz_idle_restart_tick(); cpu_idle_poll(); + idle_poll_flag = 0; } else { cpuidle_idle_call(); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e84df0818517..cdc117677647 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2025,6 +2025,13 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, + { + .procname = "halt_poll_threshold", + .data = &poll_threshold_ns, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + }, #ifdef CONFIG_TREE_RCU { .procname = "panic_on_rcu_stall",
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/4618 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/4618 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...