From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-4.20 commit eb40a380bff28f84b6583bba6786b46ef26ef548 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
It is not necessary to update data->last_state_idx in menu_select() as it only is used in menu_update() which only runs when data->needs_update is set and that is set only when updating data->last_state_idx in menu_reflect().
Accordingly, drop the update of data->last_state_idx from menu_select() and get rid of the (now redundant) "out" label from it.
No intentional behavior changes.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/governors/menu.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index ff0ebec40f859..3a48e4e1c175d 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -403,7 +403,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, s->target_residency <= ktime_to_us(delta_next)) idx = i;
- goto out; + return idx; } if (s->exit_latency > latency_req) { /* @@ -450,10 +450,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, } }
-out: - data->last_state_idx = idx; - - return data->last_state_idx; + return idx; }
/**
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-4.20 commit 01bad1c6896db021db82042e71c2bf1f97cc026b category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
If need_resched() returns "false", breaking out of the loop in poll_idle() will cause a new idle state to be selected, so in fact it usually doesn't make sense to spin in it longer than the target residency of the second state. [Note that the "polling" state is used only if there is at least one "real" state defined in addition to it, so the second state is always there.] On the other hand, breaking out of it early (say in case the next state is disabled) shouldn't hurt as it is polling anyway.
For this reason, make the loop in poll_idle() break if the CPU has been spinning longer than the target residency of the second state (the "polling" state can only be state[0]).
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/poll_state.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 36ff5a1d94226..1a00353a5c4b1 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -9,7 +9,6 @@ #include <linux/sched/clock.h> #include <linux/sched/idle.h>
-#define POLL_IDLE_TIME_LIMIT (TICK_NSEC / 16) #define POLL_IDLE_RELAX_COUNT 200
static int __cpuidle poll_idle(struct cpuidle_device *dev, @@ -21,6 +20,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
local_irq_enable(); if (!current_set_polling_and_test()) { + u64 limit = (u64)drv->states[1].target_residency + * NSEC_PER_USEC; unsigned int loop_count = 0;
while (!need_resched()) { @@ -29,7 +30,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, continue;
loop_count = 0; - if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) { + if (local_clock() - time_start > limit) { dev->poll_time_limit = true; break; }
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-5.0 commit 800fb34a99ce7d22dca839c90f869c7a12b50f70 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
When computing the limit of time to spend in the loop in poll_idle(), use the target residency of the first enabled idle state deeper than state 0 instead of always using the target residency of state 1.
This helps when state 1 is disabled for diagnostics, for instance.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/poll_state.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index 1a00353a5c4b1..e1fb542a39ae9 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -20,9 +20,19 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
local_irq_enable(); if (!current_set_polling_and_test()) { - u64 limit = (u64)drv->states[1].target_residency - * NSEC_PER_USEC; unsigned int loop_count = 0; + u64 limit = TICK_USEC; + int i; + + for (i = 1; i < drv->state_count; i++) { + if (drv->states[i].disabled + || dev->states_usage[i].disable) + continue; + + limit = (u64)drv->states[i].target_residency + * NSEC_PER_USEC; + break; + }
while (!need_resched()) { cpu_relax();
From: Doug Smythies doug.smythies@gmail.com
mainline inclusion from mainline-5.0 commit 1617971c6616c87185cbc78fa1a86dfc70dd16b6 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
The default time is declared in units of microsecnds, but is used as nanoseconds, resulting in significant accounting errors for idle state 0 time when all idle states deeper than 0 are disabled.
Under these unusual conditions, we don't really care about the poll time limit anyhow.
Fixes: 800fb34a99ce ("cpuidle: poll_state: Disregard disable idle states") Signed-off-by: Doug Smythies dsmythies@telus.net Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/poll_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index e1fb542a39ae9..a2627e00beea0 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -21,7 +21,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; - u64 limit = TICK_USEC; + u64 limit = TICK_NSEC; int i;
for (i = 1; i < drv->state_count; i++) {
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.4 commit fa86ee90eb1111267de67cb4272b5ce711f18cbb category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Add a cpuidle driver that calls the architecture default_idle routine.
To be used in conjunction with the haltpoll governor.
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/process.c | 2 +- drivers/cpuidle/Kconfig | 9 ++++ drivers/cpuidle/Makefile | 1 + drivers/cpuidle/cpuidle-haltpoll.c | 68 ++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 drivers/cpuidle/cpuidle-haltpoll.c
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cd138bfd926c2..aabd8cd9d5e34 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -557,7 +557,7 @@ void __cpuidle default_idle(void) safe_halt(); trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); } -#ifdef CONFIG_APM_MODULE +#if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); #endif
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 7e48eb5bf0a7a..754a99736cafb 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -41,6 +41,15 @@ depends on PPC source "drivers/cpuidle/Kconfig.powerpc" endmenu
+config HALTPOLL_CPUIDLE + tristate "Halt poll cpuidle driver" + depends on X86 && KVM_GUEST + default y + help + This option enables halt poll cpuidle driver, which allows to poll + before halting in the guest (more efficient than polling in the + host via halt_poll_ns for some scenarios). + endif
config ARCH_NEEDS_CPU_IDLE_COUPLED diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 9d7176cee3d37..240227474cd98 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -7,6 +7,7 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o obj-$(CONFIG_DT_IDLE_STATES) += dt_idle_states.o obj-$(CONFIG_ARCH_HAS_CPU_RELAX) += poll_state.o +obj-$(CONFIG_HALTPOLL_CPUIDLE) += cpuidle-haltpoll.o
################################################################################## # ARM SoC drivers diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c new file mode 100644 index 0000000000000..35cfb53e92870 --- /dev/null +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * cpuidle driver for haltpoll governor. + * + * Copyright 2019 Red Hat, Inc. and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Authors: Marcelo Tosatti mtosatti@redhat.com + */ + +#include <linux/init.h> +#include <linux/cpuidle.h> +#include <linux/module.h> +#include <linux/sched/idle.h> +#include <linux/kvm_para.h> + +static int default_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + if (current_clr_polling_and_test()) { + local_irq_enable(); + return index; + } + default_idle(); + return index; +} + +static struct cpuidle_driver haltpoll_driver = { + .name = "haltpoll", + .owner = THIS_MODULE, + .states = { + { /* entry 0 is for polling */ }, + { + .enter = default_enter_idle, + .exit_latency = 1, + .target_residency = 1, + .power_usage = -1, + .name = "haltpoll idle", + .desc = "default architecture idle", + }, + }, + .safe_state_index = 0, + .state_count = 2, +}; + +static int __init haltpoll_init(void) +{ + struct cpuidle_driver *drv = &haltpoll_driver; + + cpuidle_poll_state_init(drv); + + if (!kvm_para_available()) + return 0; + + return cpuidle_register(&haltpoll_driver, NULL); +} + +static void __exit haltpoll_exit(void) +{ + cpuidle_unregister(&haltpoll_driver); +} + +module_init(haltpoll_init); +module_exit(haltpoll_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marcelo Tosatti mtosatti@redhat.com");
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.4 commit 259231a045616c4101d023a8f4dcc8379af265a6 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Add a poll_limit_ns variable to cpuidle_device structure.
Calculate and configure it in the new cpuidle_poll_time function, in case its zero.
Individual governors are allowed to override this value.
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Stephen Rothwell sfr@canb.auug.org.au Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle.c | 31 +++++++++++++++++++++++++++++++ drivers/cpuidle/poll_state.c | 13 ++----------- drivers/cpuidle/sysfs.c | 7 +++++++ include/linux/cpuidle.h | 6 ++++++ 4 files changed, 46 insertions(+), 11 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 2d182dc1b49ed..b21f39bc63754 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -319,6 +319,37 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) cpuidle_curr_governor->reflect(dev, index); }
+/** + * cpuidle_poll_time - return amount of time to poll for, + * governors can override dev->poll_limit_ns if necessary + * + * @drv: the cpuidle driver tied with the cpu + * @dev: the cpuidle device + * + */ +u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + int i; + u64 limit_ns; + + if (dev->poll_limit_ns) + return dev->poll_limit_ns; + + limit_ns = TICK_NSEC; + for (i = 1; i < drv->state_count; i++) { + if (drv->states[i].disabled || dev->states_usage[i].disable) + continue; + + limit_ns = (u64)drv->states[i].target_residency + * NSEC_PER_USEC; + } + + dev->poll_limit_ns = limit_ns; + + return dev->poll_limit_ns; +} + /** * cpuidle_install_idle_handler - installs the cpuidle idle loop handler */ diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index a2627e00beea0..c8459fdb28e8e 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -21,18 +21,9 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev, local_irq_enable(); if (!current_set_polling_and_test()) { unsigned int loop_count = 0; - u64 limit = TICK_NSEC; - int i; + u64 limit;
- for (i = 1; i < drv->state_count; i++) { - if (drv->states[i].disabled - || dev->states_usage[i].disable) - continue; - - limit = (u64)drv->states[i].target_residency - * NSEC_PER_USEC; - break; - } + limit = cpuidle_poll_time(drv, dev);
while (!need_resched()) { cpu_relax(); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 66979dc336807..6799a938f536d 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -328,6 +328,7 @@ struct cpuidle_state_kobj { struct cpuidle_state_usage *state_usage; struct completion kobj_unregister; struct kobject kobj; + struct cpuidle_device *device; };
#ifdef CONFIG_SUSPEND @@ -385,6 +386,7 @@ static inline void cpuidle_remove_s2idle_attr_group(struct cpuidle_state_kobj *k #define kobj_to_state_obj(k) container_of(k, struct cpuidle_state_kobj, kobj) #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) +#define kobj_to_device(k) (kobj_to_state_obj(k)->device) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, @@ -408,10 +410,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, struct cpuidle_state *state = kobj_to_state(kobj); struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj); struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); + struct cpuidle_device *dev = kobj_to_device(kobj);
if (cattr->store) ret = cattr->store(state, state_usage, buf, size);
+ /* reset poll time cache */ + dev->poll_limit_ns = 0; + return ret; }
@@ -462,6 +468,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) } kobj->state = &drv->states[i]; kobj->state_usage = &device->states_usage[i]; + kobj->device = device; init_completion(&kobj->kobj_unregister);
ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 317aecaed8970..7468a7d90fe00 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -85,6 +85,7 @@ struct cpuidle_device { unsigned int cpu;
int last_residency; + u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -141,6 +142,8 @@ extern int cpuidle_select(struct cpuidle_driver *drv, extern int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index); extern void cpuidle_reflect(struct cpuidle_device *dev, int index); +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev);
extern int cpuidle_register_driver(struct cpuidle_driver *drv); extern struct cpuidle_driver *cpuidle_get_driver(void); @@ -175,6 +178,9 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } +extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv) {return -ENODEV; } static inline struct cpuidle_driver *cpuidle_get_driver(void) {return NULL; }
From: Stephen Rothwell sfr@canb.auug.org.au
mainline inclusion from mainline-5.4 commit 7dcddef6f769d7e60691c732eb6d09cdb1d9df76 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
An x86_64 allmodconfig build produces these errors:
x86_64-linux-gnu-ld: kernel/sched/core.o: in function `cpuidle_poll_time': core.c:(.text+0x230): multiple definition of `cpuidle_poll_time'; arch/x86/= kernel/process.o:process.c:(.text+0xc0): first defined here
(and more)
Fixes: 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure") Signed-off-by: Stephen Rothwell sfr@canb.auug.org.au Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 7468a7d90fe00..e9c6adaed3c75 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -178,7 +178,7 @@ static inline int cpuidle_enter(struct cpuidle_driver *drv, struct cpuidle_device *dev, int index) {return -ENODEV; } static inline void cpuidle_reflect(struct cpuidle_device *dev, int index) { } -extern u64 cpuidle_poll_time(struct cpuidle_driver *drv, +static inline u64 cpuidle_poll_time(struct cpuidle_driver *drv, struct cpuidle_device *dev) {return 0; } static inline int cpuidle_register_driver(struct cpuidle_driver *drv)
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.5 commit 36fcb4292473cb9c9ce7706d038bcf0eda5cabeb category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Commit 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure") changed, by mistake, the target residency from the first available sleep state to the last available sleep state (which should be longer).
This might cause excessive polling.
Fixes: 259231a04561 ("cpuidle: add poll_limit_ns to cpuidle_device structure") Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index b21f39bc63754..035ce130dceda 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -343,6 +343,7 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
limit_ns = (u64)drv->states[i].target_residency * NSEC_PER_USEC; + break; }
dev->poll_limit_ns = limit_ns;
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.4 commit 7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Since this field is shared by all governors, move it to cpuidle device structure.
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/governors/ladder.c | 21 ++++++++++----------- drivers/cpuidle/governors/menu.c | 5 ++--- include/linux/cpuidle.h | 1 + 3 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 704880a6612a7..ecf858e97648e 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -38,7 +38,6 @@ struct ladder_device_state {
struct ladder_device { struct ladder_device_state states[CPUIDLE_STATE_MAX]; - int last_state_idx; };
static DEFINE_PER_CPU(struct ladder_device, ladder_devices); @@ -49,12 +48,13 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices); * @old_idx: the current state index * @new_idx: the new target state index */ -static inline void ladder_do_selection(struct ladder_device *ldev, +static inline void ladder_do_selection(struct cpuidle_device *dev, + struct ladder_device *ldev, int old_idx, int new_idx) { ldev->states[old_idx].stats.promotion_count = 0; ldev->states[old_idx].stats.demotion_count = 0; - ldev->last_state_idx = new_idx; + dev->last_state_idx = new_idx; }
/** @@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; - int last_residency, last_idx = ldev->last_state_idx; + int last_residency, last_idx = dev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int latency_req = cpuidle_governor_latency_req(dev->cpu);
/* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { - ladder_do_selection(ldev, last_idx, 0); + ladder_do_selection(dev, ldev, last_idx, 0); return 0; }
@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { - ladder_do_selection(ldev, last_idx, last_idx + 1); + ladder_do_selection(dev, ldev, last_idx, last_idx + 1); return last_idx + 1; } } @@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, if (drv->states[i].exit_latency <= latency_req) break; } - ladder_do_selection(ldev, last_idx, i); + ladder_do_selection(dev, ldev, last_idx, i); return i; }
@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { - ladder_do_selection(ldev, last_idx, last_idx - 1); + ladder_do_selection(dev, ldev, last_idx, last_idx - 1); return last_idx - 1; } } @@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, struct ladder_device_state *lstate; struct cpuidle_state *state;
- ldev->last_state_idx = first_idx; + dev->last_state_idx = first_idx;
for (i = first_idx; i < drv->state_count; i++) { state = &drv->states[i]; @@ -167,9 +167,8 @@ static int ladder_enable_device(struct cpuidle_driver *drv, */ static void ladder_reflect(struct cpuidle_device *dev, int index) { - struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); if (index > 0) - ldev->last_state_idx = index; + dev->last_state_idx = index; }
static struct cpuidle_governor ladder_governor = { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index 3a48e4e1c175d..c447014d1d727 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -119,7 +119,6 @@ */
struct menu_device { - int last_state_idx; int needs_update; int tick_wakeup;
@@ -465,7 +464,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) { struct menu_device *data = this_cpu_ptr(&menu_devices);
- data->last_state_idx = index; + dev->last_state_idx = index; data->needs_update = 1; data->tick_wakeup = tick_nohz_idle_got_tick(); } @@ -478,7 +477,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int last_idx = data->last_state_idx; + int last_idx = dev->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; unsigned int new_factor; diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index e9c6adaed3c75..2efaff2db8e65 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -84,6 +84,7 @@ struct cpuidle_device { unsigned int poll_time_limit:1; unsigned int cpu;
+ int last_state_idx; int last_residency; u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX];
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.4 commit 2cffe9f6b96fece065ee8522673c90e92ef2085d category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
The cpuidle_haltpoll governor, in conjunction with the haltpoll cpuidle driver, allows guest vcpus to poll for a specified amount of time before halting. This provides the following benefits to host side polling:
1) The POLL flag is set while polling is performed, which allows a remote vCPU to avoid sending an IPI (and the associated cost of handling the IPI) when performing a wakeup.
2) The VM-exit cost can be avoided.
The downside of guest side polling is that polling is performed even with other runnable tasks in the host.
Results comparing halt_poll_ns and server/client application where a small packet is ping-ponged:
host --> 31.33 halt_poll_ns=300000 / no guest busy spin --> 33.40 (93.8%) halt_poll_ns=0 / guest_halt_poll_ns=300000 --> 32.73 (95.7%)
For the SAP HANA benchmarks (where idle_spin is a parameter of the previous version of the patch, results should be the same):
hpns == halt_poll_ns
idle_spin=0/ idle_spin=800/ idle_spin=0/ hpns=200000 hpns=0 hpns=800000 DeleteC06T03 (100 thread) 1.76 1.71 (-3%) 1.78 (+1%) InsertC16T02 (100 thread) 2.14 2.07 (-3%) 2.18 (+1.8%) DeleteC00T01 (1 thread) 1.34 1.28 (-4.5%) 1.29 (-3.7%) UpdateC00T03 (1 thread) 4.72 4.18 (-12%) 4.53 (-5%)
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/virtual/guest-halt-polling.txt | 78 ++++++++++ drivers/cpuidle/Kconfig | 11 ++ drivers/cpuidle/governors/Makefile | 1 + drivers/cpuidle/governors/haltpoll.c | 150 +++++++++++++++++++ 4 files changed, 240 insertions(+) create mode 100644 Documentation/virtual/guest-halt-polling.txt create mode 100644 drivers/cpuidle/governors/haltpoll.c
diff --git a/Documentation/virtual/guest-halt-polling.txt b/Documentation/virtual/guest-halt-polling.txt new file mode 100644 index 0000000000000..b3a2a294532da --- /dev/null +++ b/Documentation/virtual/guest-halt-polling.txt @@ -0,0 +1,78 @@ +Guest halt polling +================== + +The cpuidle_haltpoll driver, with the haltpoll governor, allows +the guest vcpus to poll for a specified amount of time before +halting. +This provides the following benefits to host side polling: + + 1) The POLL flag is set while polling is performed, which allows + a remote vCPU to avoid sending an IPI (and the associated + cost of handling the IPI) when performing a wakeup. + + 2) The VM-exit cost can be avoided. + +The downside of guest side polling is that polling is performed +even with other runnable tasks in the host. + +The basic logic as follows: A global value, guest_halt_poll_ns, +is configured by the user, indicating the maximum amount of +time polling is allowed. This value is fixed. + +Each vcpu has an adjustable guest_halt_poll_ns +("per-cpu guest_halt_poll_ns"), which is adjusted by the algorithm +in response to events (explained below). + +Module Parameters +================= + +The haltpoll governor has 5 tunable module parameters: + +1) guest_halt_poll_ns: +Maximum amount of time, in nanoseconds, that polling is +performed before halting. + +Default: 200000 + +2) guest_halt_poll_shrink: +Division factor used to shrink per-cpu guest_halt_poll_ns when +wakeup event occurs after the global guest_halt_poll_ns. + +Default: 2 + +3) guest_halt_poll_grow: +Multiplication factor used to grow per-cpu guest_halt_poll_ns +when event occurs after per-cpu guest_halt_poll_ns +but before global guest_halt_poll_ns. + +Default: 2 + +4) guest_halt_poll_grow_start: +The per-cpu guest_halt_poll_ns eventually reaches zero +in case of an idle system. This value sets the initial +per-cpu guest_halt_poll_ns when growing. This can +be increased from 10000, to avoid misses during the initial +growth stage: + +10k, 20k, 40k, ... (example assumes guest_halt_poll_grow=2). + +Default: 50000 + +5) guest_halt_poll_allow_shrink: + +Bool parameter which allows shrinking. Set to N +to avoid it (per-cpu guest_halt_poll_ns will remain +high once achieves global guest_halt_poll_ns value). + +Default: Y + +The module parameters can be set from the debugfs files in: + + /sys/module/haltpoll/parameters/ + +Further Notes +============= + +- Care should be taken when setting the guest_halt_poll_ns parameter as a +large value has the potential to drive the cpu usage to 100% on a machine which +would be almost entirely idle otherwise. diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 754a99736cafb..25516996bd950 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -23,6 +23,17 @@ config CPU_IDLE_GOV_LADDER config CPU_IDLE_GOV_MENU bool "Menu governor (for tickless system)"
+config CPU_IDLE_GOV_HALTPOLL + bool "Haltpoll governor (for virtualized systems)" + depends on KVM_GUEST + help + This governor implements haltpoll idle state selection, to be + used in conjunction with the haltpoll cpuidle driver, allowing + for polling for a certain amount of time before entering idle + state. + + Some virtualized workloads benefit from using it. + config DT_IDLE_STATES bool
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile index 1b512722689f0..8877365573721 100644 --- a/drivers/cpuidle/governors/Makefile +++ b/drivers/cpuidle/governors/Makefile @@ -4,3 +4,4 @@
obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o +obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c new file mode 100644 index 0000000000000..797477bda4866 --- /dev/null +++ b/drivers/cpuidle/governors/haltpoll.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * haltpoll.c - haltpoll idle governor + * + * Copyright 2019 Red Hat, Inc. and/or its affiliates. + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Authors: Marcelo Tosatti mtosatti@redhat.com + */ + +#include <linux/kernel.h> +#include <linux/cpuidle.h> +#include <linux/time.h> +#include <linux/ktime.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/sched.h> +#include <linux/module.h> +#include <linux/kvm_para.h> + +static unsigned int guest_halt_poll_ns __read_mostly = 200000; +module_param(guest_halt_poll_ns, uint, 0644); + +/* division factor to shrink halt_poll_ns */ +static unsigned int guest_halt_poll_shrink __read_mostly = 2; +module_param(guest_halt_poll_shrink, uint, 0644); + +/* multiplication factor to grow per-cpu poll_limit_ns */ +static unsigned int guest_halt_poll_grow __read_mostly = 2; +module_param(guest_halt_poll_grow, uint, 0644); + +/* value in us to start growing per-cpu halt_poll_ns */ +static unsigned int guest_halt_poll_grow_start __read_mostly = 50000; +module_param(guest_halt_poll_grow_start, uint, 0644); + +/* allow shrinking guest halt poll */ +static bool guest_halt_poll_allow_shrink __read_mostly = true; +module_param(guest_halt_poll_allow_shrink, bool, 0644); + +/** + * haltpoll_select - selects the next idle state to enter + * @drv: cpuidle driver containing state data + * @dev: the CPU + * @stop_tick: indication on whether or not to stop the tick + */ +static int haltpoll_select(struct cpuidle_driver *drv, + struct cpuidle_device *dev, + bool *stop_tick) +{ + int latency_req = cpuidle_governor_latency_req(dev->cpu); + + if (!drv->state_count || latency_req == 0) { + *stop_tick = false; + return 0; + } + + if (dev->poll_limit_ns == 0) + return 1; + + /* Last state was poll? */ + if (dev->last_state_idx == 0) { + /* Halt if no event occurred on poll window */ + if (dev->poll_time_limit == true) + return 1; + + *stop_tick = false; + /* Otherwise, poll again */ + return 0; + } + + *stop_tick = false; + /* Last state was halt: poll */ + return 0; +} + +static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) +{ + unsigned int val; + u64 block_ns = block_us*NSEC_PER_USEC; + + /* Grow cpu_halt_poll_us if + * cpu_halt_poll_us < block_ns < guest_halt_poll_us + */ + if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) { + val = dev->poll_limit_ns * guest_halt_poll_grow; + + if (val < guest_halt_poll_grow_start) + val = guest_halt_poll_grow_start; + if (val > guest_halt_poll_ns) + val = guest_halt_poll_ns; + + dev->poll_limit_ns = val; + } else if (block_ns > guest_halt_poll_ns && + guest_halt_poll_allow_shrink) { + unsigned int shrink = guest_halt_poll_shrink; + + val = dev->poll_limit_ns; + if (shrink == 0) + val = 0; + else + val /= shrink; + dev->poll_limit_ns = val; + } +} + +/** + * haltpoll_reflect - update variables and update poll time + * @dev: the CPU + * @index: the index of actual entered state + */ +static void haltpoll_reflect(struct cpuidle_device *dev, int index) +{ + dev->last_state_idx = index; + + if (index != 0) + adjust_poll_limit(dev, dev->last_residency); +} + +/** + * haltpoll_enable_device - scans a CPU's states and does setup + * @drv: cpuidle driver + * @dev: the CPU + */ +static int haltpoll_enable_device(struct cpuidle_driver *drv, + struct cpuidle_device *dev) +{ + dev->poll_limit_ns = 0; + + return 0; +} + +static struct cpuidle_governor haltpoll_governor = { + .name = "haltpoll", + .rating = 21, + .enable = haltpoll_enable_device, + .select = haltpoll_select, + .reflect = haltpoll_reflect, +}; + +static int __init init_haltpoll(void) +{ + if (kvm_para_available()) + return cpuidle_register_governor(&haltpoll_governor); + + return 0; +} + +postcore_initcall(init_haltpoll);
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.3 commit 2d5ba19bdfef4dd06add144eb04287ee98409f75 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Add an MSRs which allows the guest to disable host polling (specifically the cpuidle-haltpoll, when performing polling in the guest, disables host side polling).
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Paolo Bonzini pbonzini@redhat.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/virtual/kvm/msr.txt | 8 ++++++++ arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/include/uapi/asm/kvm_para.h | 2 ++ arch/x86/kvm/Kconfig | 1 + arch/x86/kvm/cpuid.c | 3 ++- arch/x86/kvm/x86.c | 22 ++++++++++++++++++++++ 6 files changed, 37 insertions(+), 1 deletion(-)
diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt index f3f0d57ced8e1..30ff2ac58d79a 100644 --- a/Documentation/virtual/kvm/msr.txt +++ b/Documentation/virtual/kvm/msr.txt @@ -273,3 +273,11 @@ MSR_KVM_EOI_EN: 0x4b564d04 guest must both read the least significant bit in the memory area and clear it using a single CPU instruction, such as test and clear, or compare and exchange. + +MSR_KVM_POLL_CONTROL: 0x4b564d05 + Control host-side polling. + + data: Bit 0 enables (1) or disables (0) host-side HLT polling logic. + + KVM guests can request the host not to poll on HLT, for example if + they are performing polling themselves. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ebf508fa87dc2..4344e56c9925a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -707,6 +707,8 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; } pv_eoi;
+ u64 msr_kvm_poll_control; + /* * Indicate whether the access faults on its page table in guest * which is set when fix page fault and used to detect unhandeable diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 19980ec1a316e..21d5f0240595f 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 #define KVM_FEATURE_PV_SEND_IPI 11 +#define KVM_FEATURE_POLL_CONTROL 12
#define KVM_HINTS_REALTIME 0
@@ -47,6 +48,7 @@ #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 +#define MSR_KVM_POLL_CONTROL 0x4b564d05
struct kvm_steal_time { __u64 steal; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 1bbec387d289c..80abc68b3e90c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -42,6 +42,7 @@ config KVM select PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_CPU_RELAX_INTERCEPT + select HAVE_KVM_NO_POLL select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_VFIO select SRCU diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 48c24d0e9e75c..1a0b5843b0e34 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -635,7 +635,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, (1 << KVM_FEATURE_PV_UNHALT) | (1 << KVM_FEATURE_PV_TLB_FLUSH) | (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | - (1 << KVM_FEATURE_PV_SEND_IPI); + (1 << KVM_FEATURE_PV_SEND_IPI) | + (1 << KVM_FEATURE_POLL_CONTROL);
if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d93821ca0b585..a6b3c45a1313a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1135,6 +1135,7 @@ static u32 emulated_msrs[] = { MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, MSR_AMD64_VIRT_SPEC_CTRL, + MSR_KVM_POLL_CONTROL, };
static unsigned num_emulated_msrs; @@ -2622,6 +2623,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; break;
+ case MSR_KVM_POLL_CONTROL: + /* only enable bit supported */ + if (data & (-1ULL << 1)) + return 1; + + vcpu->arch.msr_kvm_poll_control = data; + break; + case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_STATUS: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: @@ -2872,6 +2881,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_KVM_PV_EOI_EN: msr_info->data = vcpu->arch.pv_eoi.msr_val; break; + case MSR_KVM_POLL_CONTROL: + msr_info->data = vcpu->arch.msr_kvm_poll_control; + break; case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_TYPE: case MSR_IA32_MCG_CAP: @@ -8704,6 +8716,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) msr.host_initiated = true; kvm_write_tsc(vcpu, &msr); vcpu_put(vcpu); + + /* poll control enabled by default */ + vcpu->arch.msr_kvm_poll_control = 1; + mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync) @@ -9808,6 +9824,12 @@ bool kvm_vector_hashing_enabled(void) } EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
+bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.msr_kvm_poll_control & 1) == 0; +} +EXPORT_SYMBOL_GPL(kvm_arch_no_poll); + EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
From: Marcelo Tosatti mtosatti@redhat.com
mainline inclusion from mainline-5.4 commit a1c4423b02b2121108e3ea9580741e0f26309a48 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
When performing guest side polling, it is not necessary to also perform host side polling.
So disable host side polling, via the new MSR interface, when loading cpuidle-haltpoll driver.
Signed-off-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/Kconfig | 10 ++++++ arch/x86/include/asm/cpuidle_haltpoll.h | 8 +++++ arch/x86/kernel/kvm.c | 42 +++++++++++++++++++++++++ drivers/cpuidle/cpuidle-haltpoll.c | 9 +++++- include/linux/cpuidle_haltpoll.h | 16 ++++++++++ 5 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/cpuidle_haltpoll.h create mode 100644 include/linux/cpuidle_haltpoll.h
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2e9c5bb959e91..e8d2ca2009d8d 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -786,6 +786,7 @@ config KVM_GUEST bool "KVM Guest support (including kvmclock)" depends on PARAVIRT select PARAVIRT_CLOCK + select ARCH_CPUIDLE_HALTPOLL default y ---help--- This option enables various optimizations for running under the KVM @@ -794,6 +795,15 @@ config KVM_GUEST underlying device model, the host provides the guest with timing infrastructure such as time of day, and system time
+config ARCH_CPUIDLE_HALTPOLL + def_bool n + prompt "Disable host haltpoll when loading haltpoll driver" + help + If virtualized under KVM, disable host haltpoll. When loading + haltpoll driver, It allows the guest vcpus to poll for a + specified amount of time before halting, The execution of + haltpoll on the host is redundant. + config KVM_DEBUG_FS bool "Enable debug information for KVM Guests in debugfs" depends on KVM_GUEST && DEBUG_FS diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h new file mode 100644 index 0000000000000..ff8607d815261 --- /dev/null +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ARCH_HALTPOLL_H +#define _ARCH_HALTPOLL_H + +void arch_haltpoll_enable(void); +void arch_haltpoll_disable(void); + +#endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index cee45d46e67dc..40f40bd08ab73 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -866,3 +866,45 @@ void __init kvm_spinlock_init(void) }
#endif /* CONFIG_PARAVIRT_SPINLOCKS */ + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL + +static void kvm_disable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 0); +} + +static void kvm_enable_host_haltpoll(void *i) +{ + wrmsrl(MSR_KVM_POLL_CONTROL, 1); +} + +void arch_haltpoll_enable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { + pr_err_once("kvm: host does not support poll control\n"); + pr_err_once("kvm: host upgrade recommended\n"); + return; + } + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_disable_host_haltpoll(NULL); + smp_call_function(kvm_disable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_enable); + +void arch_haltpoll_disable(void) +{ + if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) + return; + + preempt_disable(); + /* Enable guest halt poll disables host halt poll */ + kvm_enable_host_haltpoll(NULL); + smp_call_function(kvm_enable_host_haltpoll, NULL, 1); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(arch_haltpoll_disable); +#endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 35cfb53e92870..9ac093dcbb01f 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/sched/idle.h> #include <linux/kvm_para.h> +#include <linux/cpuidle_haltpoll.h>
static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -47,6 +48,7 @@ static struct cpuidle_driver haltpoll_driver = {
static int __init haltpoll_init(void) { + int ret; struct cpuidle_driver *drv = &haltpoll_driver;
cpuidle_poll_state_init(drv); @@ -54,11 +56,16 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0;
- return cpuidle_register(&haltpoll_driver, NULL); + ret = cpuidle_register(&haltpoll_driver, NULL); + if (ret == 0) + arch_haltpoll_enable(); + + return ret; }
static void __exit haltpoll_exit(void) { + arch_haltpoll_disable(); cpuidle_unregister(&haltpoll_driver); }
diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h new file mode 100644 index 0000000000000..fe5954c2409e6 --- /dev/null +++ b/include/linux/cpuidle_haltpoll.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _CPUIDLE_HALTPOLL_H +#define _CPUIDLE_HALTPOLL_H + +#ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL +#include <asm/cpuidle_haltpoll.h> +#else +static inline void arch_haltpoll_enable(void) +{ +} + +static inline void arch_haltpoll_disable(void) +{ +} +#endif +#endif
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-5.0 commit 61cb5758d3c46bc1ba87694fefc0d9653613ce6b category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Add cpuidle.governor= command line parameter to allow the default cpuidle governor to be replaced.
That is useful, for example, if someone running a tickful kernel wants to use the menu governor on it.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/admin-guide/kernel-parameters.txt | 3 +++ drivers/cpuidle/cpuidle.c | 1 + drivers/cpuidle/cpuidle.h | 1 + drivers/cpuidle/governor.c | 9 +++++++-- 4 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 205adf7e258e3..3186256d9c0ce 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -682,6 +682,9 @@ cpuidle.off=1 [CPU_IDLE] disable the cpuidle sub-system
+ cpuidle.governor= + [CPU_IDLE] Name of the cpuidle governor to use. + cpufreq.off=1 [CPU_FREQ] disable the cpufreq sub-system
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 035ce130dceda..55eb3d152521b 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -735,4 +735,5 @@ static int __init cpuidle_init(void) }
module_param(off, int, 0444); +module_param_string(governor, param_governor, CPUIDLE_NAME_LEN, 0444); core_initcall(cpuidle_init); diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index 2965ab32a583f..d6613101af924 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -7,6 +7,7 @@ #define __DRIVER_CPUIDLE_H
/* For internal use only */ +extern char param_governor[]; extern struct cpuidle_governor *cpuidle_curr_governor; extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 9fed1b8292927..bb93e5cf6a4ae 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -11,10 +11,13 @@ #include <linux/cpu.h> #include <linux/cpuidle.h> #include <linux/mutex.h> +#include <linux/module.h> #include <linux/pm_qos.h>
#include "cpuidle.h"
+char param_governor[CPUIDLE_NAME_LEN]; + LIST_HEAD(cpuidle_governors); struct cpuidle_governor *cpuidle_curr_governor;
@@ -86,9 +89,11 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) mutex_lock(&cpuidle_lock); if (__cpuidle_find_governor(gov->name) == NULL) { ret = 0; - list_add_tail(&gov->governor_list, &cpuidle_governors); if (!cpuidle_curr_governor || - cpuidle_curr_governor->rating < gov->rating) + !strncasecmp(param_governor, gov->name, CPUIDLE_NAME_LEN) || + (cpuidle_curr_governor->rating < gov->rating && + strncasecmp(param_governor, cpuidle_curr_governor->name, + CPUIDLE_NAME_LEN))) cpuidle_switch_governor(gov); } mutex_unlock(&cpuidle_lock);
From: "Rafael J. Wysocki" rafael.j.wysocki@intel.com
mainline inclusion from mainline-5.1 commit 22782b3f9bb8ae21c710e2880db21bc729771e92 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
After commit ("cpuidle: Add cpuidle.governor= command line parameter") new cpuidle governors are not added to the list of available governors, so governor selection via sysfs doesn't work as expected (even though it is rarely used anyway).
Fix that by making cpuidle_register_governor() add new governors to cpuidle_governors again.
Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/governor.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index bb93e5cf6a4ae..9fddf828a76f2 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -89,6 +89,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) mutex_lock(&cpuidle_lock); if (__cpuidle_find_governor(gov->name) == NULL) { ret = 0; + list_add_tail(&gov->governor_list, &cpuidle_governors); if (!cpuidle_curr_governor || !strncasecmp(param_governor, gov->name, CPUIDLE_NAME_LEN) || (cpuidle_curr_governor->rating < gov->rating &&
From: Joao Martins joao.m.martins@oracle.com
mainline inclusion from mainline-5.4 commit cb5d8c45ab6c3daf8269e550cfb2d5018a876fe3 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
The recently introduced haltpoll driver is largely only useful with haltpoll governor. To allow drivers to associate with a particular idle behaviour, add a @governor property to 'struct cpuidle_driver' and thus allow a cpuidle driver to switch to a *preferred* governor on idle driver registration. We save the previous governor, and when an idle driver is unregistered we switch back to that.
The @governor can be overridden by cpuidle.governor= boot param or alternatively be ignored if the governor doesn't exist.
Signed-off-by: Joao Martins joao.m.martins@oracle.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle.h | 2 ++ drivers/cpuidle/driver.c | 25 +++++++++++++++++++++++++ drivers/cpuidle/governor.c | 7 ++++--- include/linux/cpuidle.h | 3 +++ 4 files changed, 34 insertions(+), 3 deletions(-)
diff --git a/drivers/cpuidle/cpuidle.h b/drivers/cpuidle/cpuidle.h index d6613101af924..9f336af17fa60 100644 --- a/drivers/cpuidle/cpuidle.h +++ b/drivers/cpuidle/cpuidle.h @@ -9,6 +9,7 @@ /* For internal use only */ extern char param_governor[]; extern struct cpuidle_governor *cpuidle_curr_governor; +extern struct cpuidle_governor *cpuidle_prev_governor; extern struct list_head cpuidle_governors; extern struct list_head cpuidle_detected_devices; extern struct mutex cpuidle_lock; @@ -22,6 +23,7 @@ extern void cpuidle_install_idle_handler(void); extern void cpuidle_uninstall_idle_handler(void);
/* governors */ +extern struct cpuidle_governor *cpuidle_find_governor(const char *str); extern int cpuidle_switch_governor(struct cpuidle_governor *gov);
/* sysfs */ diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 01acd88c41931..9db154224999c 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -253,12 +253,25 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) */ int cpuidle_register_driver(struct cpuidle_driver *drv) { + struct cpuidle_governor *gov; int ret;
spin_lock(&cpuidle_driver_lock); ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock);
+ if (!ret && !strlen(param_governor) && drv->governor && + (cpuidle_get_driver() == drv)) { + mutex_lock(&cpuidle_lock); + gov = cpuidle_find_governor(drv->governor); + if (gov) { + cpuidle_prev_governor = cpuidle_curr_governor; + if (cpuidle_switch_governor(gov) < 0) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); + } + return ret; } EXPORT_SYMBOL_GPL(cpuidle_register_driver); @@ -273,9 +286,21 @@ EXPORT_SYMBOL_GPL(cpuidle_register_driver); */ void cpuidle_unregister_driver(struct cpuidle_driver *drv) { + bool enabled = (cpuidle_get_driver() == drv); + spin_lock(&cpuidle_driver_lock); __cpuidle_unregister_driver(drv); spin_unlock(&cpuidle_driver_lock); + + if (!enabled) + return; + + mutex_lock(&cpuidle_lock); + if (cpuidle_prev_governor) { + if (!cpuidle_switch_governor(cpuidle_prev_governor)) + cpuidle_prev_governor = NULL; + } + mutex_unlock(&cpuidle_lock); } EXPORT_SYMBOL_GPL(cpuidle_unregister_driver);
diff --git a/drivers/cpuidle/governor.c b/drivers/cpuidle/governor.c index 9fddf828a76f2..349be85446256 100644 --- a/drivers/cpuidle/governor.c +++ b/drivers/cpuidle/governor.c @@ -20,14 +20,15 @@ char param_governor[CPUIDLE_NAME_LEN];
LIST_HEAD(cpuidle_governors); struct cpuidle_governor *cpuidle_curr_governor; +struct cpuidle_governor *cpuidle_prev_governor;
/** - * __cpuidle_find_governor - finds a governor of the specified name + * cpuidle_find_governor - finds a governor of the specified name * @str: the name * * Must be called with cpuidle_lock acquired. */ -static struct cpuidle_governor * __cpuidle_find_governor(const char *str) +struct cpuidle_governor *cpuidle_find_governor(const char *str) { struct cpuidle_governor *gov;
@@ -87,7 +88,7 @@ int cpuidle_register_governor(struct cpuidle_governor *gov) return -ENODEV;
mutex_lock(&cpuidle_lock); - if (__cpuidle_find_governor(gov->name) == NULL) { + if (cpuidle_find_governor(gov->name) == NULL) { ret = 0; list_add_tail(&gov->governor_list, &cpuidle_governors); if (!cpuidle_curr_governor || diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 2efaff2db8e65..43010919581ce 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -130,6 +130,9 @@ struct cpuidle_driver {
/* the driver handles the cpus in cpumask */ struct cpumask *cpumask; + + /* preferred governor to switch at register time */ + const char *governor; };
#ifdef CONFIG_CPU_IDLE
From: Joao Martins joao.m.martins@oracle.com
mainline inclusion from mainline-5.4 commit 7321440829a27d58c88b7fcfcbbc37487b5e39a5 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Right now, guest current governors have the following ratings:
* ladder -> 10 * teo -> 19 * menu -> 20 * haltpoll -> 21 * ladder + nohz=off -> 25
haltpoll governor got introduced and it is now the default governor given its highest rating -- with ladder+nohz being the exception -- regardless of idle driver in the guest. An example of an undesirable case is x86 KVM guests with MWAIT which have intel_idle registered first, and consequently will have haltpoll be used as governor which would get limited to a poll state and state 1 and the other states wouldn't get used.
To keep the previous defaults we decrease rating of governor to 9 (below current lowest rating) and thus rely on @governor switch on cpuidle_register_driver() to tie in haltpoll idle driver and governor together.
Signed-off-by: Joao Martins joao.m.martins@oracle.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 1 + drivers/cpuidle/governors/haltpoll.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 9ac093dcbb01f..7d9055382e330 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -30,6 +30,7 @@ static int default_enter_idle(struct cpuidle_device *dev,
static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", + .governor = "haltpoll", .owner = THIS_MODULE, .states = { { /* entry 0 is for polling */ }, diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 797477bda4866..7a703d2e00640 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -133,7 +133,7 @@ static int haltpoll_enable_device(struct cpuidle_driver *drv,
static struct cpuidle_governor haltpoll_governor = { .name = "haltpoll", - .rating = 21, + .rating = 9, .enable = haltpoll_enable_device, .select = haltpoll_select, .reflect = haltpoll_reflect,
From: Joao Martins joao.m.martins@oracle.com
mainline inclusion from mainline-5.4 commit 97d3eb9da84cae0548359b0aecb8619faad003b7 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
When cpus != maxcpus cpuidle-haltpoll will fail to register all vcpus past the online ones and thus fail to register the idle driver. This is because cpuidle_add_sysfs() will return with -ENODEV as a consequence from get_cpu_device() return no device for a non-existing CPU.
Instead switch to cpuidle_register_driver() and manually register each of the present cpus through cpuhp_setup_state() callbacks and future ones that get onlined or offlined. This mimmics similar logic that intel_idle does.
Signed-off-by: Joao Martins joao.m.martins@oracle.com Signed-off-by: Boris Ostrovsky boris.ostrovsky@oracle.com Reviewed-by: Marcelo Tosatti mtosatti@redhat.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/include/asm/cpuidle_haltpoll.h | 4 +- arch/x86/kernel/kvm.c | 14 ++--- drivers/cpuidle/cpuidle-haltpoll.c | 68 +++++++++++++++++++++++-- include/linux/cpuidle_haltpoll.h | 4 +- 4 files changed, 71 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/cpuidle_haltpoll.h b/arch/x86/include/asm/cpuidle_haltpoll.h index ff8607d815261..c8b39c6716ff1 100644 --- a/arch/x86/include/asm/cpuidle_haltpoll.h +++ b/arch/x86/include/asm/cpuidle_haltpoll.h @@ -2,7 +2,7 @@ #ifndef _ARCH_HALTPOLL_H #define _ARCH_HALTPOLL_H
-void arch_haltpoll_enable(void); -void arch_haltpoll_disable(void); +void arch_haltpoll_enable(unsigned int cpu); +void arch_haltpoll_disable(unsigned int cpu);
#endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 40f40bd08ab73..cafd1a3faa04f 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -879,7 +879,7 @@ static void kvm_enable_host_haltpoll(void *i) wrmsrl(MSR_KVM_POLL_CONTROL, 1); }
-void arch_haltpoll_enable(void) +void arch_haltpoll_enable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) { pr_err_once("kvm: host does not support poll control\n"); @@ -887,24 +887,18 @@ void arch_haltpoll_enable(void) return; }
- preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_disable_host_haltpoll(NULL); - smp_call_function(kvm_disable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
-void arch_haltpoll_disable(void) +void arch_haltpoll_disable(unsigned int cpu) { if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) return;
- preempt_disable(); /* Enable guest halt poll disables host halt poll */ - kvm_enable_host_haltpoll(NULL); - smp_call_function(kvm_enable_host_haltpoll, NULL, 1); - preempt_enable(); + smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1); } EXPORT_SYMBOL_GPL(arch_haltpoll_disable); #endif diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 7d9055382e330..519e90d125cfc 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -11,12 +11,16 @@ */
#include <linux/init.h> +#include <linux/cpu.h> #include <linux/cpuidle.h> #include <linux/module.h> #include <linux/sched/idle.h> #include <linux/kvm_para.h> #include <linux/cpuidle_haltpoll.h>
+static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; +static enum cpuhp_state haltpoll_hp_state; + static int default_enter_idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { @@ -47,6 +51,46 @@ static struct cpuidle_driver haltpoll_driver = { .state_count = 2, };
+static int haltpoll_cpu_online(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (!dev->registered) { + dev->cpu = cpu; + if (cpuidle_register_device(dev)) { + pr_notice("cpuidle_register_device %d failed!\n", cpu); + return -EIO; + } + arch_haltpoll_enable(cpu); + } + + return 0; +} + +static int haltpoll_cpu_offline(unsigned int cpu) +{ + struct cpuidle_device *dev; + + dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + if (dev->registered) { + arch_haltpoll_disable(cpu); + cpuidle_unregister_device(dev); + } + + return 0; +} + +static void haltpoll_uninit(void) +{ + if (haltpoll_hp_state) + cpuhp_remove_state(haltpoll_hp_state); + cpuidle_unregister_driver(&haltpoll_driver); + + free_percpu(haltpoll_cpuidle_devices); + haltpoll_cpuidle_devices = NULL; +} + static int __init haltpoll_init(void) { int ret; @@ -57,17 +101,31 @@ static int __init haltpoll_init(void) if (!kvm_para_available()) return 0;
- ret = cpuidle_register(&haltpoll_driver, NULL); - if (ret == 0) - arch_haltpoll_enable(); + ret = cpuidle_register_driver(drv); + if (ret < 0) + return ret; + + haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device); + if (haltpoll_cpuidle_devices == NULL) { + cpuidle_unregister_driver(drv); + return -ENOMEM; + } + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "cpuidle/haltpoll:online", + haltpoll_cpu_online, haltpoll_cpu_offline); + if (ret < 0) { + haltpoll_uninit(); + } else { + haltpoll_hp_state = ret; + ret = 0; + }
return ret; }
static void __exit haltpoll_exit(void) { - arch_haltpoll_disable(); - cpuidle_unregister(&haltpoll_driver); + haltpoll_uninit(); }
module_init(haltpoll_init); diff --git a/include/linux/cpuidle_haltpoll.h b/include/linux/cpuidle_haltpoll.h index fe5954c2409e6..d50c1e0411a2d 100644 --- a/include/linux/cpuidle_haltpoll.h +++ b/include/linux/cpuidle_haltpoll.h @@ -5,11 +5,11 @@ #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL #include <asm/cpuidle_haltpoll.h> #else -static inline void arch_haltpoll_enable(void) +static inline void arch_haltpoll_enable(unsigned int cpu) { }
-static inline void arch_haltpoll_disable(void) +static inline void arch_haltpoll_disable(unsigned int cpu) { } #endif
From: Joao Martins joao.m.martins@oracle.com
mainline inclusion from mainline-5.4 commit 5cc59f597c0666c6a7e1c67aac9063895949fd56 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
When a user loads cpuidle-haltpoll on a non KVM guest the module will successfully load, even though idle driver registration didn't take place.
We should instead return -ENODEV signaling the user that the driver can't be loaded, like other error paths in haltpoll_init(). An example of such error paths is when we return -EBUSY when attempting to register an idle driver when it had one already (e.g. intel_idle loads at boot and then we attempt to insert module cpuidle-haltpoll).
Signed-off-by: Joao Martins joao.m.martins@oracle.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 519e90d125cfc..7a0239ef717e9 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -99,7 +99,7 @@ static int __init haltpoll_init(void) cpuidle_poll_state_init(drv);
if (!kvm_para_available()) - return 0; + return -ENODEV;
ret = cpuidle_register_driver(drv); if (ret < 0)
From: Joao Martins joao.m.martins@oracle.com
mainline inclusion from mainline-5.4 commit 472f263660832b90e53bede2020f68cd14f8b76c category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
cpuidle-haltpoll can be built as a module to allow optional late load. Given we are setting @owner to THIS_MODULE, cpuidle will attempt to grab a module reference every time a cpuidle_device is registered -- so essentially all online cpus get a reference.
This prevents for the module to be unloaded later, which makes the module_exit callback entirely unused. Thus remove the @owner and allow module to be unloaded.
Signed-off-by: Joao Martins joao.m.martins@oracle.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 7a0239ef717e9..49a65c6fe91e3 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -35,7 +35,6 @@ static int default_enter_idle(struct cpuidle_device *dev, static struct cpuidle_driver haltpoll_driver = { .name = "haltpoll", .governor = "haltpoll", - .owner = THIS_MODULE, .states = { { /* entry 0 is for polling */ }, {
From: Wanpeng Li wanpengli@tencent.com
mainline inclusion from mainline-5.4 commit 1328edca4a142ee3c7442d1eece2c3ca383eff35 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
The downside of guest side polling is that polling is performed even with other runnable tasks in the host. However, even if poll in kvm can aware whether or not other runnable tasks in the same pCPU, it can still incur extra overhead in over-subscribe scenario. Now we can just enable guest polling when dedicated pCPUs are available.
Signed-off-by: Wanpeng Li wanpengli@tencent.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/kvm.c | 1 + drivers/cpuidle/cpuidle-haltpoll.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index cafd1a3faa04f..9c52eec69bfc6 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -703,6 +703,7 @@ unsigned int kvm_arch_para_hints(void) { return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES); } +EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
static uint32_t __init kvm_detect(void) { diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 49a65c6fe91e3..932390b028f1d 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -97,7 +97,8 @@ static int __init haltpoll_init(void)
cpuidle_poll_state_init(drv);
- if (!kvm_para_available()) + if (!kvm_para_available() || + !kvm_para_has_hint(KVM_HINTS_REALTIME)) return -ENODEV;
ret = cpuidle_register_driver(drv);
From: "Maciej S. Szmigiero" maciej.szmigiero@oracle.com
mainline inclusion from mainline-5.7 commit dd52551fb786aa7371bf9fe922573deafc26287e category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Before commit ("cpuidle-haltpoll: Enable kvm guest polling when dedicated physical CPUs are available") the cpuidle-haltpoll driver could also be used in scenarios when the host does not advertise the KVM_HINTS_REALTIME hint.
While the behavior introduced by the aforementioned commit makes sense as the default there are cases where the old behavior is desired, for example, when other kernel changes triggered by presence by this hint are unwanted, for some workloads where the latency benefit from polling overweights the loss from idle CPU capacity that otherwise would be available, or just when running under older Qemu versions that lack this hint.
Let's provide a typical "force" module parameter that allows restoring the old behavior.
Signed-off-by: Maciej S. Szmigiero maciej.szmigiero@oracle.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 932390b028f1d..0583cd80d3520 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -18,6 +18,10 @@ #include <linux/kvm_para.h> #include <linux/cpuidle_haltpoll.h>
+static bool force __read_mostly; +module_param(force, bool, 0444); +MODULE_PARM_DESC(force, "Load unconditionally"); + static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; static enum cpuhp_state haltpoll_hp_state;
@@ -90,6 +94,11 @@ static void haltpoll_uninit(void) haltpoll_cpuidle_devices = NULL; }
+static bool haltpool_want(void) +{ + return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; +} + static int __init haltpoll_init(void) { int ret; @@ -97,8 +106,7 @@ static int __init haltpoll_init(void)
cpuidle_poll_state_init(drv);
- if (!kvm_para_available() || - !kvm_para_has_hint(KVM_HINTS_REALTIME)) + if (!kvm_para_available() || !haltpool_want()) return -ENODEV;
ret = cpuidle_register_driver(drv);
From: Yihao Wu wuyihao@linux.alibaba.com
mainline inclusion from mainline-5.7 commit 4902f7fcb3bcb4ce088db97bfd194401a784cc60 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Fix a spelling typo in cpuidle-haltpoll.c.
Signed-off-by: Yihao Wu wuyihao@linux.alibaba.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 0583cd80d3520..d007c2f650bc5 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -94,7 +94,7 @@ static void haltpoll_uninit(void) haltpoll_cpuidle_devices = NULL; }
-static bool haltpool_want(void) +static bool haltpoll_want(void) { return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; } @@ -106,7 +106,7 @@ static int __init haltpoll_init(void)
cpuidle_poll_state_init(drv);
- if (!kvm_para_available() || !haltpool_want()) + if (!kvm_para_available() || !haltpoll_want()) return -ENODEV;
ret = cpuidle_register_driver(drv);
From: Christian Borntraeger borntraeger@de.ibm.com
mainline inclusion from mainline-5.2 commit cdd6ad3ac63d2fa320baefcf92a02a918375c30f category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
There are cases where halt polling is unwanted. For example when running KVM on an over committed LPAR we rather want to give back the CPU to neighbour LPARs instead of polling. Let us provide a callback that allows architectures to disable polling.
Signed-off-by: Christian Borntraeger borntraeger@de.ibm.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/Kconfig | 3 +++ virt/kvm/kvm_main.c | 2 +- 3 files changed, 14 insertions(+), 1 deletion(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b9443fedf24eb..27e31d972dfb5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1320,6 +1320,16 @@ static inline bool vcpu_valid_wakeup(struct kvm_vcpu *vcpu) } #endif /* CONFIG_HAVE_KVM_INVALID_WAKEUPS */
+#ifdef CONFIG_HAVE_KVM_NO_POLL +/* Callback that tells if we must not poll */ +bool kvm_arch_no_poll(struct kvm_vcpu *vcpu); +#else +static inline bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) +{ + return false; +} +#endif /* CONFIG_HAVE_KVM_NO_POLL */ + #ifdef CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index ea434ddc84992..aad9284c043a0 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -57,3 +57,6 @@ config HAVE_KVM_VCPU_ASYNC_IOCTL
config HAVE_KVM_VCPU_RUN_PID_CHANGE bool + +config HAVE_KVM_NO_POLL + bool diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ff1d6ba8e95f2..20c53b5cf4837 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2219,7 +2219,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) kvm_arch_vcpu_blocking(vcpu);
start = cur = ktime_get(); - if (vcpu->halt_poll_ns) { + if (vcpu->halt_poll_ns && !kvm_arch_no_poll(vcpu)) { ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
++vcpu->stat.halt_attempted_poll;
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: config bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: #232
We enable haltpoll by default for the improvement of performance.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hailiang Zhang zhang.zhanghailiang@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f8f9fb59c300d..28465599b2bf7 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -576,6 +576,8 @@ CONFIG_X86_SPEEDSTEP_LIB=m CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y +CONFIG_CPU_IDLE_GOV_HALTPOLL=y +CONFIG_HALTPOLL_CPUIDLE=y CONFIG_INTEL_IDLE=y
#
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
When it is to wake up a task in a remote cpu shared LLC , we can simply set need_resched flag, waking up a cpu that is in polling idle. This wakeup action does not require an IPI.
But the premise is that it need to support _TIF_POLLING_NRFLAG
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/thread_info.h | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index df5fca3c976ef..d0e638f5e3ecb 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -69,6 +69,7 @@ void arch_release_task_struct(struct task_struct *tsk); * TIF_NEED_RESCHED - rescheduling necessary * TIF_NOTIFY_RESUME - callback before returning to user * TIF_USEDFPU - FPU was used by this task this quantum (SMP) + * TIF_POLLING_NRFLAG - idle is polling for TIF_NEED_RESCHED */ #define TIF_SIGPENDING 0 #define TIF_NEED_RESCHED 1 @@ -82,6 +83,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 #define TIF_SECCOMP 11 +#define TIF_POLLING_NRFLAG 16 #define TIF_MEMDIE 18 /* is terminating due to OOM killer */ #define TIF_FREEZE 19 #define TIF_RESTORE_SIGMASK 20 @@ -107,6 +109,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define _TIF_32BIT (1 << TIF_32BIT) #define _TIF_SVE (1 << TIF_SVE) #define _TIF_32BIT_AARCH64 (1 << TIF_32BIT_AARCH64) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_WORK_MASK (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \ _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE | \
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Use arch_cpu_idle() to replace default_idle() in default_enter_idle(). default_idle() is defined only in x86.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/x86/kernel/process.c | 1 + drivers/cpuidle/cpuidle-haltpoll.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index aabd8cd9d5e34..1de41328b277f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -559,6 +559,7 @@ void __cpuidle default_idle(void) } #if defined(CONFIG_APM_MODULE) || defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) EXPORT_SYMBOL(default_idle); +EXPORT_SYMBOL(arch_cpu_idle); #endif
#ifdef CONFIG_XEN diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index d007c2f650bc5..8b1e6b0c53628 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -32,7 +32,7 @@ static int default_enter_idle(struct cpuidle_device *dev, local_irq_enable(); return index; } - default_idle(); + arch_cpu_idle(); return index; }
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Currently, ARM does not support kvm_para* of KVM_GUEST. We provide some definitions of kvm_para* functions, although it is only a simple return.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/kvm_para.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 arch/arm64/include/asm/kvm_para.h
diff --git a/arch/arm64/include/asm/kvm_para.h b/arch/arm64/include/asm/kvm_para.h new file mode 100644 index 0000000000000..e1ecc089ee9b8 --- /dev/null +++ b/arch/arm64/include/asm/kvm_para.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_ARM64_KVM_PARA_H +#define _ASM_ARM64_KVM_PARA_H + +#define KVM_HINTS_REALTIME 0 + +static inline bool kvm_check_and_clear_guest_paused(void) +{ + return false; +} + +static inline bool kvm_para_available(void) +{ + return false; +} + +static inline unsigned int kvm_arch_para_features(void) +{ + return 0; +} + +static inline unsigned int kvm_arch_para_hints(void) +{ + return 0; +} + +#endif /* _ASM_ARM64_KVM_PARA_H */
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Add support for cpuidle-haltpoll driver for ARM. Allow arm to use the couidle-haltpoll driver.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Signed-off-by: Yubo Miao miaoyubo@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/Kconfig | 3 +++ arch/arm64/kernel/process.c | 4 ++++ drivers/cpuidle/Kconfig | 4 ++-- drivers/cpuidle/cpuidle-haltpoll.c | 4 ++-- drivers/cpuidle/governors/haltpoll.c | 6 +++++- drivers/cpuidle/poll_state.c | 3 +++ 6 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 02ea3bc5c1d34..3249528efba27 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -288,6 +288,9 @@ config ARCH_SUPPORTS_UPROBES config ARCH_PROC_KCORE_TEXT def_bool y
+config ARCH_HAS_CPU_RELAX + def_bool y + source "arch/arm64/Kconfig.platforms"
source "kernel/livepatch/Kconfig" diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index ddb1911a2cabe..406744566cbed 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -133,6 +133,10 @@ void arch_cpu_idle(void) trace_cpu_idle_rcuidle(PWR_EVENT_EXIT, smp_processor_id()); }
+#if defined(CONFIG_HALTPOLL_CPUIDLE_MODULE) +EXPORT_SYMBOL(arch_cpu_idle); +#endif + #ifdef CONFIG_HOTPLUG_CPU void arch_cpu_idle_dead(void) { diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 25516996bd950..b379635f19367 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -25,7 +25,7 @@ config CPU_IDLE_GOV_MENU
config CPU_IDLE_GOV_HALTPOLL bool "Haltpoll governor (for virtualized systems)" - depends on KVM_GUEST + depends on KVM_GUEST || ARM64 help This governor implements haltpoll idle state selection, to be used in conjunction with the haltpoll cpuidle driver, allowing @@ -54,7 +54,7 @@ endmenu
config HALTPOLL_CPUIDLE tristate "Halt poll cpuidle driver" - depends on X86 && KVM_GUEST + depends on (X86 && KVM_GUEST) || ARM64 default y help This option enables halt poll cpuidle driver, which allows to poll diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 8b1e6b0c53628..926a6bcfed9bd 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -96,7 +96,7 @@ static void haltpoll_uninit(void)
static bool haltpoll_want(void) { - return kvm_para_has_hint(KVM_HINTS_REALTIME) || force; + return kvm_para_has_hint(KVM_HINTS_REALTIME); }
static int __init haltpoll_init(void) @@ -106,7 +106,7 @@ static int __init haltpoll_init(void)
cpuidle_poll_state_init(drv);
- if (!kvm_para_available() || !haltpoll_want()) + if (!force && (!kvm_para_available() || !haltpoll_want())) return -ENODEV;
ret = cpuidle_register_driver(drv); diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index 7a703d2e00640..ce550ab98e665 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -39,6 +39,10 @@ module_param(guest_halt_poll_grow_start, uint, 0644); static bool guest_halt_poll_allow_shrink __read_mostly = true; module_param(guest_halt_poll_allow_shrink, bool, 0644);
+static bool enable __read_mostly; +module_param(enable, bool, 0444); +MODULE_PARM_DESC(enable, "Load unconditionally"); + /** * haltpoll_select - selects the next idle state to enter * @drv: cpuidle driver containing state data @@ -141,7 +145,7 @@ static struct cpuidle_governor haltpoll_governor = {
static int __init init_haltpoll(void) { - if (kvm_para_available()) + if (kvm_para_available() || enable) return cpuidle_register_governor(&haltpoll_governor);
return 0; diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c index c8459fdb28e8e..aa9842b1f1cc2 100644 --- a/drivers/cpuidle/poll_state.c +++ b/drivers/cpuidle/poll_state.c @@ -8,6 +8,9 @@ #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/sched/idle.h> +#ifdef CONFIG_ARM64 +#include <linux/cpu.h> +#endif
#define POLL_IDLE_RELAX_COUNT 200
From: Xiangyou Xie xiexiangyou@huawei.com
hulk inclusion category: config bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
We enable haltpoll by default for the improvement of performance. X86 has been supported. Now, we will provide it on ARM.
Signed-off-by: Xiangyou Xie xiexiangyou@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Reviewed-by: zhanghailiang zhang.zhanghailiang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/configs/euleros_defconfig | 2 ++ arch/arm64/configs/hulk_defconfig | 2 ++ arch/arm64/configs/openeuler_defconfig | 4 +++- 3 files changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/configs/euleros_defconfig b/arch/arm64/configs/euleros_defconfig index 58252a7bd8cae..b88b5ceffc2bc 100644 --- a/arch/arm64/configs/euleros_defconfig +++ b/arch/arm64/configs/euleros_defconfig @@ -524,11 +524,13 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y +# CONFIG_CPU_IDLE_GOV_HALTPOLL is not set
# # ARM CPU Idle Drivers # # CONFIG_ARM_CPUIDLE is not set +# CONFIG_HALTPOLL_CPUIDLE is not set
# # CPU Frequency scaling diff --git a/arch/arm64/configs/hulk_defconfig b/arch/arm64/configs/hulk_defconfig index 9a3db8a88170e..969a41b20f660 100644 --- a/arch/arm64/configs/hulk_defconfig +++ b/arch/arm64/configs/hulk_defconfig @@ -537,11 +537,13 @@ CONFIG_CPU_IDLE_MULTIPLE_DRIVERS=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y CONFIG_DT_IDLE_STATES=y +# CONFIG_CPU_IDLE_GOV_HALTPOLL is not set
# # ARM CPU Idle Drivers # CONFIG_ARM_CPUIDLE=y +# CONFIG_HALTPOLL_CPUIDLE is not set
# # CPU Frequency scaling diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 34b34fa103d0a..6e1fd892e4226 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -530,11 +530,13 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y CONFIG_CPU_IDLE=y # CONFIG_CPU_IDLE_GOV_LADDER is not set CONFIG_CPU_IDLE_GOV_MENU=y +CONFIG_CPU_IDLE_GOV_HALTPOLL=y
# # ARM CPU Idle Drivers # -# CONFIG_ARM_CPUIDLE is not set +CONFIG_ARM_CPUIDLE=y +CONFIG_HALTPOLL_CPUIDLE=y
# # CPU Frequency scaling
From: chenjiajun chenjiajun8@huawei.com
euleros inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=34 CVE: NA
Since we add poll_limit_ns in struct cpuidle_device, governor in struct cpuidle_driver which broken the kabi. So we wrapper to fix kabi broken.
Signed-off-by: Jiajun Chen chenjiajun8@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/cpuidle/cpuidle-haltpoll.c | 53 ++++++++++++++++------------ drivers/cpuidle/cpuidle.c | 10 +++--- drivers/cpuidle/driver.c | 6 ++-- drivers/cpuidle/governors/haltpoll.c | 27 +++++++++----- drivers/cpuidle/governors/ladder.c | 21 +++++------ drivers/cpuidle/governors/menu.c | 5 +-- drivers/cpuidle/sysfs.c | 4 ++- include/linux/cpuidle.h | 13 +++++-- 8 files changed, 85 insertions(+), 54 deletions(-)
diff --git a/drivers/cpuidle/cpuidle-haltpoll.c b/drivers/cpuidle/cpuidle-haltpoll.c index 926a6bcfed9bd..fc351f093ce1a 100644 --- a/drivers/cpuidle/cpuidle-haltpoll.c +++ b/drivers/cpuidle/cpuidle-haltpoll.c @@ -22,7 +22,7 @@ static bool force __read_mostly; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Load unconditionally");
-static struct cpuidle_device __percpu *haltpoll_cpuidle_devices; +static struct cpuidle_device_wrapper __percpu *haltpoll_cpuidle_dev_wrap; static enum cpuhp_state haltpoll_hp_state;
static int default_enter_idle(struct cpuidle_device *dev, @@ -36,29 +36,34 @@ static int default_enter_idle(struct cpuidle_device *dev, return index; }
-static struct cpuidle_driver haltpoll_driver = { - .name = "haltpoll", - .governor = "haltpoll", - .states = { - { /* entry 0 is for polling */ }, - { - .enter = default_enter_idle, - .exit_latency = 1, - .target_residency = 1, - .power_usage = -1, - .name = "haltpoll idle", - .desc = "default architecture idle", +static struct cpuidle_driver_wrapper haltpoll_driver_wrapper = { + .drv = { + .name = "haltpoll", + .states = { + { /* entry 0 is for polling */ }, + { + .enter = default_enter_idle, + .exit_latency = 1, + .target_residency = 1, + .power_usage = -1, + .name = "haltpoll idle", + .desc = + "default architecture idle", + }, }, + .safe_state_index = 0, + .state_count = 2, }, - .safe_state_index = 0, - .state_count = 2, + .governor = "haltpoll", };
static int haltpoll_cpu_online(unsigned int cpu) { struct cpuidle_device *dev; + struct cpuidle_device_wrapper *devw;
- dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + devw = per_cpu_ptr(haltpoll_cpuidle_dev_wrap, cpu); + dev = &(devw->dev); if (!dev->registered) { dev->cpu = cpu; if (cpuidle_register_device(dev)) { @@ -74,8 +79,10 @@ static int haltpoll_cpu_online(unsigned int cpu) static int haltpoll_cpu_offline(unsigned int cpu) { struct cpuidle_device *dev; + struct cpuidle_device_wrapper *devw;
- dev = per_cpu_ptr(haltpoll_cpuidle_devices, cpu); + devw = per_cpu_ptr(haltpoll_cpuidle_dev_wrap, cpu); + dev = &(devw->dev); if (dev->registered) { arch_haltpoll_disable(cpu); cpuidle_unregister_device(dev); @@ -88,10 +95,10 @@ static void haltpoll_uninit(void) { if (haltpoll_hp_state) cpuhp_remove_state(haltpoll_hp_state); - cpuidle_unregister_driver(&haltpoll_driver); + cpuidle_unregister_driver(&(haltpoll_driver_wrapper.drv));
- free_percpu(haltpoll_cpuidle_devices); - haltpoll_cpuidle_devices = NULL; + free_percpu(haltpoll_cpuidle_dev_wrap); + haltpoll_cpuidle_dev_wrap = NULL; }
static bool haltpoll_want(void) @@ -102,7 +109,7 @@ static bool haltpoll_want(void) static int __init haltpoll_init(void) { int ret; - struct cpuidle_driver *drv = &haltpoll_driver; + struct cpuidle_driver *drv = &(haltpoll_driver_wrapper.drv);
cpuidle_poll_state_init(drv);
@@ -113,8 +120,8 @@ static int __init haltpoll_init(void) if (ret < 0) return ret;
- haltpoll_cpuidle_devices = alloc_percpu(struct cpuidle_device); - if (haltpoll_cpuidle_devices == NULL) { + haltpoll_cpuidle_dev_wrap = alloc_percpu(struct cpuidle_device_wrapper); + if (haltpoll_cpuidle_dev_wrap == NULL) { cpuidle_unregister_driver(drv); return -ENOMEM; } diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 55eb3d152521b..b7a7125f1cded 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -330,11 +330,13 @@ void cpuidle_reflect(struct cpuidle_device *dev, int index) u64 cpuidle_poll_time(struct cpuidle_driver *drv, struct cpuidle_device *dev) { + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev); int i; u64 limit_ns;
- if (dev->poll_limit_ns) - return dev->poll_limit_ns; + if (devw->poll_limit_ns) + return devw->poll_limit_ns;
limit_ns = TICK_NSEC; for (i = 1; i < drv->state_count; i++) { @@ -346,9 +348,9 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv, break; }
- dev->poll_limit_ns = limit_ns; + devw->poll_limit_ns = limit_ns;
- return dev->poll_limit_ns; + return devw->poll_limit_ns; }
/** diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index 9db154224999c..47930a0ecb0f1 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -254,16 +254,18 @@ static void __cpuidle_unregister_driver(struct cpuidle_driver *drv) int cpuidle_register_driver(struct cpuidle_driver *drv) { struct cpuidle_governor *gov; + struct cpuidle_driver_wrapper *drvw; int ret;
spin_lock(&cpuidle_driver_lock); ret = __cpuidle_register_driver(drv); spin_unlock(&cpuidle_driver_lock); + drvw = container_of(drv, struct cpuidle_driver_wrapper, drv);
- if (!ret && !strlen(param_governor) && drv->governor && + if (!ret && !strlen(param_governor) && drvw->governor && (cpuidle_get_driver() == drv)) { mutex_lock(&cpuidle_lock); - gov = cpuidle_find_governor(drv->governor); + gov = cpuidle_find_governor(drvw->governor); if (gov) { cpuidle_prev_governor = cpuidle_curr_governor; if (cpuidle_switch_governor(gov) < 0) diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c index ce550ab98e665..3490ba7de1415 100644 --- a/drivers/cpuidle/governors/haltpoll.c +++ b/drivers/cpuidle/governors/haltpoll.c @@ -53,6 +53,8 @@ static int haltpoll_select(struct cpuidle_driver *drv, struct cpuidle_device *dev, bool *stop_tick) { + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev); int latency_req = cpuidle_governor_latency_req(dev->cpu);
if (!drv->state_count || latency_req == 0) { @@ -60,11 +62,11 @@ static int haltpoll_select(struct cpuidle_driver *drv, return 0; }
- if (dev->poll_limit_ns == 0) + if (devw->poll_limit_ns == 0) return 1;
/* Last state was poll? */ - if (dev->last_state_idx == 0) { + if (devw->last_state_idx == 0) { /* Halt if no event occurred on poll window */ if (dev->poll_time_limit == true) return 1; @@ -81,31 +83,33 @@ static int haltpoll_select(struct cpuidle_driver *drv,
static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) { + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev); unsigned int val; u64 block_ns = block_us*NSEC_PER_USEC;
/* Grow cpu_halt_poll_us if * cpu_halt_poll_us < block_ns < guest_halt_poll_us */ - if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) { - val = dev->poll_limit_ns * guest_halt_poll_grow; + if (block_ns > devw->poll_limit_ns && block_ns <= guest_halt_poll_ns) { + val = devw->poll_limit_ns * guest_halt_poll_grow;
if (val < guest_halt_poll_grow_start) val = guest_halt_poll_grow_start; if (val > guest_halt_poll_ns) val = guest_halt_poll_ns;
- dev->poll_limit_ns = val; + devw->poll_limit_ns = val; } else if (block_ns > guest_halt_poll_ns && guest_halt_poll_allow_shrink) { unsigned int shrink = guest_halt_poll_shrink;
- val = dev->poll_limit_ns; + val = devw->poll_limit_ns; if (shrink == 0) val = 0; else val /= shrink; - dev->poll_limit_ns = val; + devw->poll_limit_ns = val; } }
@@ -116,7 +120,9 @@ static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) */ static void haltpoll_reflect(struct cpuidle_device *dev, int index) { - dev->last_state_idx = index; + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev); + devw->last_state_idx = index;
if (index != 0) adjust_poll_limit(dev, dev->last_residency); @@ -130,7 +136,10 @@ static void haltpoll_reflect(struct cpuidle_device *dev, int index) static int haltpoll_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { - dev->poll_limit_ns = 0; + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev); + + devw->poll_limit_ns = 0;
return 0; } diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index ecf858e97648e..704880a6612a7 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -38,6 +38,7 @@ struct ladder_device_state {
struct ladder_device { struct ladder_device_state states[CPUIDLE_STATE_MAX]; + int last_state_idx; };
static DEFINE_PER_CPU(struct ladder_device, ladder_devices); @@ -48,13 +49,12 @@ static DEFINE_PER_CPU(struct ladder_device, ladder_devices); * @old_idx: the current state index * @new_idx: the new target state index */ -static inline void ladder_do_selection(struct cpuidle_device *dev, - struct ladder_device *ldev, +static inline void ladder_do_selection(struct ladder_device *ldev, int old_idx, int new_idx) { ldev->states[old_idx].stats.promotion_count = 0; ldev->states[old_idx].stats.demotion_count = 0; - dev->last_state_idx = new_idx; + ldev->last_state_idx = new_idx; }
/** @@ -68,13 +68,13 @@ static int ladder_select_state(struct cpuidle_driver *drv, { struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); struct ladder_device_state *last_state; - int last_residency, last_idx = dev->last_state_idx; + int last_residency, last_idx = ldev->last_state_idx; int first_idx = drv->states[0].flags & CPUIDLE_FLAG_POLLING ? 1 : 0; int latency_req = cpuidle_governor_latency_req(dev->cpu);
/* Special case when user has set very strict latency requirement */ if (unlikely(latency_req == 0)) { - ladder_do_selection(dev, ldev, last_idx, 0); + ladder_do_selection(ldev, last_idx, 0); return 0; }
@@ -91,7 +91,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.promotion_count++; last_state->stats.demotion_count = 0; if (last_state->stats.promotion_count >= last_state->threshold.promotion_count) { - ladder_do_selection(dev, ldev, last_idx, last_idx + 1); + ladder_do_selection(ldev, last_idx, last_idx + 1); return last_idx + 1; } } @@ -107,7 +107,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, if (drv->states[i].exit_latency <= latency_req) break; } - ladder_do_selection(dev, ldev, last_idx, i); + ladder_do_selection(ldev, last_idx, i); return i; }
@@ -116,7 +116,7 @@ static int ladder_select_state(struct cpuidle_driver *drv, last_state->stats.demotion_count++; last_state->stats.promotion_count = 0; if (last_state->stats.demotion_count >= last_state->threshold.demotion_count) { - ladder_do_selection(dev, ldev, last_idx, last_idx - 1); + ladder_do_selection(ldev, last_idx, last_idx - 1); return last_idx - 1; } } @@ -139,7 +139,7 @@ static int ladder_enable_device(struct cpuidle_driver *drv, struct ladder_device_state *lstate; struct cpuidle_state *state;
- dev->last_state_idx = first_idx; + ldev->last_state_idx = first_idx;
for (i = first_idx; i < drv->state_count; i++) { state = &drv->states[i]; @@ -167,8 +167,9 @@ static int ladder_enable_device(struct cpuidle_driver *drv, */ static void ladder_reflect(struct cpuidle_device *dev, int index) { + struct ladder_device *ldev = this_cpu_ptr(&ladder_devices); if (index > 0) - dev->last_state_idx = index; + ldev->last_state_idx = index; }
static struct cpuidle_governor ladder_governor = { diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index c447014d1d727..3a48e4e1c175d 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -119,6 +119,7 @@ */
struct menu_device { + int last_state_idx; int needs_update; int tick_wakeup;
@@ -464,7 +465,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) { struct menu_device *data = this_cpu_ptr(&menu_devices);
- dev->last_state_idx = index; + data->last_state_idx = index; data->needs_update = 1; data->tick_wakeup = tick_nohz_idle_got_tick(); } @@ -477,7 +478,7 @@ static void menu_reflect(struct cpuidle_device *dev, int index) static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = this_cpu_ptr(&menu_devices); - int last_idx = dev->last_state_idx; + int last_idx = data->last_state_idx; struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; unsigned int new_factor; diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 6799a938f536d..f966a343daa71 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -411,12 +411,14 @@ static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, struct cpuidle_state_usage *state_usage = kobj_to_state_usage(kobj); struct cpuidle_state_attr *cattr = attr_to_stateattr(attr); struct cpuidle_device *dev = kobj_to_device(kobj); + struct cpuidle_device_wrapper *devw = + container_of(dev, struct cpuidle_device_wrapper, dev);
if (cattr->store) ret = cattr->store(state, state_usage, buf, size);
/* reset poll time cache */ - dev->poll_limit_ns = 0; + devw->poll_limit_ns = 0;
return ret; } diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 43010919581ce..364e28be3155f 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -84,9 +84,7 @@ struct cpuidle_device { unsigned int poll_time_limit:1; unsigned int cpu;
- int last_state_idx; int last_residency; - u64 poll_limit_ns; struct cpuidle_state_usage states_usage[CPUIDLE_STATE_MAX]; struct cpuidle_state_kobj *kobjs[CPUIDLE_STATE_MAX]; struct cpuidle_driver_kobj *kobj_driver; @@ -99,6 +97,12 @@ struct cpuidle_device { #endif };
+struct cpuidle_device_wrapper { + struct cpuidle_device dev; + int last_state_idx; + u64 poll_limit_ns; +}; + DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices); DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
@@ -130,9 +134,12 @@ struct cpuidle_driver {
/* the driver handles the cpus in cpumask */ struct cpumask *cpumask; +};
+struct cpuidle_driver_wrapper { + struct cpuidle_driver drv; /* preferred governor to switch at register time */ - const char *governor; + const char *governor; };
#ifdef CONFIG_CPU_IDLE