Changelog: v1->v2: change __setup() to early_param() v2->v3: fix kabi because #include <linux/sched/isolation.h> modify defconfig v3->v4: fix riscv compile error: move support_cpu0_nohz_full to tick.h
Xiongfeng Wang (4): isolation: Do not check whether housekeeping CPUs are present isolation: Check whether all the housekeeping CPUs are online arm64: Kconfig: select ARCH_SUSPEND_NONZERO_CPU by default arm64: defconfig: Enable CONFIG_ARCH_SUSPEND_NONZERO_CPU
arch/arm64/Kconfig | 3 +++ arch/arm64/configs/openeuler_defconfig | 2 ++ include/linux/cpu.h | 3 ++- include/linux/sched/isolation.h | 2 ++ include/linux/tick.h | 2 ++ kernel/sched/isolation.c | 31 ++++++++++++++++++++++---- kernel/smp.c | 4 ++++ kernel/time/tick-sched.c | 11 ++++++++- 8 files changed, 52 insertions(+), 6 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9NR7Q CVE: NA
--------------------------------
If we set isolcpus without CPU0, it will print the following error: [ 0.000000] Housekeeping: must include one present CPU, using boot CPU:0 It is because when string 'isolcpus=xxx' is parsed, only CPU0 is set as present for arch arm64.
Unlike X86 and PowerPC, the present_cpu_mask is set rather late in smp_prepare_cpus() for arm64. In CPU hotplug situation, some possible CPUs are not marked as present, only CPUs with its GICC marked as ACPI_MADT_ENABLED are marked as present. So we cannot set preset_cpu_mask for all possible CPUs at early stage.
Only check cpu_present_mask cannot assure that the CPUs will be broughtup online, as described in the below Link1. So do not check whether housekeeping CPUs are present when parsing 'isolcpus'. A following commit will add check after CPU online process is finished, as suggested in below Link2.
Link1: https://lore.kernel.org/all/20190504002733.GB19076@lenoir/ Link2: https://lore.kernel.org/all/1557186148.ocs72ssdjc.astroid@bobo.none/ Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- include/linux/tick.h | 2 ++ kernel/sched/isolation.c | 18 ++++++++++++++---- kernel/time/tick-sched.c | 9 +++++++++ 3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/include/linux/tick.h b/include/linux/tick.h index f34b7b779d44..46c75dccaf8e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -41,8 +41,10 @@ extern void tick_irq_enter(void); # ifndef arch_needs_cpu # define arch_needs_cpu() (0) # endif +extern bool support_cpu0_nohz_full; # else static inline void tick_irq_enter(void) { } +#define support_cpu0_nohz_full 0 #endif
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 785ef5201116..22f7ab8e9179 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -96,18 +96,28 @@ static int __init housekeeping_setup(char *str, enum hk_flags flags) alloc_bootmem_cpumask_var(&housekeeping_mask); cpumask_andnot(housekeeping_mask, cpu_possible_mask, non_housekeeping_mask); + if (support_cpu0_nohz_full && cpumask_empty(housekeeping_mask)) { + pr_warn("Housekeeping cpumask is NULL, using boot CPU\n"); + __cpumask_set_cpu(smp_processor_id(), housekeeping_mask); + /* update non_housekeeping_mask because it will be used below + in tick_nohz_full_setup() */ + cpumask_andnot(non_housekeeping_mask, + cpu_possible_mask, housekeeping_mask); + }
cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask); - if (cpumask_empty(tmp)) { + if (!support_cpu0_nohz_full && cpumask_empty(tmp)) { pr_warn("Housekeeping: must include one present CPU, " "using boot CPU:%d\n", smp_processor_id()); __cpumask_set_cpu(smp_processor_id(), housekeeping_mask); __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); } } else { - cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask); - if (cpumask_empty(tmp)) - __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); + if (!support_cpu0_nohz_full) { + cpumask_andnot(tmp, cpu_present_mask, non_housekeeping_mask); + if (cpumask_empty(tmp)) + __cpumask_clear_cpu(smp_processor_id(), non_housekeeping_mask); + } cpumask_andnot(tmp, cpu_possible_mask, non_housekeeping_mask); if (!cpumask_equal(tmp, housekeeping_mask)) { pr_warn("Housekeeping: nohz_full= must match isolcpus=\n"); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f50dc8f36707..d7bc1358af79 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -38,6 +38,8 @@ */ static DEFINE_PER_CPU(struct tick_sched, tick_cpu_sched);
+bool support_cpu0_nohz_full; + struct tick_sched *tick_get_tick_sched(int cpu) { return &per_cpu(tick_cpu_sched, cpu); @@ -1559,3 +1561,10 @@ int tick_check_oneshot_change(int allow_nohz) tick_nohz_switch_to_nohz(); return 0; } + +static int __init support_cpu0_nohz_full_setup(char *str) +{ + support_cpu0_nohz_full = true; + return 0; +} +early_param("support_cpu0_nohz_full", support_cpu0_nohz_full_setup);
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9NR7Q CVE: NA
--------------------------------
All the housekeeping CPUs are need to be brought up online, as describe in the below Link. We add this check after secondary CPUs online is finished.
Link: https://lore.kernel.org/all/20190504002733.GB19076@lenoir/ Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- include/linux/sched/isolation.h | 2 ++ kernel/sched/isolation.c | 13 +++++++++++++ kernel/smp.c | 4 ++++ 3 files changed, 19 insertions(+)
diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 2f93081ad7a0..d618e58e1b01 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -26,6 +26,7 @@ extern bool housekeeping_enabled(enum hk_flags flags); extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); +extern void check_housekeeping_cpus_online(void);
#else
@@ -48,6 +49,7 @@ static inline bool housekeeping_enabled(enum hk_flags flags) static inline void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { } static inline void housekeeping_init(void) { } +static inline void check_housekeeping_cpus_online(void) { } #endif /* CONFIG_CPU_ISOLATION */
static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 22f7ab8e9179..9aaa6f196066 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -216,3 +216,16 @@ static int __init enhanced_isolcpus_setup(char *str) return 0; } __setup("enhanced_isolcpus", enhanced_isolcpus_setup); + +void check_housekeeping_cpus_online(void) +{ + if (!support_cpu0_nohz_full) + return; + if (!housekeeping_flags) + return; + if (!cpumask_subset(housekeeping_mask, cpu_online_mask)) { + pr_err("Not all the housekeeping CPUs are online, please modify the kernel parameter !\n"); + /* BUG_ON here, otherwise there may exist other potential error */ + BUG_ON(1); + } +} diff --git a/kernel/smp.c b/kernel/smp.c index 4b13a7ef6a31..a1017ff0af9d 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -20,6 +20,7 @@ #include <linux/cpu.h> #include <linux/sched.h> #include <linux/sched/idle.h> +#include <linux/sched/isolation.h> #include <linux/hypervisor.h> #include <linux/sched/clock.h> #include <linux/nmi.h> @@ -884,6 +885,9 @@ void __init smp_init(void)
/* Any cleanup work */ smp_cpus_done(setup_max_cpus); + + /* Check whether all the housekeeping CPUs are online */ + check_housekeeping_cpus_online(); }
/*
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9NR7Q CVE: NA
--------------------------------
Commit 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full") allow CPU0 to be housekeeping CPU. But it is only supproted when CONFIG_PM_SLEEP_SMP is not configured, or when it is configured and the arch allows suspend on non-zero CPUs.
We cannot disable CONFIG_PM_SLEEP_SMP because freeze_secondary_cpus(), which is used in kexec, is included in the config. For arm64, there is not restriction in kernel to prevent nonzero cpu entering suspend using PSCI interface. when ARCH_SUSPEND_NONZERO_CPU is enabled, PM_SLEEP_SMP_NONZERO_CPU will be also enabled. The only influence is that suspend_disable_secondary_cpus() will use a housekeeping CPU to enter suspend. If pm sleep/hibernate is not involved, only kexec will use this function, which is tested on a arm64 server, and the vmcore can be created successfully. We add 'enable_cpu0_nohz_full' in the CONFIG_PM_SLEEP_SMP_NONZERO_CPU check branch. When 'enable_cpu0_nohz_full' is zero. There is no functional change.
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- arch/arm64/Kconfig | 3 +++ include/linux/cpu.h | 3 ++- kernel/time/tick-sched.c | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 273a58b74470..eb7334370cfe 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2306,6 +2306,9 @@ config ARCH_HIBERNATION_HEADER config ARCH_SUSPEND_POSSIBLE def_bool y
+config ARCH_SUSPEND_NONZERO_CPU + def_bool y + endmenu
menu "CPU Power Management" diff --git a/include/linux/cpu.h b/include/linux/cpu.h index c282f6355aef..d66d1e0bec38 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -18,6 +18,7 @@ #include <linux/compiler.h> #include <linux/cpumask.h> #include <linux/cpuhotplug.h> +#include <linux/tick.h>
struct device; struct device_node; @@ -163,7 +164,7 @@ static inline int suspend_disable_secondary_cpus(void) { int cpu = 0;
- if (IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) + if (support_cpu0_nohz_full && IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) cpu = -1;
return freeze_secondary_cpus(cpu); diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d7bc1358af79..9e829ec93c2f 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -559,7 +559,7 @@ void __init tick_nohz_init(void) }
if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) && - !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) { + (!support_cpu0_nohz_full || !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU))) { cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9NR7Q CVE: NA
--------------------------------
Enable CONFIG_PM_SLEEP_SMP_NONZERO_CPU and CONFIG_ARCH_SUSPEND_NONZERO_CPU.
Signed-off-by: Xiongfeng Wang wangxiongfeng2@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 34061d75a0d2..b404f64278f0 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -581,6 +581,7 @@ CONFIG_HIBERNATION_SNAPSHOT_DEV=y CONFIG_PM_STD_PARTITION="" CONFIG_PM_SLEEP=y CONFIG_PM_SLEEP_SMP=y +CONFIG_PM_SLEEP_SMP_NONZERO_CPU=y # CONFIG_PM_AUTOSLEEP is not set # CONFIG_PM_WAKELOCKS is not set CONFIG_PM=y @@ -599,6 +600,7 @@ CONFIG_CPU_PM=y CONFIG_ARCH_HIBERNATION_POSSIBLE=y CONFIG_ARCH_HIBERNATION_HEADER=y CONFIG_ARCH_SUSPEND_POSSIBLE=y +CONFIG_ARCH_SUSPEND_NONZERO_CPU=y # end of Power management options
#