[PATCH OLK-5.10 0/7] cpumask: cleanup nr_cpu_ids vs nr_cpumask_bits mess
 
            From: Zheng Zucheng <zhengzucheng@huawei.com> cpumask subsystem uses nr_cpu_ids and nr_cpumask_bits interchangeably despite that the variables have different meaning and purpose. It makes some cpumask functions broken. This series cleans that mess and adds new config FORCE_NR_CPUS that allows to optimize cpumask subsystem if the number of CPUs is known at compile-time. After some testing I found build broken when SMP is on and NR_CPUS == 1. This is addressed in a new patch #1, and in the following patch #2 that now declares set_nr_cpu_ids in cpumask.h (was in smp.h). Yury Norov (6): smp: don't declare nr_cpu_ids if NR_CPUS == 1 smp: add set_nr_cpu_ids() lib/cpumask: delete misleading comment lib/cpumask: deprecate nr_cpumask_bits lib/cpumask: add FORCE_NR_CPUS config option powerpc/64: don't refer nr_cpu_ids in asm code when it's undefined Zheng Zucheng (1): config: Add FORCE_NR_CPUS to openeuler_defconfig arch/arm64/configs/openeuler_defconfig | 1 + arch/loongarch/kernel/setup.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/powerpc/kernel/head_64.S | 4 ++++ arch/x86/configs/openeuler_defconfig | 1 + arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/xen/smp_pv.c | 2 +- include/linux/cpumask.h | 22 +++++++++++----------- kernel/smp.c | 6 ++++-- lib/Kconfig | 9 +++++++++ 10 files changed, 35 insertions(+), 18 deletions(-) -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit 53fc190cc6771c5494d782210334d4ebb50c7103 category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- SMP and NR_CPUS are independent options, hence nr_cpu_ids may be declared even if NR_CPUS == 1, which is useless. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- kernel/smp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/smp.c b/kernel/smp.c index 2023c022ad5f..bd8d63de3132 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -852,9 +852,11 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); +#if (NR_CPUS > 1) /* Setup number of possible processor ids */ unsigned int nr_cpu_ids __read_mostly = NR_CPUS; EXPORT_SYMBOL(nr_cpu_ids); +#endif /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ void __init setup_nr_cpu_ids(void) -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit 38bef8e57f2149acd2c910a98f57dd6291d2e0ec category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- In preparation to support compile-time nr_cpu_ids, add a setter for the variable. This is a no-op for all arches. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- arch/loongarch/kernel/setup.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/x86/kernel/smpboot.c | 4 ++-- arch/x86/xen/smp_pv.c | 2 +- include/linux/cpumask.h | 5 +++++ kernel/smp.c | 4 ++-- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c index 85f597917c6f..5b6408296786 100644 --- a/arch/loongarch/kernel/setup.c +++ b/arch/loongarch/kernel/setup.c @@ -526,7 +526,7 @@ static void __init prefill_possible_map(void) for (; i < NR_CPUS; i++) set_cpu_possible(i, false); - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); } #endif diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index b7eb7dd96e17..fd5fb8f82898 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -756,7 +756,7 @@ static void __init prefill_possible_map(void) for (; i < NR_CPUS; i++) set_cpu_possible(i, false); - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); } #else static inline void prefill_possible_map(void) {} diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index ba5cf8ff7374..2b37b16685ed 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1284,7 +1284,7 @@ static void __init smp_sanity_check(void) nr++; } - nr_cpu_ids = 8; + set_nr_cpu_ids(8); } #endif @@ -1526,7 +1526,7 @@ __init void prefill_possible_map(void) possible = i; } - nr_cpu_ids = possible; + set_nr_cpu_ids(possible); pr_info("Allowing %d CPUs, %d hotplug CPUs\n", possible, max_t(int, possible - num_processors, 0)); diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c index 755e939db3ed..26babb8a1897 100644 --- a/arch/x86/xen/smp_pv.c +++ b/arch/x86/xen/smp_pv.c @@ -182,7 +182,7 @@ static void __init _get_smp_config(unsigned int early) * hypercall to expand the max number of VCPUs an already * running guest has. So cap it up to X. */ if (subtract) - nr_cpu_ids = nr_cpu_ids - subtract; + set_nr_cpu_ids(nr_cpu_ids - subtract); #endif } diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7cdec529b1d9..7b3f83f452ec 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -37,6 +37,11 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; #define nr_cpu_ids 1U #else extern unsigned int nr_cpu_ids; + +static inline void set_nr_cpu_ids(unsigned int nr) +{ + nr_cpu_ids = nr; +} #endif #ifdef CONFIG_CPUMASK_OFFSTACK diff --git a/kernel/smp.c b/kernel/smp.c index bd8d63de3132..843c7e46389f 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -834,7 +834,7 @@ static int __init nrcpus(char *str) int nr_cpus; if (get_option(&str, &nr_cpus) && nr_cpus > 0 && nr_cpus < nr_cpu_ids) - nr_cpu_ids = nr_cpus; + set_nr_cpu_ids(nr_cpus); return 0; } @@ -861,7 +861,7 @@ EXPORT_SYMBOL(nr_cpu_ids); /* An arch may set nr_cpu_ids earlier if needed, so this would be redundant */ void __init setup_nr_cpu_ids(void) { - nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1; + set_nr_cpu_ids(find_last_bit(cpumask_bits(cpu_possible_mask), NR_CPUS) + 1); } /* Called by boot processor to activate the rest. */ -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit 7102b3bb070fdf4580a05cbfc5ad3c0691dc4bf9 category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- The comment says that HOTPLUG config option enables all cpus in cpu_possible_mask up to NR_CPUs. This is wrong. Even if HOTPLUG is enabled, the mask is populated on boot with respect to ACPI/DT records. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- include/linux/cpumask.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 7b3f83f452ec..192144b52b47 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -70,10 +70,6 @@ static inline void set_nr_cpu_ids(unsigned int nr) * cpu_online_mask is the dynamic subset of cpu_present_mask, * indicating those CPUs available for scheduling. * - * If HOTPLUG is enabled, then cpu_possible_mask is forced to have - * all NR_CPUS bits set, otherwise it is just the set of CPUs that - * ACPI reports present at boot. - * * If HOTPLUG is enabled, then cpu_present_mask varies dynamically, * depending on what ACPI reports as currently plugged in, otherwise * cpu_present_mask is just a copy of cpu_possible_mask. -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit aa47a7c215e79a2ade6916f163c5a17b561bce4f category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Cpumask code is written in assumption that when CONFIG_CPUMASK_OFFSTACK is enabled, all cpumasks have boot-time defined size, otherwise the size is always NR_CPUS. The latter is wrong because the number of possible cpus is always calculated on boot, and it may be less than NR_CPUS. On my 4-cpu arm64 VM the nr_cpu_ids is 4, as expected, and nr_cpumask_bits is 256, which corresponds to NR_CPUS. This not only leads to useless traversing of cpumask bits greater than 4, this also makes some cpumask routines fail. For example, cpumask_full(0b1111000..000) would erroneously return false in the example above because tail bits in the mask are all unset. This patch deprecates nr_cpumask_bits and wires it to nr_cpu_ids unconditionally, so that cpumask routines will not waste time traversing unused part of cpu masks. It also fixes cpumask_full() and similar routines. As a side effect, because now a length of cpumasks is defined at run-time even if CPUMASK_OFFSTACK is disabled, compiler can't optimize corresponding functions. It increases kernel size by ~2.5KB if OFFSTACK is off. This is addressed in the following patch. Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- include/linux/cpumask.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 192144b52b47..cb8f29fd0477 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -44,13 +44,8 @@ static inline void set_nr_cpu_ids(unsigned int nr) } #endif -#ifdef CONFIG_CPUMASK_OFFSTACK -/* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, - * not all bits may be allocated. */ +/* Deprecated. Always use nr_cpu_ids. */ #define nr_cpumask_bits nr_cpu_ids -#else -#define nr_cpumask_bits ((unsigned int)NR_CPUS) -#endif /* * The following particular system cpumasks and operations manage -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit 6f9c07be9d020489326098801f0661f754c7c865 category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- The size of cpumasks is hard-limited by compile-time parameter NR_CPUS, but defined at boot-time when kernel parses ACPI/DT tables, and stored in nr_cpu_ids. In many practical cases, number of CPUs for a target is known at compile time, and can be provided with NR_CPUS. In that case, compiler may be instructed to rely on NR_CPUS as on actual number of CPUs, not an upper limit. It allows to optimize many cpumask routines and significantly shrink size of the kernel image. This patch adds FORCE_NR_CPUS option to teach the compiler to rely on NR_CPUS and enable corresponding optimizations. If FORCE_NR_CPUS=y, kernel will not set nr_cpu_ids at boot, but only check that the actual number of possible CPUs is equal to NR_CPUS, and WARN if that doesn't hold. The new option is especially useful in embedded applications because kernel configurations are unique for each SoC, the number of CPUs is constant and known well, and memory limitations are typically harder. For my 4-CPU ARM64 build with NR_CPUS=4, FORCE_NR_CPUS=y saves 46KB: add/remove: 3/4 grow/shrink: 46/729 up/down: 652/-46952 (-46300) Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- include/linux/cpumask.h | 10 +++++++--- kernel/smp.c | 2 +- lib/Kconfig | 9 +++++++++ 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cb8f29fd0477..2e50162d9908 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -33,16 +33,20 @@ typedef struct cpumask { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t; */ #define cpumask_pr_args(maskp) nr_cpu_ids, cpumask_bits(maskp) -#if NR_CPUS == 1 -#define nr_cpu_ids 1U +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) +#define nr_cpu_ids ((unsigned int)NR_CPUS) #else extern unsigned int nr_cpu_ids; +#endif static inline void set_nr_cpu_ids(unsigned int nr) { +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + WARN_ON(nr != nr_cpu_ids); +#else nr_cpu_ids = nr; -} #endif +} /* Deprecated. Always use nr_cpu_ids. */ #define nr_cpumask_bits nr_cpu_ids diff --git a/kernel/smp.c b/kernel/smp.c index 843c7e46389f..4b13a7ef6a31 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -852,7 +852,7 @@ static int __init maxcpus(char *str) early_param("maxcpus", maxcpus); -#if (NR_CPUS > 1) +#if (NR_CPUS > 1) && !defined(CONFIG_FORCE_NR_CPUS) /* Setup number of possible processor ids */ unsigned int nr_cpu_ids __read_mostly = NR_CPUS; EXPORT_SYMBOL(nr_cpu_ids); diff --git a/lib/Kconfig b/lib/Kconfig index 36326864249d..8026964596fd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -494,6 +494,15 @@ config CPUMASK_OFFSTACK them on the stack. This is a bit more expensive, but avoids stack overflow. +config FORCE_NR_CPUS + bool "NR_CPUS is set to an actual number of CPUs" + depends on SMP + help + Say Yes if you have NR_CPUS set to an actual number of possible + CPUs in your system, not to a default value. This forces the core + code to rely on compile-time value and optimize kernel routines + better. + config CPU_RMAP bool depends on SMP -- 2.34.1
 
            From: Yury Norov <yury.norov@gmail.com> mainline inclusion from mainline-v6.1-rc1 commit 546a073d628111e3338af689938407e77d5dc38f category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- generic_secondary_common_init() calls LOAD_REG_ADDR(r7, nr_cpu_ids) conditionally on CONFIG_SMP. However, if 'NR_CPUS == 1', kernel doesn't use the nr_cpu_ids, and in C code, it's just: #if NR_CPUS == 1 #define nr_cpu_ids ... This series makes declaration of nr_cpu_ids conditional on NR_CPUS == 1, and that reveals the issue, because compiler can't link the LOAD_REG_ADDR(r7, nr_cpu_ids) against nonexisting symbol. Current code looks unsafe for those who build kernel with CONFIG_SMP=y and NR_CPUS == 1. This is weird configuration, but not disallowed. Fix the linker error by replacing LOAD_REG_ADDR() with LOAD_REG_IMMEDIATE() conditionally on NR_CPUS == 1. As the following patch adds CONFIG_FORCE_NR_CPUS option that has the similar effect on nr_cpu_ids, make the generic_secondary_common_init() conditional on it too. Reported-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Yury Norov <yury.norov@gmail.com> Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- arch/powerpc/kernel/head_64.S | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index 53def7b7f5ce..bc6d9f8f6bb7 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -398,8 +398,12 @@ generic_secondary_common_init: #else LOAD_REG_ADDR(r8, paca_ptrs) /* Load paca_ptrs pointe */ ld r8,0(r8) /* Get base vaddr of array */ +#if (NR_CPUS == 1) || defined(CONFIG_FORCE_NR_CPUS) + LOAD_REG_IMMEDIATE(r7, NR_CPUS) +#else LOAD_REG_ADDR(r7, nr_cpu_ids) /* Load nr_cpu_ids address */ lwz r7,0(r7) /* also the max paca allocated */ +#endif li r5,0 /* logical cpu id */ 1: sldi r9,r5,3 /* get paca_ptrs[] index from cpu id */ -- 2.34.1
 
            From: Zheng Zucheng <zhengzucheng@huawei.com> hulk inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/I8KQBZ CVE: NA -------------------------------- Update openeuler_defconfig for FORCE_NR_CPUS in ARM64 and x86 Signed-off-by: Zheng Zucheng <zhengzucheng@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 12e4828c9cd1..a4928ab384b7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -7074,6 +7074,7 @@ CONFIG_CMA_ALIGNMENT=8 CONFIG_DMA_MAP_BENCHMARK=y CONFIG_SGL_ALLOC=y CONFIG_CHECK_SIGNATURE=y +# CONFIG_FORCE_NR_CPUS is not set CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_GLOB=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 3db754e79811..4f912e543270 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -8135,6 +8135,7 @@ CONFIG_DMA_COHERENT_POOL=y CONFIG_SGL_ALLOC=y CONFIG_CHECK_SIGNATURE=y CONFIG_CPUMASK_OFFSTACK=y +# CONFIG_FORCE_NR_CPUS is not set CONFIG_CPU_RMAP=y CONFIG_DQL=y CONFIG_GLOB=y -- 2.34.1
 
            反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3374 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/S... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3374 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/S...
participants (2)
- 
                 Lu Jialin Lu Jialin
- 
                 patchwork bot patchwork bot