[PATCH OLK-6.6 0/5] Support SMT control on arm64

From: Hongye Lin <linhongye@h-partners.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- The core CPU control framework supports runtime SMT control which is not yet supported on arm64. Besides the general vulnerabilities concerns we want this runtime control on our arm64 server for: - better single CPU performance in some cases - saving overall power consumption This patchset implements it in the following aspects: - Provides a default topology_is_primary_thread() - support retrieve SMT thread number on OF based system - support retrieve SMT thread number on ACPI based system - select HOTPLUG_SMT for arm64 Tests has been done on our ACPI based arm64 server and on ACPI/OF based QEMU VMs. Yicong Yang (5): Revert "arm64: Kconfig: Enable HOTPLUG_SMT" cpu/SMT: Provide a default topology_is_primary_thread() arch_topology: Support SMT control for OF based system arm64: topology: Support SMT control on ACPI based system arm64: Kconfig: Enable HOTPLUG_SMT arch/arm64/Kconfig | 2 +- arch/arm64/configs/openeuler_defconfig | 1 - arch/arm64/kernel/topology.c | 67 +++++++++++++++++++------- arch/powerpc/include/asm/topology.h | 1 + arch/x86/include/asm/topology.h | 3 +- drivers/base/arch_topology.c | 61 +++++++---------------- include/linux/arch_topology.h | 14 ------ include/linux/topology.h | 24 +++++++++ 8 files changed, 94 insertions(+), 79 deletions(-) -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: cleanup bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- This reverts commit 0dc61bb65a49655c01310cbcf27ce9193b33bb72. This reverts commit 6085d50ad91a82545cffc9f9d9b17ff2781d1fa3. This reverts commit 4aaf7df8f542907c8486cb5e55e4ec28f2a7dfac. This reverts commit 44a2386e29fa29b99c71f672d5481a3408aeebe8. This reverts commit 6dd0f06a404f481ea58fb04b3006287dc3c5aea3. reverts PR:https://gitee.com/openeuler/kernel/pulls/3745 Reviewed-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- arch/arm64/Kconfig | 1 - arch/arm64/configs/openeuler_defconfig | 1 - arch/arm64/kernel/topology.c | 23 ------------- drivers/base/arch_topology.c | 45 -------------------------- include/linux/arch_topology.h | 14 -------- 5 files changed, 84 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9da9d58f1c02..df0635940ea0 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -242,7 +242,6 @@ config ARM64 select HAVE_KRETPROBES select HAVE_GENERIC_VDSO select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU - select HOTPLUG_SMT if (SMP && HOTPLUG_CPU) select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 3205dc763d99..94dd817d6536 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -774,7 +774,6 @@ CONFIG_CPU_MITIGATIONS=y # # General architecture-dependent options # -CONFIG_HOTPLUG_SMT=y CONFIG_HOTPLUG_CORE_SYNC=y CONFIG_HOTPLUG_CORE_SYNC_DEAD=y CONFIG_KPROBES=y diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 0dc360c32ec8..817d788cd866 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -17,7 +17,6 @@ #include <linux/cpufreq.h> #include <linux/init.h> #include <linux/percpu.h> -#include <linux/xarray.h> #include <asm/cpu.h> #include <asm/cputype.h> @@ -44,16 +43,11 @@ static bool __init acpi_cpu_is_threaded(int cpu) */ int __init parse_acpi_topology(void) { - int thread_num, max_smt_thread_num = 1; - struct xarray core_threads; int cpu, topology_id; - void *entry; if (acpi_disabled) return 0; - xa_init(&core_threads); - for_each_possible_cpu(cpu) { topology_id = find_acpi_cpu_topology(cpu, 0); if (topology_id < 0) @@ -63,20 +57,6 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].thread_id = topology_id; topology_id = find_acpi_cpu_topology(cpu, 1); cpu_topology[cpu].core_id = topology_id; - - entry = xa_load(&core_threads, topology_id); - if (!entry) { - xa_store(&core_threads, topology_id, - xa_mk_value(1), GFP_KERNEL); - } else { - thread_num = xa_to_value(entry); - thread_num++; - xa_store(&core_threads, topology_id, - xa_mk_value(thread_num), GFP_KERNEL); - - if (thread_num > max_smt_thread_num) - max_smt_thread_num = thread_num; - } } else { cpu_topology[cpu].thread_id = -1; cpu_topology[cpu].core_id = topology_id; @@ -87,9 +67,6 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].package_id = topology_id; } - topology_smt_set_num_threads(max_smt_thread_num); - - xa_destroy(&core_threads); return 0; } #endif diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 6c058cd64a8d..c5aada55d8fe 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -546,13 +546,6 @@ static int __init parse_core(struct device_node *core, int package_id, i++; } while (1); - /* - * We've already gotten threads number in this core, update the SMT - * threads number when necessary. - */ - if (i > topology_smt_get_num_threads()) - topology_smt_set_num_threads(i); - cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { @@ -758,36 +751,6 @@ const struct cpumask *cpu_clustergroup_mask(int cpu) return &cpu_topology[cpu].cluster_sibling; } -#ifdef CONFIG_HOTPLUG_SMT - -/* Maximum threads number per-Core */ -static unsigned int topology_smt_num_threads = 1; - -void __init topology_smt_set_num_threads(unsigned int num_threads) -{ - topology_smt_num_threads = num_threads; -} - -unsigned int __init topology_smt_get_num_threads(void) -{ - return topology_smt_num_threads; -} - -/* - * On SMT Hotplug the primary thread of the SMT won't be disabled. For x86 they - * seem to have a primary thread for special purpose. For other arthitectures - * like arm64 there's no such restriction for a primary thread, so make the - * first thread in the SMT as the primary thread. - */ -bool topology_is_primary_thread(unsigned int cpu) -{ - if (cpu == cpumask_first(topology_sibling_cpumask(cpu))) - return true; - - return false; -} -#endif - void update_siblings_masks(unsigned int cpuid) { struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; @@ -900,14 +863,6 @@ void __init init_cpu_topology(void) reset_cpu_topology(); } - /* - * By this stage we get to know whether we support SMT or not, update - * the information for the core. We don't support - * CONFIG_SMT_NUM_THREADS_DYNAMIC so make the max_threads == num_threads. - */ - cpu_smt_set_num_threads(topology_smt_get_num_threads(), - topology_smt_get_num_threads()); - for_each_possible_cpu(cpu) { ret = fetch_cache_info(cpu); if (!ret) diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 0367f3a61838..a07b510e7dc5 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -92,20 +92,6 @@ void update_siblings_masks(unsigned int cpu); void remove_cpu_topology(unsigned int cpuid); void reset_cpu_topology(void); int parse_acpi_topology(void); - -#ifdef CONFIG_HOTPLUG_SMT -bool topology_is_primary_thread(unsigned int cpu); -void topology_smt_set_num_threads(unsigned int num_threads); -unsigned int topology_smt_get_num_threads(void); -#else -static inline bool topology_is_primary_thread(unsigned int cpu) { return false; } -static inline void topology_smt_set_num_threads(unsigned int num_threads) { } -static inline unsigned int topology_smt_get_num_threads(void) -{ - return 1; -} -#endif - #endif #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- Currently if architectures want to support HOTPLUG_SMT they need to provide a topology_is_primary_thread() telling the framework which thread in the SMT cannot offline. However arm64 doesn't have a restriction on which thread in the SMT cannot offline, a simplest choice is that just make 1st thread as the "primary" thread. So just make this as the default implementation in the framework and let architectures like x86 that have special primary thread to override this function (which they've already done). There's no need to provide a stub function if !CONFIG_SMP or !CONFIG_HOTPLUG_SMT. In such case the testing CPU is already the 1st CPU in the SMT so it's always the primary thread. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Pierre Gondois <pierre.gondois@arm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- arch/powerpc/include/asm/topology.h | 1 + arch/x86/include/asm/topology.h | 3 ++- include/linux/topology.h | 24 ++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 16bacfe8c7a2..da15b5efe807 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -152,6 +152,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) { return cpu == cpu_first_thread_sibling(cpu); } +#define topology_is_primary_thread topology_is_primary_thread static inline bool topology_smt_thread_allowed(unsigned int cpu) { diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 3235ba1e5b06..61f6350b4ed1 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -153,6 +153,8 @@ static inline bool topology_is_primary_thread(unsigned int cpu) { return cpumask_test_cpu(cpu, cpu_primary_thread_mask); } +#define topology_is_primary_thread topology_is_primary_thread + #else /* CONFIG_SMP */ #define topology_max_packages() (1) static inline int @@ -162,7 +164,6 @@ topology_update_die_map(unsigned int dieid, unsigned int cpu) { return 0; } static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } -static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/include/linux/topology.h b/include/linux/topology.h index a2f15fd0e527..44465c56cbc1 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -241,6 +241,30 @@ static inline const struct cpumask *cpu_smt_mask(int cpu) } #endif +#ifndef topology_is_primary_thread + +#define topology_is_primary_thread topology_is_primary_thread + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + /* + * When disabling SMT the primary thread of the SMT will remain + * enabled/active. Architectures do have a special primary thread + * (e.g. x86) needs to override this function. Otherwise can make + * the first thread in the SMT as the primary thread. + * + * The sibling cpumask of an offline CPU contains always the CPU + * itself for architectures using the implementation of + * CONFIG_GENERIC_ARCH_TOPOLOGY for building their topology. + * Other architectures not using CONFIG_GENERIC_ARCH_TOPOLOGY for + * building their topology have to check whether to use this default + * implementation or to override it. + */ + return cpu == cpumask_first(topology_sibling_cpumask(cpu)); +} + +#endif + static inline const struct cpumask *cpu_cpu_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- On building the topology from the devicetree, we've already gotten the SMT thread number of each core. Update the largest SMT thread number and enable the SMT control by the end of topology parsing. The framework's SMT control provides two interface to the users [1] through /sys/devices/system/cpu/smt/control: 1) enable SMT by writing "on" and disable by "off" 2) enable SMT by writing max_thread_number or disable by writing 1 Both method support to completely disable/enable the SMT cores so both work correctly for symmetric SMT platform and asymmetric platform with non-SMT and one type SMT cores like: core A: 1 thread core B: X (X!=1) threads Note that for a theoretically possible multiple SMT-X (X>1) core platform the SMT control is also supported as expected but only by writing the "on/off" method. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Docu... Reviewed-by: Pierre Gondois <pierre.gondois@arm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- drivers/base/arch_topology.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index c5aada55d8fe..1525d3e2f9d5 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -10,6 +10,7 @@ #include <linux/cacheinfo.h> #include <linux/cpu.h> #include <linux/cpufreq.h> +#include <linux/cpu_smt.h> #include <linux/device.h> #include <linux/of.h> #include <linux/slab.h> @@ -485,6 +486,10 @@ core_initcall(free_raw_capacity); #endif #if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) + +/* Used to enable the SMT control */ +static unsigned int max_smt_thread_num = 1; + /* * This function returns the logic cpu number of the node. * There are basically three kinds of return values: @@ -546,6 +551,8 @@ static int __init parse_core(struct device_node *core, int package_id, i++; } while (1); + max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i); + cpu = get_cpu_for_node(core); if (cpu >= 0) { if (!leaf) { @@ -658,6 +665,17 @@ static int __init parse_socket(struct device_node *socket) if (!has_socket) ret = parse_cluster(socket, 0, -1, 0); + /* + * Reset the max_smt_thread_num to 1 on failure. Since on failure + * we need to notify the framework the SMT is not supported, but + * max_smt_thread_num can be initialized to the SMT thread number + * of the cores which are successfully parsed. + */ + if (ret) + max_smt_thread_num = 1; + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + return ret; } -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- For ACPI we'll build the topology from PPTT and we cannot directly get the SMT number of each core. Instead using a temporary xarray to record the heterogeneous information (from ACPI_PPTT_ACPI_IDENTICAL) and SMT information of the first core in its heterogeneous CPU cluster when building the topology. Then we can know the largest SMT number in the system. If a homogeneous system's using ACPI 6.2 or later, all the CPUs should be under the root node of PPTT. There'll be only one entry in the xarray and all the CPUs in the system will be assumed identical. The framework's SMT control provides two interface to the users [1] through /sys/devices/system/cpu/smt/control: 1) enable SMT by writing "on" and disable by "off" 2) enable SMT by writing max_thread_number or disable by writing 1 Both method support to completely disable/enable the SMT cores so both work correctly for symmetric SMT platform and asymmetric platform with non-SMT and one type SMT cores like: core A: 1 thread core B: X (X!=1) threads Note that for a theoretically possible multiple SMT-X (X>1) core platform the SMT control is also supported as expected but only by writing the "on/off" method. [1] https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Docu... Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Hanjun Guo <guohanjun@huawei.com> Reviewed-by: Pierre Gondois <pierre.gondois@arm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 817d788cd866..c595f2e1575c 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -15,8 +15,10 @@ #include <linux/arch_topology.h> #include <linux/cacheinfo.h> #include <linux/cpufreq.h> +#include <linux/cpu_smt.h> #include <linux/init.h> #include <linux/percpu.h> +#include <linux/xarray.h> #include <asm/cpu.h> #include <asm/cputype.h> @@ -37,17 +39,28 @@ static bool __init acpi_cpu_is_threaded(int cpu) return !!is_threaded; } +struct cpu_smt_info { + unsigned int thread_num; + int core_id; +}; + /* * Propagate the topology information of the processor_topology_node tree to the * cpu_topology array. */ int __init parse_acpi_topology(void) { + unsigned int max_smt_thread_num = 1; + struct cpu_smt_info *entry; + struct xarray hetero_cpu; + unsigned long hetero_id; int cpu, topology_id; if (acpi_disabled) return 0; + xa_init(&hetero_cpu); + for_each_possible_cpu(cpu) { topology_id = find_acpi_cpu_topology(cpu, 0); if (topology_id < 0) @@ -57,6 +70,34 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].thread_id = topology_id; topology_id = find_acpi_cpu_topology(cpu, 1); cpu_topology[cpu].core_id = topology_id; + + /* + * In the PPTT, CPUs below a node with the 'identical + * implementation' flag have the same number of threads. + * Count the number of threads for only one CPU (i.e. + * one core_id) among those with the same hetero_id. + * See the comment of find_acpi_cpu_topology_hetero_id() + * for more details. + * + * One entry is created for each node having: + * - the 'identical implementation' flag + * - its parent not having the flag + */ + hetero_id = find_acpi_cpu_topology_hetero_id(cpu); + entry = xa_load(&hetero_cpu, hetero_id); + if (!entry) { + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + WARN_ON_ONCE(!entry); + + if (entry) { + entry->core_id = topology_id; + entry->thread_num = 1; + xa_store(&hetero_cpu, hetero_id, + entry, GFP_KERNEL); + } + } else if (entry->core_id == topology_id) { + entry->thread_num++; + } } else { cpu_topology[cpu].thread_id = -1; cpu_topology[cpu].core_id = topology_id; @@ -67,6 +108,19 @@ int __init parse_acpi_topology(void) cpu_topology[cpu].package_id = topology_id; } + /* + * This is a short loop since the number of XArray elements is the + * number of heterogeneous CPU clusters. On a homogeneous system + * there's only one entry in the XArray. + */ + xa_for_each(&hetero_cpu, hetero_id, entry) { + max_smt_thread_num = max(max_smt_thread_num, entry->thread_num); + xa_erase(&hetero_cpu, hetero_id); + kfree(entry); + } + + cpu_smt_set_num_threads(max_smt_thread_num, max_smt_thread_num); + xa_destroy(&hetero_cpu); return 0; } #endif -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> driver inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IBZQQR ---------------------------------------------------------------------- Enable HOTPLUG_SMT for SMT control. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Pierre Gondois <pierre.gondois@arm.com> Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com> Reviewed-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index df0635940ea0..a4456cd7f0f8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -242,6 +242,7 @@ config ARM64 select HAVE_KRETPROBES select HAVE_GENERIC_VDSO select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU + select HOTPLUG_SMT if HOTPLUG_CPU select IRQ_DOMAIN select IRQ_FORCED_THREADING select KASAN_VMALLOC if KASAN -- 2.33.0

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/15835 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/43Q... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/15835 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/43Q...
participants (2)
-
patchwork bot
-
Yushan Wang