[PATCH 01/10] arm64: Support AT_HWCAP3

From: Mark Brown <broonie@kernel.org> commit ddadbcdaaed5c3c44cc6c36093f6bf02d942d71d upstream. We have filled all 64 bits of AT_HWCAP2 so in order to support discovery of further features provide the framework to use the already defined AT_HWCAP3 for further CPU features. Signed-off-by: Mark Brown <broonie@kernel.org> Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Link: https://lore.kernel.org/r/20241004-arm64-elf-hwcap3-v2-2-799d1daad8b0@kernel... Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> [conflicts with e218c611e32c8d4315a6d64790b9a86f3d4fa4dc] Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- Documentation/arch/arm64/elf_hwcaps.rst | 6 +++--- arch/arm64/include/asm/cpufeature.h | 3 ++- arch/arm64/include/asm/hwcap.h | 5 ++++- arch/arm64/include/uapi/asm/hwcap.h | 4 ++++ arch/arm64/kernel/cpufeature.c | 6 ++++++ 5 files changed, 19 insertions(+), 5 deletions(-) diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index 76ff9d7398fd..f01ce14bca65 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -16,9 +16,9 @@ architected discovery mechanism available to userspace code at EL0. The kernel exposes the presence of these features to userspace through a set of flags called hwcaps, exposed in the auxiliary vector. -Userspace software can test for features by acquiring the AT_HWCAP or -AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant -flags are set, e.g.:: +Userspace software can test for features by acquiring the AT_HWCAP, +AT_HWCAP2 or AT_HWCAP3 entry of the auxiliary vector, and testing +whether the relevant flags are set, e.g.:: bool floating_point_is_present(void) { diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 5a7f5f3e052d..102bd1194d06 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -12,7 +12,7 @@ #include <asm/hwcap.h> #include <asm/sysreg.h> -#define MAX_CPU_FEATURES 128 +#define MAX_CPU_FEATURES 192 #define cpu_feature(x) KERNEL_HWCAP_ ## x #define ARM64_SW_FEATURE_OVERRIDE_NOKASLR 0 @@ -434,6 +434,7 @@ void cpu_set_feature(unsigned int num); bool cpu_have_feature(unsigned int num); unsigned long cpu_get_elf_hwcap(void); unsigned long cpu_get_elf_hwcap2(void); +unsigned long cpu_get_elf_hwcap3(void); #define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name)) #define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name)) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 521267478d18..6f044e5873a9 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -140,17 +140,20 @@ #define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) #define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) +#define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) + /* * This yields a mask that user programs can use to figure out what * instruction set this cpu supports. */ #define ELF_HWCAP cpu_get_elf_hwcap() #define ELF_HWCAP2 cpu_get_elf_hwcap2() +#define ELF_HWCAP3 cpu_get_elf_hwcap3() #ifdef CONFIG_COMPAT #define COMPAT_ELF_HWCAP (compat_elf_hwcap) #define COMPAT_ELF_HWCAP2 (compat_elf_hwcap2) -extern unsigned int compat_elf_hwcap, compat_elf_hwcap2; +extern unsigned int compat_elf_hwcap, compat_elf_hwcap2, compat_elf_hwcap3; #endif enum { diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 53026f45a509..a86fca74fdcf 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -105,4 +105,8 @@ #define HWCAP2_MOPS (1UL << 43) #define HWCAP2_HBC (1UL << 44) +/* + * HWCAP3 flags - for AT_HWCAP3 + */ + #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 48750a38e455..4cd925bfc513 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -104,6 +104,7 @@ static DECLARE_BITMAP(elf_hwcap, MAX_CPU_FEATURES) __read_mostly; COMPAT_HWCAP_LPAE) unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; unsigned int compat_elf_hwcap2 __read_mostly; +unsigned int compat_elf_hwcap3 __read_mostly; #endif DECLARE_BITMAP(system_cpucaps, ARM64_NCAPS); @@ -3507,6 +3508,11 @@ unsigned long cpu_get_elf_hwcap2(void) return elf_hwcap[1]; } +unsigned long cpu_get_elf_hwcap3(void) +{ + return elf_hwcap[2]; +} + void __init setup_system_features(void) { /* -- 2.33.0

From: Peter Bergner <bergner@linux.ibm.com> commit 3281366a8e79a512956382885091565db1036b64 upstream. Changes from v1: - Add Acked-by lines. The powerpc toolchain keeps a copy of the HWCAP bit masks in our TCB for fast access by the __builtin_cpu_supports built-in function. The TCB space for the HWCAP entries - which are created in pairs - is an ABI extension, so waiting to create the space for HWCAP3 and HWCAP4 until we need them is problematical. Define AT_HWCAP3 and AT_HWCAP4 in the generic uapi header so they can be used in glibc to reserve space in the powerpc TCB for their future use. I scanned through the Linux and GLIBC source codes looking for unused AT_* values and 29 and 30 did not seem to be used, so they are what I went with. This has received Acked-by's from both GLIBC and Linux kernel developers and no reservations or Nacks from anyone. Arnd, we seem to have consensus on the patch below. Is this something you could take and apply to your tree? Peter Signed-off-by: Peter Bergner <bergner@linux.ibm.com> Acked-by: Adhemerval Zanella <adhemerval.zanella@linaro.org> Acked-by: Nicholas Piggin <npiggin@gmail.com> Acked-by: Szabolcs Nagy <szabolcs.nagy@arm.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Commit: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://msgid.link/a406b535-dc55-4856-8ae9-5a063644a1af@linux.ibm.com Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- include/uapi/linux/auxvec.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/auxvec.h b/include/uapi/linux/auxvec.h index 6991c4b8ab18..cc61cb9b3e9a 100644 --- a/include/uapi/linux/auxvec.h +++ b/include/uapi/linux/auxvec.h @@ -32,6 +32,8 @@ #define AT_HWCAP2 26 /* extension of AT_HWCAP */ #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size */ #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment */ +#define AT_HWCAP3 29 /* extension of AT_HWCAP */ +#define AT_HWCAP4 30 /* extension of AT_HWCAP */ #define AT_EXECFN 31 /* filename of program */ -- 2.33.0

From: Mark Brown <broonie@kernel.org> commit 4e6e8c2b757f382684abc4765202cd25c221dea1 upstream. AT_HWCAP3 and AT_HWCAP4 were recently defined for use on PowerPC in commit 3281366a8e79 ("uapi/auxvec: Define AT_HWCAP3 and AT_HWCAP4 aux vector, entries"). Since we want to start using AT_HWCAP3 on arm64 add support for exposing both these new hwcaps via binfmt_elf. Signed-off-by: Mark Brown <broonie@kernel.org> Acked-by: Kees Cook <kees@kernel.org> Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- fs/binfmt_elf.c | 6 ++++++ fs/binfmt_elf_fdpic.c | 6 ++++++ fs/compat_binfmt_elf.c | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fb2c8d14327a..74d90a711647 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -276,6 +276,12 @@ create_elf_tables(struct linux_binprm *bprm, const struct elfhdr *exec, NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 96a8b13b57d9..7419c3a41db9 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -642,6 +642,12 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP); #ifdef ELF_HWCAP2 NEW_AUX_ENT(AT_HWCAP2, ELF_HWCAP2); +#endif +#ifdef ELF_HWCAP3 + NEW_AUX_ENT(AT_HWCAP3, ELF_HWCAP3); +#endif +#ifdef ELF_HWCAP4 + NEW_AUX_ENT(AT_HWCAP4, ELF_HWCAP4); #endif NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE); NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index 8f0af4f62631..d5ef5469e4e6 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -80,6 +80,16 @@ #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 #endif +#ifdef COMPAT_ELF_HWCAP3 +#undef ELF_HWCAP3 +#define ELF_HWCAP3 COMPAT_ELF_HWCAP3 +#endif + +#ifdef COMPAT_ELF_HWCAP4 +#undef ELF_HWCAP4 +#define ELF_HWCAP4 COMPAT_ELF_HWCAP4 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> Instructions introduced by FEAT_{LS64, LS64_V} is controlled by HCRX_EL2.{EnALS, EnASR}. Configure all of these to allow usage at EL0/1. This doesn't mean these instructions are always available in EL0/1 if provided. The hypervisor still have the control at runtime. Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- arch/arm64/include/asm/el2_setup.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index e7cae1452098..1da6a8eb496c 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -27,6 +27,17 @@ ubfx x0, x0, #ID_AA64MMFR1_EL1_HCX_SHIFT, #4 cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS + + /* Enable LS64, LS64_V if supported */ + mrs_s x1, SYS_ID_AA64ISAR1_EL1 + ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 + cbz x1, .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnALS + cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V + b.lt .Lset_hcrx_\@ + orr x0, x0, #HCRX_EL2_EnASR + +.Lset_hcrx_\@ : msr_s SYS_HCRX_EL2, x0 .Lskip_hcrx_\@: .endm -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> Armv8.7 introduces single-copy atomic 64-byte loads and stores instructions and its variants named under FEAT_{LS64, LS64_V}. These features are identified by ID_AA64ISAR1_EL1.LS64 and the use of such instructions in userspace (EL0) can be trapped. In order to support the use of corresponding instructions in userspace: - Make ID_AA64ISAR1_EL1.LS64 visbile to userspace - Add identifying and enabling in the cpufeature list - Expose these support of these features to userspace through HWCAP3 and cpuinfo Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- Documentation/arch/arm64/booting.rst | 12 ++++++ Documentation/arch/arm64/elf_hwcaps.rst | 6 +++ arch/arm64/include/asm/hwcap.h | 2 + arch/arm64/include/uapi/asm/hwcap.h | 2 + arch/arm64/kernel/cpufeature.c | 51 +++++++++++++++++++++++++ arch/arm64/kernel/cpuinfo.c | 2 + arch/arm64/tools/cpucaps | 2 + 7 files changed, 77 insertions(+) diff --git a/Documentation/arch/arm64/booting.rst b/Documentation/arch/arm64/booting.rst index 408d2e27b641..b540e0933dde 100644 --- a/Documentation/arch/arm64/booting.rst +++ b/Documentation/arch/arm64/booting.rst @@ -438,6 +438,18 @@ Before jumping into the kernel, the following conditions must be met: - HCRX_EL2.TALLINT must be initialised to 0b0. + For CPUs support for 64-byte loads and stores without status (FEAT_LS64): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnALS (bit 1) must be initialised to 0b1. + + For CPUs support for 64-byte loads and stores with status (FEAT_LS64_V): + + - If the kernel is entered at EL1 and EL2 is present: + + - HCRX_EL2.EnASR (bit 2) must be initialised to 0b1. + The requirements described above for CPU mode, caches, MMUs, architected timers, coherency and system registers apply to all CPUs. All CPUs must enter the kernel in the same exception level. Where the values documented diff --git a/Documentation/arch/arm64/elf_hwcaps.rst b/Documentation/arch/arm64/elf_hwcaps.rst index f01ce14bca65..d220f18c06ce 100644 --- a/Documentation/arch/arm64/elf_hwcaps.rst +++ b/Documentation/arch/arm64/elf_hwcaps.rst @@ -308,6 +308,12 @@ HWCAP2_MOPS HWCAP2_HBC Functionality implied by ID_AA64ISAR2_EL1.BC == 0b0001. +HWCAP3_LS64 + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0001. + +HWCAP3_LS64_V + Functionality implied by ID_AA64ISAR1_EL1.LS64 == 0b0010. + 4. Unused AT_HWCAP bits ----------------------- diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 6f044e5873a9..14de5d7ecc9c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -141,6 +141,8 @@ #define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) #define __khwcap3_feature(x) (const_ilog2(HWCAP3_ ## x) + 128) +#define KERNEL_HWCAP_LS64 __khwcap3_feature(LS64) +#define KERNEL_HWCAP_LS64_V __khwcap3_feature(LS64_V) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a86fca74fdcf..4cabccfd7d84 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -108,5 +108,7 @@ /* * HWCAP3 flags - for AT_HWCAP3 */ +#define HWCAP3_LS64 (1UL << 0) +#define HWCAP3_LS64_V (1UL << 1) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 4cd925bfc513..84031be9df2a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -200,6 +200,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_LS64_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_I8MM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_DGH_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR1_EL1_BF16_SHIFT, 4, 0), @@ -2168,6 +2169,38 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) static bool enable_pseudo_nmi; #endif +static bool has_ls64(const struct arm64_cpu_capabilities *entry, int __unused) +{ + u64 ls64; + + ls64 = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), + entry->field_pos, entry->sign); + + if (ls64 == ID_AA64ISAR1_EL1_LS64_NI || + ls64 > ID_AA64ISAR1_EL1_LS64_LS64_ACCDATA) + return false; + + if (entry->capability == ARM64_HAS_LS64 && + ls64 >= ID_AA64ISAR1_EL1_LS64_LS64) + return true; + + if (entry->capability == ARM64_HAS_LS64_V && + ls64 >= ID_AA64ISAR1_EL1_LS64_LS64_V) + return true; + + return false; +} + +static void cpu_enable_ls64(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnALS, SCTLR_EL1_EnALS); +} + +static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) +{ + sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, SCTLR_EL1_EnASR); +} + #ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) { @@ -2896,6 +2929,22 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TWED, IMP) }, #endif + { + .desc = "LS64", + .capability = ARM64_HAS_LS64, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_ls64, + .cpu_enable = cpu_enable_ls64, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64) + }, + { + .desc = "LS64_V", + .capability = ARM64_HAS_LS64_V, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_ls64, + .cpu_enable = cpu_enable_ls64_v, + ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) + }, {}, }; @@ -2990,6 +3039,8 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR1_EL1, BF16, EBF16, CAP_HWCAP, KERNEL_HWCAP_EBF16), HWCAP_CAP(ID_AA64ISAR1_EL1, DGH, IMP, CAP_HWCAP, KERNEL_HWCAP_DGH), HWCAP_CAP(ID_AA64ISAR1_EL1, I8MM, IMP, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64, CAP_HWCAP, KERNEL_HWCAP_LS64), + HWCAP_CAP(ID_AA64ISAR1_EL1, LS64, LS64_V, CAP_HWCAP, KERNEL_HWCAP_LS64_V), HWCAP_CAP(ID_AA64MMFR2_EL1, AT, IMP, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE HWCAP_CAP(ID_AA64PFR0_EL1, SVE, IMP, CAP_HWCAP, KERNEL_HWCAP_SVE), diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 98fda8500535..21a18237fc4b 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -82,6 +82,8 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SB] = "sb", [KERNEL_HWCAP_PACA] = "paca", [KERNEL_HWCAP_PACG] = "pacg", + [KERNEL_HWCAP_LS64] = "ls64", + [KERNEL_HWCAP_LS64_V] = "ls64_v", [KERNEL_HWCAP_DCPODP] = "dcpodp", [KERNEL_HWCAP_SVE2] = "sve2", [KERNEL_HWCAP_SVEAES] = "sveaes", diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index e76b6f6475b6..445234988333 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -109,3 +109,5 @@ WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT WORKAROUND_SPECULATIVE_SSBS WORKAROUND_SPECULATIVE_UNPRIV_LOAD +HAS_LS64 +HAS_LS64_V -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> Avoid the limitation of the vhe code section exceeding one page under the CONFIG_FUNCTION_ALIGNMENT_64B configuration. Do not enable LS64 when CONFIG_FUNCTION_ALIGNMENT_64B is enabled. Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- arch/arm64/Kconfig | 4 ++++ arch/arm64/include/asm/el2_setup.h | 2 ++ arch/arm64/kernel/cpufeature.c | 4 ++++ 3 files changed, 10 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3829167e97fc..9c8600123457 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -116,6 +116,7 @@ config ARM64 select ARM_GIC_V3 select ARM_GIC_V3_ITS if PCI select ARM_PSCI_FW + select ARM64_LS64 if !FUNCTION_ALIGNMENT_64B select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -2197,6 +2198,9 @@ config ARM64_TWED help Delayed Trapping of WFE (part of the ARMv8.6 Extensions) +config ARM64_LS64 + bool + endmenu menu "ARMv8.7 architectural features" diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 1da6a8eb496c..6f65322e2fd0 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -28,6 +28,7 @@ cbz x0, .Lskip_hcrx_\@ mov_q x0, HCRX_HOST_FLAGS +#ifdef CONFIG_ARM64_LS64 /* Enable LS64, LS64_V if supported */ mrs_s x1, SYS_ID_AA64ISAR1_EL1 ubfx x1, x1, #ID_AA64ISAR1_EL1_LS64_SHIFT, #4 @@ -36,6 +37,7 @@ cmp x1, #ID_AA64ISAR1_EL1_LS64_LS64_V b.lt .Lset_hcrx_\@ orr x0, x0, #HCRX_EL2_EnASR +#endif .Lset_hcrx_\@ : msr_s SYS_HCRX_EL2, x0 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 84031be9df2a..021d0980d186 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2169,6 +2169,7 @@ static void cpu_enable_e0pd(struct arm64_cpu_capabilities const *cap) static bool enable_pseudo_nmi; #endif +#ifdef CONFIG_ARM64_LS64 static bool has_ls64(const struct arm64_cpu_capabilities *entry, int __unused) { u64 ls64; @@ -2200,6 +2201,7 @@ static void cpu_enable_ls64_v(struct arm64_cpu_capabilities const *cap) { sysreg_clear_set(sctlr_el1, SCTLR_EL1_EnASR, SCTLR_EL1_EnASR); } +#endif #ifdef CONFIG_ARM64_PSEUDO_NMI static int __init early_enable_pseudo_nmi(char *p) @@ -2929,6 +2931,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { ARM64_CPUID_FIELDS(ID_AA64MMFR1_EL1, TWED, IMP) }, #endif +#ifdef CONFIG_ARM64_LS64 { .desc = "LS64", .capability = ARM64_HAS_LS64, @@ -2945,6 +2948,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_ls64_v, ARM64_CPUID_FIELDS(ID_AA64ISAR1_EL1, LS64, LS64_V) }, +#endif {}, }; -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> Using FEAT_{LS64, LS64_V} instructions in a guest is also controlled by HCRX_EL2.{EnALS, EnASR}. Enable it if guest has related feature. Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- arch/arm64/kvm/hyp/include/hyp/switch.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index c2a20617719b..2c14040ff832 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -212,6 +212,12 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) hcrx &= ~clr; } + if (cpus_have_final_cap(ARM64_HAS_LS64)) + hcrx |= HCRX_EL2_EnALS; + + if (cpus_have_final_cap(ARM64_HAS_LS64_V)) + hcrx |= HCRX_EL2_EnASR; + write_sysreg_s(hcrx, SYS_HCRX_EL2); } -- 2.33.0

From: Vincenzo Mezzela <vincenzo.mezzela@gmail.com> commit 97b1974547c517d8b5cba1fa0cc7213399ff0d2c upstream. Refactor do-while loops to move break condition within the loop's scope. This modification is in preparation to move the declaration of the device_node directly within the loop and take advantage of the automatic cleanup feature provided by the __free(device_node) attribute. Acked-by: Sudeep Holla <sudeep.holla@arm.com> Signed-off-by: Vincenzo Mezzela <vincenzo.mezzela@gmail.com> Link: https://lore.kernel.org/r/20240607163350.392971-2-vincenzo.mezzela@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- drivers/base/arch_topology.c | 107 ++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1b816703c2a0..764c25b7f204 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -536,23 +536,24 @@ static int __init parse_core(struct device_node *core, int package_id, do { snprintf(name, sizeof(name), "thread%d", i); t = of_get_child_by_name(core, name); - if (t) { - leaf = false; - cpu = get_cpu_for_node(t); - if (cpu >= 0) { - cpu_topology[cpu].package_id = package_id; - cpu_topology[cpu].cluster_id = cluster_id; - cpu_topology[cpu].core_id = core_id; - cpu_topology[cpu].thread_id = i; - } else if (cpu != -ENODEV) { - pr_err("%pOF: Can't get CPU for thread\n", t); - of_node_put(t); - return -EINVAL; - } + if (!t) + break; + + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].package_id = package_id; + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else if (cpu != -ENODEV) { + pr_err("%pOF: Can't get CPU for thread\n", t); of_node_put(t); + return -EINVAL; } + of_node_put(t); i++; - } while (t); + } while (1); max_smt_thread_num = max_t(unsigned int, max_smt_thread_num, i); @@ -594,48 +595,48 @@ static int __init parse_cluster(struct device_node *cluster, int package_id, do { snprintf(name, sizeof(name), "cluster%d", i); c = of_get_child_by_name(cluster, name); - if (c) { - leaf = false; - ret = parse_cluster(c, package_id, i, depth + 1); - if (depth > 0) - pr_warn("Topology for clusters of clusters not yet supported\n"); - of_node_put(c); - if (ret != 0) - return ret; - } + if (!c) + break; + + leaf = false; + ret = parse_cluster(c, package_id, i, depth + 1); + if (depth > 0) + pr_warn("Topology for clusters of clusters not yet supported\n"); + of_node_put(c); + if (ret != 0) + return ret; i++; - } while (c); + } while (1); /* Now check for cores */ i = 0; do { snprintf(name, sizeof(name), "core%d", i); c = of_get_child_by_name(cluster, name); - if (c) { - has_cores = true; - - if (depth == 0) { - pr_err("%pOF: cpu-map children should be clusters\n", - c); - of_node_put(c); - return -EINVAL; - } + if (!c) + break; - if (leaf) { - ret = parse_core(c, package_id, cluster_id, - core_id++); - } else { - pr_err("%pOF: Non-leaf cluster with core %s\n", - cluster, name); - ret = -EINVAL; - } + has_cores = true; + if (depth == 0) { + pr_err("%pOF: cpu-map children should be clusters\n", c); of_node_put(c); - if (ret != 0) - return ret; + return -EINVAL; } + + if (leaf) { + ret = parse_core(c, package_id, cluster_id, core_id++); + } else { + pr_err("%pOF: Non-leaf cluster with core %s\n", + cluster, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; i++; - } while (c); + } while (1); if (leaf && !has_cores) pr_warn("%pOF: empty cluster\n", cluster); @@ -653,15 +654,17 @@ static int __init parse_socket(struct device_node *socket) do { snprintf(name, sizeof(name), "socket%d", package_id); c = of_get_child_by_name(socket, name); - if (c) { - has_socket = true; - ret = parse_cluster(c, package_id, -1, 0); - of_node_put(c); - if (ret != 0) - return ret; - } + if (!c) + break; + + has_socket = true; + ret = parse_cluster(c, package_id, -1, 0); + of_node_put(c); + if (ret != 0) + return ret; + package_id++; - } while (c); + } while (1); if (!has_socket) ret = parse_cluster(socket, 0, -1, 0); -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> Architecture's using perf events for hard lockup detection needs to convert the watchdog_thresh to the event's period, some architecture for example arm64 perform this conversion using the CPU's maximum frequency which will be acquired by cpufreq. However by the time the lockup detector's initialized the cpufreq driver may not be initialized, thus launch a watchdog with inaccurate period. Provide a function hardlockup_detector_perf_adjust_period() to allowing adjust the event period. Then architecture can update with more accurate period if cpufreq is initialized. Fixes: 94946f9eaac1 ("arm64: add hw_nmi_get_sample_period for preparation of lockup detector") Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- include/linux/nmi.h | 2 ++ kernel/watchdog_perf.c | 23 +++++++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 43dd3a79fdf2..95805d31d28d 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_cleanup(void); +extern void hardlockup_detector_perf_adjust_period(int cpu, u64 period); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } +static inline void hardlockup_detector_perf_adjust_period(int cpu, u64 period) { } #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 0052afe18b7f..989d30f80951 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -198,6 +198,29 @@ void hardlockup_detector_perf_cleanup(void) cpumask_clear(&dead_events_mask); } +/** + * hardlockup_detector_perf_adjust_period - Adjust the event period due + * to cpu frequency change + * @cpu: The CPU whose event period will be adjusted + * @period: The target period to be set + */ +void hardlockup_detector_perf_adjust_period(int cpu, u64 period) +{ + struct perf_event *event = per_cpu(watchdog_ev, cpu); + + if (!(watchdog_enabled & WATCHDOG_HARDLOCKUP_ENABLED)) + return; + + if (!event) + return; + + if (event->attr.sample_period == period) + return; + + if (perf_event_period(event, period)) + pr_err("failed to change period to %llu\n", period); +} + /** * hardlockup_detector_perf_stop - Globally stop watchdog events * -- 2.33.0

From: Yicong Yang <yangyicong@hisilicon.com> arm64 depends on the cpufreq driver to gain the maximum cpu frequency to convert the watchdog_thresh to perf event period. cpufreq drivers like cppc_cpufreq will be initialized lately after the initializing of the hard lockup detector so just use a safe cpufreq which will be inaccurency. Use a cpufreq notifier to adjust the event's period to a more accurate one. Fixes: 94946f9eaac1 ("arm64: add hw_nmi_get_sample_period for preparation of lockup detector") Signed-off-by: Yicong Yang <yangyicong@hisilicon.com> Signed-off-by: Hongye Lin <linhongye@h-partners.com> Signed-off-by: Qi Xi <xiqi2@huawei.com> --- arch/arm64/kernel/watchdog_hld.c | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/arch/arm64/kernel/watchdog_hld.c b/arch/arm64/kernel/watchdog_hld.c index 817f0b7f6f92..ae0da38214fa 100644 --- a/arch/arm64/kernel/watchdog_hld.c +++ b/arch/arm64/kernel/watchdog_hld.c @@ -35,3 +35,61 @@ bool __init arch_perf_nmi_is_available(void) */ return arm_pmu_irq_is_nmi(); } + +static int watchdog_perf_update_period(void *data) +{ + int cpu = raw_smp_processor_id(); + u64 max_cpu_freq, new_period; + + max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL; + if (!max_cpu_freq) + return 0; + + new_period = watchdog_thresh * max_cpu_freq; + hardlockup_detector_perf_adjust_period(cpu, new_period); + + return 0; +} + +static int watchdog_freq_notifier_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_policy *policy = data; + int cpu; + + if (val != CPUFREQ_CREATE_POLICY) + return NOTIFY_DONE; + + /* + * Let each online CPU related to the policy update the period by their + * own. This will serialize with the framework on start/stop the lockup + * detector (softlockup_{start,stop}_all) and avoid potential race + * condition. Otherwise we may have below theoretical race condition: + * (core 0/1 share the same policy) + * [core 0] [core 1] + * hardlockup_detector_event_create() + * hw_nmi_get_sample_period() + * (cpufreq registered, notifier callback invoked) + * watchdog_freq_notifier_callback() + * watchdog_perf_update_period() + * (since core 1's event's not yet created, + * the period is not set) + * perf_event_create_kernel_counter() + * (event's period is SAFE_MAX_CPU_FREQ) + */ + for_each_cpu(cpu, policy->cpus) + smp_call_on_cpu(cpu, watchdog_perf_update_period, NULL, false); + + return NOTIFY_DONE; +} + +static struct notifier_block watchdog_freq_notifier = { + .notifier_call = watchdog_freq_notifier_callback, +}; + +static int __init init_watchdog_freq_notifier(void) +{ + return cpufreq_register_notifier(&watchdog_freq_notifier, + CPUFREQ_POLICY_NOTIFIER); +} +core_initcall(init_watchdog_freq_notifier); -- 2.33.0
participants (1)
-
Qi Xi