From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.11-rc1 commit 364a5a8ae8dc2dd457e2fefb4da3f3fd2c0ba8b1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7LP2Z
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Armv8.3 introduced the LDAPR instruction, which provides weaker memory ordering semantics than LDARi (RCpc vs RCsc). Generally, we provide an RCsc implementation when implementing the Linux memory model, but LDAPR can be used as a useful alternative to dependency ordering, particularly when the compiler is capable of breaking the dependencies.
Since LDAPR is not available on all CPUs, add a cpufeature to detect it at runtime and allow the instruction to be used with alternative code patching.
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zeng Heng zengheng4@huawei.com --- arch/arm64/Kconfig | 5 ++++- arch/arm64/include/asm/cpucaps.h | 1 + arch/arm64/kernel/cpufeature.c | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 75da7a29aa19..e2c2219d5ec4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -510,7 +510,7 @@ config ARM64_ERRATUM_1742098 are only present if the cryptography extensions are present. All software should have a fallback implementation for CPUs that don't implement the cryptography extensions. - + If unsure, say Y.
config ARM64_ERRATUM_845719 @@ -1767,6 +1767,9 @@ config ARM64_AMU_EXTN config AS_HAS_ARMV8_4 def_bool $(cc-option,-Wa$(comma)-march=armv8.4-a)
+config AS_HAS_LDAPR + def_bool $(as-instr,.arch_extension rcpc) + config ARM64_TLB_RANGE bool "Enable support for tlbi range feature" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b43f8e374114..c7fe08dce205 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -75,6 +75,7 @@ #define ARM64_WORKAROUND_1742098 67 #define ARM64_HAS_WFXT 68 #define ARM64_WORKAROUND_HISILICON_ERRATUM_162100125 69 +#define ARM64_HAS_LDAPR 70
#define ARM64_NCAPS 80
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 57631fa553f6..d92ed6383f1a 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2063,6 +2063,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, .matches = has_cpuid_feature, }, + { + .desc = "RCpc load-acquire (LDAPR)", + .capability = ARM64_HAS_LDAPR, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR1_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .matches = has_cpuid_feature, + .min_field_value = 1, + }, { .desc = "TLB range maintenance instructions", .capability = ARM64_HAS_TLB_RANGE,
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.11-rc1 commit 5af76fb4228701bd5377880b09b0216a5fd800ef category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7LP2Z
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
In preparation for patching the internals of READ_ONCE() itself, replace its usage on the alternatives patching patch with a volatile variable instead.
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zeng Heng zengheng4@huawei.com --- arch/arm64/kernel/alternative.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 5f8e4c2df53c..34979a766fa5 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -21,7 +21,8 @@ #define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset) #define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
-static int all_alternatives_applied; +/* Volatile, as we may be patching the guts of READ_ONCE() */ +static volatile int all_alternatives_applied;
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
@@ -205,7 +206,7 @@ static int __apply_alternatives_multi_stop(void *unused)
/* We always have a CPU 0 at this point (__init) */ if (smp_processor_id()) { - while (!READ_ONCE(all_alternatives_applied)) + while (!all_alternatives_applied) cpu_relax(); isb(); } else { @@ -217,7 +218,7 @@ static int __apply_alternatives_multi_stop(void *unused) BUG_ON(all_alternatives_applied); __apply_alternatives(®ion, false, remaining_capabilities); /* Barriers provided by the cache flushing */ - WRITE_ONCE(all_alternatives_applied, 1); + all_alternatives_applied = 1; }
return 0;
From: Will Deacon will@kernel.org
mainline inclusion from mainline-v5.11-rc1 commit e35123d83ee35c31f64ecfbdfabbe5142d3025b8 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7LP2Z
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
When building with LTO, there is an increased risk of the compiler converting an address dependency headed by a READ_ONCE() invocation into a control dependency and consequently allowing for harmful reordering by the CPU.
Ensure that such transformations are harmless by overriding the generic READ_ONCE() definition with one that provides acquire semantics when building with LTO.
Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Mark Rutland mark.rutland@arm.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Zeng Heng zengheng4@huawei.com --- arch/arm64/include/asm/rwonce.h | 73 +++++++++++++++++++++++++++++++ arch/arm64/kernel/vdso/Makefile | 2 +- arch/arm64/kernel/vdso32/Makefile | 2 +- arch/arm64/kernel/vmlinux.lds.S | 2 +- 4 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 arch/arm64/include/asm/rwonce.h
diff --git a/arch/arm64/include/asm/rwonce.h b/arch/arm64/include/asm/rwonce.h new file mode 100644 index 000000000000..1bce62fa908a --- /dev/null +++ b/arch/arm64/include/asm/rwonce.h @@ -0,0 +1,73 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 Google LLC. + */ +#ifndef __ASM_RWONCE_H +#define __ASM_RWONCE_H + +#ifdef CONFIG_LTO + +#include <linux/compiler_types.h> +#include <asm/alternative-macros.h> + +#ifndef BUILD_VDSO + +#ifdef CONFIG_AS_HAS_LDAPR +#define __LOAD_RCPC(sfx, regs...) \ + ALTERNATIVE( \ + "ldar" #sfx "\t" #regs, \ + ".arch_extension rcpc\n" \ + "ldapr" #sfx "\t" #regs, \ + ARM64_HAS_LDAPR) +#else +#define __LOAD_RCPC(sfx, regs...) "ldar" #sfx "\t" #regs +#endif /* CONFIG_AS_HAS_LDAPR */ + +/* + * When building with LTO, there is an increased risk of the compiler + * converting an address dependency headed by a READ_ONCE() invocation + * into a control dependency and consequently allowing for harmful + * reordering by the CPU. + * + * Ensure that such transformations are harmless by overriding the generic + * READ_ONCE() definition with one that provides RCpc acquire semantics + * when building with LTO. + */ +#define __READ_ONCE(x) \ +({ \ + typeof(&(x)) __x = &(x); \ + int atomic = 1; \ + union { __unqual_scalar_typeof(*__x) __val; char __c[1]; } __u; \ + switch (sizeof(x)) { \ + case 1: \ + asm volatile(__LOAD_RCPC(b, %w0, %1) \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 2: \ + asm volatile(__LOAD_RCPC(h, %w0, %1) \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 4: \ + asm volatile(__LOAD_RCPC(, %w0, %1) \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + case 8: \ + asm volatile(__LOAD_RCPC(, %0, %1) \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*__x) : "memory"); \ + break; \ + default: \ + atomic = 0; \ + } \ + atomic ? (typeof(*__x))__u.__val : (*(volatile typeof(__x))__x);\ +}) + +#endif /* !BUILD_VDSO */ +#endif /* CONFIG_LTO */ + +#include <asm-generic/rwonce.h> + +#endif /* __ASM_RWONCE_H */ diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d65f52264aba..a8f8e409e2bf 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -28,7 +28,7 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ $(btildflags-y) -T
ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 -ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) KASAN_SANITIZE := n diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index abad38c576e1..57b28f1e5d97 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -37,7 +37,7 @@ cc32-as-instr = $(call try-run,\ # As a result we set our own flags here.
# KBUILD_CPPFLAGS and NOSTDINC_FLAGS from top-level Makefile -VDSO_CPPFLAGS := -D__KERNEL__ -nostdinc +VDSO_CPPFLAGS := -DBUILD_VDSO -D__KERNEL__ -nostdinc VDSO_CPPFLAGS += -isystem $(shell $(CC_COMPAT) -print-file-name=include 2>/dev/null) VDSO_CPPFLAGS += $(LINUXINCLUDE)
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 71f4b5f24d15..31fdb55cd4e2 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -199,7 +199,7 @@ SECTIONS INIT_CALLS CON_INITCALL INIT_RAM_FS - *(.init.rodata.* .init.bss) /* from the EFI stub */ + *(.init.altinstructions .init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { EXIT_DATA