Due to the performance degradation of the lsmem command on the ARM architecture, unsafe_copy_to_user, unsafe_put_user, and unsafe_get_user have been introduced to bypass the usercopy checks. This optimizes the data exchange between user space and kernel space, thereby improving performance.
v1->v2: -- Some commit message cleanup on patch 5.
Linus Torvalds (2): arm64: start using 'asm goto' for get_user() when available arm64: start using 'asm goto' for put_user()
Mark Rutland (2): init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND arm64: uaccess: correct thinko in __get_mem_asm()
Ze Zuo (1): openeuler_defconfig: remove GCC_ASM_GOTO_OUTPUT_WORKAROUND for x86 and arm64
arch/arm64/Kconfig | 4 + arch/arm64/configs/openeuler_defconfig | 1 - arch/arm64/include/asm/asm-extable.h | 3 + arch/arm64/include/asm/uaccess.h | 169 ++++++++++++++++++------- arch/arm64/kernel/mte.c | 12 +- arch/x86/configs/openeuler_defconfig | 1 - include/linux/compiler-gcc.h | 20 --- init/Kconfig | 22 ++-- 8 files changed, 147 insertions(+), 85 deletions(-)
From: Linus Torvalds torvalds@linux-foundation.org
mainline inclusion from mainline-v6.11-rc1 commit 86a6a68febfcf57b5c2a7ba33e6d6f1f78ca5197 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB81V6
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This generates noticeably better code with compilers that support it, since we don't need to test the error register etc, the exception just jumps to the error handling directly.
Note that this also marks SW_TTBR0_PAN incompatible with KCSAN support, since KCSAN wants to save and restore the user access state.
KCSAN and SW_TTBR0_PAN were probably always incompatible, but it became obvious only when implementing the unsafe user access functions. At that point the default empty user_access_save/restore() functions weren't provided by the default fallback functions.
Note that according to the description above, it is necessary to disable KCSAN when enabling ARM64_SW_TTBR0_PAN after applying this patch. However, during compilation, there is a configuration circular dependency issue, so the dependency is temporarily removed.
Signed-off-by: Linus Torvalds torvalds@linux-foundation.org
Conflicts: arch/arm64/Kconfig [Resolve conflicts dut to Recursive dependency on kconfig detected] Signed-off-by: Ze Zuo zuoze1@huawei.com --- arch/arm64/Kconfig | 4 + arch/arm64/include/asm/uaccess.h | 121 +++++++++++++++++++++++++------ arch/arm64/kernel/mte.c | 12 ++- 3 files changed, 107 insertions(+), 30 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c11aa218c08e..10200f20e47c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1758,6 +1758,10 @@ config RODATA_FULL_DEFAULT_ENABLED
config ARM64_SW_TTBR0_PAN bool "Emulate Privileged Access Never using TTBR0_EL1 switching" + # KCSAN and SW_TTBR0_PAN were probably always incompatible, but + # it became obvious only when implementing the unsafe user access + # functions. So don't use it together with KCSAN. + # depends on !KCSAN help Enabling this option prevents the kernel from accessing user-space memory directly by pointing TTBR0_EL1 to a reserved diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 07c1aeaeb094..407ce2f6ef24 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -184,29 +184,40 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) * The "__xxx_error" versions set the third argument to -EFAULT if an error * occurs, and leave it unchanged on success. */ -#define __get_mem_asm(load, reg, x, addr, err, type) \ +#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +#define __get_mem_asm(load, reg, x, addr, label, type) \ + asm_goto_output( \ + "1: " load " " reg "0, [%1]\n" \ + _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + : "=r" (x) \ + : "r" (addr) : : label) +#else +#define __get_mem_asm(load, reg, x, addr, label, type) do { \ + int __gma_err = 0; \ asm volatile( \ "1: " load " " reg "1, [%2]\n" \ "2:\n" \ _ASM_EXTABLE_##type##ACCESS_ERR_ZERO(1b, 2b, %w0, %w1) \ - : "+r" (err), "=r" (x) \ - : "r" (addr)) + : "+r" (__gma_err), "=r" (x) \ + : "r" (addr)); \ + if (__gma_err) goto label; } while (0) +#endif
-#define __raw_get_mem(ldr, x, ptr, err, type) \ +#define __raw_get_mem(ldr, x, ptr, label, type) \ do { \ unsigned long __gu_val; \ switch (sizeof(*(ptr))) { \ case 1: \ - __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "b", "%w", __gu_val, (ptr), label, type); \ break; \ case 2: \ - __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr "h", "%w", __gu_val, (ptr), label, type); \ break; \ case 4: \ - __get_mem_asm(ldr, "%w", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%w", __gu_val, (ptr), label, type); \ break; \ case 8: \ - __get_mem_asm(ldr, "%x", __gu_val, (ptr), (err), type); \ + __get_mem_asm(ldr, "%x", __gu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -219,27 +230,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_get_user(x, ptr, err) \ +#define __raw_get_user(x, ptr, label) \ do { \ __typeof__(*(ptr)) __user *__rgu_ptr = (ptr); \ __typeof__(x) __rgu_val; \ __chk_user_ptr(ptr); \ - \ - uaccess_ttbr0_enable(); \ - __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ - \ - (x) = __rgu_val; \ + do { \ + __label__ __rgu_failed; \ + uaccess_ttbr0_enable(); \ + __raw_get_mem("ldtr", __rgu_val, __rgu_ptr, __rgu_failed, U); \ + uaccess_ttbr0_disable(); \ + (x) = __rgu_val; \ + break; \ + __rgu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0)
#define __get_user_error(x, ptr, err) \ do { \ + __label__ __gu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_get_user((x), __p, (err)); \ + __raw_get_user((x), __p, __gu_failed); \ } else { \ + __gu_failed: \ (x) = (__force __typeof__(x))0; (err) = -EFAULT; \ } \ } while (0) @@ -262,15 +280,18 @@ do { \ do { \ __typeof__(dst) __gkn_dst = (dst); \ __typeof__(src) __gkn_src = (src); \ - int __gkn_err = 0; \ - \ - __mte_enable_tco_async(); \ - __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ - (__force type *)(__gkn_src), __gkn_err, K); \ - __mte_disable_tco_async(); \ + do { \ + __label__ __gkn_label; \ \ - if (unlikely(__gkn_err)) \ + __mte_enable_tco_async(); \ + __raw_get_mem("ldr", *((type *)(__gkn_dst)), \ + (__force type *)(__gkn_src), __gkn_label, K); \ + __mte_disable_tco_async(); \ + break; \ + __gkn_label: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while (0)
#define __put_mem_asm(store, reg, x, addr, err, type) \ @@ -381,6 +402,60 @@ extern unsigned long __must_check __arch_copy_to_user(void __user *to, const voi __actu_ret; \ })
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) +{ + if (unlikely(!access_ok(ptr,len))) + return 0; + uaccess_ttbr0_enable(); + return 1; +} +#define user_access_begin(a,b) user_access_begin(a,b) +#define user_access_end() uaccess_ttbr0_disable() + +/* + * The arm64 inline asms should learn abut asm goto, and we should + * teach user_access_begin() about address masking. + */ +#define unsafe_put_user(x, ptr, label) do { \ + int __upu_err = 0; \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), __upu_err, U); \ + if (__upu_err) goto label; \ +} while (0) + +#define unsafe_get_user(x, ptr, label) \ + __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U) + +/* + * KCSAN uses these to save and restore ttbr state. + * We do not support KCSAN with ARM64_SW_TTBR0_PAN, so + * they are no-ops. + */ +static inline unsigned long user_access_save(void) { return 0; } +static inline void user_access_restore(unsigned long enabled) { } + +/* + * We want the unsafe accessors to always be inlined and use + * the error labels - thus the macro games. + */ +#define unsafe_copy_loop(dst, src, len, type, label) \ + while (len >= sizeof(type)) { \ + unsafe_put_user(*(type *)(src),(type __user *)(dst),label); \ + dst += sizeof(type); \ + src += sizeof(type); \ + len -= sizeof(type); \ + } + +#define unsafe_copy_to_user(_dst,_src,_len,label) \ +do { \ + char __user *__ucu_dst = (_dst); \ + const char *__ucu_src = (_src); \ + size_t __ucu_len = (_len); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u64, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u32, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u16, label); \ + unsafe_copy_loop(__ucu_dst, __ucu_src, __ucu_len, u8, label); \ +} while (0) + #define INLINE_COPY_TO_USER #define INLINE_COPY_FROM_USER
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index cea96ee75d22..c4999d2d7ce8 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -582,12 +582,9 @@ subsys_initcall(register_mte_tcf_preferred_sysctl); size_t mte_probe_user_range(const char __user *uaddr, size_t size) { const char __user *end = uaddr + size; - int err = 0; char val;
- __raw_get_user(val, uaddr, err); - if (err) - return size; + __raw_get_user(val, uaddr, efault);
uaddr = PTR_ALIGN(uaddr, MTE_GRANULE_SIZE); while (uaddr < end) { @@ -595,12 +592,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size) * A read is sufficient for mte, the caller should have probed * for the pte write permission if required. */ - __raw_get_user(val, uaddr, err); - if (err) - return end - uaddr; + __raw_get_user(val, uaddr, efault); uaddr += MTE_GRANULE_SIZE; } (void)val;
return 0; + +efault: + return end - uaddr; }
From: Linus Torvalds torvalds@linux-foundation.org
mainline inclusion from mainline-v6.11-rc1 commit 7fd298d4b39d8d5fe99d56811a7ed78c7a5377d5 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB81V6
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This generates noticeably better code since we don't need to test the error register etc, the exception just jumps to the error handling directly.
Unlike get_user(), there's no need to worry about old compilers. All supported compilers support the regular non-output 'asm goto', as pointed out by Nathan Chancellor.
Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Ze Zuo zuoze1@huawei.com --- arch/arm64/include/asm/asm-extable.h | 3 ++ arch/arm64/include/asm/uaccess.h | 70 ++++++++++++++-------------- 2 files changed, 39 insertions(+), 34 deletions(-)
diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h index 9c0664fe1eb1..b3adf0db6622 100644 --- a/arch/arm64/include/asm/asm-extable.h +++ b/arch/arm64/include/asm/asm-extable.h @@ -133,6 +133,9 @@ #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err) \ _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
+#define _ASM_EXTABLE_KACCESS(insn, fixup) \ + _ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr) + #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr) \ __DEFINE_ASM_GPR_NUMS \ __ASM_EXTABLE_RAW(#insn, #fixup, \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 407ce2f6ef24..5bdd0b877500 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -294,29 +294,28 @@ do { \ } while (0); \ } while (0)
-#define __put_mem_asm(store, reg, x, addr, err, type) \ - asm volatile( \ - "1: " store " " reg "1, [%2]\n" \ +#define __put_mem_asm(store, reg, x, addr, label, type) \ + asm goto( \ + "1: " store " " reg "0, [%1]\n" \ "2:\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0) \ - : "+r" (err) \ - : "rZ" (x), "r" (addr)) + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ + : : "rZ" (x), "r" (addr) : : label)
-#define __raw_put_mem(str, x, ptr, err, type) \ +#define __raw_put_mem(str, x, ptr, label, type) \ do { \ __typeof__(*(ptr)) __pu_val = (x); \ switch (sizeof(*(ptr))) { \ case 1: \ - __put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type); \ break; \ case 2: \ - __put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type); \ break; \ case 4: \ - __put_mem_asm(str, "%w", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%w", __pu_val, (ptr), label, type); \ break; \ case 8: \ - __put_mem_asm(str, "%x", __pu_val, (ptr), (err), type); \ + __put_mem_asm(str, "%x", __pu_val, (ptr), label, type); \ break; \ default: \ BUILD_BUG(); \ @@ -328,25 +327,34 @@ do { \ * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions, * we must evaluate these outside of the critical section. */ -#define __raw_put_user(x, ptr, err) \ +#define __raw_put_user(x, ptr, label) \ do { \ + __label__ __rpu_failed; \ __typeof__(*(ptr)) __user *__rpu_ptr = (ptr); \ __typeof__(*(ptr)) __rpu_val = (x); \ __chk_user_ptr(__rpu_ptr); \ \ - uaccess_ttbr0_enable(); \ - __raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U); \ - uaccess_ttbr0_disable(); \ + do { \ + uaccess_ttbr0_enable(); \ + __raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U); \ + uaccess_ttbr0_disable(); \ + break; \ + __rpu_failed: \ + uaccess_ttbr0_disable(); \ + goto label; \ + } while (0); \ } while (0)
#define __put_user_error(x, ptr, err) \ do { \ + __label__ __pu_failed; \ __typeof__(*(ptr)) __user *__p = (ptr); \ might_fault(); \ if (access_ok(__p, sizeof(*__p))) { \ __p = uaccess_mask_ptr(__p); \ - __raw_put_user((x), __p, (err)); \ + __raw_put_user((x), __p, __pu_failed); \ } else { \ + __pu_failed: \ (err) = -EFAULT; \ } \ } while (0) @@ -369,15 +377,18 @@ do { \ do { \ __typeof__(dst) __pkn_dst = (dst); \ __typeof__(src) __pkn_src = (src); \ - int __pkn_err = 0; \ - \ - __mte_enable_tco_async(); \ - __raw_put_mem("str", *((type *)(__pkn_src)), \ - (__force type *)(__pkn_dst), __pkn_err, K); \ - __mte_disable_tco_async(); \ \ - if (unlikely(__pkn_err)) \ + do { \ + __label__ __pkn_err; \ + __mte_enable_tco_async(); \ + __raw_put_mem("str", *((type *)(__pkn_src)), \ + (__force type *)(__pkn_dst), __pkn_err, K); \ + __mte_disable_tco_async(); \ + break; \ + __pkn_err: \ + __mte_disable_tco_async(); \ goto err_label; \ + } while (0); \ } while(0)
extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n); @@ -411,17 +422,8 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt } #define user_access_begin(a,b) user_access_begin(a,b) #define user_access_end() uaccess_ttbr0_disable() - -/* - * The arm64 inline asms should learn abut asm goto, and we should - * teach user_access_begin() about address masking. - */ -#define unsafe_put_user(x, ptr, label) do { \ - int __upu_err = 0; \ - __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), __upu_err, U); \ - if (__upu_err) goto label; \ -} while (0) - +#define unsafe_put_user(x, ptr, label) \ + __raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U) #define unsafe_get_user(x, ptr, label) \ __raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
From: Mark Rutland mark.rutland@arm.com
mainline inclusion from mainline-v6.11-rc1 commit f2f6a8e8871725035959b90bac048cde555aa0e9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB81V6
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Several versions of GCC mis-compile asm goto with outputs. We try to workaround this, but our workaround is demonstrably incomplete and liable to result in subtle bugs, especially on arm64 where get_user() has recently been moved over to using asm goto with outputs.
From discussion(s) with Linus at:
https://lore.kernel.org/linux-arm-kernel/Zpfv2tnlQ-gOLGac@J2N7QTR9R3.cambrid... https://lore.kernel.org/linux-arm-kernel/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambrid...
... it sounds like the best thing to do for now is to remove the workaround and make CC_HAS_ASM_GOTO_OUTPUT depend on working compiler versions.
The issue was originally reported to GCC by Sean Christopherson:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921
... and Jakub Jelinek fixed this for GCC 14, with the fix backported to 13.3.0, 12.4.0, and 11.5.0.
In the kernel, we tried to workaround broken compilers in commits:
4356e9f841f7 ("work around gcc bugs with 'asm goto' with outputs") 68fb3ca0e408 ("update workarounds for gcc "asm goto" issue")
... but the workaround of adding an empty asm("") after the asm volatile goto(...) demonstrably does not always avoid the problem, as can be seen in the following test case:
| #define asm_goto_output(x...) \ | do { asm volatile goto(x); asm (""); } while (0) | | #define __good_or_bad(__val, __key) \ | do { \ | __label__ __failed; \ | unsigned long __tmp; \ | asm_goto_output( \ | " cbnz %[key], %l[__failed]\n" \ | " mov %[val], #0x900d\n" \ | : [val] "=r" (__tmp) \ | : [key] "r" (__key) \ | : \ | : __failed); \ | (__val) = __tmp; \ | break; \ | __failed: \ | (__val) = 0xbad; \ | } while (0) | | unsigned long get_val(unsigned long key); | unsigned long get_val(unsigned long key) | { | unsigned long val = 0xbad; | | __good_or_bad(val, key); | | return val; | }
GCC 13.2.0 (at -O2) compiles this to:
| cbnz x0, .Lfailed | mov x0, #0x900d | .Lfailed: | ret
GCC 14.1.0 (at -O2) compiles this to:
| cbnz x0, .Lfailed | mov x0, #0x900d | ret | .Lfailed: | mov x0, #0xbad | ret
Note that GCC 13.2.0 erroneously omits the assignment to 'val' in the error path (even though this does not depend on an output of the asm goto). GCC 14.1.0 correctly retains the assignment.
This problem can be seen within the kernel with the following test case:
| #include <linux/uaccess.h> | #include <linux/types.h> | | noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr); | noinline unsigned long test_unsafe_get_user(unsigned long __user *ptr) | { | unsigned long val; | | unsafe_get_user(val, ptr, Efault); | return val; | | Efault: | val = 0x900d; | return val; | }
GCC 13.2.0 (arm64 defconfig) compiles this to:
| and x0, x0, #0xff7fffffffffffff | ldtr x0, [x0] | .Lextable_fixup: | ret
GCC 13.2.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:
| endbr64 | mov (%rdi),%rax | .Lextable_fixup: | ret
... omitting the assignment to 'val' in the error path, and leaving garbage in the result register returned by the function (which happens to contain the faulting address in the generated code).
GCC 14.1.0 (arm64 defconfig) compiles this to:
| and x0, x0, #0xff7fffffffffffff | ldtr x0, [x0] | ret | .Lextable_fixup: | mov x0, #0x900d // #36877 | ret
GCC 14.1.0 (x86_64 defconfig + MITIGATION_RETPOLINE=n) compiles this to:
| endbr64 | mov (%rdi),%rax | ret | .Lextable_fixup: | mov $0x900d,%eax | ret
... retaining the expected assignment to 'val' in the error path.
We don't have a complete and reasonable workaround. While placing empty asm("") blocks after each goto label *might* be sufficient, we don't know for certain, this is tedious and error-prone, and there doesn't seem to be a neat way to wrap this up (which is especially painful for cases with multiple goto labels).
Avoid this issue by disabling CONFIG_CC_HAS_ASM_GOTO_OUTPUT for known-broken compiler versions and removing the workaround (along with the CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND config option).
For the moment I've left the default implementation of asm_goto_output() unchanged. This should now be redundant since any compiler with the fix for the clobbering issue whould also have a fix for the (earlier) volatile issue, but it's far less churny to leave it around, which makes it easier to backport this patch if necessary.
Signed-off-by: Mark Rutland mark.rutland@arm.com Cc: Alex Coplan alex.coplan@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Jakub Jelinek jakub@gcc.gnu.org Cc: Peter Zijlstra peterz@infradead.org Cc: Sean Christopherson seanjc@google.com Cc: Szabolcs Nagy szabolcs.nagy@arm.com Cc: Will Deacon will@kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Ze Zuo zuoze1@huawei.com --- include/linux/compiler-gcc.h | 20 -------------------- init/Kconfig | 22 ++++++++++++---------- 2 files changed, 12 insertions(+), 30 deletions(-)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 9e6961078c2a..81e73e94eb7d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -66,26 +66,6 @@ __builtin_unreachable(); \ } while (0)
-/* - * GCC 'asm goto' with outputs miscompiles certain code sequences: - * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 - * - * Work around it via the same compiler barrier quirk that we used - * to use for the old 'asm goto' workaround. - * - * Also, always mark such 'asm goto' statements as volatile: all - * asm goto statements are supposed to be volatile as per the - * documentation, but some versions of gcc didn't actually do - * that for asms with outputs: - * - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98619 - */ -#ifdef CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND -#define asm_goto_output(x...) \ - do { asm volatile goto(x); asm (""); } while (0) -#endif - #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ diff --git a/init/Kconfig b/init/Kconfig index 2d6ae58a5bf7..1840935e919c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -81,23 +81,25 @@ config CC_CAN_LINK_STATIC default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m64-flag) -static) if 64BIT default $(success,$(srctree)/scripts/cc-can-link.sh $(CC) $(CLANG_FLAGS) $(USERCFLAGS) $(USERLDFLAGS) $(m32-flag) -static)
+# Fixed in GCC 14, 13.3, 12.4 and 11.5 +# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 +config GCC_ASM_GOTO_OUTPUT_BROKEN + bool + depends on CC_IS_GCC + default y if GCC_VERSION < 110500 + default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 + default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 + config CC_HAS_ASM_GOTO_OUTPUT - def_bool $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null) + def_bool y + depends on !GCC_ASM_GOTO_OUTPUT_BROKEN + depends on $(success,echo 'int foo(int x) { asm goto ("": "=r"(x) ::: bar); return x; bar: return 0; }' | $(CC) -x c - -c -o /dev/null)
config CC_HAS_ASM_GOTO_TIED_OUTPUT depends on CC_HAS_ASM_GOTO_OUTPUT # Detect buggy gcc and clang, fixed in gcc-11 clang-14. def_bool $(success,echo 'int foo(int *x) { asm goto (".long (%l[bar]) - .": "+m"(*x) ::: bar); return *x; bar: return 0; }' | $CC -x c - -c -o /dev/null)
-config GCC_ASM_GOTO_OUTPUT_WORKAROUND - bool - depends on CC_IS_GCC && CC_HAS_ASM_GOTO_OUTPUT - # Fixed in GCC 14, 13.3, 12.4 and 11.5 - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921 - default y if GCC_VERSION < 110500 - default y if GCC_VERSION >= 120000 && GCC_VERSION < 120400 - default y if GCC_VERSION >= 130000 && GCC_VERSION < 130300 - config TOOLS_SUPPORT_RELR def_bool $(success,env "CC=$(CC)" "LD=$(LD)" "NM=$(NM)" "OBJCOPY=$(OBJCOPY)" $(srctree)/scripts/tools-support-relr.sh)
From: Mark Rutland mark.rutland@arm.com
mainline inclusion from mainline-v6.11-rc4 commit f94511df53bb792e505c98662971434c7995388a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB81V6
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
In the CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y version of __get_mem_asm(), we incorrectly use _ASM_EXTABLE_##type##ACCESS_ERR() such that upon a fault the extable fixup handler writes -EFAULT into "%w0", which is the register containing 'x' (the result of the load).
This was a thinko in commit:
86a6a68febfcf57b ("arm64: start using 'asm goto' for get_user() when available")
Prior to that commit _ASM_EXTABLE_##type##ACCESS_ERR_ZERO() was used such that the extable fixup handler wrote -EFAULT into "%w0" (the register containing 'err'), and zero into "%w1" (the register containing 'x'). When the 'err' variable was removed, the extable entry was updated incorrectly.
Writing -EFAULT to the value register is unnecessary but benign:
* We never want -EFAULT in the value register, and previously this would have been zeroed in the extable fixup handler.
* In __get_user_error() the value is overwritten with zero explicitly in the error path.
* The asm goto outputs cannot be used when the goto label is taken, as older compilers (e.g. clang < 16.0.0) do not guarantee that asm goto outputs are usable in this path and may use a stale value rather than the value in an output register. Consequently, zeroing in the extable fixup handler is insufficient to ensure callers see zero in the error path.
* The expected usage of unsafe_get_user() and get_kernel_nofault() requires that the value is not consumed in the error path.
Some versions of GCC would mis-compile asm goto with outputs, and erroneously omit subsequent assignments, breaking the error path handling in __get_user_error(). This was discussed at:
https://lore.kernel.org/lkml/ZpfxLrJAOF2YNqCk@J2N7QTR9R3.cambridge.arm.com/
... and was fixed by removing support for asm goto with outputs on those broken compilers in commit:
f2f6a8e887172503 ("init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND")
With that out of the way, we can safely replace the usage of _ASM_EXTABLE_##type##ACCESS_ERR() with _ASM_EXTABLE_##type##ACCESS(), leaving the value register unchanged in the case a fault is taken, as was originally intended. This matches other architectures and matches our __put_mem_asm().
Signed-off-by: Mark Rutland mark.rutland@arm.com Cc: Will Deacon will@kernel.org Link: https://lore.kernel.org/r/20240807103731.2498893-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas catalin.marinas@arm.com Signed-off-by: Ze Zuo zuoze1@huawei.com --- arch/arm64/include/asm/uaccess.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 5bdd0b877500..dd0877a75922 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else
hulk inclusion category: other bugzilla: https://gitee.com/openeuler/kernel/issues/IB81V6
----------------------------------------
Remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y from openeuler_defconfig due to patch 'f2f6a8e88717("init/Kconfig: remove CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND")' removed this config.
Signed-off-by: Ze Zuo zuoze1@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 - arch/x86/configs/openeuler_defconfig | 1 - 2 files changed, 2 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 41fba69b84e2..9ba0e79f3c93 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -2,7 +2,6 @@ # Automatically generated file; DO NOT EDIT. # Linux/arm64 6.6.0 Kernel Configuration # -CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 6113ee0b4fcb..877ab0bead2a 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -2,7 +2,6 @@ # Automatically generated file; DO NOT EDIT. # Linux/x86 6.6.0 Kernel Configuration # -CONFIG_GCC_ASM_GOTO_OUTPUT_WORKAROUND=y CONFIG_TOOLS_SUPPORT_RELR=y CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14005 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14005 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...