
From: Yipeng Zou <zouyipeng@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IB6JLE -------------------------------- This patch is designed to optimize the performance of the SVC exception handler by simplifying its operation, which can lead to faster execution times. However, this optimization comes with the trade-off of reduced functionality, particularly in areas related to security and maintenance. When a task is executed with xcall, certain features that are crucial for robust system operation may not be available, which could impact the system's ability to perform essential tasks. Here's a breakdown of the potential impacts: 1. Memory Tagging Extension (MTE) 2. Process Trace (PTRACE) 3. System Call Trace (STRACE) 4. GNU Debugger (GDB) 5. Software single-stepping 6. Secure State Buffer Descriptor (SSBD) 7. Shadow Call Stack 8. Software Translation Table Buffer Zero Protection (SW_TTBR0_PAN) 9. Unmap Kernel at Exception Level 0 (UNMAP_KERNEL_AT_EL0) 10.ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD 11. GCC Plugin Stack Leak Detection (GCC_PLUGIN_STACKLEAK) 12. SYSCALL Trace Point In conclusion, while the patch is intended to enhance the performance of the SVC exception handler, it does so by sacrificing several important features that contribute to security, debugging, and overall system stability. It is imperative for developers and system administrators to be cognizant of these trade-offs and to plan for the potential effects on their applications and operational workflows. Signed-off-by: Yipeng Zou <zouyipeng@huawei.com> Signed-off-by: Liao chen <liaochen4@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/Kconfig | 24 +++++++++++++ arch/arm64/include/asm/exception.h | 3 ++ arch/arm64/kernel/entry-common.c | 20 +++++++++++ arch/arm64/kernel/entry.S | 51 ++++++++++++++++++++++++-- arch/arm64/kernel/syscall.c | 57 ++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) diff --git a/arch/Kconfig b/arch/Kconfig index 48ef789de86b..b80a22ed5545 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1205,4 +1205,28 @@ config FAST_SYSCALL exception handling path that only considers necessary features such as security, context saving, and recovery. +config DEBUG_FEATURE_BYPASS + bool "Bypass debug feature in fast syscall" + depends on FAST_SYSCALL + default y + help + This to bypass debug feature in fast syscall. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + In fast syscall we only considers necessary features. + Disable this config to keep debug feature in fast syscall. + +config SECURITY_FEATURE_BYPASS + bool "Bypass security feature in fast syscall" + depends on FAST_SYSCALL + default y + help + This to bypass security feature in fast syscall. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + In fast syscall we only considers necessary features. + Disable this config to keep security feature in fast syscall. + endmenu diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d38d526d084e..4b7994bd2b94 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -48,6 +48,9 @@ void do_el0_sys(unsigned long esr, struct pt_regs *regs); void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); void do_el0_cp15(unsigned long esr, struct pt_regs *regs); +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs); +#endif void do_el0_svc(struct pt_regs *regs); void do_el0_svc_compat(struct pt_regs *regs); void do_el0_fpac(struct pt_regs *regs, unsigned long esr); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 02cd5d57edb6..e567484fc662 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -388,6 +388,26 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) do_el0_fpac(regs, esr); } +#ifdef CONFIG_FAST_SYSCALL +asmlinkage void noinstr fast_enter_from_user_mode(void) +{ +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + lockdep_hardirqs_off(CALLER_ADDR0); + CT_WARN_ON(ct_state() != CONTEXT_USER); +#endif + user_exit_irqoff(); +#ifndef CONFIG_DEBUG_FEATURE_BYPASS + trace_hardirqs_off_finish(); +#endif +} + +asmlinkage void noinstr el0_xcall_handler(struct pt_regs *regs) +{ + fast_enter_from_user_mode(); + do_el0_xcall(regs); +} +#endif + asmlinkage void noinstr el0_sync_handler(struct pt_regs *regs) { unsigned long esr = read_sysreg(esr_el1); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e49b5569bb97..e99a60fce105 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -182,7 +182,7 @@ alternative_else_nop_endif #endif .endm - .macro kernel_entry, el, regsize = 64 + .macro kernel_entry, el, regsize = 64, fast_mode = std .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 .endif @@ -212,12 +212,19 @@ alternative_else_nop_endif * Ensure MDSCR_EL1.SS is clear, since we can unmask debug exceptions * when scheduling. */ + .if \fast_mode == std ldr x19, [tsk, #TSK_TI_FLAGS] disable_step_tsk x19, x20 + .endif /* Check for asynchronous tag check faults in user space */ + .if \fast_mode == std check_mte_async_tcf x22, x23 + .endif + + .if \fast_mode == std apply_ssbd 1, x22, x23 + .endif ptrauth_keys_install_kernel tsk, x20, x22, x23 @@ -243,9 +250,11 @@ alternative_else_nop_endif add x29, sp, #S_STACKFRAME #ifdef CONFIG_ARM64_SW_TTBR0_PAN +.if \fast_mode == std alternative_if_not ARM64_HAS_PAN bl __swpan_entry_el\el alternative_else_nop_endif +.endif #endif stp x22, x23, [sp, #S_PC] @@ -268,9 +277,11 @@ alternative_else_nop_endif /* Re-enable tag checking (TCO set on exception entry) */ #ifdef CONFIG_ARM64_MTE +.if \fast_mode == std alternative_if ARM64_MTE SET_PSTATE_TCO(0) alternative_else_nop_endif +.endif #endif /* @@ -283,7 +294,7 @@ alternative_else_nop_endif */ .endm - .macro kernel_exit, el + .macro kernel_exit, el, fast_mode = std .if \el != 0 disable_daif .endif @@ -303,14 +314,18 @@ alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR #ifdef CONFIG_ARM64_SW_TTBR0_PAN +.if \fast_mode == std alternative_if_not ARM64_HAS_PAN bl __swpan_exit_el\el alternative_else_nop_endif +.endif #endif .if \el == 0 ldr x23, [sp, #S_SP] // load return stack pointer msr sp_el0, x23 + + .if \fast_mode == std tst x22, #PSR_MODE32_BIT // native task? b.eq 3f @@ -325,13 +340,17 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + .endif + scs_save tsk, x0 /* No kernel C function calls after this as user keys are set. */ ptrauth_keys_install_user tsk, x0, x1, x2 + .if \fast_mode == std apply_ssbd 0, x0, x1 .endif + .endif msr elr_el1, x21 // set up the return data msr spsr_el1, x22 @@ -352,6 +371,11 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 + .if \fast_mode != std + ldr lr, [sp, #S_LR] + add sp, sp, #S_FRAME_SIZE // restore sp + eret + .endif alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 ldr lr, [sp, #S_LR] add sp, sp, #S_FRAME_SIZE // restore sp @@ -717,10 +741,31 @@ SYM_CODE_END(el1_irq) beq .Lskip_xcall\@ ldp x20, x21, [sp, #0] /* do xcall */ +#ifdef CONFIG_SECURITY_FEATURE_BYPASS + kernel_entry 0, 64, xcall +#else kernel_entry 0, 64 +#endif mov x0, sp - bl el0t_64_sync_handler + bl el0_xcall_handler +#ifdef CONFIG_SECURITY_FEATURE_BYPASS + disable_daif + gic_prio_kentry_setup tmp=x3 + ldr x19, [tsk, #TSK_TI_FLAGS] + and x2, x19, #_TIF_WORK_MASK + cbnz x2, fast_work_pending\@ +fast_finish_ret_to_user\@: + user_enter_irqoff + kernel_exit 0 xcall +fast_work_pending\@: + mov x0, sp // 'regs' + mov x1, x19 + bl do_notify_resume + ldr x19, [tsk, #TSK_TI_FLAGS] // re-check for single-step + b fast_finish_ret_to_user\@ +#else b ret_to_user +#endif .Lskip_xcall\@: ldp x20, x21, [sp, #0] .endm diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 9bd304568d90..2d73eaaf9bc2 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -106,6 +106,46 @@ static void cortex_a76_erratum_1463225_svc_handler(void) static void cortex_a76_erratum_1463225_svc_handler(void) { } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +#ifdef CONFIG_FAST_SYSCALL +static void el0_xcall_common(struct pt_regs *regs, int scno, int sc_nr, + const syscall_fn_t syscall_table[]) +{ + unsigned long flags = read_thread_flags(); + + regs->orig_x0 = regs->regs[0]; + regs->syscallno = scno; + +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + cortex_a76_erratum_1463225_svc_handler(); +#endif + local_daif_restore(DAIF_PROCCTX); + + if (system_supports_mte() && (flags & _TIF_MTE_ASYNC_FAULT)) { + syscall_set_return_value(current, regs, -ERESTARTNOINTR, 0); + return; + } + + if (has_syscall_work(flags)) { + if (scno == NO_SYSCALL) + syscall_set_return_value(current, regs, -ENOSYS, 0); + scno = syscall_trace_enter(regs); + if (scno == NO_SYSCALL) + goto trace_exit; + } + + invoke_syscall(regs, scno, sc_nr, syscall_table); + + if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) { + flags = read_thread_flags(); + if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) + return; + } + +trace_exit: + syscall_trace_exit(regs); +} +#endif + static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, const syscall_fn_t syscall_table[]) { @@ -237,6 +277,23 @@ static inline void delouse_pt_regs(struct pt_regs *regs) } #endif +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs) +{ + const syscall_fn_t *t = sys_call_table; + +#ifdef CONFIG_ARM64_ILP32 + if (is_ilp32_compat_task()) { + t = ilp32_sys_call_table; + delouse_pt_regs(regs); + } +#endif + + fp_user_discard(); + el0_xcall_common(regs, regs->regs[8], __NR_syscalls, t); +} +#endif + void do_el0_svc(struct pt_regs *regs) { const syscall_fn_t *t = sys_call_table; -- 2.34.1