From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Replace the default SYNC exception vector taken from EL0 with a simplified version defined at arch/arm64/kernel/xcall/entry.S called "sync_ventry". For that, it uses ESR to select the corresponding entry in a jump table which includes the entry code address for different SYNC exception causes, for example, svc, data abort, instruction abort, brk, etc. The most important part about these entry codes is that they does not increase the number of branch instructions compared with the default one. The magic is from the bypass of el0t_64_sync_handler and the complex 'switch(...) case' statements inside. These kinda of optimization minimizes the overhead of exception type filter. Separate the SVC exception entry from the generic SYNC exception entry, so that it is possible to redirect the syscall handler to some customized implementation through 'svc 0xffff' or reduce the context switch latency for the syscalls specified via '/proc/pid/xcall'. In order to simplify the checking of syscall groups in assembly code that are passed via '/proc/pid/xcall', this patch replace the old bitmap in xcall_info data with a byte array, so a lot of related code has been modified by the way. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/include/asm/xcall.h | 3 +- arch/arm64/kernel/entry-common.c | 39 ++++++++- arch/arm64/kernel/entry.S | 16 +++- arch/arm64/kernel/process.c | 5 ++ arch/arm64/kernel/xcall/entry.S | 140 ++++++++++++++++++++++++++++--- arch/arm64/kernel/xcall/xcall.c | 18 ++-- fs/proc/proc_xcall.c | 77 ++++++++--------- 7 files changed, 232 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 5cea7f81a857..121fccc86ef3 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -89,11 +89,12 @@ DECLARE_STATIC_KEY_FALSE(xcall_enable); struct xcall_info { /* Must be first! */ - DECLARE_BITMAP(xcall_enable, __NR_syscalls); + u8 xcall_enable[__NR_syscalls + 1]; }; #define TASK_XINFO(p) ((struct xcall_info *)p->xinfo) int xcall_init_task(struct task_struct *p, struct task_struct *orig); void xcall_task_free(struct task_struct *p); +void xcall_info_switch(struct task_struct *p); #endif /* __ASM_XCALL_H */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index a405ccc185f1..985ac6bb88b5 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -844,7 +844,44 @@ asmlinkage void noinstr el0_fast_syscall(struct pt_regs *regs) fast_exit_to_user_mode(regs); } -asmlinkage void noinstr el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +asmlinkage void el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +asmlinkage void __alias_el0_da(struct pt_regs *regs, unsigned long esr) + __alias(el0_da); +asmlinkage void __alias_el0_ia(struct pt_regs *regs, unsigned long esr) + __alias(el0_ia); +asmlinkage void __alias_el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpsimd_acc); +asmlinkage void __alias_el0_sve_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_sve_acc); +asmlinkage void __alias_el0_sme_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_sme_acc); +asmlinkage void __alias_el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpsimd_exc); +asmlinkage void __alias_el0_sys(struct pt_regs *regs, unsigned long esr) + __alias(el0_sys); +asmlinkage void __alias_el0_wfx(struct pt_regs *regs, unsigned long esr) + __alias(el0_sys); +asmlinkage void __alias_el0_sp(struct pt_regs *regs, unsigned long esr) + __alias(el0_sp); +asmlinkage void __alias_el0_pc(struct pt_regs *regs, unsigned long esr) + __alias(el0_pc); +asmlinkage void __alias_el0_undef(struct pt_regs *regs, unsigned long esr) + __alias(el0_undef); +asmlinkage void __alias_el0_bti(struct pt_regs *regs) __alias(el0_bti); +asmlinkage void __alias_el0_mops(struct pt_regs *regs, unsigned long esr) + __alias(el0_mops); +asmlinkage void __alias_el0_breakpt(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_softstp(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_watchpt(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_brk64(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_fpac(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpac); +asmlinkage void __alias_el0_inv(struct pt_regs *regs, unsigned long esr) + __alias(el0_inv); #endif /* CONFIG_FAST_SYSCALL */ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 48584f3b454d..cceb4526745f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -552,6 +552,10 @@ tsk .req x28 // current thread_info .text +#ifdef CONFIG_FAST_SYSCALL +#include "xcall/entry.S" +#endif + /* * Exception vectors. */ @@ -569,7 +573,11 @@ SYM_CODE_START(vectors) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -581,8 +589,6 @@ SYM_CODE_START(vectors) SYM_CODE_END(vectors) #ifdef CONFIG_ACTLR_XCALL_XINT -#include "xcall/entry.S" - .align 11 SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, t, 64, sync // Synchronous EL1t @@ -595,7 +601,11 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -605,7 +615,7 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0 kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors_xcall_xint) -#endif +#endif /* CONFIG_ACTLR_XCALL_XINT */ #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(__bad_stack) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fe3f89445fcb..e9e5ce956f15 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -55,6 +55,7 @@ #include <asm/stacktrace.h> #include <asm/switch_to.h> #include <asm/system_misc.h> +#include <asm/xcall.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> @@ -472,6 +473,10 @@ DEFINE_PER_CPU(struct task_struct *, __entry_task); static void entry_task_switch(struct task_struct *next) { __this_cpu_write(__entry_task, next); +#ifdef CONFIG_FAST_SYSCALL + if (static_branch_unlikely(&xcall_enable)) + xcall_info_switch(next); +#endif } /* diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index abd5f97d5525..b8238663c820 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -151,28 +151,30 @@ alternative_else_nop_endif sb .endm /* .macro hw_xcal_restore_base_regs */ -#define __NR_fast_syscalls 512 - SYM_CODE_START_LOCAL(el0t_64_svc_entry) +alternative_if_not ARM64_HAS_HW_XCALL_XINT /* Hijack SVC to dynamically load syscalls via '/proc/xcall/comm' */ - ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[25,18] - cmp x20, 0 - bne el0t_64_xcall_entry + ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[15,0] + cmp x20, 0xfff + b.ge el0t_64_xcall_entry +alternative_else_nop_endif /* Hijack SVC to low overhead syscalls via '/prox/[pid]/xcall' */ - ldr_this_cpu x21, __cpu_fast_syscall, x20 // per_cpu table - and x20, x8, #__NR_fast_syscalls - 1 // trunk syscno less than 512 - ldrb w20, [x21, x20] // memory overhead is 512(B) + cmp x8, __NR_syscalls + b.ge .slow_syscall + ldr_this_cpu x21, __xcall_info, x20 + ldrb w20, [x21, x8] cmp x20, 0 bne el0t_fast_syscall +.slow_syscall: ldp x20, x21, [sp, #16 * 10] kernel_entry 0, 64 mov x0, sp bl el0_slow_syscall b ret_to_user SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) - lsr x20, x20, #4 + lsr x20, x20, #12 adr x21, .xcall_func_table ldr w20, [x21, x20, lsl #2] add x20, x20, x21 @@ -180,7 +182,7 @@ SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) /* ISS==0F~FF: Entry to optimized and customized syscalls */ .xcall_func_table: - .rept 15 + .rept 16 .word el0t_xcall_syscall - .xcall_func_table .endr SYM_CODE_END(el0t_64_svc_entry) @@ -201,6 +203,89 @@ SYM_CODE_START_LOCAL(el0t_fast_syscall) hw_xcal_restore_base_regs SYM_CODE_END(el0t_fast_syscall) + .macro el0t_64_entry sym:req +SYM_CODE_START_LOCAL(\sym\()_entry) + ldp x20, x21, [sp, #16 * 10] + kernel_entry 0, 64 + mov x0, sp + ldr x1, [sp, #(S_SYSCALLNO - 8)] // ESR + bl __alias_\sym + b ret_to_user +SYM_CODE_END(\sym\()_entry) + .endm + + .macro el0t_64_entry_offset sym:req, num:req + .rept \num + .word el0t_64_sync_table - \sym\()_entry + .endr + .endm + + /* + * Early exception handlers + */ + el0t_64_entry el0_undef + el0t_64_entry el0_wfx + el0t_64_entry el0_fpsimd_acc + el0t_64_entry el0_bti + el0t_64_entry el0_sys + el0t_64_entry el0_sve_acc + el0t_64_entry el0_fpac + el0t_64_entry el0_sme_acc + el0t_64_entry el0_ia + el0t_64_entry el0_pc + el0t_64_entry el0_da + el0t_64_entry el0_sp + el0t_64_entry el0_mops + el0t_64_entry el0_fpsimd_exc + el0t_64_entry el0_breakpt + el0t_64_entry el0_softstp + el0t_64_entry el0_watchpt + el0t_64_entry el0_brk64 + el0t_64_entry el0_inv + +SYM_CODE_START_LOCAL(el0t_64_sync_table) + el0t_64_entry_offset el0_undef, 1 // 0x00 + el0t_64_entry_offset el0_wfx, 1 // 0x01 + el0t_64_entry_offset el0_inv, 5 + el0t_64_entry_offset el0_fpsimd_acc, 1 // 0x07 + el0t_64_entry_offset el0_inv, 5 + el0t_64_entry_offset el0_bti, 1 // 0x0D + el0t_64_entry_offset el0_inv, 7 + el0t_64_entry_offset el0t_64_svc, 1 // 0x15 + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_sys, 1 // 0x18 + el0t_64_entry_offset el0_sve_acc, 1 // 0x19 + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_fpac, 1 // 0x1C + el0t_64_entry_offset el0_sme_acc, 1 // 0x1D + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_ia, 1 // 0x20 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_pc, 1 // 0x22 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_da, 1 // 0x24 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_sp, 1 // 0x26 + el0t_64_entry_offset el0_mops, 1 // 0x27 + el0t_64_entry_offset el0_inv, 4 + el0t_64_entry_offset el0_fpsimd_exc, 1 // 0x2C + el0t_64_entry_offset el0_inv, 3 + el0t_64_entry_offset el0_breakpt, 1 // 0x30 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_softstp, 1 // 0x32 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_watchpt, 1 // 0x34 + el0t_64_entry_offset el0_inv, 7 + el0t_64_entry_offset el0_brk64, 1 // 0x3C + el0t_64_entry_offset el0_inv, 3 +SYM_CODE_END(el0t_64_sync_table) + +/*********************************************** + * * + * Xcall exception entry code for 920G CPU * + * * + ***********************************************/ +#ifdef CONFIG_ACTLR_XCALL_XINT .macro xcall_ventry .align 7 .Lventry_start\@: @@ -214,9 +299,42 @@ SYM_CODE_END(el0t_fast_syscall) .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE stp x20, x21, [sp, #16 * 10] - /* Decode ESR.ICC bits[25,18] for use later */ + /* Decode ESR.ICC bits[24,17] for use later */ mrs x21, esr_el1 ubfx w20, w21, #16, #8 b el0t_64_xcall_entry .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm +#endif /* CONFIG_ACTLR_XCALL_XINT */ + +/**************************************************************** + * * + * Sync exception entry code for early CPUs before 920G * + * * + ****************************************************************/ + .macro sync_ventry + .align 7 +.Lventry_start\@: + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr +.Lskip_tramp_vectors_cleanup\@: + sub sp, sp, #PT_REGS_SIZE + + /* Save ESR and ICC.bits[15,0] for use later */ + stp x20, x21, [sp, #16 * 10] + mrs x20, esr_el1 + uxth w21, w20 + stp x20, x21, [sp, #(S_SYSCALLNO - 8)] + /* Using jump table for different exception causes */ + lsr w21, w20, #ESR_ELx_EC_SHIFT + adr x20, el0t_64_sync_table + ldr w21, [x20, x21, lsl #2] + sub x20, x20, x21 + br x20 +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? + .endm diff --git a/arch/arm64/kernel/xcall/xcall.c b/arch/arm64/kernel/xcall/xcall.c index c0907c01c9c7..96e6274571d3 100644 --- a/arch/arm64/kernel/xcall/xcall.c +++ b/arch/arm64/kernel/xcall/xcall.c @@ -17,8 +17,9 @@ static inline int sw_xcall_init_task(struct task_struct *p, struct task_struct * return -ENOMEM; if (orig->xinfo) { - bitmap_copy(TASK_XINFO(p)->xcall_enable, TASK_XINFO(orig)->xcall_enable, - __NR_syscalls); + memcpy(TASK_XINFO(p)->xcall_enable, + TASK_XINFO(orig)->xcall_enable, + (__NR_syscalls + 1) * sizeof(u8)); } return 0; @@ -38,8 +39,13 @@ void xcall_task_free(struct task_struct *p) kfree(p->xinfo); } -#define __NR_fast_syscalls 512 -static u8 fast_syscall_enabled[__NR_fast_syscalls + 1] = { - [0 ... __NR_fast_syscalls] = 0, +static u8 default_xcall_info[__NR_syscalls + 1] = { + [0 ... __NR_syscalls] = 0, }; -asmlinkage DEFINE_PER_CPU(u8*, __cpu_fast_syscall) = fast_syscall_enabled; +DEFINE_PER_CPU(u8*, __xcall_info) = default_xcall_info; + +void xcall_info_switch(struct task_struct *task) +{ + if (TASK_XINFO(task)->xcall_enable) + __this_cpu_write(__xcall_info, TASK_XINFO(task)->xcall_enable); +} diff --git a/fs/proc/proc_xcall.c b/fs/proc/proc_xcall.c index 5f45d0799b33..7a08d4b18af3 100644 --- a/fs/proc/proc_xcall.c +++ b/fs/proc/proc_xcall.c @@ -12,9 +12,9 @@ static int xcall_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct task_struct *p; - unsigned int rs, re; struct xcall_info *xinfo; + struct task_struct *p; + int l = 0, r = 1; if (!static_key_enabled(&xcall_enable)) return -EACCES; @@ -27,14 +27,25 @@ static int xcall_show(struct seq_file *m, void *v) if (!xinfo) goto out; - for (rs = 0, bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls); - rs < re; rs = re + 1, - bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls)) { - if (rs == (re - 1)) - seq_printf(m, "%d,", rs); - else - seq_printf(m, "%d-%d,", rs, re - 1); + while (r < __NR_syscalls) { + if (!xinfo->xcall_enable[l]) { + l++; + r = l + 1; + continue; + } + + if (!xinfo->xcall_enable[r]) { + if (r == (l + 1)) + seq_printf(m, "%d,", l); + else + seq_printf(m, "%d-%d,", l, r - 1); + l = r + 1; + r = l + 1; + continue; + } + r++; } + seq_puts(m, "\n"); out: put_task_struct(p); @@ -47,45 +58,28 @@ static int xcall_open(struct inode *inode, struct file *filp) return single_open(filp, xcall_show, inode); } -static int xcall_enable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_set_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static int xcall_disable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_clear_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static ssize_t xcall_write(struct file *file, const char __user *buf, +static ssize_t xcall_write(struct file *file, const char __user *ubuf, size_t count, loff_t *offset) { - struct inode *inode = file_inode(file); - struct task_struct *p; - char buffer[5]; - const size_t maxlen = sizeof(buffer) - 1; unsigned int sc_no = __NR_syscalls; + struct task_struct *p; + char buf[5]; int ret = 0; - int is_clear = 0; - struct xcall_info *xinfo; if (!static_key_enabled(&xcall_enable)) return -EACCES; - memset(buffer, 0, sizeof(buffer)); - if (!count || copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) - return -EFAULT; - - p = get_proc_task(inode); - if (!p || !p->xinfo) + p = get_proc_task(file_inode(file)); + if (!p || !TASK_XINFO(p)) return -ESRCH; - if (buffer[0] == '!') - is_clear = 1; + memset(buf, '\0', 5); + if (!count || (count > 4) || copy_from_user(buf, ubuf, count)) { + ret = -EFAULT; + goto out; + } - if (kstrtouint(buffer + is_clear, 10, &sc_no)) { + if (kstrtouint((buf + (int)(buf[0] == '!')), 10, &sc_no)) { ret = -EINVAL; goto out; } @@ -95,13 +89,8 @@ static ssize_t xcall_write(struct file *file, const char __user *buf, goto out; } - xinfo = TASK_XINFO(p); - if (!is_clear && !test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_enable_one(xinfo, sc_no); - else if (is_clear && test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_disable_one(xinfo, sc_no); - else - ret = -EINVAL; + (TASK_XINFO(p))->xcall_enable[sc_no] = (int)(buf[0] != '!'); + ret = 0; out: put_task_struct(p); -- 2.34.1