
From: Yipeng Zou <zouyipeng@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IB6JLE -------------------------------- The svc exception handling process in ARM64, which includes auxiliary functions for debug/trace and core functions like KPTI, has been identified as overly "lengthy". This inefficiency is particularly noticeable in short syscalls such as lseek() and getpid(), where the syscall function itself comprises a small percentage of the total instructions executed. To address this, we introduce the concept of xcall, a fast svc exception handling path that only considers necessary features such as security, context saving, and recovery. This approach can be seen as a high-speed syscall processing mechanism bridging the gap between vdso and traditional syscalls. We've implemented a per-task bitmap to enable or disable xcall for specific syscalls. Users can enable a syscall with the following command: echo $syscall_nr > /proc/$PID/xcall To disable a syscall, use: echo \!$syscall_nr > /proc/$PID/xcall The current status of enabled syscalls can be viewed by: cat /proc/$PID/xcall Finally, we've added a kernel boot parameter to control the xcall feature. To enable xcall, include "xcall" in the kernel boot command. By default, xcall is disabled. This patch introduces basic framework and have not modified to syscall path only copy to xcall. Signed-off-by: Yipeng Zou <zouyipeng@huawei.com> --- arch/Kconfig | 19 +++++ arch/arm64/Kconfig | 1 + arch/arm64/configs/openeuler_defconfig | 2 + arch/arm64/include/asm/cpucaps.h | 1 + arch/arm64/kernel/asm-offsets.c | 3 + arch/arm64/kernel/cpufeature.c | 28 +++++++ arch/arm64/kernel/entry.S | 59 +++++++++++++ fs/proc/base.c | 112 +++++++++++++++++++++++++ include/linux/sched.h | 4 + kernel/fork.c | 20 +++++ 10 files changed, 249 insertions(+) diff --git a/arch/Kconfig b/arch/Kconfig index 0fc9c6d591b8..48ef789de86b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1186,4 +1186,23 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config ARCH_SUPPORTS_FAST_SYSCALL + bool + +config FAST_SYSCALL + bool "Fast Syscall support" + depends on ARCH_SUPPORTS_FAST_SYSCALL + default n + help + This enable support Fast syscall feature. + The svc exception handling process, which includes auxiliary + functions for debug/trace and core functions like + KPTI, has been identified as overly "lengthy". + This inefficiency is particularly noticeable in short syscalls + such as lseek() and getpid(), where the syscall function itself + comprises a small percentage of the total instructions executed. + To address this, we introduce the concept of fast syscall, a fast svc + exception handling path that only considers necessary features + such as security, context saving, and recovery. + endmenu diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 93ced97f8c6c..d19304745fee 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -219,6 +219,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_LIVEPATCH_WO_FTRACE select THP_NUMA_CONTROL if ARM64_64K_PAGES && NUMA_BALANCING && TRANSPARENT_HUGEPAGE + select ARCH_SUPPORTS_FAST_SYSCALL if !ARM64_MTE && !KASAN_HW_TAGS help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 5449de73fbbc..8f649fdedfea 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -921,6 +921,8 @@ CONFIG_ARCH_HAS_GCOV_PROFILE_ALL=y # end of GCOV-based kernel profiling CONFIG_HAVE_GCC_PLUGINS=y +CONFIG_ARCH_SUPPORTS_FAST_SYSCALL=y +# CONFIG_FAST_SYSCALL is not set # end of General architecture-dependent options CONFIG_RT_MUTEXES=y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index ce9fbf260a3c..5c4e78ffa264 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -81,6 +81,7 @@ #define ARM64_HAS_PBHA_STAGE2 73 #define ARM64_SME 74 #define ARM64_SME_FA64 75 +#define ARM64_HAS_XCALL 76 #define ARM64_NCAPS 80 diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index c247e11130db..7c6ad4b1667b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -26,6 +26,9 @@ int main(void) { +#ifdef CONFIG_FAST_SYSCALL + DEFINE(TSK_XCALL, offsetof(struct task_struct, xcall_enable)); +#endif DEFINE(TSK_ACTIVE_MM, offsetof(struct task_struct, active_mm)); BLANK(); DEFINE(TSK_TI_CPU, offsetof(struct task_struct, thread_info.cpu)); diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index dee049d27c74..f5ef453593ff 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2155,6 +2155,26 @@ static bool can_clearpage_use_stnp(const struct arm64_cpu_capabilities *entry, return use_clearpage_stnp && has_mor_nontemporal(entry); } +#ifdef CONFIG_FAST_SYSCALL +static bool is_xcall_support; +static int __init xcall_setup(char *str) +{ + is_xcall_support = true; + return 1; +} +__setup("xcall", xcall_setup); + +bool fast_syscall_enabled(void) +{ + return is_xcall_support; +} + +static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xcall_support; +} +#endif + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -2701,6 +2721,14 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = fa64_kernel_enable, }, #endif /* CONFIG_ARM64_SME */ +#ifdef CONFIG_FAST_SYSCALL + { + .desc = "Xcall Support", + .capability = ARM64_HAS_XCALL, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_xcall_support, + }, +#endif {}, }; diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 1290f36c8371..e49b5569bb97 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -675,11 +675,70 @@ SYM_CODE_START_LOCAL_NOALIGN(el1_irq) kernel_exit 1 SYM_CODE_END(el1_irq) +#ifdef CONFIG_FAST_SYSCALL + .macro check_esr_el1_ec_svc64 + /* Only support SVC64 for now */ + mrs x20, esr_el1 + lsr w20, w20, #ESR_ELx_EC_SHIFT + cmp x20, #ESR_ELx_EC_SVC64 + .endm + + .macro check_syscall_nr + cmp x8, __NR_syscalls + .endm + + .macro check_xcall_enable + /* x21 = task_struct->xcall_enable */ + ldr_this_cpu x20, __entry_task, x21 + ldr x21, [x20, #TSK_XCALL] + /* x20 = sc_no / 8 */ + lsr x20, x8, 3 + ldr x21, [x21, x20] + /* x8 = sc_no % 8 */ + and x8, x8, 7 + mov x20, 1 + lsl x20, x20, x8 + and x21, x21, x20 + cmp x21, 0 + .endm + + .macro check_xcall_pre_kernel_entry + stp x20, x21, [sp, #0] + /* is ESR_ELx_EC_SVC64 */ + check_esr_el1_ec_svc64 + bne .Lskip_xcall\@ + /* x8 >= __NR_syscalls */ + check_syscall_nr + bhs .Lskip_xcall\@ + str x8, [sp, #16] + /* is xcall enabled */ + check_xcall_enable + ldr x8, [sp, #16] + beq .Lskip_xcall\@ + ldp x20, x21, [sp, #0] + /* do xcall */ + kernel_entry 0, 64 + mov x0, sp + bl el0t_64_sync_handler + b ret_to_user +.Lskip_xcall\@: + ldp x20, x21, [sp, #0] + .endm +#endif + /* * EL0 mode handlers. */ .align 6 SYM_CODE_START_LOCAL_NOALIGN(el0_sync) +#ifdef CONFIG_FAST_SYSCALL + /* Only support el0 aarch64 sync exception */ + alternative_if_not ARM64_HAS_XCALL + b .Lret_to_kernel_entry + alternative_else_nop_endif + check_xcall_pre_kernel_entry + .Lret_to_kernel_entry: +#endif kernel_entry 0 mov x0, sp bl el0_sync_handler diff --git a/fs/proc/base.c b/fs/proc/base.c index 4e0054a37c4c..3206960c4bd7 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3589,6 +3589,115 @@ static const struct file_operations proc_pid_sg_level_operations = { }; #endif +#ifdef CONFIG_FAST_SYSCALL +bool fast_syscall_enabled(void); + +static int xcall_show(struct seq_file *m, void *v) +{ + struct inode *inode = m->private; + struct task_struct *p; + unsigned int rs, re; + + if (!fast_syscall_enabled()) + return -EACCES; + + p = get_proc_task(inode); + if (!p) + return -ESRCH; + + if (!p->xcall_enable) + goto out; + + seq_printf(m, "Enabled Total[%d/%d]:", bitmap_weight(p->xcall_enable, __NR_syscalls), + __NR_syscalls); + + for (rs = 0, bitmap_next_set_region(p->xcall_enable, &rs, &re, __NR_syscalls); + rs < re; rs = re + 1, + bitmap_next_set_region(p->xcall_enable, &rs, &re, __NR_syscalls)) { + rs == (re - 1) ? seq_printf(m, "%d,", rs) : + seq_printf(m, "%d-%d,", rs, re - 1); + } + seq_puts(m, "\n"); +out: + put_task_struct(p); + + return 0; +} + +static int xcall_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, xcall_show, inode); +} + +static int xcall_enable_one(struct task_struct *p, unsigned int sc_no) +{ + bitmap_set(p->xcall_enable, sc_no, 1); + return 0; +} + +static int xcall_disable_one(struct task_struct *p, unsigned int sc_no) +{ + bitmap_clear(p->xcall_enable, sc_no, 1); + return 0; +} + +static ssize_t xcall_write(struct file *file, const char __user *buf, + size_t count, loff_t *offset) +{ + struct inode *inode = file_inode(file); + struct task_struct *p; + char buffer[TASK_COMM_LEN]; + const size_t maxlen = sizeof(buffer) - 1; + unsigned int sc_no = __NR_syscalls; + int ret = 0; + int is_clear = 0; + + if (!fast_syscall_enabled()) + return -EACCES; + + memset(buffer, 0, sizeof(buffer)); + if (!count || copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) + return -EFAULT; + + p = get_proc_task(inode); + if (!p || !p->xcall_enable) + return -ESRCH; + + if (buffer[0] == '!') + is_clear = 1; + + if (kstrtouint(buffer + is_clear, 10, &sc_no)) { + ret = -EINVAL; + goto out; + } + + if (sc_no >= __NR_syscalls) { + ret = -EINVAL; + goto out; + } + + if (!is_clear && !test_bit(sc_no, p->xcall_enable)) + ret = xcall_enable_one(p, sc_no); + else if (is_clear && test_bit(sc_no, p->xcall_enable)) + ret = xcall_disable_one(p, sc_no); + else + ret = -EINVAL; + +out: + put_task_struct(p); + + return ret ? ret : count; +} + +static const struct file_operations proc_pid_xcall_operations = { + .open = xcall_open, + .read = seq_read, + .write = xcall_write, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + /* * Thread groups */ @@ -3615,6 +3724,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_QOS_SCHED_SMART_GRID REG("smart_grid_level", 0644, proc_pid_sg_level_operations), #endif +#ifdef CONFIG_FAST_SYSCALL + REG("xcall", 0644, proc_pid_xcall_operations), +#endif #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif diff --git a/include/linux/sched.h b/include/linux/sched.h index e3170b7f81fa..18361e35a377 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1477,7 +1477,11 @@ struct task_struct { #else KABI_RESERVE(14) #endif +#if defined(CONFIG_FAST_SYSCALL) + KABI_USE(15, unsigned long *xcall_enable) +#else KABI_RESERVE(15) +#endif KABI_RESERVE(16) KABI_AUX_PTR(task_struct) diff --git a/kernel/fork.c b/kernel/fork.c index 9b1ea79deaa5..bd7afeb364ab 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -479,6 +479,12 @@ void free_task(struct task_struct *tsk) #endif if (task_relationship_used()) sched_relationship_free(tsk); + +#ifdef CONFIG_FAST_SYSCALL + if (tsk->xcall_enable) + bitmap_free(tsk->xcall_enable); +#endif + free_task_struct(tsk); } EXPORT_SYMBOL(free_task); @@ -1007,6 +1013,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node) #ifdef CONFIG_MEMCG tsk->active_memcg = NULL; #endif + +#ifdef CONFIG_FAST_SYSCALL + tsk->xcall_enable = NULL; +#endif + return tsk; free_stack: @@ -2085,6 +2096,15 @@ static __latent_entropy struct task_struct *copy_process( rt_mutex_init_task(p); +#ifdef CONFIG_FAST_SYSCALL + p->xcall_enable = bitmap_zalloc(__NR_syscalls, GFP_KERNEL); + if (!p->xcall_enable) + goto bad_fork_free; + + if (current->xcall_enable) + bitmap_copy(p->xcall_enable, current->xcall_enable, __NR_syscalls); +#endif + #ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY retval = sched_prefer_cpus_fork(p, current->prefer_cpus); if (retval) -- 2.34.1