Add xcall 2.0 support, which is compatible with xcall1.0, and can be used to dynamic instruction replacement for hardware xcall. Compilation works normally under the switch hardware xcall and xcall2.0. And hardware xcall tested normally using xcall 2.0 or dynamic library method. Changes in v4: - Reuse uprobe_write_opcode() instead of use __replace_page() and copy_to_page(). - Handle error for uprobe_write_opcode(). - Support xcall1.0 before 920G. - Change xcall1.0 bitmap to byte array to save check insns. - Use percpu variable to save check insns for xcall1.0. - A little cleanup. Changes in v3: - Add validity checks during xcall registration to prevent out-of-bounds access and null pointer dereferences. - Extract __replace_page() and copy_to_page() from uprobe, to avoid code duplication. - Replace with new compatible refactored code. - Remove unnecessary member, such as old_name. Changes in v2: - Remove duplicates or unnecessary ~300 LOC, such as link_slot, init_task. - Fix kabi. - Some cleanups. Jinjie Ruan (1): xcall2.0: Support xcall1.0 for hardware xcall Liao Chang (8): xcall2.0: Add userspace proc interface xcall2.0: Add xcall module register interface xcall2.0: Add xcall_area xcall2.0: Hajack syscall with dynamic instruciton replace xcall: Refactor the early exception entry for ACTLR.xcall arm64: Revert the hack to the early entry of SYNC exception xcall: Refactor the early entry for SYNC exception xcall2.0: Add a basic testcase Yuntao Liu (1): xcall2.0: Intruduce xcall2.0 prefetch kernel module arch/arm64/Kconfig.turbo | 13 + arch/arm64/include/asm/exception.h | 4 +- arch/arm64/include/asm/mmu_context.h | 7 - arch/arm64/include/asm/xcall.h | 142 ++++---- arch/arm64/kernel/cpufeature.c | 71 ++-- arch/arm64/kernel/entry-common.c | 65 +++- arch/arm64/kernel/entry.S | 86 +---- arch/arm64/kernel/probes/uprobes.c | 6 + arch/arm64/kernel/process.c | 5 + arch/arm64/kernel/syscall.c | 14 + arch/arm64/kernel/xcall/Makefile | 3 +- arch/arm64/kernel/xcall/core.c | 396 +++++++++++++++++++++ arch/arm64/kernel/xcall/entry.S | 185 ++++++++-- arch/arm64/kernel/xcall/proc.c | 204 +++++++++++ arch/arm64/kernel/xcall/xcall.c | 56 +-- arch/arm64/kvm/sys_regs.c | 1 + drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 1 + drivers/staging/xcall/Kconfig | 19 + drivers/staging/xcall/Makefile | 1 + drivers/staging/xcall/dynamic_xcall_test.c | 97 +++++ drivers/staging/xcall/prefetch.c | 270 ++++++++++++++ fs/proc/proc_xcall.c | 141 ++------ include/linux/mm_types.h | 4 + include/linux/xcall.h | 52 +++ kernel/events/uprobes.c | 19 + kernel/fork.c | 2 + mm/mmap.c | 14 +- 28 files changed, 1512 insertions(+), 368 deletions(-) create mode 100644 arch/arm64/kernel/xcall/core.c create mode 100644 arch/arm64/kernel/xcall/proc.c create mode 100644 drivers/staging/xcall/Kconfig create mode 100644 drivers/staging/xcall/Makefile create mode 100644 drivers/staging/xcall/dynamic_xcall_test.c create mode 100644 drivers/staging/xcall/prefetch.c create mode 100644 include/linux/xcall.h -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Add "xcall", "xcall_comm" key struct and provide the procfs interface to userspace. Add '/proc/xcall/command' interface, using this file for attaching xcall programs onto one executable. Argument syntax: +:COMM BINARY KERNEL_MODULE : Attach a xcall -:COMM : Detach a xcall COMM: Unique string for attached xcall. BINARY: Path to an executable. KERNEL_MODULE: Module name listed in /proc/modules provide xcall program. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/Kconfig.turbo | 12 ++ arch/arm64/include/asm/xcall.h | 26 +++- arch/arm64/kernel/xcall/Makefile | 3 +- arch/arm64/kernel/xcall/core.c | 163 ++++++++++++++++++++++++ arch/arm64/kernel/xcall/proc.c | 204 +++++++++++++++++++++++++++++++ include/linux/xcall.h | 27 ++++ 6 files changed, 433 insertions(+), 2 deletions(-) create mode 100644 arch/arm64/kernel/xcall/core.c create mode 100644 arch/arm64/kernel/xcall/proc.c create mode 100644 include/linux/xcall.h diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index c4a8e4e889aa..cfefbdb605f8 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -71,4 +71,16 @@ config ACTLR_XCALL_XINT Use the 0x680 as the offset to the exception vector base address for the Armv8.8 NMI taken from EL0. +config DYNAMIC_XCALL + bool "Support dynamically replace and load system call" + depends on FAST_SYSCALL + default n + help + Xcall 2.0 add "/proc/xcall/comm" interface to + attach xcall programs onto one executable, + and support different custom syscall implementation + by dynamic instruction replaced with 'svc ffff' + and a kernel module which provides customized + implementation. + endmenu # "Turbo features selection" diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 5765a96eed53..a35e3803efad 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -7,10 +7,34 @@ #include <linux/percpu.h> #include <linux/sched.h> #include <linux/types.h> +#include <linux/xcall.h> #include <asm/actlr.h> #include <asm/cpufeature.h> +struct xcall_comm { + char *name; + char *binary; + char *module; + struct list_head list; +}; + +struct xcall { + /* used for xcall_attach */ + struct list_head list; + refcount_t ref; + /* file attached xcall */ + struct path binary_path; + struct inode *binary; + struct xcall_prog *program; + char *name; +}; + +#ifdef CONFIG_DYNAMIC_XCALL +extern int xcall_attach(struct xcall_comm *info); +extern int xcall_detach(struct xcall_comm *info); +#endif /* CONFIG_DYNAMIC_XCALL */ + DECLARE_STATIC_KEY_FALSE(xcall_enable); struct xcall_info { @@ -93,4 +117,4 @@ static inline void cpu_switch_xcall_entry(struct task_struct *tsk) } #endif /* CONFIG_ACTLR_XCALL_XINT */ -#endif /*__ASM_XCALL_H*/ +#endif /* __ASM_XCALL_H */ diff --git a/arch/arm64/kernel/xcall/Makefile b/arch/arm64/kernel/xcall/Makefile index 0168bd190793..4a9c8eedcba9 100644 --- a/arch/arm64/kernel/xcall/Makefile +++ b/arch/arm64/kernel/xcall/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += xcall.o +obj-y += xcall.o +obj-$(CONFIG_DYNAMIC_XCALL) += core.o proc.o diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c new file mode 100644 index 000000000000..5d0a401335c4 --- /dev/null +++ b/arch/arm64/kernel/xcall/core.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "xcall: " fmt + +#include <linux/namei.h> +#include <linux/slab.h> +#include <linux/xcall.h> + +#include <asm/xcall.h> + +static DEFINE_SPINLOCK(xcall_list_lock); +static LIST_HEAD(xcalls_list); +static DEFINE_SPINLOCK(prog_list_lock); +static LIST_HEAD(progs_list); + +/* + * Travel the list of all registered xcall_prog during module installation + * to find the xcall_prog. + */ +static struct xcall_prog *get_xcall_prog(const char *module) +{ + struct xcall_prog *p; + + spin_lock(&prog_list_lock); + list_for_each_entry(p, &progs_list, list) { + if (!strcmp(module, p->name)) { + spin_unlock(&prog_list_lock); + return p; + } + } + spin_unlock(&prog_list_lock); + return NULL; +} + + +static struct xcall *get_xcall(struct xcall *xcall) +{ + refcount_inc(&xcall->ref); + return xcall; +} + +static void put_xcall(struct xcall *xcall) +{ + if (!refcount_dec_and_test(&xcall->ref)) + return; + + pr_info("free xcall resource.\n"); + kfree(xcall->name); + if (xcall->program) + module_put(xcall->program->owner); + + path_put(&xcall->binary_path); + kfree(xcall); +} + +static struct xcall *find_xcall(const char *name, struct inode *binary) +{ + struct xcall *xcall; + + list_for_each_entry(xcall, &xcalls_list, list) { + if ((name && !strcmp(name, xcall->name)) || + (binary && xcall->binary == binary)) + return get_xcall(xcall); + } + return NULL; +} + +static struct xcall *find_xcall_by_name_locked(const char *name) +{ + struct xcall *ret = NULL; + + spin_lock(&xcall_list_lock); + ret = find_xcall(name, NULL); + spin_unlock(&xcall_list_lock); + return ret; +} + +static struct xcall *insert_xcall_locked(struct xcall *xcall) +{ + struct xcall *ret = NULL; + + spin_lock(&xcall_list_lock); + ret = find_xcall(NULL, xcall->binary); + if (!ret) + list_add(&xcall->list, &xcalls_list); + else + put_xcall(ret); + spin_unlock(&xcall_list_lock); + return ret; +} + +static void delete_xcall(struct xcall *xcall) +{ + spin_lock(&xcall_list_lock); + list_del(&xcall->list); + spin_unlock(&xcall_list_lock); + + put_xcall(xcall); +} + +/* Init xcall with a given inode */ +static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) +{ + struct xcall_prog *program = get_xcall_prog(comm->module); + + if (!program || !try_module_get(program->owner)) + return -EINVAL; + + if (kern_path(comm->binary, LOOKUP_FOLLOW, &xcall->binary_path)) + return -EINVAL; + + xcall->binary = d_real_inode(xcall->binary_path.dentry); + xcall->program = program; + refcount_set(&xcall->ref, 1); + INIT_LIST_HEAD(&xcall->list); + + return 0; +} + +int xcall_attach(struct xcall_comm *comm) +{ + struct xcall *xcall; + int ret; + + xcall = kzalloc(sizeof(struct xcall), GFP_KERNEL); + if (!xcall) + return -ENOMEM; + + ret = init_xcall(xcall, comm); + if (ret) { + kfree(xcall); + return ret; + } + + xcall->name = kstrdup(comm->name, GFP_KERNEL); + if (!xcall->name) { + delete_xcall(xcall); + return -ENOMEM; + } + + if (insert_xcall_locked(xcall)) { + delete_xcall(xcall); + return -EINVAL; + } + + return 0; +} + +int xcall_detach(struct xcall_comm *comm) +{ + struct xcall *xcall; + + xcall = find_xcall_by_name_locked(comm->name); + if (!xcall) + return -EINVAL; + + put_xcall(xcall); + delete_xcall(xcall); + return 0; +} diff --git a/arch/arm64/kernel/xcall/proc.c b/arch/arm64/kernel/xcall/proc.c new file mode 100644 index 000000000000..1738000afcc7 --- /dev/null +++ b/arch/arm64/kernel/xcall/proc.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Huawei Limited. + */ +#include <linux/slab.h> +#include <linux/xcall.h> +#include <linux/string.h> +#include <linux/proc_fs.h> +#include <linux/module.h> +#include <linux/seq_file.h> + +#include <asm/xcall.h> + +static LIST_HEAD(comm_list); +static DECLARE_RWSEM(comm_rwsem); + +static void free_xcall_comm(struct xcall_comm *info) +{ + if (!info) + return; + kfree(info->name); + kfree(info->binary); + kfree(info->module); + kfree(info); +} + +static struct xcall_comm *find_xcall_comm(struct xcall_comm *comm) +{ + struct xcall_comm *temp; + + list_for_each_entry(temp, &comm_list, list) { + if (!strcmp(comm->name, temp->name)) + return temp; + } + + return NULL; +} + +static void delete_xcall_comm_locked(struct xcall_comm *info) +{ + struct xcall_comm *ret; + + down_write(&comm_rwsem); + ret = find_xcall_comm(info); + if (ret) + list_del(&ret->list); + up_write(&comm_rwsem); + free_xcall_comm(ret); +} + +static void insert_xcall_comm_locked(struct xcall_comm *info) +{ + down_write(&comm_rwsem); + if (!find_xcall_comm(info)) + list_add(&info->list, &comm_list); + up_write(&comm_rwsem); +} + +static int parse_xcall_command(int argc, char **argv, + struct xcall_comm *info) +{ + if (strlen(argv[0]) < 3) + return -ECANCELED; + + if (argv[0][0] != '+' && argv[0][0] != '-') + return -ECANCELED; + + if (argv[0][1] != ':') + return -ECANCELED; + + if (argv[0][0] == '+' && argc != 3) + return -ECANCELED; + + if (argv[0][0] == '-' && argc != 1) + return -ECANCELED; + + info->name = kstrdup(&argv[0][2], GFP_KERNEL); + if (!info->name) + return -ENOMEM; + + if (argv[0][0] == '-') + return '-'; + + info->binary = kstrdup(argv[1], GFP_KERNEL); + if (!info->binary) + goto binary_fail; + + info->module = kstrdup(argv[2], GFP_KERNEL); + if (!info->module) + goto module_fail; + + return argv[0][0]; + +module_fail: + kfree(info->binary); +binary_fail: + kfree(info->name); + return 'x'; +} + +/* + * /proc/xcall/comm + * Argument syntax: + * +:COMM ELF_FILE [KERNEL_MODULE] : Attach a xcall + * -:COMM : Detach a xcall + * + * COMM: : Unique string for attached xcall. + * ELF_FILE : Path to an executable or library. + * KERNEL_MODULE : Module name listed in /proc/modules provide xcall program. + */ +int proc_xcall_command(int argc, char **argv) +{ + struct xcall_comm *info; + int ret, op; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + INIT_LIST_HEAD(&info->list); + + op = parse_xcall_command(argc, argv, info); + switch (op) { + case '+': + ret = xcall_attach(info); + if (!ret) + insert_xcall_comm_locked(info); + else + free_xcall_comm(info); + break; + case '-': + ret = xcall_detach(info); + if (!ret) + delete_xcall_comm_locked(info); + free_xcall_comm(info); + break; + default: + free_xcall_comm(info); + return -ECANCELED; + } + + return ret; +} + +static int xcall_comm_show(struct seq_file *m, void *v) +{ + struct xcall_comm *info; + + down_read(&comm_rwsem); + list_for_each_entry(info, &comm_list, list) { + seq_printf(m, "+:%s %s %s\n", + info->name, info->binary, + info->module); + } + seq_puts(m, "\n"); + up_read(&comm_rwsem); + return 0; +} + +static int xcall_comm_open(struct inode *inode, struct file *file) +{ + return single_open(file, xcall_comm_show, NULL); +} + +static ssize_t xcall_comm_write(struct file *file, + const char __user *user_buf, + size_t nbytes, loff_t *ppos) +{ + int argc = 0, ret = 0; + char *raw_comm; + char **argv; + + raw_comm = memdup_user_nul(user_buf, nbytes - 1); + if (IS_ERR(raw_comm)) + return PTR_ERR(raw_comm); + + argv = argv_split(GFP_KERNEL, raw_comm, &argc); + if (!argv) { + kfree(raw_comm); + return -ENOMEM; + } + + ret = proc_xcall_command(argc, argv); + + argv_free(argv); + + kfree(raw_comm); + + return ret ? ret : nbytes; +} + +static const struct proc_ops xcall_comm_ops = { + .proc_open = xcall_comm_open, + .proc_read = seq_read, + .proc_lseek = seq_lseek, + .proc_write = xcall_comm_write, +}; + +static int __init xcall_proc_init(void) +{ + proc_mkdir("xcall", NULL); + proc_create("xcall/comm", 0644, NULL, &xcall_comm_ops); + return 0; +} +module_init(xcall_proc_init); diff --git a/include/linux/xcall.h b/include/linux/xcall.h new file mode 100644 index 000000000000..6b67253a3623 --- /dev/null +++ b/include/linux/xcall.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2025 Huawei. + */ + +#ifndef _LINUX_XCALL_H +#define _LINUX_XCALL_H + +#include <linux/module.h> +#include <linux/path.h> + +struct xcall_prog_object { + unsigned long scno; + unsigned long func; +}; + +#define PROG_NAME_LEN 64 +#define MAX_NR_SCNO 32 + +struct xcall_prog { + char name[PROG_NAME_LEN]; + struct module *owner; + struct list_head list; + struct xcall_prog_object objs[MAX_NR_SCNO]; + unsigned int nr_scno; +}; +#endif /* _LINUX_XCALL_H */ -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Add xcall_prog_register()/unregister() interface for user module to register xcall syscall. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/kernel/xcall/core.c | 59 ++++++++++++++++++++++++++++++---- include/linux/xcall.h | 12 +++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index 5d0a401335c4..ccfc8df323aa 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -24,17 +24,23 @@ static struct xcall_prog *get_xcall_prog(const char *module) { struct xcall_prog *p; - spin_lock(&prog_list_lock); list_for_each_entry(p, &progs_list, list) { - if (!strcmp(module, p->name)) { - spin_unlock(&prog_list_lock); + if (!strcmp(module, p->name)) return p; - } } - spin_unlock(&prog_list_lock); return NULL; } +static struct xcall_prog *get_xcall_prog_locked(const char *module) +{ + struct xcall_prog *ret; + + spin_lock(&prog_list_lock); + ret = get_xcall_prog(module); + spin_unlock(&prog_list_lock); + + return ret; +} static struct xcall *get_xcall(struct xcall *xcall) { @@ -104,7 +110,7 @@ static void delete_xcall(struct xcall *xcall) /* Init xcall with a given inode */ static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) { - struct xcall_prog *program = get_xcall_prog(comm->module); + struct xcall_prog *program = get_xcall_prog_locked(comm->module); if (!program || !try_module_get(program->owner)) return -EINVAL; @@ -161,3 +167,44 @@ int xcall_detach(struct xcall_comm *comm) delete_xcall(xcall); return 0; } + +static int check_prog(struct xcall_prog *prog) +{ + struct xcall_prog_object *obj = prog->objs; + + prog->nr_scno = 0; + while (prog->nr_scno < MAX_NR_SCNO && obj->func) { + if (obj->scno >= __NR_syscalls) + return -EINVAL; + + prog->nr_scno++; + obj++; + } + + pr_info("Successly registered syscall number: %d\n", prog->nr_scno); + return 0; +} + +int xcall_prog_register(struct xcall_prog *prog) +{ + if (check_prog(prog)) + return -EINVAL; + + spin_lock(&prog_list_lock); + if (get_xcall_prog(prog->name)) { + spin_unlock(&prog_list_lock); + return -EBUSY; + } + list_add(&prog->list, &progs_list); + spin_unlock(&prog_list_lock); + return 0; +} +EXPORT_SYMBOL(xcall_prog_register); + +void xcall_prog_unregister(struct xcall_prog *prog) +{ + spin_lock(&prog_list_lock); + list_del(&prog->list); + spin_unlock(&prog_list_lock); +} +EXPORT_SYMBOL(xcall_prog_unregister); diff --git a/include/linux/xcall.h b/include/linux/xcall.h index 6b67253a3623..b7110d02c6bd 100644 --- a/include/linux/xcall.h +++ b/include/linux/xcall.h @@ -24,4 +24,16 @@ struct xcall_prog { struct xcall_prog_object objs[MAX_NR_SCNO]; unsigned int nr_scno; }; + +#ifdef CONFIG_DYNAMIC_XCALL +extern int xcall_prog_register(struct xcall_prog *prog); +extern void xcall_prog_unregister(struct xcall_prog *prog); +#else /* !CONFIG_DYNAMIC_XCALL */ +static inline int xcall_prog_register(struct xcall_prog *prog) +{ + return -EINVAL; +} +static inline void xcall_prog_unregister(struct xcall_prog *prog) {} +#endif /* CONFIG_DYNAMIC_XCALL */ + #endif /* _LINUX_XCALL_H */ -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- In xcall 2.0 each process is associated with an unique xcall area. In the mmap process, associate an xcall area with all matching executable files and populate the system call table to prepare for hijacking and replacing custom system calls. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/include/asm/xcall.h | 17 ++++++ arch/arm64/kernel/xcall/core.c | 106 +++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 4 ++ include/linux/xcall.h | 13 ++++ kernel/fork.c | 2 + mm/mmap.c | 14 ++++- 6 files changed, 153 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index a35e3803efad..27aaf4344d0f 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -4,13 +4,16 @@ #include <linux/atomic.h> #include <linux/jump_label.h> +#include <linux/mm_types.h> #include <linux/percpu.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/xcall.h> +#include <linux/refcount.h> #include <asm/actlr.h> #include <asm/cpufeature.h> +#include <asm/syscall.h> struct xcall_comm { char *name; @@ -30,9 +33,23 @@ struct xcall { char *name; }; +struct xcall_area { + /* + * 0...NR_syscalls - 1: function pointers to hijack default syscall + * NR_syscalls...NR_syscalls * 2 - 1: function pointers in kernel module + */ + unsigned long sys_call_table[NR_syscalls * 2]; + refcount_t ref; + struct xcall *xcall; +}; + #ifdef CONFIG_DYNAMIC_XCALL extern int xcall_attach(struct xcall_comm *info); extern int xcall_detach(struct xcall_comm *info); + +#define mm_xcall_area(mm) ((struct xcall_area *)((mm)->xcall)) +#else +#define mm_xcall_area(mm) (NULL) #endif /* CONFIG_DYNAMIC_XCALL */ DECLARE_STATIC_KEY_FALSE(xcall_enable); diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index ccfc8df323aa..15fb2d4424ef 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -42,6 +42,13 @@ static struct xcall_prog *get_xcall_prog_locked(const char *module) return ret; } +static long inv_xcall(struct pt_regs *regs) +{ + return -ENOSYS; +} + +#define inv_xcall_syscall ((unsigned long)inv_xcall) + static struct xcall *get_xcall(struct xcall *xcall) { refcount_inc(&xcall->ref); @@ -126,6 +133,105 @@ static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) return 0; } +static int fill_xcall_syscall(struct xcall_area *area, struct xcall *xcall) +{ + struct xcall_prog_object *obj; + unsigned int scno_offset; + + obj = xcall->program->objs; + while (obj->func) { + scno_offset = NR_syscalls + obj->scno; + if (area->sys_call_table[scno_offset] != inv_xcall_syscall) + return -EINVAL; + + area->sys_call_table[scno_offset] = obj->func; + obj += 1; + } + + return 0; +} + +static struct xcall_area *create_xcall_area(struct mm_struct *mm) +{ + struct xcall_area *area; + int i; + + area = kzalloc(sizeof(*area), GFP_KERNEL); + if (!area) + return NULL; + + refcount_set(&area->ref, 1); + + for (i = 0; i < NR_syscalls; i++) { + area->sys_call_table[i] = inv_xcall_syscall; + area->sys_call_table[i + NR_syscalls] = inv_xcall_syscall; + } + + smp_store_release(&mm->xcall, area); + return area; +} + +/* + * Initialize the xcall data of mm_struct data. + * And register xcall into one address space, which includes create + * the mm_struct associated xcall_area data + */ +int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm) +{ + struct xcall_area *area; + struct xcall *xcall; + + if (list_empty(&xcalls_list)) + return 0; + + spin_lock(&xcall_list_lock); + xcall = find_xcall(NULL, file_inode(vma->vm_file)); + if (!xcall || !xcall->program) { + spin_unlock(&xcall_list_lock); + return -EINVAL; + } + spin_unlock(&xcall_list_lock); + + area = mm_xcall_area(mm); + if (!area && !create_xcall_area(mm)) { + put_xcall(xcall); + return -ENOMEM; + } + + area = (struct xcall_area *)READ_ONCE(mm->xcall); + // Each process is allowed to be associated with only one xcall. + if (!cmpxchg(&area->xcall, NULL, xcall) && !fill_xcall_syscall(area, xcall)) + return 0; + + put_xcall(xcall); + return -EINVAL; +} + +void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p) +{ + struct xcall_area *area = mm_xcall_area(mm); + + if (area) + refcount_inc(&area->ref); +} + +void clear_xcall_area(struct mm_struct *mm) +{ + struct xcall_area *area = mm_xcall_area(mm); + + if (!area) + return; + + if (!refcount_dec_and_test(&area->ref)) + return; + + if (area->xcall) + put_xcall(area->xcall); + + kfree(area); + mm->xcall = NULL; +} + int xcall_attach(struct xcall_comm *comm) { struct xcall *xcall; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 64c38b09e18d..633283dce0a7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1016,7 +1016,11 @@ struct mm_struct { #else KABI_RESERVE(1) #endif +#ifdef CONFIG_DYNAMIC_XCALL + KABI_USE(2, void *xcall) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) KABI_RESERVE(5) diff --git a/include/linux/xcall.h b/include/linux/xcall.h index b7110d02c6bd..5b0242f5a6a6 100644 --- a/include/linux/xcall.h +++ b/include/linux/xcall.h @@ -9,6 +9,10 @@ #include <linux/module.h> #include <linux/path.h> +struct vm_area_struct; +struct mm_struct; +struct inode; + struct xcall_prog_object { unsigned long scno; unsigned long func; @@ -28,12 +32,21 @@ struct xcall_prog { #ifdef CONFIG_DYNAMIC_XCALL extern int xcall_prog_register(struct xcall_prog *prog); extern void xcall_prog_unregister(struct xcall_prog *prog); +extern void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p); +extern void clear_xcall_area(struct mm_struct *mm); +extern int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm); #else /* !CONFIG_DYNAMIC_XCALL */ static inline int xcall_prog_register(struct xcall_prog *prog) { return -EINVAL; } static inline void xcall_prog_unregister(struct xcall_prog *prog) {} +static inline void mm_init_xcall_area(struct mm_struct *mm, struct task_struct *p) {} +static inline void clear_xcall_area(struct mm_struct *mm) {} +static inline int xcall_mmap(struct vm_area_struct *vma, struct mm_struct *mm) +{ + return 0; +} #endif /* CONFIG_DYNAMIC_XCALL */ #endif /* _LINUX_XCALL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 78663ca68160..f659f24b9ba2 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1367,6 +1367,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, #if defined(CONFIG_DAMON_MEM_SAMPLING) mm->damon_fifo = NULL; #endif + mm_init_xcall_area(mm, p); mm_init_uprobes_state(mm); hugetlb_count_init(mm); @@ -1420,6 +1421,7 @@ static inline void __mmput(struct mm_struct *mm) { VM_BUG_ON(atomic_read(&mm->mm_users)); + clear_xcall_area(mm); uprobe_clear_state(mm); exit_aio(mm); ksm_exit(mm); diff --git a/mm/mmap.c b/mm/mmap.c index fb54df419ea2..27f8e4dd8d72 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -48,6 +48,7 @@ #include <linux/sched/mm.h> #include <linux/ksm.h> #include <linux/share_pool.h> +#include <linux/xcall.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -590,9 +591,12 @@ static inline void vma_complete(struct vma_prepare *vp, if (!vp->skip_vma_uprobe) { uprobe_mmap(vp->vma); + xcall_mmap(vp->vma, mm); - if (vp->adj_next) + if (vp->adj_next) { uprobe_mmap(vp->adj_next); + xcall_mmap(vp->adj_next, mm); + } } } @@ -622,8 +626,10 @@ static inline void vma_complete(struct vma_prepare *vp, goto again; } } - if (vp->insert && vp->file) + if (vp->insert && vp->file) { uprobe_mmap(vp->insert); + xcall_mmap(vp->insert, mm); + } validate_mm(mm); } @@ -2943,8 +2949,10 @@ static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, mm->locked_vm += (len >> PAGE_SHIFT); } - if (file) + if (file) { uprobe_mmap(vma); + xcall_mmap(vma, mm); + } /* * New (or expanded) vma always get soft dirty status. -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Hajack syscall with dynamic instruciton replace. With xcal2.0, hardware xcall can directly modify the SVC instruction through dynamic instruction replacement, which avoids unnecessary system call number checks at the exception entry. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/Kconfig.turbo | 1 + arch/arm64/include/asm/exception.h | 3 ++ arch/arm64/include/asm/xcall.h | 38 ++++++++++++++ arch/arm64/kernel/entry-common.c | 2 +- arch/arm64/kernel/probes/uprobes.c | 6 +++ arch/arm64/kernel/syscall.c | 14 +++++ arch/arm64/kernel/xcall/core.c | 84 +++++++++++++++++++++++++++++- arch/arm64/kernel/xcall/entry.S | 15 +----- kernel/events/uprobes.c | 19 +++++++ 9 files changed, 165 insertions(+), 17 deletions(-) diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index cfefbdb605f8..778ea1025c2c 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -74,6 +74,7 @@ config ACTLR_XCALL_XINT config DYNAMIC_XCALL bool "Support dynamically replace and load system call" depends on FAST_SYSCALL + depends on UPROBES default n help Xcall 2.0 add "/proc/xcall/comm" interface to diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69f0e6d53f8..94338104a18c 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -75,6 +75,9 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs); +#endif void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 27aaf4344d0f..ee526a520704 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -15,6 +15,9 @@ #include <asm/cpufeature.h> #include <asm/syscall.h> +#define SVC_0000 0xd4000001 +#define SVC_FFFF 0xd41fffe1 + struct xcall_comm { char *name; char *binary; @@ -43,13 +46,48 @@ struct xcall_area { struct xcall *xcall; }; +extern const syscall_fn_t *default_sys_call_table(void); #ifdef CONFIG_DYNAMIC_XCALL extern int xcall_attach(struct xcall_comm *info); extern int xcall_detach(struct xcall_comm *info); +extern int xcall_pre_sstep_check(struct pt_regs *regs); +extern int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, + uprobe_opcode_t opcode); #define mm_xcall_area(mm) ((struct xcall_area *)((mm)->xcall)) + +static inline long hijack_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + + if (likely(!area)) + return -ENOSYS; + + if (unlikely(scno >= __NR_syscalls)) + return -ENOSYS; + + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); +} + +static inline const syscall_fn_t *real_syscall_table(void) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + + if (likely(!area)) + return default_sys_call_table(); + + return (syscall_fn_t *)(&(area->sys_call_table[__NR_syscalls])); +} #else #define mm_xcall_area(mm) (NULL) +#define hijack_syscall(regs) (NULL) +static inline const syscall_fn_t *real_syscall_table(void) +{ + return sys_call_table; +} #endif /* CONFIG_DYNAMIC_XCALL */ DECLARE_STATIC_KEY_FALSE(xcall_enable); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 1e8171c1efe7..f4a21c66856a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -827,7 +827,7 @@ static void noinstr el0_xcall(struct pt_regs *regs) #endif fp_user_discard(); local_daif_restore(DAIF_PROCCTX); - do_el0_svc(regs); + do_el0_xcall(regs); fast_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a2f137a595fc..677a9589f9ca 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -6,6 +6,7 @@ #include <linux/ptrace.h> #include <linux/uprobes.h> #include <asm/cacheflush.h> +#include <asm/xcall.h> #include "decode-insn.h" @@ -171,6 +172,11 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; +#ifdef CONFIG_DYNAMIC_XCALL + if (xcall_pre_sstep_check(regs)) + return DBG_HOOK_HANDLED; +#endif + return DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 558e9c9da8a4..ff40e51634b0 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,7 @@ #include <asm/syscall.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/xcall.h> long a32_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); @@ -162,6 +163,15 @@ static inline void delouse_pt_regs(struct pt_regs *regs) } #endif +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs) +{ + const syscall_fn_t *t = real_syscall_table(); + + el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); +} +#endif + void do_el0_svc(struct pt_regs *regs) { const syscall_fn_t *t = sys_call_table; @@ -173,6 +183,10 @@ void do_el0_svc(struct pt_regs *regs) } #endif +#ifdef CONFIG_DYNAMIC_XCALL + if (!hijack_syscall(regs)) + return; +#endif el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); } diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index 15fb2d4424ef..c331e3d253c5 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -5,6 +5,7 @@ #define pr_fmt(fmt) "xcall: " fmt +#include <linux/mmap_lock.h> #include <linux/namei.h> #include <linux/slab.h> #include <linux/xcall.h> @@ -49,6 +50,77 @@ static long inv_xcall(struct pt_regs *regs) #define inv_xcall_syscall ((unsigned long)inv_xcall) +static long patch_syscall(struct pt_regs *regs); + +static long filter_ksyscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + /* + * curerntly, some syscall uses svc 0 at two and more different + * addresses, so it needs to hijack all of these svc 0. + */ + if (regs->syscallno & ESR_ELx_ISS_MASK) + return -ENOSYS; + + cmpxchg(&(area->sys_call_table[scno]), filter_ksyscall, patch_syscall); + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long replay_syscall(struct pt_regs *regs) +{ + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long patch_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + unsigned long old; + int ret; + + old = cmpxchg(&(area->sys_call_table[scno]), patch_syscall, replay_syscall); + if (old != (unsigned long)patch_syscall) { + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); + } + + regs->pc -= AARCH64_INSN_SIZE; + + mmap_write_lock(current->mm); + ret = set_xcall_insn(current->mm, regs->pc, SVC_FFFF); + mmap_write_unlock(current->mm); + + if (!ret) { + xchg(&(area->sys_call_table[scno]), filter_ksyscall); + pr_debug("patch svc ffff for scno %u\n", scno); + return 0; + } + + /* + * Upon patch svc 0xffff failed, it uses the functions defined + * in sys_call_table to handle syscall this time, and try to + * do patching next time. + */ + set_xcall_insn(current->mm, regs-pc, SVC_0000); + regs->pc += AARCH64_INSN_SIZE; + xchg(&(area->sys_call_table[scno]), patch_syscall); + return ret; +} + +int xcall_pre_sstep_check(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + return area && (scno < NR_syscalls) && + (area->sys_call_table[scno] != (unsigned long)inv_xcall); +} + static struct xcall *get_xcall(struct xcall *xcall) { refcount_inc(&xcall->ref); @@ -135,17 +207,19 @@ static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) static int fill_xcall_syscall(struct xcall_area *area, struct xcall *xcall) { + unsigned int scno_offset, scno_count = 0; struct xcall_prog_object *obj; - unsigned int scno_offset; obj = xcall->program->objs; - while (obj->func) { + while (scno_count < xcall->program->nr_scno && obj->func) { scno_offset = NR_syscalls + obj->scno; if (area->sys_call_table[scno_offset] != inv_xcall_syscall) return -EINVAL; area->sys_call_table[scno_offset] = obj->func; + area->sys_call_table[obj->scno] = (unsigned long)patch_syscall; obj += 1; + scno_count++; } return 0; @@ -314,3 +388,9 @@ void xcall_prog_unregister(struct xcall_prog *prog) spin_unlock(&prog_list_lock); } EXPORT_SYMBOL(xcall_prog_unregister); + +const syscall_fn_t *default_sys_call_table(void) +{ + return sys_call_table; +} +EXPORT_SYMBOL(default_sys_call_table); diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index 401be46f4fc2..7728f32ee962 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -152,7 +152,6 @@ alternative_else_nop_endif .endm /* .macro hw_xcal_restore_base_regs */ SYM_CODE_START(no_xcall_entry) - ldp x20, x21, [sp, #0] kernel_entry 0, 64 mov x0, sp bl el0t_64_sync_handler @@ -160,24 +159,12 @@ SYM_CODE_START(no_xcall_entry) SYM_CODE_END(no_xcall_entry) SYM_CODE_START(xcall_entry) - ldp x20, x21, [sp, #0] hw_xcall_save_base_regs mov x0, sp bl el0t_64_xcall_handler hw_xcal_restore_base_regs SYM_CODE_END(xcall_entry) -SYM_CODE_START_LOCAL(el0t_64_hw_xcall) - stp x20, x21, [sp, #0] - ldr_this_cpu x21, __cpu_xcall_entry, x20 - mov x20, __NR_syscalls - /* x8 >= __NR_syscalls */ - cmp x8, __NR_syscalls - csel x20, x8, x20, lt - ldr x21, [x21, x20, lsl #3] - br x21 -SYM_CODE_END(el0t_64_hw_xcall) - .macro xcall_ventry .align 7 .Lventry_start\@: @@ -190,6 +177,6 @@ SYM_CODE_END(el0t_64_hw_xcall) msr tpidrro_el0, xzr .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE - b el0t_64_hw_xcall + b xcall_entry .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 609e48784f77..2d77f20f1474 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -595,6 +595,25 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v *(uprobe_opcode_t *)&auprobe->insn); } +#ifdef CONFIG_DYNAMIC_XCALL +/* + * Force to patch any instruction without checking the old instruction + * is UPROBE_BRK. + */ +int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) +{ + struct uprobe uprobe = { .ref_ctr_offset = 0 }; + int ret; + + /* Use the UPROBE_SWBP_INSN to occupy the vaddr avoid uprobe writes it */ + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, UPROBE_SWBP_INSN); + if (ret) + return ret; + + return uprobe_write_opcode(&uprobe.arch, mm, vaddr, opcode); +} +#endif + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); -- 2.34.1
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Due to the default HCR_EL2.TACR value is 1, enable ACTLR_XCALL system-wide by default to avoid the overhead of vCPU trapping out caused by accessing ACTLR_XCALL takes place during scheduling context switch. And Separate the control interface of xcall in userspace into the two parts. The first one aims to register xcall to individual TASK via /proc/[pid]/xcall. The second one amis to register xcall to individual BINARY file via /proc/xcall/comm. So it needs some cleanup to the code to implement the first one. Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/include/asm/mmu_context.h | 7 --- arch/arm64/include/asm/xcall.h | 76 ---------------------------- arch/arm64/kernel/cpufeature.c | 71 +++++++++++++------------- arch/arm64/kernel/xcall/entry.S | 7 --- arch/arm64/kernel/xcall/xcall.c | 40 +-------------- arch/arm64/kvm/sys_regs.c | 1 + fs/proc/proc_xcall.c | 64 +---------------------- 7 files changed, 39 insertions(+), 227 deletions(-) diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 39595fa03491..a6fb325424e7 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -24,9 +24,6 @@ #include <asm/cputype.h> #include <asm/sysreg.h> #include <asm/tlbflush.h> -#ifdef CONFIG_ACTLR_XCALL_XINT -#include <asm/xcall.h> -#endif extern bool rodata_full; @@ -267,10 +264,6 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next, if (prev != next) __switch_mm(next); -#ifdef CONFIG_ACTLR_XCALL_XINT - cpu_switch_xcall_entry(tsk); -#endif - /* * Update the saved TTBR0_EL1 of the scheduled-in task as the previous * value may have not been initialised yet (activate_mm caller) or the diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index ee526a520704..5cea7f81a857 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -2,17 +2,12 @@ #ifndef __ASM_XCALL_H #define __ASM_XCALL_H -#include <linux/atomic.h> #include <linux/jump_label.h> #include <linux/mm_types.h> -#include <linux/percpu.h> #include <linux/sched.h> -#include <linux/types.h> #include <linux/xcall.h> #include <linux/refcount.h> -#include <asm/actlr.h> -#include <asm/cpufeature.h> #include <asm/syscall.h> #define SVC_0000 0xd4000001 @@ -101,75 +96,4 @@ struct xcall_info { int xcall_init_task(struct task_struct *p, struct task_struct *orig); void xcall_task_free(struct task_struct *p); - -#ifdef CONFIG_ACTLR_XCALL_XINT -struct hw_xcall_info { - /* Must be first! */ - void *xcall_entry[__NR_syscalls + 1]; - atomic_t xcall_scno_count; - /* keep xcall_entry and xcall scno count consistent */ - spinlock_t lock; -}; - -#define TASK_HW_XINFO(p) ((struct hw_xcall_info *)p->xinfo) -#define XCALL_ENTRY_SIZE (sizeof(unsigned long) * (__NR_syscalls + 1)) - -DECLARE_PER_CPU(void *, __cpu_xcall_entry); -extern void xcall_entry(void); -extern void no_xcall_entry(void); - -static inline bool is_xcall_entry(struct hw_xcall_info *xinfo, unsigned int sc_no) -{ - return xinfo->xcall_entry[sc_no] == xcall_entry; -} - -static inline int set_hw_xcall_entry(struct hw_xcall_info *xinfo, - unsigned int sc_no, bool enable) -{ - spin_lock(&xinfo->lock); - if (enable && !is_xcall_entry(xinfo, sc_no)) { - xinfo->xcall_entry[sc_no] = xcall_entry; - atomic_inc(&xinfo->xcall_scno_count); - } - - if (!enable && is_xcall_entry(xinfo, sc_no)) { - xinfo->xcall_entry[sc_no] = no_xcall_entry; - atomic_dec(&xinfo->xcall_scno_count); - } - spin_unlock(&xinfo->lock); - - return 0; -} - -static inline void cpu_set_arch_xcall(bool enable) -{ - u64 el = read_sysreg(CurrentEL); - u64 val; - - if (el == CurrentEL_EL2) { - val = read_sysreg(actlr_el2); - val = enable ? (val | ACTLR_ELx_XCALL) : (val & ~ACTLR_ELx_XCALL); - write_sysreg(val, actlr_el2); - } else { - val = read_sysreg(actlr_el1); - val = enable ? (val | ACTLR_ELx_XCALL) : (val & ~ACTLR_ELx_XCALL); - write_sysreg(val, actlr_el1); - } -} - -static inline void cpu_switch_xcall_entry(struct task_struct *tsk) -{ - struct hw_xcall_info *xinfo = tsk->xinfo; - - if (!system_uses_xcall_xint() || !tsk->xinfo) - return; - - if (unlikely(atomic_read(&xinfo->xcall_scno_count) > 0)) { - __this_cpu_write(__cpu_xcall_entry, xinfo->xcall_entry); - cpu_set_arch_xcall(true); - } else - cpu_set_arch_xcall(false); -} -#endif /* CONFIG_ACTLR_XCALL_XINT */ - #endif /* __ASM_XCALL_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 480af6df8364..625a77760126 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2443,6 +2443,39 @@ static void mpam_extra_caps(void) #include <asm/xcall.h> DEFINE_STATIC_KEY_FALSE(xcall_enable); +static int __init xcall_setup(char *str) +{ + static_branch_enable(&xcall_enable); + + return 1; +} +__setup("xcall", xcall_setup); + +static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return static_key_enabled(&xcall_enable); +} +#endif + +#ifdef CONFIG_FAST_IRQ +bool is_xint_support; +static int __init xint_setup(char *str) +{ + if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) + return 1; + + is_xint_support = true; + return 1; +} +__setup("xint", xint_setup); + +static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) +{ + return is_xint_support; +} +#endif + +#ifdef CONFIG_ACTLR_XCALL_XINT #define AIDR_ELx_XCALL_SHIFT 32 #define AIDR_ELx_XCALL (UL(1) << AIDR_ELx_XCALL_SHIFT) @@ -2477,40 +2510,6 @@ static bool is_arch_xcall_xint_support(void) return false; } -static int __init xcall_setup(char *str) -{ - if (!is_arch_xcall_xint_support()) - static_branch_enable(&xcall_enable); - - return 1; -} -__setup("xcall", xcall_setup); - -static bool has_xcall_support(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return static_key_enabled(&xcall_enable); -} -#endif - -#ifdef CONFIG_FAST_IRQ -bool is_xint_support; -static int __init xint_setup(char *str) -{ - if (!cpus_have_cap(ARM64_HAS_GIC_CPUIF_SYSREGS)) - return 1; - - is_xint_support = true; - return 1; -} -__setup("xint", xint_setup); - -static bool has_xint_support(const struct arm64_cpu_capabilities *entry, int __unused) -{ - return is_xint_support; -} -#endif - -#ifdef CONFIG_ACTLR_XCALL_XINT static bool has_arch_xcall_xint_support(const struct arm64_cpu_capabilities *entry, int scope) { return is_arch_xcall_xint_support(); @@ -2554,14 +2553,14 @@ static void cpu_enable_arch_xcall_xint(const struct arm64_cpu_capabilities *__un el = read_sysreg(CurrentEL); if (el == CurrentEL_EL2) { actlr_el2 = read_sysreg(actlr_el2); - actlr_el2 |= ACTLR_ELx_XINT; + actlr_el2 |= (ACTLR_ELx_XINT | ACTLR_ELx_XCALL); write_sysreg(actlr_el2, actlr_el2); isb(); actlr_el2 = read_sysreg(actlr_el2); pr_info("actlr_el2: %llx, cpu:%d\n", actlr_el2, cpu); } else { actlr_el1 = read_sysreg(actlr_el1); - actlr_el1 |= ACTLR_ELx_XINT; + actlr_el1 |= (ACTLR_ELx_XINT | ACTLR_ELx_XCALL); write_sysreg(actlr_el1, actlr_el1); isb(); actlr_el1 = read_sysreg(actlr_el1); diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index 7728f32ee962..7b75e8651a2a 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -151,13 +151,6 @@ alternative_else_nop_endif sb .endm /* .macro hw_xcal_restore_base_regs */ -SYM_CODE_START(no_xcall_entry) - kernel_entry 0, 64 - mov x0, sp - bl el0t_64_sync_handler - b ret_to_user -SYM_CODE_END(no_xcall_entry) - SYM_CODE_START(xcall_entry) hw_xcall_save_base_regs mov x0, sp diff --git a/arch/arm64/kernel/xcall/xcall.c b/arch/arm64/kernel/xcall/xcall.c index d8eaec7e4637..31072c0402f4 100644 --- a/arch/arm64/kernel/xcall/xcall.c +++ b/arch/arm64/kernel/xcall/xcall.c @@ -6,7 +6,6 @@ */ #include <linux/bitmap.h> -#include <linux/percpu.h> #include <linux/sched.h> #include <linux/slab.h> #include <asm/xcall.h> @@ -25,45 +24,8 @@ static inline int sw_xcall_init_task(struct task_struct *p, struct task_struct * return 0; } -#ifdef CONFIG_ACTLR_XCALL_XINT -static const void *default_syscall_table[__NR_syscalls + 1] = { - [0 ... __NR_syscalls] = no_xcall_entry, -}; - -asmlinkage DEFINE_PER_CPU(void *, __cpu_xcall_entry) = default_syscall_table; -static inline int hw_xcall_init_task(struct task_struct *p, struct task_struct *orig) -{ - struct hw_xcall_info *p_xinfo, *orig_xinfo; - - p->xinfo = kzalloc(sizeof(struct hw_xcall_info), GFP_KERNEL); - if (!p->xinfo) - return -ENOMEM; - - p_xinfo = TASK_HW_XINFO(p); - spin_lock_init(&p_xinfo->lock); - - if (!orig->xinfo) { - memcpy(p->xinfo, default_syscall_table, XCALL_ENTRY_SIZE); - atomic_set(&p_xinfo->xcall_scno_count, 0); - } else { - orig_xinfo = TASK_HW_XINFO(orig); - spin_lock(&orig_xinfo->lock); - memcpy(p->xinfo, orig->xinfo, XCALL_ENTRY_SIZE); - atomic_set(&p_xinfo->xcall_scno_count, - atomic_read(&orig_xinfo->xcall_scno_count)); - spin_unlock(&orig_xinfo->lock); - } - - return 0; -} -#endif - int xcall_init_task(struct task_struct *p, struct task_struct *orig) { -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) - return hw_xcall_init_task(p, orig); -#endif if (static_branch_unlikely(&xcall_enable)) return sw_xcall_init_task(p, orig); @@ -72,6 +34,6 @@ int xcall_init_task(struct task_struct *p, struct task_struct *orig) void xcall_task_free(struct task_struct *p) { - if (system_uses_xcall_xint() || static_branch_unlikely(&xcall_enable)) + if (static_branch_unlikely(&xcall_enable)) kfree(p->xinfo); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 98af8358296a..14eaa56f2e9d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -17,6 +17,7 @@ #include <linux/printk.h> #include <linux/uaccess.h> +#include <asm/actlr.h> #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/debug-monitors.h> diff --git a/fs/proc/proc_xcall.c b/fs/proc/proc_xcall.c index 5a417bc7cb0a..5f45d0799b33 100644 --- a/fs/proc/proc_xcall.c +++ b/fs/proc/proc_xcall.c @@ -4,57 +4,11 @@ * * Copyright (C) 2025 Huawei Ltd. */ -#include <linux/cpufeature.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <asm/xcall.h> #include "internal.h" -#ifdef CONFIG_ACTLR_XCALL_XINT -static void proc_hw_xcall_show(struct task_struct *p, struct seq_file *m) -{ - struct hw_xcall_info *hw_xinfo = TASK_HW_XINFO(p); - unsigned int i, start = 0, end = 0; - bool in_range = false; - - if (!hw_xinfo) - return; - - for (i = 0; i < __NR_syscalls; i++) { - bool scno_xcall_enable = is_xcall_entry(hw_xinfo, i); - - if (scno_xcall_enable && !in_range) { - in_range = true; - start = i; - } - - if ((!scno_xcall_enable || i == __NR_syscalls - 1) && in_range) { - in_range = false; - end = scno_xcall_enable ? i : i - 1; - if (i == start + 1) - seq_printf(m, "%u,", start); - else - seq_printf(m, "%u-%u,", start, end); - } - } - seq_puts(m, "\n"); -} - -static int proc_set_hw_xcall(struct task_struct *p, unsigned int sc_no, - bool is_clear) -{ - struct hw_xcall_info *hw_xinfo = TASK_HW_XINFO(p); - - if (!is_clear) - return set_hw_xcall_entry(hw_xinfo, sc_no, true); - - if (is_clear) - return set_hw_xcall_entry(hw_xinfo, sc_no, false); - - return -EINVAL; -} -#endif - static int xcall_show(struct seq_file *m, void *v) { struct inode *inode = m->private; @@ -62,20 +16,13 @@ static int xcall_show(struct seq_file *m, void *v) unsigned int rs, re; struct xcall_info *xinfo; - if (!system_uses_xcall_xint() && !static_key_enabled(&xcall_enable)) + if (!static_key_enabled(&xcall_enable)) return -EACCES; p = get_proc_task(inode); if (!p) return -ESRCH; -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) { - proc_hw_xcall_show(p, m); - goto out; - } -#endif - xinfo = TASK_XINFO(p); if (!xinfo) goto out; @@ -124,7 +71,7 @@ static ssize_t xcall_write(struct file *file, const char __user *buf, int is_clear = 0; struct xcall_info *xinfo; - if (!system_uses_xcall_xint() && !static_key_enabled(&xcall_enable)) + if (!static_key_enabled(&xcall_enable)) return -EACCES; memset(buffer, 0, sizeof(buffer)); @@ -148,13 +95,6 @@ static ssize_t xcall_write(struct file *file, const char __user *buf, goto out; } -#ifdef CONFIG_ACTLR_XCALL_XINT - if (system_uses_xcall_xint()) { - ret = proc_set_hw_xcall(p, sc_no, is_clear); - goto out; - } -#endif - xinfo = TASK_XINFO(p); if (!is_clear && !test_bit(sc_no, xinfo->xcall_enable)) ret = xcall_enable_one(xinfo, sc_no); -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Use the symbol el0t_64_svc_entry as the dispatcher of svc exception handler: - el0_slow_syscall: use kernel_entry and ret_to_user to do exception context switch, additionally use el0_svc to invoke syscall functions. - el0_fast_syscall: use hw_xcall_save_base_regs and hw_xcall_restore_base_regs to do low-overhead context switch, additionally use el0_svc to invoke syscall functions. - el0_xcall_syscall: use hw_xcall_save_base_regs and hw_xcall_restore_base_regs to do low-overhead context switch, additionally use el0_xcall to invoke dynamically load syscall functions. Signed-off-by: Liao Chang <liaochang1@huawei.com> --- arch/arm64/include/asm/exception.h | 1 - arch/arm64/kernel/entry-common.c | 26 ++++++++------ arch/arm64/kernel/entry.S | 3 +- arch/arm64/kernel/xcall/entry.S | 55 +++++++++++++++++++++++++++--- arch/arm64/kernel/xcall/xcall.c | 6 ++++ 5 files changed, 74 insertions(+), 17 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 94338104a18c..1d87f724719d 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -83,6 +83,5 @@ void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigne #ifdef CONFIG_ACTLR_XCALL_XINT asmlinkage void el0t_64_xint_handler(struct pt_regs *regs); -asmlinkage void el0t_64_xcall_handler(struct pt_regs *regs); #endif #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index f4a21c66856a..a405ccc185f1 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -207,7 +207,7 @@ static __always_inline void fast_enter_from_user_mode(struct pt_regs *regs) mte_disable_tco_entry(current); #endif } -#endif +#endif /* CONFIG_FAST_SYSCALL || CONFIG_FAST_IRQ */ /* * Handle IRQ/context state management when entering an NMI from user/kernel @@ -818,8 +818,8 @@ static void noinstr el0_fpac(struct pt_regs *regs, unsigned long esr) } #ifdef CONFIG_FAST_SYSCALL -/* Copy from el0_sync */ -static void noinstr el0_xcall(struct pt_regs *regs) +/* dynamically load syscall handler */ +asmlinkage void noinstr el0_xcall_syscall(struct pt_regs *regs) { fast_enter_from_user_mode(regs); #ifndef CONFIG_SECURITY_FEATURE_BYPASS @@ -831,11 +831,21 @@ static void noinstr el0_xcall(struct pt_regs *regs) fast_exit_to_user_mode(regs); } -asmlinkage void noinstr el0t_64_fast_syscall_handler(struct pt_regs *regs) +/* low-overhead syscall handler */ +asmlinkage void noinstr el0_fast_syscall(struct pt_regs *regs) { - el0_xcall(regs); -} + fast_enter_from_user_mode(regs); +#ifndef CONFIG_SECURITY_FEATURE_BYPASS + cortex_a76_erratum_1463225_svc_handler(); #endif + fp_user_discard(); + local_daif_restore(DAIF_PROCCTX); + do_el0_svc(regs); + fast_exit_to_user_mode(regs); +} + +asmlinkage void noinstr el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +#endif /* CONFIG_FAST_SYSCALL */ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) { @@ -1052,10 +1062,6 @@ UNHANDLED(el0t, 32, error) #endif /* CONFIG_AARCH32_EL0 */ #ifdef CONFIG_ACTLR_XCALL_XINT -asmlinkage void noinstr el0t_64_xcall_handler(struct pt_regs *regs) -{ - el0_xcall(regs); -} asmlinkage void noinstr el0t_64_xint_handler(struct pt_regs *regs) { el0_interrupt(regs, ISR_EL1_IS, handle_arch_irq, handle_arch_nmi_irq); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5648a3119f90..cd39e17af8b0 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -685,7 +685,7 @@ SYM_CODE_END(__bad_stack) kernel_entry 0, 64 #endif mov x0, sp - bl el0t_64_fast_syscall_handler + bl el0_fast_syscall #ifdef CONFIG_SECURITY_FEATURE_BYPASS kernel_exit 0, xcall #else @@ -797,7 +797,6 @@ SYM_CODE_END(el\el\ht\()_\regsize\()_\label) entry_handler 0, t, 64, error #ifdef CONFIG_ACTLR_XCALL_XINT - entry_handler 0, t, 64, xcall entry_handler 0, t, 64, xint #endif entry_handler 0, t, 32, sync diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index 7b75e8651a2a..abd5f97d5525 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -151,12 +151,55 @@ alternative_else_nop_endif sb .endm /* .macro hw_xcal_restore_base_regs */ -SYM_CODE_START(xcall_entry) +#define __NR_fast_syscalls 512 + +SYM_CODE_START_LOCAL(el0t_64_svc_entry) + /* Hijack SVC to dynamically load syscalls via '/proc/xcall/comm' */ + ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[25,18] + cmp x20, 0 + bne el0t_64_xcall_entry + + /* Hijack SVC to low overhead syscalls via '/prox/[pid]/xcall' */ + ldr_this_cpu x21, __cpu_fast_syscall, x20 // per_cpu table + and x20, x8, #__NR_fast_syscalls - 1 // trunk syscno less than 512 + ldrb w20, [x21, x20] // memory overhead is 512(B) + cmp x20, 0 + bne el0t_fast_syscall + + ldp x20, x21, [sp, #16 * 10] + kernel_entry 0, 64 + mov x0, sp + bl el0_slow_syscall + b ret_to_user +SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) + lsr x20, x20, #4 + adr x21, .xcall_func_table + ldr w20, [x21, x20, lsl #2] + add x20, x20, x21 + br x20 + /* ISS==0F~FF: Entry to optimized and customized syscalls + */ +.xcall_func_table: + .rept 15 + .word el0t_xcall_syscall - .xcall_func_table + .endr +SYM_CODE_END(el0t_64_svc_entry) + +SYM_CODE_START_LOCAL(el0t_xcall_syscall) + ldp x20, x21, [sp, #16 * 10] hw_xcall_save_base_regs mov x0, sp - bl el0t_64_xcall_handler + bl el0_xcall_syscall hw_xcal_restore_base_regs -SYM_CODE_END(xcall_entry) +SYM_CODE_END(el0t_xcall_syscall) + +SYM_CODE_START_LOCAL(el0t_fast_syscall) + ldp x20, x21, [sp, #16 * 10] + hw_xcall_save_base_regs + mov x0, sp + bl el0_fast_syscall + hw_xcal_restore_base_regs +SYM_CODE_END(el0t_fast_syscall) .macro xcall_ventry .align 7 @@ -170,6 +213,10 @@ SYM_CODE_END(xcall_entry) msr tpidrro_el0, xzr .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE - b xcall_entry + stp x20, x21, [sp, #16 * 10] + /* Decode ESR.ICC bits[25,18] for use later */ + mrs x21, esr_el1 + ubfx w20, w21, #16, #8 + b el0t_64_xcall_entry .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm diff --git a/arch/arm64/kernel/xcall/xcall.c b/arch/arm64/kernel/xcall/xcall.c index 31072c0402f4..c0907c01c9c7 100644 --- a/arch/arm64/kernel/xcall/xcall.c +++ b/arch/arm64/kernel/xcall/xcall.c @@ -37,3 +37,9 @@ void xcall_task_free(struct task_struct *p) if (static_branch_unlikely(&xcall_enable)) kfree(p->xinfo); } + +#define __NR_fast_syscalls 512 +static u8 fast_syscall_enabled[__NR_fast_syscalls + 1] = { + [0 ... __NR_fast_syscalls] = 0, +}; +asmlinkage DEFINE_PER_CPU(u8*, __cpu_fast_syscall) = fast_syscall_enabled; -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Revert the hack to the early entry of SYNC exception. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/kernel/entry.S | 69 --------------------------------------- 1 file changed, 69 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index cd39e17af8b0..48584f3b454d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -637,65 +637,6 @@ SYM_CODE_START_LOCAL(__bad_stack) SYM_CODE_END(__bad_stack) #endif /* CONFIG_VMAP_STACK */ -#ifdef CONFIG_FAST_SYSCALL - .macro check_esr_el1_ec_svc64 - /* Only support SVC64 for now */ - mrs x20, esr_el1 - lsr w20, w20, #ESR_ELx_EC_SHIFT - cmp x20, #ESR_ELx_EC_SVC64 - .endm - - .macro check_syscall_nr - cmp x8, __NR_syscalls - .endm - - .macro check_xcall_enable - /* x21 = task_struct->xinfo->xcall_enable */ - ldr_this_cpu x20, __entry_task, x21 - ldr x21, [x20, #TSK_XCALL] - /* x20 = sc_no / 8 */ - lsr x20, x8, 3 - ldr x21, [x21, x20] - /* x8 = sc_no % 8 */ - and x8, x8, 7 - mov x20, 1 - lsl x20, x20, x8 - and x21, x21, x20 - cmp x21, 0 - .endm - - .macro check_xcall_pre_kernel_entry - stp x20, x21, [sp, #0] - /* is ESR_ELx_EC_SVC64 */ - check_esr_el1_ec_svc64 - bne .Lskip_xcall\@ - /* x8 >= __NR_syscalls */ - check_syscall_nr - bhs .Lskip_xcall\@ - str x8, [sp, #16] - /* is xcall enabled */ - check_xcall_enable - ldr x8, [sp, #16] - beq .Lskip_xcall\@ - ldp x20, x21, [sp, #0] - /* do xcall */ -#ifdef CONFIG_SECURITY_FEATURE_BYPASS - kernel_entry 0, 64, xcall -#else - kernel_entry 0, 64 -#endif - mov x0, sp - bl el0_fast_syscall -#ifdef CONFIG_SECURITY_FEATURE_BYPASS - kernel_exit 0, xcall -#else - b ret_to_user -#endif -.Lskip_xcall\@: - ldp x20, x21, [sp, #0] - .endm -#endif - #ifdef CONFIG_FAST_IRQ .macro check_xint_pre_kernel_entry stp x0, x1, [sp, #0] @@ -748,16 +689,6 @@ SYM_CODE_END(__bad_stack) .macro entry_handler el:req, ht:req, regsize:req, label:req SYM_CODE_START_LOCAL(el\el\ht\()_\regsize\()_\label) -#ifdef CONFIG_FAST_SYSCALL - .if \el == 0 && \regsize == 64 && \label == sync - /* Only support el0 aarch64 sync exception */ - alternative_if_not ARM64_HAS_XCALL - b .Lret_to_kernel_entry\@ - alternative_else_nop_endif - check_xcall_pre_kernel_entry - .Lret_to_kernel_entry\@: - .endif -#endif #ifdef CONFIG_FAST_IRQ .if \regsize == 64 && \label == irq && \el == 0 && \ht == t alternative_if_not ARM64_HAS_XINT -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Replace the default SYNC exception vector taken from EL0 with a simplified version defined at arch/arm64/kernel/xcall/entry.S called "sync_ventry". For that, it uses ESR to select the corresponding entry in a jump table which includes the entry code address for different SYNC exception causes, for example, svc, data abort, instruction abort, brk, etc. The most important part about these entry codes is that they does not increase the number of branch instructions compared with the default one. The magic is from the bypass of el0t_64_sync_handler and the complex 'switch(...) case' statements inside. These kinda of optimization minimizes the overhead of exception type filter. Separate the SVC exception entry from the generic SYNC exception entry, so that it is possible to redirect the syscall handler to some customized implementation through 'svc 0xffff' or reduce the context switch latency for the syscalls specified via '/proc/pid/xcall'. In order to simplify the checking of syscall groups in assembly code that are passed via '/proc/pid/xcall', this patch replace the old bitmap in xcall_info data with a byte array, so a lot of related code has been modified by the way. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/include/asm/xcall.h | 3 +- arch/arm64/kernel/entry-common.c | 39 ++++++++- arch/arm64/kernel/entry.S | 16 +++- arch/arm64/kernel/process.c | 5 ++ arch/arm64/kernel/xcall/entry.S | 140 ++++++++++++++++++++++++++++--- arch/arm64/kernel/xcall/xcall.c | 18 ++-- fs/proc/proc_xcall.c | 77 ++++++++--------- 7 files changed, 232 insertions(+), 66 deletions(-) diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 5cea7f81a857..121fccc86ef3 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -89,11 +89,12 @@ DECLARE_STATIC_KEY_FALSE(xcall_enable); struct xcall_info { /* Must be first! */ - DECLARE_BITMAP(xcall_enable, __NR_syscalls); + u8 xcall_enable[__NR_syscalls + 1]; }; #define TASK_XINFO(p) ((struct xcall_info *)p->xinfo) int xcall_init_task(struct task_struct *p, struct task_struct *orig); void xcall_task_free(struct task_struct *p); +void xcall_info_switch(struct task_struct *p); #endif /* __ASM_XCALL_H */ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index a405ccc185f1..985ac6bb88b5 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -844,7 +844,44 @@ asmlinkage void noinstr el0_fast_syscall(struct pt_regs *regs) fast_exit_to_user_mode(regs); } -asmlinkage void noinstr el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +asmlinkage void el0_slow_syscall(struct pt_regs *regs) __alias(el0_svc); +asmlinkage void __alias_el0_da(struct pt_regs *regs, unsigned long esr) + __alias(el0_da); +asmlinkage void __alias_el0_ia(struct pt_regs *regs, unsigned long esr) + __alias(el0_ia); +asmlinkage void __alias_el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpsimd_acc); +asmlinkage void __alias_el0_sve_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_sve_acc); +asmlinkage void __alias_el0_sme_acc(struct pt_regs *regs, unsigned long esr) + __alias(el0_sme_acc); +asmlinkage void __alias_el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpsimd_exc); +asmlinkage void __alias_el0_sys(struct pt_regs *regs, unsigned long esr) + __alias(el0_sys); +asmlinkage void __alias_el0_wfx(struct pt_regs *regs, unsigned long esr) + __alias(el0_sys); +asmlinkage void __alias_el0_sp(struct pt_regs *regs, unsigned long esr) + __alias(el0_sp); +asmlinkage void __alias_el0_pc(struct pt_regs *regs, unsigned long esr) + __alias(el0_pc); +asmlinkage void __alias_el0_undef(struct pt_regs *regs, unsigned long esr) + __alias(el0_undef); +asmlinkage void __alias_el0_bti(struct pt_regs *regs) __alias(el0_bti); +asmlinkage void __alias_el0_mops(struct pt_regs *regs, unsigned long esr) + __alias(el0_mops); +asmlinkage void __alias_el0_breakpt(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_softstp(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_watchpt(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_brk64(struct pt_regs *regs, unsigned long esr) + __alias(el0_dbg); +asmlinkage void __alias_el0_fpac(struct pt_regs *regs, unsigned long esr) + __alias(el0_fpac); +asmlinkage void __alias_el0_inv(struct pt_regs *regs, unsigned long esr) + __alias(el0_inv); #endif /* CONFIG_FAST_SYSCALL */ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 48584f3b454d..cceb4526745f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -552,6 +552,10 @@ tsk .req x28 // current thread_info .text +#ifdef CONFIG_FAST_SYSCALL +#include "xcall/entry.S" +#endif + /* * Exception vectors. */ @@ -569,7 +573,11 @@ SYM_CODE_START(vectors) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -581,8 +589,6 @@ SYM_CODE_START(vectors) SYM_CODE_END(vectors) #ifdef CONFIG_ACTLR_XCALL_XINT -#include "xcall/entry.S" - .align 11 SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, t, 64, sync // Synchronous EL1t @@ -595,7 +601,11 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 1, h, 64, fiq // FIQ EL1h kernel_ventry 1, h, 64, error // Error EL1h +#ifdef CONFIG_FAST_SYSCALL + sync_ventry // Synchronous 64-bit EL0 +#else kernel_ventry 0, t, 64, sync // Synchronous 64-bit EL0 +#endif kernel_ventry 0, t, 64, irq // IRQ 64-bit EL0 kernel_ventry 0, t, 64, fiq // FIQ 64-bit EL0 kernel_ventry 0, t, 64, error // Error 64-bit EL0 @@ -605,7 +615,7 @@ SYM_CODE_START(vectors_xcall_xint) kernel_ventry 0, t, 32, fiq // FIQ 32-bit EL0 kernel_ventry 0, t, 32, error // Error 32-bit EL0 SYM_CODE_END(vectors_xcall_xint) -#endif +#endif /* CONFIG_ACTLR_XCALL_XINT */ #ifdef CONFIG_VMAP_STACK SYM_CODE_START_LOCAL(__bad_stack) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index fe3f89445fcb..e9e5ce956f15 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -55,6 +55,7 @@ #include <asm/stacktrace.h> #include <asm/switch_to.h> #include <asm/system_misc.h> +#include <asm/xcall.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> @@ -472,6 +473,10 @@ DEFINE_PER_CPU(struct task_struct *, __entry_task); static void entry_task_switch(struct task_struct *next) { __this_cpu_write(__entry_task, next); +#ifdef CONFIG_FAST_SYSCALL + if (static_branch_unlikely(&xcall_enable)) + xcall_info_switch(next); +#endif } /* diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index abd5f97d5525..b8238663c820 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -151,28 +151,30 @@ alternative_else_nop_endif sb .endm /* .macro hw_xcal_restore_base_regs */ -#define __NR_fast_syscalls 512 - SYM_CODE_START_LOCAL(el0t_64_svc_entry) +alternative_if_not ARM64_HAS_HW_XCALL_XINT /* Hijack SVC to dynamically load syscalls via '/proc/xcall/comm' */ - ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[25,18] - cmp x20, 0 - bne el0t_64_xcall_entry + ldr x20, [sp, #S_SYSCALLNO] // ESR.bits[15,0] + cmp x20, 0xfff + b.ge el0t_64_xcall_entry +alternative_else_nop_endif /* Hijack SVC to low overhead syscalls via '/prox/[pid]/xcall' */ - ldr_this_cpu x21, __cpu_fast_syscall, x20 // per_cpu table - and x20, x8, #__NR_fast_syscalls - 1 // trunk syscno less than 512 - ldrb w20, [x21, x20] // memory overhead is 512(B) + cmp x8, __NR_syscalls + b.ge .slow_syscall + ldr_this_cpu x21, __xcall_info, x20 + ldrb w20, [x21, x8] cmp x20, 0 bne el0t_fast_syscall +.slow_syscall: ldp x20, x21, [sp, #16 * 10] kernel_entry 0, 64 mov x0, sp bl el0_slow_syscall b ret_to_user SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) - lsr x20, x20, #4 + lsr x20, x20, #12 adr x21, .xcall_func_table ldr w20, [x21, x20, lsl #2] add x20, x20, x21 @@ -180,7 +182,7 @@ SYM_INNER_LABEL(el0t_64_xcall_entry, SYM_L_GLOBAL) /* ISS==0F~FF: Entry to optimized and customized syscalls */ .xcall_func_table: - .rept 15 + .rept 16 .word el0t_xcall_syscall - .xcall_func_table .endr SYM_CODE_END(el0t_64_svc_entry) @@ -201,6 +203,89 @@ SYM_CODE_START_LOCAL(el0t_fast_syscall) hw_xcal_restore_base_regs SYM_CODE_END(el0t_fast_syscall) + .macro el0t_64_entry sym:req +SYM_CODE_START_LOCAL(\sym\()_entry) + ldp x20, x21, [sp, #16 * 10] + kernel_entry 0, 64 + mov x0, sp + ldr x1, [sp, #(S_SYSCALLNO - 8)] // ESR + bl __alias_\sym + b ret_to_user +SYM_CODE_END(\sym\()_entry) + .endm + + .macro el0t_64_entry_offset sym:req, num:req + .rept \num + .word el0t_64_sync_table - \sym\()_entry + .endr + .endm + + /* + * Early exception handlers + */ + el0t_64_entry el0_undef + el0t_64_entry el0_wfx + el0t_64_entry el0_fpsimd_acc + el0t_64_entry el0_bti + el0t_64_entry el0_sys + el0t_64_entry el0_sve_acc + el0t_64_entry el0_fpac + el0t_64_entry el0_sme_acc + el0t_64_entry el0_ia + el0t_64_entry el0_pc + el0t_64_entry el0_da + el0t_64_entry el0_sp + el0t_64_entry el0_mops + el0t_64_entry el0_fpsimd_exc + el0t_64_entry el0_breakpt + el0t_64_entry el0_softstp + el0t_64_entry el0_watchpt + el0t_64_entry el0_brk64 + el0t_64_entry el0_inv + +SYM_CODE_START_LOCAL(el0t_64_sync_table) + el0t_64_entry_offset el0_undef, 1 // 0x00 + el0t_64_entry_offset el0_wfx, 1 // 0x01 + el0t_64_entry_offset el0_inv, 5 + el0t_64_entry_offset el0_fpsimd_acc, 1 // 0x07 + el0t_64_entry_offset el0_inv, 5 + el0t_64_entry_offset el0_bti, 1 // 0x0D + el0t_64_entry_offset el0_inv, 7 + el0t_64_entry_offset el0t_64_svc, 1 // 0x15 + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_sys, 1 // 0x18 + el0t_64_entry_offset el0_sve_acc, 1 // 0x19 + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_fpac, 1 // 0x1C + el0t_64_entry_offset el0_sme_acc, 1 // 0x1D + el0t_64_entry_offset el0_inv, 2 + el0t_64_entry_offset el0_ia, 1 // 0x20 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_pc, 1 // 0x22 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_da, 1 // 0x24 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_sp, 1 // 0x26 + el0t_64_entry_offset el0_mops, 1 // 0x27 + el0t_64_entry_offset el0_inv, 4 + el0t_64_entry_offset el0_fpsimd_exc, 1 // 0x2C + el0t_64_entry_offset el0_inv, 3 + el0t_64_entry_offset el0_breakpt, 1 // 0x30 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_softstp, 1 // 0x32 + el0t_64_entry_offset el0_inv, 1 + el0t_64_entry_offset el0_watchpt, 1 // 0x34 + el0t_64_entry_offset el0_inv, 7 + el0t_64_entry_offset el0_brk64, 1 // 0x3C + el0t_64_entry_offset el0_inv, 3 +SYM_CODE_END(el0t_64_sync_table) + +/*********************************************** + * * + * Xcall exception entry code for 920G CPU * + * * + ***********************************************/ +#ifdef CONFIG_ACTLR_XCALL_XINT .macro xcall_ventry .align 7 .Lventry_start\@: @@ -214,9 +299,42 @@ SYM_CODE_END(el0t_fast_syscall) .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE stp x20, x21, [sp, #16 * 10] - /* Decode ESR.ICC bits[25,18] for use later */ + /* Decode ESR.ICC bits[24,17] for use later */ mrs x21, esr_el1 ubfx w20, w21, #16, #8 b el0t_64_xcall_entry .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm +#endif /* CONFIG_ACTLR_XCALL_XINT */ + +/**************************************************************** + * * + * Sync exception entry code for early CPUs before 920G * + * * + ****************************************************************/ + .macro sync_ventry + .align 7 +.Lventry_start\@: + /* + * This must be the first instruction of the EL0 vector entries. It is + * skipped by the trampoline vectors, to trigger the cleanup. + */ + b .Lskip_tramp_vectors_cleanup\@ + mrs x30, tpidrro_el0 + msr tpidrro_el0, xzr +.Lskip_tramp_vectors_cleanup\@: + sub sp, sp, #PT_REGS_SIZE + + /* Save ESR and ICC.bits[15,0] for use later */ + stp x20, x21, [sp, #16 * 10] + mrs x20, esr_el1 + uxth w21, w20 + stp x20, x21, [sp, #(S_SYSCALLNO - 8)] + /* Using jump table for different exception causes */ + lsr w21, w20, #ESR_ELx_EC_SHIFT + adr x20, el0t_64_sync_table + ldr w21, [x20, x21, lsl #2] + sub x20, x20, x21 + br x20 +.org .Lventry_start\@ + 128 // Did we overflow the ventry slot? + .endm diff --git a/arch/arm64/kernel/xcall/xcall.c b/arch/arm64/kernel/xcall/xcall.c index c0907c01c9c7..96e6274571d3 100644 --- a/arch/arm64/kernel/xcall/xcall.c +++ b/arch/arm64/kernel/xcall/xcall.c @@ -17,8 +17,9 @@ static inline int sw_xcall_init_task(struct task_struct *p, struct task_struct * return -ENOMEM; if (orig->xinfo) { - bitmap_copy(TASK_XINFO(p)->xcall_enable, TASK_XINFO(orig)->xcall_enable, - __NR_syscalls); + memcpy(TASK_XINFO(p)->xcall_enable, + TASK_XINFO(orig)->xcall_enable, + (__NR_syscalls + 1) * sizeof(u8)); } return 0; @@ -38,8 +39,13 @@ void xcall_task_free(struct task_struct *p) kfree(p->xinfo); } -#define __NR_fast_syscalls 512 -static u8 fast_syscall_enabled[__NR_fast_syscalls + 1] = { - [0 ... __NR_fast_syscalls] = 0, +static u8 default_xcall_info[__NR_syscalls + 1] = { + [0 ... __NR_syscalls] = 0, }; -asmlinkage DEFINE_PER_CPU(u8*, __cpu_fast_syscall) = fast_syscall_enabled; +DEFINE_PER_CPU(u8*, __xcall_info) = default_xcall_info; + +void xcall_info_switch(struct task_struct *task) +{ + if (TASK_XINFO(task)->xcall_enable) + __this_cpu_write(__xcall_info, TASK_XINFO(task)->xcall_enable); +} diff --git a/fs/proc/proc_xcall.c b/fs/proc/proc_xcall.c index 5f45d0799b33..7a08d4b18af3 100644 --- a/fs/proc/proc_xcall.c +++ b/fs/proc/proc_xcall.c @@ -12,9 +12,9 @@ static int xcall_show(struct seq_file *m, void *v) { struct inode *inode = m->private; - struct task_struct *p; - unsigned int rs, re; struct xcall_info *xinfo; + struct task_struct *p; + int l = 0, r = 1; if (!static_key_enabled(&xcall_enable)) return -EACCES; @@ -27,14 +27,25 @@ static int xcall_show(struct seq_file *m, void *v) if (!xinfo) goto out; - for (rs = 0, bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls); - rs < re; rs = re + 1, - bitmap_next_set_region(xinfo->xcall_enable, &rs, &re, __NR_syscalls)) { - if (rs == (re - 1)) - seq_printf(m, "%d,", rs); - else - seq_printf(m, "%d-%d,", rs, re - 1); + while (r < __NR_syscalls) { + if (!xinfo->xcall_enable[l]) { + l++; + r = l + 1; + continue; + } + + if (!xinfo->xcall_enable[r]) { + if (r == (l + 1)) + seq_printf(m, "%d,", l); + else + seq_printf(m, "%d-%d,", l, r - 1); + l = r + 1; + r = l + 1; + continue; + } + r++; } + seq_puts(m, "\n"); out: put_task_struct(p); @@ -47,45 +58,28 @@ static int xcall_open(struct inode *inode, struct file *filp) return single_open(filp, xcall_show, inode); } -static int xcall_enable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_set_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static int xcall_disable_one(struct xcall_info *xinfo, unsigned int sc_no) -{ - test_and_clear_bit(sc_no, xinfo->xcall_enable); - return 0; -} - -static ssize_t xcall_write(struct file *file, const char __user *buf, +static ssize_t xcall_write(struct file *file, const char __user *ubuf, size_t count, loff_t *offset) { - struct inode *inode = file_inode(file); - struct task_struct *p; - char buffer[5]; - const size_t maxlen = sizeof(buffer) - 1; unsigned int sc_no = __NR_syscalls; + struct task_struct *p; + char buf[5]; int ret = 0; - int is_clear = 0; - struct xcall_info *xinfo; if (!static_key_enabled(&xcall_enable)) return -EACCES; - memset(buffer, 0, sizeof(buffer)); - if (!count || copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) - return -EFAULT; - - p = get_proc_task(inode); - if (!p || !p->xinfo) + p = get_proc_task(file_inode(file)); + if (!p || !TASK_XINFO(p)) return -ESRCH; - if (buffer[0] == '!') - is_clear = 1; + memset(buf, '\0', 5); + if (!count || (count > 4) || copy_from_user(buf, ubuf, count)) { + ret = -EFAULT; + goto out; + } - if (kstrtouint(buffer + is_clear, 10, &sc_no)) { + if (kstrtouint((buf + (int)(buf[0] == '!')), 10, &sc_no)) { ret = -EINVAL; goto out; } @@ -95,13 +89,8 @@ static ssize_t xcall_write(struct file *file, const char __user *buf, goto out; } - xinfo = TASK_XINFO(p); - if (!is_clear && !test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_enable_one(xinfo, sc_no); - else if (is_clear && test_bit(sc_no, xinfo->xcall_enable)) - ret = xcall_disable_one(xinfo, sc_no); - else - ret = -EINVAL; + (TASK_XINFO(p))->xcall_enable[sc_no] = (int)(buf[0] != '!'); + ret = 0; out: put_task_struct(p); -- 2.34.1
From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Add a xcall2.0 basic testcase. This module can be combined with the syscall sub-item of Unixbench to evaluate the baseline noise of xcall2.0's "Dynamic Instruction Replacement" mechanism. Users can also use this module as a reference to implement custom system calls. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- drivers/staging/Kconfig | 2 + drivers/staging/Makefile | 1 + drivers/staging/xcall/Kconfig | 19 +++++ drivers/staging/xcall/Makefile | 1 + drivers/staging/xcall/dynamic_xcall_test.c | 97 ++++++++++++++++++++++ 5 files changed, 120 insertions(+) create mode 100644 drivers/staging/xcall/Kconfig create mode 100644 drivers/staging/xcall/Makefile create mode 100644 drivers/staging/xcall/dynamic_xcall_test.c diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index f9aef39cac2e..702216e0ddd2 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -78,4 +78,6 @@ source "drivers/staging/qlge/Kconfig" source "drivers/staging/vme_user/Kconfig" +source "drivers/staging/xcall/Kconfig" + endif # STAGING diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index ffa70dda481d..3df57d6ab9b2 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -28,3 +28,4 @@ obj-$(CONFIG_PI433) += pi433/ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ obj-$(CONFIG_FIELDBUS_DEV) += fieldbus/ obj-$(CONFIG_QLGE) += qlge/ +obj-$(CONFIG_DYNAMIC_XCALL) += xcall/ diff --git a/drivers/staging/xcall/Kconfig b/drivers/staging/xcall/Kconfig new file mode 100644 index 000000000000..bf7421fa8a14 --- /dev/null +++ b/drivers/staging/xcall/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +menu "Xcall" + +if ARM64 + +config DYNAMIC_XCALL_TESTCASE + tristate "xcall2.0 test case" + depends on DYNAMIC_XCALL + help + A simple example of using the xcall2.0 kernel module. + This module can be combined with the syscall sub-item of + Unixbench to evaluate the baseline noise of xcall2.0's + "Dynamic Instruction Replacement" mechanism. Users can + also use this module as a reference to implement custom + system calls. + +endif # if ARM64 + +endmenu diff --git a/drivers/staging/xcall/Makefile b/drivers/staging/xcall/Makefile new file mode 100644 index 000000000000..668ac4f3b471 --- /dev/null +++ b/drivers/staging/xcall/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_DYNAMIC_XCALL_TESTCASE) += dynamic_xcall_test.o diff --git a/drivers/staging/xcall/dynamic_xcall_test.c b/drivers/staging/xcall/dynamic_xcall_test.c new file mode 100644 index 000000000000..3805d914a067 --- /dev/null +++ b/drivers/staging/xcall/dynamic_xcall_test.c @@ -0,0 +1,97 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * A simple dummy xcall for syscall testing + * + * The data struct and functions marked as MANDATORY have to + * be includes in all of kernel xcall modules. + * + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "dummy_xcall: " fmt + +#include <linux/module.h> +#include <linux/xcall.h> +#include <linux/unistd.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hash.h> +#include <linux/mmu_notifier.h> +#include <linux/miscdevice.h> +#include <uapi/linux/futex.h> + +#include <asm/barrier.h> +#include <asm/xcall.h> + +static long __do_sys_close(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_close](regs); +} + +static long __do_sys_getpid(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_getpid](regs); +} + +static long __do_sys_getuid(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_getuid](regs); +} + +static long __do_sys_unmask(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_umask](regs); +} + +static long __do_sys_dup(struct pt_regs *regs) +{ + return default_sys_call_table()[__NR_dup](regs); +} + +/* MANDATORY */ +static struct xcall_prog dummy_xcall_prog = { + .name = "dummy_xcall", + .owner = THIS_MODULE, + .objs = { + { + .scno = (unsigned long)__NR_getpid, + .func = (unsigned long)__do_sys_getpid, + }, + { + .scno = (unsigned long)__NR_getuid, + .func = (unsigned long)__do_sys_getuid, + }, + { + .scno = (unsigned long)__NR_close, + .func = (unsigned long)__do_sys_close, + }, + { + .scno = (unsigned long)__NR_umask, + .func = (unsigned long)__do_sys_unmask, + }, + { + .scno = (unsigned long)__NR_dup, + .func = (unsigned long)__do_sys_dup, + }, + {} + } +}; + +/* MANDATORY */ +static int __init dummy_xcall_init(void) +{ + INIT_LIST_HEAD(&dummy_xcall_prog.list); + return xcall_prog_register(&dummy_xcall_prog); +} + +/* MANDATORY */ +static void __exit dummy_xcall_exit(void) +{ + xcall_prog_unregister(&dummy_xcall_prog); +} + +module_init(dummy_xcall_init); +module_exit(dummy_xcall_exit); +MODULE_AUTHOR("Liao Chang <liaochang1@huawei.com>"); +MODULE_DESCRIPTION("Dummy Xcall"); +MODULE_LICENSE("GPL"); -- 2.34.1
From: Yuntao Liu <liuyuntao12@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Intruduce xcall2.0 redis async prefetch kernel module. Signed-off-by: Yuntao Liu <liuyuntao12@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- drivers/staging/xcall/Makefile | 2 +- drivers/staging/xcall/prefetch.c | 270 +++++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+), 1 deletion(-) create mode 100644 drivers/staging/xcall/prefetch.c diff --git a/drivers/staging/xcall/Makefile b/drivers/staging/xcall/Makefile index 668ac4f3b471..d8c6137e2945 100644 --- a/drivers/staging/xcall/Makefile +++ b/drivers/staging/xcall/Makefile @@ -1 +1 @@ -obj-$(CONFIG_DYNAMIC_XCALL_TESTCASE) += dynamic_xcall_test.o +obj-$(CONFIG_DYNAMIC_XCALL_TESTCASE) += dynamic_xcall_test.o prefetch.o diff --git a/drivers/staging/xcall/prefetch.c b/drivers/staging/xcall/prefetch.c new file mode 100644 index 000000000000..81ebe0ebf5fc --- /dev/null +++ b/drivers/staging/xcall/prefetch.c @@ -0,0 +1,270 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * A simple dummy xcall for syscall testing + * + * The data struct and functions marked as MANDATORY have to + * be includes in all of kernel xcall modules. + * + * Copyright (C) 2025 Huawei Limited. + */ + +#define pr_fmt(fmt) "dummy_xcall: " fmt + +#include <linux/module.h> +#include <linux/xcall.h> +#include <linux/unistd.h> +#include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/mm.h> +#include <linux/hash.h> +#include <linux/mmu_notifier.h> +#include <linux/miscdevice.h> +#include <linux/file.h> +#include <linux/socket.h> // 定义 struct sock 和基本的 socket 类型 +#include <net/sock.h> // 定义 sock 结构体的详细内容 +#include <net/tcp_states.h> // 定义 TCP_ESTABLISHED 等 TCP 状态宏 +#include <uapi/linux/futex.h> + +#include <asm/barrier.h> +#include <asm/xcall.h> + +#define MAX_FD 100 + +static unsigned long xcall_cache_hit; +static unsigned long xcall_cache_miss; + +struct proc_dir_entry *xcall_proc_dir; + +enum cache_state { + XCALL_CACHE_NONE = 0, + XCALL_CACHE_PREFETCH, + XCALL_CACHE_READY, + XCALL_CACHE_CANCEL +}; + +struct prefetch_item { + int fd; + int cpu; + int pos; + int len; + atomic_t state; + struct file *file; + struct work_struct work; + char cache[PAGE_SIZE]; +}; + +static struct epoll_event events[MAX_FD] = {0}; + +static struct prefetch_item prefetch_items[MAX_FD] = {0}; +static struct workqueue_struct *rc_work; + +static inline bool transition_state(struct prefetch_item *pfi, + enum cache_state old, enum cache_state new) +{ + return atomic_cmpxchg(&pfi->state, old, new) == old; +} + +static void prefetch_work_fn(struct work_struct *work) +{ + struct prefetch_item *pfi = container_of(work, struct prefetch_item, work); + + if (!transition_state(pfi, XCALL_CACHE_NONE, XCALL_CACHE_PREFETCH)) + return; + + pfi->pos = 0; + pfi->len = kernel_read(pfi->file, pfi->cache, PAGE_SIZE, &pfi->file->f_pos); + + transition_state(pfi, XCALL_CACHE_PREFETCH, XCALL_CACHE_READY); +} + +static long __do_sys_epoll_pwait(struct pt_regs *regs) +{ + struct prefetch_item *pfi; + int i, fd, err; + long ret; + + ret = default_sys_call_table()[__NR_epoll_pwait](regs); + if (ret != 0) { + err = copy_from_user(events, (void __user *)regs->regs[1], + ret * sizeof(struct epoll_event)); + if (err) + return -EFAULT; + + for (i = 0; i < ret; i++) { + fd = events[i].data; + if (events[i].events & EPOLLIN) { + pfi = &prefetch_items[fd]; + if (!pfi->file) + pfi->file = fget(fd); + + queue_work_on(250 + (fd % 4), rc_work, &pfi->work); + } + } + } + return ret; +} + +static long __do_sys_read(struct pt_regs *regs) +{ + int fd = regs->regs[0]; + struct prefetch_item *pfi = &prefetch_items[fd]; + void *user_buf = (void *)regs->regs[1]; + int count = regs->regs[2]; + int copy_len; + long ret; + + if (pfi->file) { + while (!transition_state(pfi, XCALL_CACHE_READY, XCALL_CACHE_CANCEL)) { + if (transition_state(pfi, XCALL_CACHE_NONE, XCALL_CACHE_CANCEL)) + goto slow_read; + } + + xcall_cache_hit++; + copy_len = pfi->len; + + if (copy_len == 0) { + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); + return 0; + } + + copy_len = (copy_len >= count) ? count : copy_len; + copy_len -= copy_to_user(user_buf, (void *)(pfi->cache + pfi->pos), copy_len); + pfi->len -= copy_len; + pfi->pos += copy_len; + + if (pfi->len == 0) + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); + else + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_READY); + return copy_len; + } + + goto not_epoll_fd; + +slow_read: + xcall_cache_miss++; + pfi->len = 0; + pfi->pos = 0; + cancel_work_sync(&pfi->work); + transition_state(pfi, XCALL_CACHE_CANCEL, XCALL_CACHE_NONE); +not_epoll_fd: + ret = default_sys_call_table()[__NR_read](regs); + return ret; +} + +static long __do_sys_close(struct pt_regs *regs) +{ + int fd = regs->regs[0]; + struct prefetch_item *pfi = &prefetch_items[fd]; + long ret; + + if (pfi->file) { + fput(pfi->file); + pfi->file = NULL; + } + + ret = default_sys_call_table()[__NR_close](regs); + return ret; +} + +/* MANDATORY */ +static struct xcall_prog xcall_prefetch_prog = { + .name = "xcall_prefetch", + .owner = THIS_MODULE, + .objs = { + { + .scno = (unsigned long)__NR_epoll_pwait, + .func = (unsigned long)__do_sys_epoll_pwait, + }, + { + .scno = (unsigned long)__NR_read, + .func = (unsigned long)__do_sys_read, + }, + { + .scno = (unsigned long)__NR_close, + .func = (unsigned long)__do_sys_close, + }, + {} + } +}; + +static ssize_t xcall_prefetch_reset(struct file *file, const char __user *buf, + size_t count, loff_t *pos) +{ + xcall_cache_hit = 0; + xcall_cache_miss = 0; + + return count; +} + +static int xcall_prefetch_show(struct seq_file *m, void *v) +{ + u64 percent; + + percent = DIV_ROUND_CLOSEST(xcall_cache_hit * 100ULL, xcall_cache_hit + xcall_cache_miss); + seq_printf(m, "epoll cache_{hit,miss}: %lu,%lu, hit ratio: %llu%%\n", + xcall_cache_hit, xcall_cache_miss, percent); + return 0; +} + +static int xcall_prefetch_open(struct inode *inode, struct file *file) +{ + return single_open(file, xcall_prefetch_show, NULL); +} + +static const struct proc_ops xcall_prefetch_fops = { + .proc_open = xcall_prefetch_open, + .proc_read = seq_read, + .proc_write = xcall_prefetch_reset, + .proc_lseek = seq_lseek, + .proc_release = single_release +}; + +static int __init init_xcall_prefetch_procfs(void) +{ + struct proc_dir_entry *prefetch_dir; + + xcall_proc_dir = proc_mkdir("xcall_stat", NULL); + if (!xcall_proc_dir) + return -ENOMEM; + prefetch_dir = proc_create("prefetch", 0640, xcall_proc_dir, &xcall_prefetch_fops); + if (!prefetch_dir) + goto rm_xcall_proc_dir; + + return 0; + +rm_xcall_proc_dir: + proc_remove(xcall_proc_dir); + return -ENOMEM; +} + +/* MANDATORY */ +static int __init dummy_xcall_init(void) +{ + int i; + + rc_work = alloc_workqueue("eventpoll_rc", 0, 0); + if (!rc_work) + pr_warn("alloc eventpoll_rc workqueue failed.\n"); + + for (i = 0; i < MAX_FD; i++) + INIT_WORK(&prefetch_items[i].work, prefetch_work_fn); + + init_xcall_prefetch_procfs(); + + INIT_LIST_HEAD(&xcall_prefetch_prog.list); + return xcall_prog_register(&xcall_prefetch_prog); +} + +/* MANDATORY */ +static void __exit dummy_xcall_exit(void) +{ + proc_remove(xcall_proc_dir); + xcall_prog_unregister(&xcall_prefetch_prog); +} + +module_init(dummy_xcall_init); +module_exit(dummy_xcall_exit); +MODULE_AUTHOR("Liao Chang <liaochang1@huawei.com>"); +MODULE_DESCRIPTION("Dummy Xcall"); +MODULE_LICENSE("GPL"); -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/18847 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/O3K... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/18847 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/O3K...
participants (2)
-
Jinjie Ruan -
patchwork bot