From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Hajack syscall with dynamic instruciton replace. With xcal2.0, hardware xcall can directly modify the SVC instruction through dynamic instruction replacement, which avoids unnecessary system call number checks at the exception entry. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/Kconfig.turbo | 1 + arch/arm64/include/asm/exception.h | 3 ++ arch/arm64/include/asm/xcall.h | 38 ++++++++++++++ arch/arm64/kernel/entry-common.c | 2 +- arch/arm64/kernel/probes/uprobes.c | 6 +++ arch/arm64/kernel/syscall.c | 14 +++++ arch/arm64/kernel/xcall/core.c | 84 +++++++++++++++++++++++++++++- arch/arm64/kernel/xcall/entry.S | 15 +----- kernel/events/uprobes.c | 19 +++++++ 9 files changed, 165 insertions(+), 17 deletions(-) diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index cfefbdb605f8..778ea1025c2c 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -74,6 +74,7 @@ config ACTLR_XCALL_XINT config DYNAMIC_XCALL bool "Support dynamically replace and load system call" depends on FAST_SYSCALL + depends on UPROBES default n help Xcall 2.0 add "/proc/xcall/comm" interface to diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69f0e6d53f8..94338104a18c 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -75,6 +75,9 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs); +#endif void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index 27aaf4344d0f..ee526a520704 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -15,6 +15,9 @@ #include <asm/cpufeature.h> #include <asm/syscall.h> +#define SVC_0000 0xd4000001 +#define SVC_FFFF 0xd41fffe1 + struct xcall_comm { char *name; char *binary; @@ -43,13 +46,48 @@ struct xcall_area { struct xcall *xcall; }; +extern const syscall_fn_t *default_sys_call_table(void); #ifdef CONFIG_DYNAMIC_XCALL extern int xcall_attach(struct xcall_comm *info); extern int xcall_detach(struct xcall_comm *info); +extern int xcall_pre_sstep_check(struct pt_regs *regs); +extern int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, + uprobe_opcode_t opcode); #define mm_xcall_area(mm) ((struct xcall_area *)((mm)->xcall)) + +static inline long hijack_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + + if (likely(!area)) + return -ENOSYS; + + if (unlikely(scno >= __NR_syscalls)) + return -ENOSYS; + + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); +} + +static inline const syscall_fn_t *real_syscall_table(void) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + + if (likely(!area)) + return default_sys_call_table(); + + return (syscall_fn_t *)(&(area->sys_call_table[__NR_syscalls])); +} #else #define mm_xcall_area(mm) (NULL) +#define hijack_syscall(regs) (NULL) +static inline const syscall_fn_t *real_syscall_table(void) +{ + return sys_call_table; +} #endif /* CONFIG_DYNAMIC_XCALL */ DECLARE_STATIC_KEY_FALSE(xcall_enable); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 1e8171c1efe7..f4a21c66856a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -827,7 +827,7 @@ static void noinstr el0_xcall(struct pt_regs *regs) #endif fp_user_discard(); local_daif_restore(DAIF_PROCCTX); - do_el0_svc(regs); + do_el0_xcall(regs); fast_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a2f137a595fc..677a9589f9ca 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -6,6 +6,7 @@ #include <linux/ptrace.h> #include <linux/uprobes.h> #include <asm/cacheflush.h> +#include <asm/xcall.h> #include "decode-insn.h" @@ -171,6 +172,11 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; +#ifdef CONFIG_DYNAMIC_XCALL + if (xcall_pre_sstep_check(regs)) + return DBG_HOOK_HANDLED; +#endif + return DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 558e9c9da8a4..ff40e51634b0 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,7 @@ #include <asm/syscall.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/xcall.h> long a32_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); @@ -162,6 +163,15 @@ static inline void delouse_pt_regs(struct pt_regs *regs) } #endif +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs) +{ + const syscall_fn_t *t = real_syscall_table(); + + el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); +} +#endif + void do_el0_svc(struct pt_regs *regs) { const syscall_fn_t *t = sys_call_table; @@ -173,6 +183,10 @@ void do_el0_svc(struct pt_regs *regs) } #endif +#ifdef CONFIG_DYNAMIC_XCALL + if (!hijack_syscall(regs)) + return; +#endif el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); } diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index 15fb2d4424ef..c331e3d253c5 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -5,6 +5,7 @@ #define pr_fmt(fmt) "xcall: " fmt +#include <linux/mmap_lock.h> #include <linux/namei.h> #include <linux/slab.h> #include <linux/xcall.h> @@ -49,6 +50,77 @@ static long inv_xcall(struct pt_regs *regs) #define inv_xcall_syscall ((unsigned long)inv_xcall) +static long patch_syscall(struct pt_regs *regs); + +static long filter_ksyscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + /* + * curerntly, some syscall uses svc 0 at two and more different + * addresses, so it needs to hijack all of these svc 0. + */ + if (regs->syscallno & ESR_ELx_ISS_MASK) + return -ENOSYS; + + cmpxchg(&(area->sys_call_table[scno]), filter_ksyscall, patch_syscall); + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long replay_syscall(struct pt_regs *regs) +{ + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long patch_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + unsigned long old; + int ret; + + old = cmpxchg(&(area->sys_call_table[scno]), patch_syscall, replay_syscall); + if (old != (unsigned long)patch_syscall) { + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); + } + + regs->pc -= AARCH64_INSN_SIZE; + + mmap_write_lock(current->mm); + ret = set_xcall_insn(current->mm, regs->pc, SVC_FFFF); + mmap_write_unlock(current->mm); + + if (!ret) { + xchg(&(area->sys_call_table[scno]), filter_ksyscall); + pr_debug("patch svc ffff for scno %u\n", scno); + return 0; + } + + /* + * Upon patch svc 0xffff failed, it uses the functions defined + * in sys_call_table to handle syscall this time, and try to + * do patching next time. + */ + set_xcall_insn(current->mm, regs-pc, SVC_0000); + regs->pc += AARCH64_INSN_SIZE; + xchg(&(area->sys_call_table[scno]), patch_syscall); + return ret; +} + +int xcall_pre_sstep_check(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + return area && (scno < NR_syscalls) && + (area->sys_call_table[scno] != (unsigned long)inv_xcall); +} + static struct xcall *get_xcall(struct xcall *xcall) { refcount_inc(&xcall->ref); @@ -135,17 +207,19 @@ static int init_xcall(struct xcall *xcall, struct xcall_comm *comm) static int fill_xcall_syscall(struct xcall_area *area, struct xcall *xcall) { + unsigned int scno_offset, scno_count = 0; struct xcall_prog_object *obj; - unsigned int scno_offset; obj = xcall->program->objs; - while (obj->func) { + while (scno_count < xcall->program->nr_scno && obj->func) { scno_offset = NR_syscalls + obj->scno; if (area->sys_call_table[scno_offset] != inv_xcall_syscall) return -EINVAL; area->sys_call_table[scno_offset] = obj->func; + area->sys_call_table[obj->scno] = (unsigned long)patch_syscall; obj += 1; + scno_count++; } return 0; @@ -314,3 +388,9 @@ void xcall_prog_unregister(struct xcall_prog *prog) spin_unlock(&prog_list_lock); } EXPORT_SYMBOL(xcall_prog_unregister); + +const syscall_fn_t *default_sys_call_table(void) +{ + return sys_call_table; +} +EXPORT_SYMBOL(default_sys_call_table); diff --git a/arch/arm64/kernel/xcall/entry.S b/arch/arm64/kernel/xcall/entry.S index 401be46f4fc2..7728f32ee962 100644 --- a/arch/arm64/kernel/xcall/entry.S +++ b/arch/arm64/kernel/xcall/entry.S @@ -152,7 +152,6 @@ alternative_else_nop_endif .endm /* .macro hw_xcal_restore_base_regs */ SYM_CODE_START(no_xcall_entry) - ldp x20, x21, [sp, #0] kernel_entry 0, 64 mov x0, sp bl el0t_64_sync_handler @@ -160,24 +159,12 @@ SYM_CODE_START(no_xcall_entry) SYM_CODE_END(no_xcall_entry) SYM_CODE_START(xcall_entry) - ldp x20, x21, [sp, #0] hw_xcall_save_base_regs mov x0, sp bl el0t_64_xcall_handler hw_xcal_restore_base_regs SYM_CODE_END(xcall_entry) -SYM_CODE_START_LOCAL(el0t_64_hw_xcall) - stp x20, x21, [sp, #0] - ldr_this_cpu x21, __cpu_xcall_entry, x20 - mov x20, __NR_syscalls - /* x8 >= __NR_syscalls */ - cmp x8, __NR_syscalls - csel x20, x8, x20, lt - ldr x21, [x21, x20, lsl #3] - br x21 -SYM_CODE_END(el0t_64_hw_xcall) - .macro xcall_ventry .align 7 .Lventry_start\@: @@ -190,6 +177,6 @@ SYM_CODE_END(el0t_64_hw_xcall) msr tpidrro_el0, xzr .Lskip_tramp_vectors_cleanup\@: sub sp, sp, #PT_REGS_SIZE - b el0t_64_hw_xcall + b xcall_entry .org .Lventry_start\@ + 128 // Did we overflow the ventry slot? .endm diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 609e48784f77..2d77f20f1474 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -595,6 +595,25 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v *(uprobe_opcode_t *)&auprobe->insn); } +#ifdef CONFIG_DYNAMIC_XCALL +/* + * Force to patch any instruction without checking the old instruction + * is UPROBE_BRK. + */ +int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) +{ + struct uprobe uprobe = { .ref_ctr_offset = 0 }; + int ret; + + /* Use the UPROBE_SWBP_INSN to occupy the vaddr avoid uprobe writes it */ + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, UPROBE_SWBP_INSN); + if (ret) + return ret; + + return uprobe_write_opcode(&uprobe.arch, mm, vaddr, opcode); +} +#endif + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); -- 2.34.1