From: Liao Chang <liaochang1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5CMS -------------------------------- Hajack syscall with dynamic instruciton replace. With xcal2.0, hardware xcall can directly modify the SVC instruction through dynamic instruction replacement, which avoids unnecessary system call number checks at the exception entry. Signed-off-by: Liao Chang <liaochang1@huawei.com> Signed-off-by: Zheng Xinyu <zhengxinyu6@huawei.com> Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com> --- arch/arm64/Kconfig.turbo | 1 + arch/arm64/include/asm/exception.h | 3 ++ arch/arm64/include/asm/xcall.h | 38 +++++++++++++++++ arch/arm64/kernel/entry-common.c | 2 +- arch/arm64/kernel/probes/uprobes.c | 6 +++ arch/arm64/kernel/syscall.c | 14 ++++++ arch/arm64/kernel/xcall/core.c | 68 ++++++++++++++++++++++++++++++ kernel/events/uprobes.c | 23 ++++++++++ 8 files changed, 154 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig.turbo b/arch/arm64/Kconfig.turbo index cfefbdb605f8..778ea1025c2c 100644 --- a/arch/arm64/Kconfig.turbo +++ b/arch/arm64/Kconfig.turbo @@ -74,6 +74,7 @@ config ACTLR_XCALL_XINT config DYNAMIC_XCALL bool "Support dynamically replace and load system call" depends on FAST_SYSCALL + depends on UPROBES default n help Xcall 2.0 add "/proc/xcall/comm" interface to diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index d69f0e6d53f8..94338104a18c 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -75,6 +75,9 @@ void do_el1_fpac(struct pt_regs *regs, unsigned long esr); void do_el0_mops(struct pt_regs *regs, unsigned long esr); void do_serror(struct pt_regs *regs, unsigned long esr); void do_notify_resume(struct pt_regs *regs, unsigned long thread_flags); +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs); +#endif void __noreturn panic_bad_stack(struct pt_regs *regs, unsigned long esr, unsigned long far); diff --git a/arch/arm64/include/asm/xcall.h b/arch/arm64/include/asm/xcall.h index fd0232d5bb99..449c1ad4d50f 100644 --- a/arch/arm64/include/asm/xcall.h +++ b/arch/arm64/include/asm/xcall.h @@ -15,6 +15,9 @@ #include <asm/cpufeature.h> #include <asm/syscall.h> +#define SVC_0000 0xd4000001 +#define SVC_FFFF 0xd41fffe1 + struct xcall_comm { char *name; char *binary; @@ -43,13 +46,48 @@ struct xcall_area { struct xcall *xcall; }; +extern const syscall_fn_t *default_sys_call_table(void); #ifdef CONFIG_DYNAMIC_XCALL extern int xcall_attach(struct xcall_comm *info); extern int xcall_detach(struct xcall_comm *info); +extern int xcall_pre_sstep_check(struct pt_regs *regs); +extern int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, + uprobe_opcode_t opcode); #define mm_xcall_area(mm) ((struct xcall_area *)((mm)->xcall)) + +static inline long hijack_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + + if (likely(!area)) + return -EINVAL; + + if (unlikely(scno >= __NR_syscalls)) + return -EINVAL; + + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); +} + +static inline const syscall_fn_t *real_syscall_table(void) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + + if (likely(!area)) + return default_sys_call_table(); + + return (syscall_fn_t *)(&(area->sys_call_table[__NR_syscalls])); +} #else #define mm_xcall_area(mm) (NULL) +#define hijack_syscall(regs) (NULL) +static inline const syscall_fn_t *real_syscall_table(void) +{ + return sys_call_table; +} #endif /* CONFIG_DYNAMIC_XCALL */ DECLARE_STATIC_KEY_FALSE(xcall_enable); diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 1e8171c1efe7..f4a21c66856a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -827,7 +827,7 @@ static void noinstr el0_xcall(struct pt_regs *regs) #endif fp_user_discard(); local_daif_restore(DAIF_PROCCTX); - do_el0_svc(regs); + do_el0_xcall(regs); fast_exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a2f137a595fc..677a9589f9ca 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -6,6 +6,7 @@ #include <linux/ptrace.h> #include <linux/uprobes.h> #include <asm/cacheflush.h> +#include <asm/xcall.h> #include "decode-insn.h" @@ -171,6 +172,11 @@ static int uprobe_breakpoint_handler(struct pt_regs *regs, if (uprobe_pre_sstep_notifier(regs)) return DBG_HOOK_HANDLED; +#ifdef CONFIG_DYNAMIC_XCALL + if (xcall_pre_sstep_check(regs)) + return DBG_HOOK_HANDLED; +#endif + return DBG_HOOK_ERROR; } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 558e9c9da8a4..ff40e51634b0 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -14,6 +14,7 @@ #include <asm/syscall.h> #include <asm/thread_info.h> #include <asm/unistd.h> +#include <asm/xcall.h> long a32_arm_syscall(struct pt_regs *regs, int scno); long sys_ni_syscall(void); @@ -162,6 +163,15 @@ static inline void delouse_pt_regs(struct pt_regs *regs) } #endif +#ifdef CONFIG_FAST_SYSCALL +void do_el0_xcall(struct pt_regs *regs) +{ + const syscall_fn_t *t = real_syscall_table(); + + el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); +} +#endif + void do_el0_svc(struct pt_regs *regs) { const syscall_fn_t *t = sys_call_table; @@ -173,6 +183,10 @@ void do_el0_svc(struct pt_regs *regs) } #endif +#ifdef CONFIG_DYNAMIC_XCALL + if (!hijack_syscall(regs)) + return; +#endif el0_svc_common(regs, regs->regs[8], __NR_syscalls, t); } diff --git a/arch/arm64/kernel/xcall/core.c b/arch/arm64/kernel/xcall/core.c index cb3bef28eb11..9ba4c8de7112 100644 --- a/arch/arm64/kernel/xcall/core.c +++ b/arch/arm64/kernel/xcall/core.c @@ -5,6 +5,7 @@ #define pr_fmt(fmt) "xcall: " fmt +#include <linux/mmap_lock.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/xcall.h> @@ -44,6 +45,66 @@ static struct xcall_prog *get_xcall_prog_locked(const char *module) #define inv_xcall_syscall ((unsigned long)__arm64_sys_ni_syscall) +static long patch_syscall(struct pt_regs *regs); + +static long filter_ksyscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + cmpxchg(&(area->sys_call_table[scno]), filter_ksyscall, patch_syscall); + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long replay_syscall(struct pt_regs *regs) +{ + regs->pc -= AARCH64_INSN_SIZE; + return 0; +} + +static long patch_syscall(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + syscall_fn_t syscall_fn; + unsigned long old; + int ret; + + old = cmpxchg(&(area->sys_call_table[scno]), patch_syscall, replay_syscall); + if (old != (unsigned long)patch_syscall) { + syscall_fn = (syscall_fn_t)area->sys_call_table[scno]; + return syscall_fn(regs); + } + + regs->pc -= AARCH64_INSN_SIZE; + + mmap_write_lock(current->mm); + ret = set_xcall_insn(current->mm, regs->pc, SVC_FFFF); + mmap_write_unlock(current->mm); + + if (!ret) { + xchg(&(area->sys_call_table[scno]), filter_ksyscall); + return 0; + } + + regs->pc += AARCH64_INSN_SIZE; + xchg(&(area->sys_call_table[scno]), patch_syscall); + pr_info("patch xcall insn failed for scno %u at %s.\n", + scno, ret > 0 ? "UPROBE_BRK" : "SVC_FFFF"); + + return ret; +} + +int xcall_pre_sstep_check(struct pt_regs *regs) +{ + struct xcall_area *area = mm_xcall_area(current->mm); + unsigned int scno = (unsigned int)regs->regs[8]; + + return area && (scno < NR_syscalls) && + (area->sys_call_table[scno] != inv_xcall_syscall); +} + static struct xcall *get_xcall(struct xcall *xcall) { refcount_inc(&xcall->ref); @@ -138,6 +199,7 @@ static int fill_xcall_syscall(struct xcall_area *area, struct xcall *xcall) } area->sys_call_table[scno_offset] = obj->func; + area->sys_call_table[obj->scno] = (unsigned long)patch_syscall; obj += 1; scno_count++; } @@ -314,3 +376,9 @@ void xcall_prog_unregister(struct xcall_prog *prog) spin_unlock(&prog_list_lock); } EXPORT_SYMBOL(xcall_prog_unregister); + +const syscall_fn_t *default_sys_call_table(void) +{ + return sys_call_table; +} +EXPORT_SYMBOL(default_sys_call_table); diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 609e48784f77..e382f7e4d5d9 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -595,6 +595,29 @@ set_orig_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long v *(uprobe_opcode_t *)&auprobe->insn); } +#ifdef CONFIG_DYNAMIC_XCALL +/* + * Force to patch any instruction without checking the old instruction + * is UPROBE_BRK. + */ +int set_xcall_insn(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t opcode) +{ + struct uprobe uprobe = { .ref_ctr_offset = 0 }; + int ret; + + /* Use the UPROBE_SWBP_INSN to occupy the vaddr avoid uprobe writes it */ + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, UPROBE_SWBP_INSN); + if (ret) + return 1; + + ret = uprobe_write_opcode(&uprobe.arch, mm, vaddr, opcode); + if (ret) + return -1; + + return 0; +} +#endif + static struct uprobe *get_uprobe(struct uprobe *uprobe) { refcount_inc(&uprobe->ref); -- 2.34.1