From: Masami Hiramatsu mhiramat@kernel.org
mainline inclusion from mainline-v5.16-rc1 commit 19138af1bd880d52318bbb164de72a482e59a45c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9R2TB
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
------------------------------------------------------
Since the kretprobe replaces the function return address with the kretprobe_trampoline on the stack, x86 unwinders can not continue the stack unwinding at that point, or record kretprobe_trampoline instead of correct return address.
To fix this issue, find the correct return address from task's kretprobe_instances as like as function-graph tracer does.
With this fix, the unwinder can correctly unwind the stack from kretprobe event on x86, as below.
<...>-135 [003] ...1 6.722338: r_full_proxy_read_0: (vfs_read+0xab/0x1a0 <- full_proxy_read) <...>-135 [003] ...1 6.722377: <stack trace> => kretprobe_trace_func+0x209/0x2f0 => kretprobe_dispatcher+0x4a/0x70 => __kretprobe_trampoline_handler+0xca/0x150 => trampoline_handler+0x44/0x70 => kretprobe_trampoline+0x2a/0x50 => vfs_read+0xab/0x1a0 => ksys_read+0x5f/0xe0 => do_syscall_64+0x33/0x40 => entry_SYSCALL_64_after_hwframe+0x44/0xae
Link: https://lkml.kernel.org/r/163163055130.489837.5161749078833497255.stgit@devn...
Reported-by: Daniel Xu dxu@dxuuu.xyz Signed-off-by: Masami Hiramatsu mhiramat@kernel.org Suggested-by: Josh Poimboeuf jpoimboe@redhat.com Tested-by: Andrii Nakryiko andrii@kernel.org Acked-by: Josh Poimboeuf jpoimboe@redhat.com Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Conflicts: arch/x86/include/asm/unwind.h [Adapt for calling kretprobe_find_ret_addr()] Signed-off-by: Zheng Yejian zhengyejian1@huawei.com --- arch/x86/include/asm/unwind.h | 22 ++++++++++++++++++++++ arch/x86/kernel/unwind_frame.c | 3 +-- arch/x86/kernel/unwind_guess.c | 3 +-- arch/x86/kernel/unwind_orc.c | 21 +++++++++++++++++---- 4 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h index 70fc159ebe69..1139b3cab818 100644 --- a/arch/x86/include/asm/unwind.h +++ b/arch/x86/include/asm/unwind.h @@ -4,6 +4,7 @@
#include <linux/sched.h> #include <linux/ftrace.h> +#include <linux/kprobes.h> #include <asm/ptrace.h> #include <asm/stacktrace.h>
@@ -99,6 +100,27 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size, void *orc, size_t orc_size) {} #endif
+static inline +unsigned long unwind_recover_kretprobe(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + return is_kretprobe_trampoline(addr) ? + kretprobe_find_ret_addr(state->task, addr_p) : + addr; +} + +/* Recover the return address modified by kretprobe and ftrace_graph. */ +static inline +unsigned long unwind_recover_ret_addr(struct unwind_state *state, + unsigned long addr, unsigned long *addr_p) +{ + unsigned long ret; + + ret = ftrace_graph_ret_addr(state->task, &state->graph_idx, + addr, addr_p); + return unwind_recover_kretprobe(state, ret, addr_p); +} + /* * This disables KASAN checking when reading a value from another task's stack, * since the other task could be running on another CPU and could have poisoned diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index d7c44b257f7f..8e1c50c86e5d 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -240,8 +240,7 @@ static bool update_stack_state(struct unwind_state *state, else { addr_p = unwind_get_return_address_ptr(state); addr = READ_ONCE_TASK_STACK(state->task, *addr_p); - state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - addr, addr_p); + state->ip = unwind_recover_ret_addr(state, addr, addr_p); }
/* Save the original stack pointer for unwind_dump(): */ diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index c49f10ffd8cd..884d68a6e714 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -15,8 +15,7 @@ unsigned long unwind_get_return_address(struct unwind_state *state)
addr = READ_ONCE_NOCHECK(*state->sp);
- return ftrace_graph_ret_addr(state->task, &state->graph_idx, - addr, state->sp); + return unwind_recover_ret_addr(state, addr, state->sp); } EXPORT_SYMBOL_GPL(unwind_get_return_address);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 9452ba625f67..e1105f169bcc 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -579,9 +579,8 @@ bool unwind_next_frame(struct unwind_state *state) if (!deref_stack_reg(state, ip_p, &state->ip)) goto err;
- state->ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, - state->ip, (void *)ip_p); - + state->ip = unwind_recover_ret_addr(state, state->ip, + (unsigned long *)ip_p); state->sp = sp; state->regs = NULL; state->prev_regs = NULL; @@ -594,7 +593,18 @@ bool unwind_next_frame(struct unwind_state *state) (void *)orig_ip); goto err; } - + /* + * There is a small chance to interrupt at the entry of + * __kretprobe_trampoline() where the ORC info doesn't exist. + * That point is right after the RET to __kretprobe_trampoline() + * which was modified return address. + * At that point, the @addr_p of the unwind_recover_kretprobe() + * (this has to point the address of the stack entry storing + * the modified return address) must be "SP - (a stack entry)" + * because SP is incremented by the RET. + */ + state->ip = unwind_recover_kretprobe(state, state->ip, + (unsigned long *)(state->sp - sizeof(long))); state->regs = (struct pt_regs *)sp; state->prev_regs = NULL; state->full_regs = true; @@ -607,6 +617,9 @@ bool unwind_next_frame(struct unwind_state *state) (void *)orig_ip); goto err; } + /* See UNWIND_HINT_TYPE_REGS case comment. */ + state->ip = unwind_recover_kretprobe(state, state->ip, + (unsigned long *)(state->sp - sizeof(long)));
if (state->full_regs) state->prev_regs = state->regs;