Alexander Duyck (1): bpf, arm64: Fix BTI type used for freplace attached functions
Nathan Chancellor (1): bpf, arm64: Mark dummy_tramp as global
Peter Zijlstra (1): x86/ibt,ftrace: Search for __fentry__ location
Xu Kuohai (12): arm64, insn: Add ldr/str with immediate offset bpf, arm64: Optimize BPF store/load using arm64 str/ldr(immediate offset) bpf, arm64: Adjust the offset of str/ldr(immediate) to positive number arm64: Add LDR (literal) instruction bpf, arm64: Implement bpf_arch_text_poke() for arm64 bpf, arm64: Add bpf trampoline for arm64 bpf, arm64: Fix compile error in dummy_tramp() bpf, arm64: Fix bpf trampoline instruction endianness ftrace: Allow users to disable ftrace direct call arm64: ftrace: Support long jump for ftrace direct call arm64: ftrace: Add ftrace direct call support arm64: ftrace: Support direct call for no literal module functions
arch/arm64/Kconfig | 2 + arch/arm64/Makefile | 4 + arch/arm64/include/asm/ftrace.h | 31 +- arch/arm64/include/asm/insn.h | 14 + arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry-ftrace.S | 39 +- arch/arm64/kernel/ftrace.c | 248 ++++++++- arch/arm64/kernel/insn.c | 111 +++- arch/arm64/net/bpf_jit.h | 21 + arch/arm64/net/bpf_jit_comp.c | 874 +++++++++++++++++++++++++++++-- arch/x86/kernel/kprobes/core.c | 11 +- include/linux/ftrace.h | 3 + kernel/bpf/trampoline.c | 20 +- kernel/kprobes.c | 8 +- kernel/trace/Kconfig | 7 +- kernel/trace/ftrace.c | 65 ++- 16 files changed, 1343 insertions(+), 116 deletions(-)
From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-v5.18-rc1 commit aebfd12521d9c7d0b502cf6d06314cfbcdccfe3b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/aebfd12521d9
--------------------------------
Currently a lot of ftrace code assumes __fentry__ is at sym+0. However with Intel IBT enabled the first instruction of a function will most likely be ENDBR.
Change ftrace_location() to not only return the __fentry__ location when called for the __fentry__ location, but also when called for the sym+0 location.
Then audit/update all callsites of this function to consistently use these new semantics.
Suggested-by: Steven Rostedt rostedt@goodmis.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Masami Hiramatsu mhiramat@kernel.org Acked-by: Josh Poimboeuf jpoimboe@redhat.com Link: https://lore.kernel.org/r/20220308154318.227581603@infradead.org Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/x86/kernel/kprobes/core.c | 11 ++------ kernel/bpf/trampoline.c | 20 +++----------- kernel/kprobes.c | 8 ++---- kernel/trace/ftrace.c | 48 ++++++++++++++++++++++++++++------ 4 files changed, 48 insertions(+), 39 deletions(-)
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 352cf0a264d6..ef86b3f02898 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -215,17 +215,10 @@ static unsigned long __recover_probed_insn(kprobe_opcode_t *buf, unsigned long addr) { struct kprobe *kp; - unsigned long faddr; + bool faddr;
kp = get_kprobe((void *)addr); - faddr = ftrace_location(addr); - /* - * Addresses inside the ftrace location are refused by - * arch_check_ftrace_location(). Something went terribly wrong - * if such an address is checked here. - */ - if (WARN_ON(faddr && faddr != addr)) - return 0UL; + faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace. diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 4c5594266fb2..af9fea93b488 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -87,18 +87,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) return tr; }
-static int is_ftrace_location(void *ip) -{ - long addr; - - addr = ftrace_location((long)ip); - if (!addr) - return 0; - if (WARN_ON_ONCE(addr != (long)ip)) - return -EFAULT; - return 1; -} - static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr) { void *ip = tr->func.addr; @@ -127,12 +115,12 @@ static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_ad static int register_fentry(struct bpf_trampoline *tr, void *new_addr) { void *ip = tr->func.addr; + unsigned long faddr; int ret;
- ret = is_ftrace_location(ip); - if (ret < 0) - return ret; - tr->func.ftrace_managed = ret; + faddr = ftrace_location((unsigned long)ip); + if (faddr) + tr->func.ftrace_managed = true;
if (tr->func.ftrace_managed) ret = register_ftrace_direct((long)ip, (long)new_addr); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index c34949d44984..6fd66a47b476 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1612,14 +1612,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
int __weak arch_check_ftrace_location(struct kprobe *p) { - unsigned long ftrace_addr; + unsigned long addr = (unsigned long)p->addr;
- ftrace_addr = ftrace_location((unsigned long)p->addr); - if (ftrace_addr) { + if (ftrace_location(addr) == addr) { #ifdef CONFIG_KPROBES_ON_FTRACE - /* Given address is not on the instruction boundary */ - if ((unsigned long)p->addr != ftrace_addr) - return -EILSEQ; p->flags |= KPROBE_FLAG_FTRACE; #else /* !CONFIG_KPROBES_ON_FTRACE */ return -EINVAL; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 6f91431bd03b..837c24dcbafd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1576,17 +1576,34 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) }
/** - * ftrace_location - return true if the ip giving is a traced location + * ftrace_location - return the ftrace location * @ip: the instruction pointer to check * - * Returns rec->ip if @ip given is a pointer to a ftrace location. - * That is, the instruction that is either a NOP or call to - * the function tracer. It checks the ftrace internal tables to - * determine if the address belongs or not. + * If @ip matches the ftrace location, return @ip. + * If @ip matches sym+0, return sym's ftrace location. + * Otherwise, return 0. */ unsigned long ftrace_location(unsigned long ip) { - return ftrace_location_range(ip, ip); + struct dyn_ftrace *rec; + unsigned long offset; + unsigned long size; + + rec = lookup_rec(ip, ip); + if (!rec) { + if (!kallsyms_lookup_size_offset(ip, &size, &offset)) + goto out; + + /* map sym+0 to __fentry__ */ + if (!offset) + rec = lookup_rec(ip, ip + size - 1); + } + + if (rec) + return rec->ip; + +out: + return 0; }
/** @@ -4949,7 +4966,8 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove) { struct ftrace_func_entry *entry;
- if (!ftrace_location(ip)) + ip = ftrace_location(ip); + if (!ip) return -EINVAL;
if (remove) { @@ -5097,11 +5115,16 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) struct ftrace_func_entry *entry; struct ftrace_hash *free_hash = NULL; struct dyn_ftrace *rec; - int ret = -EBUSY; + int ret = -ENODEV;
mutex_lock(&direct_mutex);
+ ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + /* See if there's a direct function at @ip already */ + ret = -EBUSY; if (ftrace_find_rec_direct(ip)) goto out_unlock;
@@ -5230,6 +5253,10 @@ int unregister_ftrace_direct(unsigned long ip, unsigned long addr)
mutex_lock(&direct_mutex);
+ ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, NULL); if (!entry) goto out_unlock; @@ -5361,6 +5388,11 @@ int modify_ftrace_direct(unsigned long ip, mutex_lock(&direct_mutex);
mutex_lock(&ftrace_lock); + + ip = ftrace_location(ip); + if (!ip) + goto out_unlock; + entry = find_direct_entry(&ip, &rec); if (!entry) goto out_unlock;
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v5.19-rc1 commit 30c90f6757a7b38bc95069725657a647873e2ab3 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/30c90f6757a7
--------------------------------
This patch introduces ldr/str with immediate offset support to simplify the JIT implementation of BPF LDX/STX instructions on arm64. Although arm64 ldr/str immediate is available in pre-index, post-index and unsigned offset forms, the unsigned offset form is sufficient for BPF, so this patch only adds this type.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Link: https://lore.kernel.org/bpf/20220321152852.2334294-2-xukuohai@huawei.com Conflicts: arch/arm64/lib/insn.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/include/asm/insn.h | 9 +++++ arch/arm64/kernel/insn.c | 67 +++++++++++++++++++++++++++-------- 2 files changed, 62 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index c0973345e6e1..8f2b8239ea8b 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -200,6 +200,8 @@ enum aarch64_insn_size_type { enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_REG_OFFSET, AARCH64_INSN_LDST_STORE_REG_OFFSET, + AARCH64_INSN_LDST_LOAD_IMM_OFFSET, + AARCH64_INSN_LDST_STORE_IMM_OFFSET, AARCH64_INSN_LDST_LOAD_PAIR_PRE_INDEX, AARCH64_INSN_LDST_STORE_PAIR_PRE_INDEX, AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, @@ -296,8 +298,10 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000) __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) +__AARCH64_INSN_FUNCS(str_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) __AARCH64_INSN_FUNCS(exclusive, 0x3F800000, 0x08000000) @@ -407,6 +411,11 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, enum aarch64_insn_register offset, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 7d4fdf974542..74ac1d86e9ce 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -442,29 +442,24 @@ static u32 aarch64_insn_encode_register(enum aarch64_insn_register_type type, return insn; }
+static const u32 aarch64_insn_ldst_size[] = { + [AARCH64_INSN_SIZE_8] = 0, + [AARCH64_INSN_SIZE_16] = 1, + [AARCH64_INSN_SIZE_32] = 2, + [AARCH64_INSN_SIZE_64] = 3, +}; + static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, u32 insn) { u32 size;
- switch (type) { - case AARCH64_INSN_SIZE_8: - size = 0; - break; - case AARCH64_INSN_SIZE_16: - size = 1; - break; - case AARCH64_INSN_SIZE_32: - size = 2; - break; - case AARCH64_INSN_SIZE_64: - size = 3; - break; - default: + if (type < AARCH64_INSN_SIZE_8 || type > AARCH64_INSN_SIZE_64) { pr_err("%s: unknown size encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; }
+ size = aarch64_insn_ldst_size[type]; insn &= ~GENMASK(31, 30); insn |= size << 30;
@@ -647,6 +642,50 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, offset); }
+u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, + enum aarch64_insn_register base, + unsigned int imm, + enum aarch64_insn_size_type size, + enum aarch64_insn_ldst_type type) +{ + u32 insn; + u32 shift; + + if (size < AARCH64_INSN_SIZE_8 || size > AARCH64_INSN_SIZE_64) { + pr_err("%s: unknown size encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + shift = aarch64_insn_ldst_size[size]; + if (imm & ~(BIT(12 + shift) - BIT(shift))) { + pr_err("%s: invalid imm: %d\n", __func__, imm); + return AARCH64_BREAK_FAULT; + } + + imm >>= shift; + + switch (type) { + case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: + insn = aarch64_insn_get_ldr_imm_value(); + break; + case AARCH64_INSN_LDST_STORE_IMM_OFFSET: + insn = aarch64_insn_get_str_imm_value(); + break; + default: + pr_err("%s: unknown load/store encoding %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + base); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base,
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v5.19-rc1 commit 7db6c0f1d8ee051e0a7d8c58c5982990e4491f39 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/7db6c0f1d8ee
--------------------------------
The current BPF store/load instruction is translated by the JIT into two instructions. The first instruction moves the immediate offset into a temporary register. The second instruction uses this temporary register to do the real store/load.
In fact, arm64 supports addressing with immediate offsets. So This patch introduces optimization that uses arm64 str/ldr instruction with immediate offset when the offset fits.
Example of generated instuction for r2 = *(u64 *)(r1 + 0):
without optimization: mov x10, 0 ldr x1, [x0, x10]
with optimization: ldr x1, [x0, 0]
If the offset is negative, or is not aligned correctly, or exceeds max value, rollback to the use of temporary register.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Link: https://lore.kernel.org/bpf/20220321152852.2334294-3-xukuohai@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit.h | 14 ++++ arch/arm64/net/bpf_jit_comp.c | 128 ++++++++++++++++++++++++++++++---- 2 files changed, 127 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3..af4f5b6d1242 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -66,6 +66,20 @@ #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD)
+/* Load/store register (immediate offset) */ +#define A64_LS_IMM(Rt, Rn, imm, size, type) \ + aarch64_insn_gen_load_store_imm(Rt, Rn, imm, \ + AARCH64_INSN_SIZE_##size, \ + AARCH64_INSN_LDST_##type##_IMM_OFFSET) +#define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) +#define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) +#define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) +#define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) +#define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) +#define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) +#define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) +#define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 18627cbd6da4..2c161b9bc4bd 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -178,6 +178,47 @@ static bool is_addsub_imm(u32 imm) return !(imm & ~0xfff) || !(imm & ~0xfff000); }
+/* + * There are 3 types of AArch64 LDR/STR (immediate) instruction: + * Post-index, Pre-index, Unsigned offset. + * + * For BPF ldr/str, the "unsigned offset" type is sufficient. + * + * "Unsigned offset" type LDR(immediate) format: + * + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * "Unsigned offset" type STR(immediate) format: + * 3 2 1 0 + * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * scale + * + * The offset is calculated from imm12 and scale in the following way: + * + * offset = (u64)imm12 << scale + */ +static bool is_lsi_offset(s16 offset, int scale) +{ + if (offset < 0) + return false; + + if (offset > (0xFFF << scale)) + return false; + + if (offset & ((1 << scale) - 1)) + return false; + + return true; +} + /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
@@ -808,19 +849,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: - emit_a64_mov_i(1, tmp, off, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_LDR32(dst, src, tmp), ctx); + if (is_lsi_offset(off, 2)) { + emit(A64_LDR32I(dst, src, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR32(dst, src, tmp), ctx); + } break; case BPF_H: - emit(A64_LDRH(dst, src, tmp), ctx); + if (is_lsi_offset(off, 1)) { + emit(A64_LDRHI(dst, src, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRH(dst, src, tmp), ctx); + } break; case BPF_B: - emit(A64_LDRB(dst, src, tmp), ctx); + if (is_lsi_offset(off, 0)) { + emit(A64_LDRBI(dst, src, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDRB(dst, src, tmp), ctx); + } break; case BPF_DW: - emit(A64_LDR64(dst, src, tmp), ctx); + if (is_lsi_offset(off, 3)) { + emit(A64_LDR64I(dst, src, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_LDR64(dst, src, tmp), ctx); + } break; }
@@ -848,20 +908,39 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: /* Load imm to a register then store it */ - emit_a64_mov_i(1, tmp2, off, ctx); emit_a64_mov_i(1, tmp, imm, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off, 2)) { + emit(A64_STR32I(tmp, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR32(tmp, dst, tmp2), ctx); + } break; case BPF_H: - emit(A64_STRH(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off, 1)) { + emit(A64_STRHI(tmp, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRH(tmp, dst, tmp2), ctx); + } break; case BPF_B: - emit(A64_STRB(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off, 0)) { + emit(A64_STRBI(tmp, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STRB(tmp, dst, tmp2), ctx); + } break; case BPF_DW: - emit(A64_STR64(tmp, dst, tmp2), ctx); + if (is_lsi_offset(off, 3)) { + emit(A64_STR64I(tmp, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp2, off, ctx); + emit(A64_STR64(tmp, dst, tmp2), ctx); + } break; } break; @@ -871,19 +950,38 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: - emit_a64_mov_i(1, tmp, off, ctx); switch (BPF_SIZE(code)) { case BPF_W: - emit(A64_STR32(src, dst, tmp), ctx); + if (is_lsi_offset(off, 2)) { + emit(A64_STR32I(src, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR32(src, dst, tmp), ctx); + } break; case BPF_H: - emit(A64_STRH(src, dst, tmp), ctx); + if (is_lsi_offset(off, 1)) { + emit(A64_STRHI(src, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRH(src, dst, tmp), ctx); + } break; case BPF_B: - emit(A64_STRB(src, dst, tmp), ctx); + if (is_lsi_offset(off, 0)) { + emit(A64_STRBI(src, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STRB(src, dst, tmp), ctx); + } break; case BPF_DW: - emit(A64_STR64(src, dst, tmp), ctx); + if (is_lsi_offset(off, 3)) { + emit(A64_STR64I(src, dst, off), ctx); + } else { + emit_a64_mov_i(1, tmp, off, ctx); + emit(A64_STR64(src, dst, tmp), ctx); + } break; } break;
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v5.19-rc1 commit 5b3d19b9bd4080d7f5e260f91ce8f639e19eb499 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/5b3d19b9bd40
--------------------------------
The BPF STX/LDX instruction uses offset relative to the FP to address stack space. Since the BPF_FP locates at the top of the frame, the offset is usually a negative number. However, arm64 str/ldr immediate instruction requires that offset be a positive number. Therefore, this patch tries to convert the offsets.
The method is to find the negative offset furthest from the FP firstly. Then add it to the FP, calculate a bottom position, called FPB, and then adjust the offsets in other STR/LDX instructions relative to FPB.
FPB is saved using the callee-saved register x27 of arm64 which is not used yet.
Before adjusting the offset, the patch checks every instruction to ensure that the FP does not change in run-time. If the FP may change, no offset is adjusted.
For example, for the following bpftrace command:
bpftrace -e 'kprobe:do_sys_open { printf("opening: %s\n", str(arg1)); }'
Without this patch, jited code(fragment):
0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: mov x25, sp 1c: mov x26, #0x0 // #0 20: bti j 24: sub sp, sp, #0x90 28: add x19, x0, #0x0 2c: mov x0, #0x0 // #0 30: mov x10, #0xffffffffffffff78 // #-136 34: str x0, [x25, x10] 38: mov x10, #0xffffffffffffff80 // #-128 3c: str x0, [x25, x10] 40: mov x10, #0xffffffffffffff88 // #-120 44: str x0, [x25, x10] 48: mov x10, #0xffffffffffffff90 // #-112 4c: str x0, [x25, x10] 50: mov x10, #0xffffffffffffff98 // #-104 54: str x0, [x25, x10] 58: mov x10, #0xffffffffffffffa0 // #-96 5c: str x0, [x25, x10] 60: mov x10, #0xffffffffffffffa8 // #-88 64: str x0, [x25, x10] 68: mov x10, #0xffffffffffffffb0 // #-80 6c: str x0, [x25, x10] 70: mov x10, #0xffffffffffffffb8 // #-72 74: str x0, [x25, x10] 78: mov x10, #0xffffffffffffffc0 // #-64 7c: str x0, [x25, x10] 80: mov x10, #0xffffffffffffffc8 // #-56 84: str x0, [x25, x10] 88: mov x10, #0xffffffffffffffd0 // #-48 8c: str x0, [x25, x10] 90: mov x10, #0xffffffffffffffd8 // #-40 94: str x0, [x25, x10] 98: mov x10, #0xffffffffffffffe0 // #-32 9c: str x0, [x25, x10] a0: mov x10, #0xffffffffffffffe8 // #-24 a4: str x0, [x25, x10] a8: mov x10, #0xfffffffffffffff0 // #-16 ac: str x0, [x25, x10] b0: mov x10, #0xfffffffffffffff8 // #-8 b4: str x0, [x25, x10] b8: mov x10, #0x8 // #8 bc: ldr x2, [x19, x10] [...]
With this patch, jited code(fragment):
0: bti c 4: stp x29, x30, [sp, #-16]! 8: mov x29, sp c: stp x19, x20, [sp, #-16]! 10: stp x21, x22, [sp, #-16]! 14: stp x25, x26, [sp, #-16]! 18: stp x27, x28, [sp, #-16]! 1c: mov x25, sp 20: sub x27, x25, #0x88 24: mov x26, #0x0 // #0 28: bti j 2c: sub sp, sp, #0x90 30: add x19, x0, #0x0 34: mov x0, #0x0 // #0 38: str x0, [x27] 3c: str x0, [x27, #8] 40: str x0, [x27, #16] 44: str x0, [x27, #24] 48: str x0, [x27, #32] 4c: str x0, [x27, #40] 50: str x0, [x27, #48] 54: str x0, [x27, #56] 58: str x0, [x27, #64] 5c: str x0, [x27, #72] 60: str x0, [x27, #80] 64: str x0, [x27, #88] 68: str x0, [x27, #96] 6c: str x0, [x27, #104] 70: str x0, [x27, #112] 74: str x0, [x27, #120] 78: str x0, [x27, #128] 7c: ldr x2, [x19, #8] [...]
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Link: https://lore.kernel.org/bpf/20220321152852.2334294-4-xukuohai@huawei.com Conflicts: arch/arm64/net/bpf_jit_comp.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 159 ++++++++++++++++++++++++++++------ 1 file changed, 132 insertions(+), 27 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 2c161b9bc4bd..cdc5d9a2d9fa 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -24,6 +24,7 @@ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 4)
/* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -50,6 +51,7 @@ static const int bpf2a64[] = { [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ [BPF_REG_AX] = A64_R(9), + [FP_BOTTOM] = A64_R(27), };
struct jit_ctx { @@ -60,6 +62,7 @@ struct jit_ctx { int exentry_idx; __le32 *image; u32 stack_size; + int fpb_offset; };
static inline void emit(const u32 insn, struct jit_ctx *ctx) @@ -205,7 +208,7 @@ static bool is_addsub_imm(u32 imm) * * offset = (u64)imm12 << scale */ -static bool is_lsi_offset(s16 offset, int scale) +static bool is_lsi_offset(int offset, int scale) { if (offset < 0) return false; @@ -224,9 +227,9 @@ static bool is_lsi_offset(s16 offset, int scale)
/* Tail call offset to jump into */ #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) -#define PROLOGUE_OFFSET 8 +#define PROLOGUE_OFFSET 9 #else -#define PROLOGUE_OFFSET 7 +#define PROLOGUE_OFFSET 8 #endif
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) @@ -238,6 +241,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; const u8 tcc = bpf2a64[TCALL_CNT]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const int idx0 = ctx->idx; int cur_offset;
@@ -276,6 +280,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_PUSH(r6, r7, A64_SP), ctx); emit(A64_PUSH(r8, r9, A64_SP), ctx); emit(A64_PUSH(fp, tcc, A64_SP), ctx); + emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx);
/* Set up BPF prog stack base register */ emit(A64_MOV(1, fp, A64_SP), ctx); @@ -296,6 +301,8 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) emit(A64_BTI_J, ctx); }
+ emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); + ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);
/* Set up function call stack */ @@ -377,10 +384,13 @@ static void build_epilogue(struct jit_ctx *ctx) const u8 r8 = bpf2a64[BPF_REG_8]; const u8 r9 = bpf2a64[BPF_REG_9]; const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM];
/* We're done with BPF stack */ emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
+ /* Restore x27 and x28 */ + emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); /* Restore fs (x25) and x26 */ emit(A64_POP(fp, A64_R(26), A64_SP), ctx);
@@ -473,6 +483,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 tmp = bpf2a64[TMP_REG_1]; const u8 tmp2 = bpf2a64[TMP_REG_2]; const u8 tmp3 = bpf2a64[TMP_REG_3]; + const u8 fp = bpf2a64[BPF_REG_FP]; + const u8 fpb = bpf2a64[FP_BOTTOM]; const s16 off = insn->off; const s32 imm = insn->imm; const int i = insn - ctx->prog->insnsi; @@ -482,6 +494,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 jmp_cond, reg; s32 jmp_offset; u32 a64_insn; + u8 src_adj; + u8 dst_adj; + int off_adj; int ret;
#define check_imm(bits, imm) do { \ @@ -849,34 +864,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + if (ctx->fpb_offset > 0 && src == fp) { + src_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + src_adj = src; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - if (is_lsi_offset(off, 2)) { - emit(A64_LDR32I(dst, src, off), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_LDR32(dst, src, tmp), ctx); } break; case BPF_H: - if (is_lsi_offset(off, 1)) { - emit(A64_LDRHI(dst, src, off), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_LDRH(dst, src, tmp), ctx); } break; case BPF_B: - if (is_lsi_offset(off, 0)) { - emit(A64_LDRBI(dst, src, off), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_LDRB(dst, src, tmp), ctx); } break; case BPF_DW: - if (is_lsi_offset(off, 3)) { - emit(A64_LDR64I(dst, src, off), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_LDR64I(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_LDR64(dst, src, tmp), ctx); @@ -907,36 +929,43 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_ST | BPF_MEM | BPF_H: case BPF_ST | BPF_MEM | BPF_B: case BPF_ST | BPF_MEM | BPF_DW: + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } /* Load imm to a register then store it */ emit_a64_mov_i(1, tmp, imm, ctx); switch (BPF_SIZE(code)) { case BPF_W: - if (is_lsi_offset(off, 2)) { - emit(A64_STR32I(tmp, dst, off), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp2, off, ctx); emit(A64_STR32(tmp, dst, tmp2), ctx); } break; case BPF_H: - if (is_lsi_offset(off, 1)) { - emit(A64_STRHI(tmp, dst, off), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp2, off, ctx); emit(A64_STRH(tmp, dst, tmp2), ctx); } break; case BPF_B: - if (is_lsi_offset(off, 0)) { - emit(A64_STRBI(tmp, dst, off), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp2, off, ctx); emit(A64_STRB(tmp, dst, tmp2), ctx); } break; case BPF_DW: - if (is_lsi_offset(off, 3)) { - emit(A64_STR64I(tmp, dst, off), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp2, off, ctx); emit(A64_STR64(tmp, dst, tmp2), ctx); @@ -950,34 +979,41 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, case BPF_STX | BPF_MEM | BPF_H: case BPF_STX | BPF_MEM | BPF_B: case BPF_STX | BPF_MEM | BPF_DW: + if (ctx->fpb_offset > 0 && dst == fp) { + dst_adj = fpb; + off_adj = off + ctx->fpb_offset; + } else { + dst_adj = dst; + off_adj = off; + } switch (BPF_SIZE(code)) { case BPF_W: - if (is_lsi_offset(off, 2)) { - emit(A64_STR32I(src, dst, off), ctx); + if (is_lsi_offset(off_adj, 2)) { + emit(A64_STR32I(src, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_STR32(src, dst, tmp), ctx); } break; case BPF_H: - if (is_lsi_offset(off, 1)) { - emit(A64_STRHI(src, dst, off), ctx); + if (is_lsi_offset(off_adj, 1)) { + emit(A64_STRHI(src, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_STRH(src, dst, tmp), ctx); } break; case BPF_B: - if (is_lsi_offset(off, 0)) { - emit(A64_STRBI(src, dst, off), ctx); + if (is_lsi_offset(off_adj, 0)) { + emit(A64_STRBI(src, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_STRB(src, dst, tmp), ctx); } break; case BPF_DW: - if (is_lsi_offset(off, 3)) { - emit(A64_STR64I(src, dst, off), ctx); + if (is_lsi_offset(off_adj, 3)) { + emit(A64_STR64I(src, dst_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); emit(A64_STR64(src, dst, tmp), ctx); @@ -1017,6 +1053,73 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, return 0; }
+/* + * Return 0 if FP may change at runtime, otherwise find the minimum negative + * offset to FP, converts it to positive number, and align down to 8 bytes. + */ +static int find_fpb_offset(struct bpf_prog *prog) +{ + int i; + int offset = 0; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + const u8 class = BPF_CLASS(insn->code); + const u8 mode = BPF_MODE(insn->code); + const u8 src = insn->src_reg; + const u8 dst = insn->dst_reg; + const s16 off = insn->off; + + switch (class) { + case BPF_STX: + case BPF_ST: + /* fp holds atomic operation result */ + if (class == BPF_STX && mode == BPF_XADD && + src == BPF_REG_FP) + return 0; + + if (mode == BPF_MEM && dst == BPF_REG_FP && + off < offset) + offset = insn->off; + break; + + case BPF_JMP32: + case BPF_JMP: + break; + + case BPF_LDX: + case BPF_LD: + /* fp holds load result */ + if (dst == BPF_REG_FP) + return 0; + + if (class == BPF_LDX && mode == BPF_MEM && + src == BPF_REG_FP && off < offset) + offset = off; + break; + + case BPF_ALU: + case BPF_ALU64: + default: + /* fp holds ALU result */ + if (dst == BPF_REG_FP) + return 0; + } + } + + if (offset < 0) { + /* + * safely be converted to a positive 'int', since insn->off + * is 's16' + */ + offset = -offset; + /* align down to 8 bytes */ + offset = ALIGN_DOWN(offset, 8); + } + + return offset; +} + static int build_body(struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; @@ -1138,6 +1241,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_off; }
+ ctx.fpb_offset = find_fpb_offset(prog); + /* * 1. Initial fake pass to compute ctx->idx and ctx->offset. *
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit f1e8a24ed2cab1c907bb47ca5f8dee684896456e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/f1e8a24ed2ca
--------------------------------
Add LDR (literal) instruction to load data from address relative to PC. This instruction will be used to implement long jump from bpf prog to bpf trampoline in the follow-up patch.
The instruction encoding:
3 2 2 2 0 0 0 7 6 4 5 0 +-----+-------+---+-----+-------------------------------------+--------+ | 0 x | 0 1 1 | 0 | 0 0 | imm19 | Rt | +-----+-------+---+-----+-------------------------------------+--------+
for 32-bit, variant x == 0; for 64-bit, x == 1.
branch_imm_common() is used to check the distance between pc and target address, since it's reused by this patch and LDR (literal) is not a branch instruction, rename it to label_imm_common().
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Acked-by: Will Deacon will@kernel.org Link: https://lore.kernel.org/bpf/20220711150823.2128542-3-xukuohai@huawei.com Conflicts: arch/arm64/lib/insn.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/include/asm/insn.h | 3 +++ arch/arm64/kernel/insn.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 8f2b8239ea8b..1dec70e8fa4c 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -416,6 +416,9 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, unsigned int imm, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); +u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit); u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 74ac1d86e9ce..e56f409c179b 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -466,7 +466,7 @@ static u32 aarch64_insn_encode_ldst_size(enum aarch64_insn_size_type type, return insn; }
-static inline long branch_imm_common(unsigned long pc, unsigned long addr, +static inline long label_imm_common(unsigned long pc, unsigned long addr, long range) { long offset; @@ -497,7 +497,7 @@ u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, * ARM64 virtual address arrangement guarantees all kernel and module * texts are within +/-128M. */ - offset = branch_imm_common(pc, addr, SZ_128M); + offset = label_imm_common(pc, addr, SZ_128M); if (offset >= SZ_128M) return AARCH64_BREAK_FAULT;
@@ -525,7 +525,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset;
- offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M); if (offset >= SZ_1M) return AARCH64_BREAK_FAULT;
@@ -564,7 +564,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, u32 insn; long offset;
- offset = branch_imm_common(pc, addr, SZ_1M); + offset = label_imm_common(pc, addr, SZ_1M);
insn = aarch64_insn_get_bcond_value();
@@ -686,6 +686,28 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_12, insn, imm); }
+u32 aarch64_insn_gen_load_literal(unsigned long pc, unsigned long addr, + enum aarch64_insn_register reg, + bool is64bit) +{ + u32 insn; + long offset; + + offset = label_imm_common(pc, addr, SZ_1M); + if (offset >= SZ_1M) + return AARCH64_BREAK_FAULT; + + insn = aarch64_insn_get_ldr_lit_value(); + + if (is64bit) + insn |= BIT(30); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, reg); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_19, insn, + offset >> 2); +} + u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, enum aarch64_insn_register reg2, enum aarch64_insn_register base,
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit b2ad54e1533e91449cb2a371e034942bd7882b58 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/b2ad54e1533e
--------------------------------
Implement bpf_arch_text_poke() for arm64, so bpf prog or bpf trampoline can be patched with it.
When the target address is NULL, the original instruction is patched to a NOP.
When the target address and the source address are within the branch range, the original instruction is patched to a bl instruction to the target address directly.
To support attaching bpf trampoline to both regular kernel function and bpf prog, we follow the ftrace patchsite way for bpf prog. That is, two instructions are inserted at the beginning of bpf prog, the first one saves the return address to x9, and the second is a nop which will be patched to a bl instruction when a bpf trampoline is attached.
However, when a bpf trampoline is attached to bpf prog, the distance between target address and source address may exceed 128MB, the maximum branch range, because bpf trampoline and bpf prog are allocated separately with vmalloc. So long jump should be handled.
When a bpf prog is constructed, a plt pointing to empty trampoline dummy_tramp is placed at the end:
bpf_prog: mov x9, lr nop // patchsite ... ret
plt: ldr x10, target br x10 target: .quad dummy_tramp // plt target
This is also the state when no trampoline is attached.
When a short-jump bpf trampoline is attached, the patchsite is patched to a bl instruction to the trampoline directly:
bpf_prog: mov x9, lr bl <short-jump bpf trampoline address> // patchsite ... ret
plt: ldr x10, target br x10 target: .quad dummy_tramp // plt target
When a long-jump bpf trampoline is attached, the plt target is filled with the trampoline address and the patchsite is patched to a bl instruction to the plt:
bpf_prog: mov x9, lr bl plt // patchsite ... ret
plt: ldr x10, target br x10 target: .quad <long-jump bpf trampoline address>
dummy_tramp is used to prevent another CPU from jumping to an unknown location during the patching process, making the patching process easier.
The patching process is as follows:
1. when neither the old address or the new address is a long jump, the patchsite is replaced with a bl to the new address, or nop if the new address is NULL;
2. when the old address is not long jump but the new one is, the branch target address is written to plt first, then the patchsite is replaced with a bl instruction to the plt;
3. when the old address is long jump but the new one is not, the address of dummy_tramp is written to plt first, then the patchsite is replaced with a bl to the new address, or a nop if the new address is NULL;
4. when both the old address and the new address are long jump, the new address is written to plt and the patchsite is not changed.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Jakub Sitnicki jakub@cloudflare.com Reviewed-by: KP Singh kpsingh@kernel.org Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Acked-by: Song Liu songliubraving@fb.com Link: https://lore.kernel.org/bpf/20220711150823.2128542-4-xukuohai@huawei.com Conflicts: arch/arm64/net/bpf_jit_comp.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit.h | 7 + arch/arm64/net/bpf_jit_comp.c | 324 ++++++++++++++++++++++++++++++++-- 2 files changed, 318 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index af4f5b6d1242..864677bf08e2 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -80,6 +80,12 @@ #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD)
+/* LDR (literal) */ +#define A64_LDR32LIT(Wt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Wt, false) +#define A64_LDR64LIT(Xt, offset) \ + aarch64_insn_gen_load_literal(0, offset, Xt, true) + /* Load/store register pair */ #define A64_LS_PAIR(Rt, Rt2, Rn, offset, ls, type) \ aarch64_insn_gen_load_store_pair(Rt, Rt2, Rn, offset, \ @@ -232,5 +238,6 @@ #define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) #define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) #define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) +#define A64_NOP A64_HINT(AARCH64_INSN_HINT_NOP)
#endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc5d9a2d9fa..288620308ae7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -10,6 +10,7 @@ #include <linux/bitfield.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/memory.h> #include <linux/printk.h> #include <linux/slab.h>
@@ -65,6 +66,15 @@ struct jit_ctx { int fpb_offset; };
+struct bpf_plt { + u32 insn_ldr; /* load target */ + u32 insn_br; /* branch to target */ + u64 target; /* target value */ +}; + +#define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) +#define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) + static inline void emit(const u32 insn, struct jit_ctx *ctx) { if (ctx->image != NULL) @@ -127,6 +137,12 @@ static inline void emit_a64_mov_i64(const int reg, const u64 val, } }
+static inline void emit_bti(u32 insn, struct jit_ctx *ctx) +{ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(insn, ctx); +} + /* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address @@ -222,15 +238,31 @@ static bool is_lsi_offset(int offset, int scale) return true; }
+/* generated prologue: + * bti c // if CONFIG_ARM64_BTI_KERNEL + * mov x9, lr + * nop // POKE_OFFSET + * stp x29, lr, [sp, #-16]! + * mov x29, sp + * stp x19, x20, [sp, #-16]! + * stp x21, x22, [sp, #-16]! + * stp x25, x26, [sp, #-16]! + * stp x27, x28, [sp, #-16]! + * mov x25, sp + * mov tcc, #0 + * // PROLOGUE_OFFSET + */ + +#define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) + +/* Offset of nop instruction in bpf prog entry to be poked */ +#define POKE_OFFSET (BTI_INSNS + 1) + /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15)
/* Tail call offset to jump into */ -#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) -#define PROLOGUE_OFFSET 9 -#else -#define PROLOGUE_OFFSET 8 -#endif +#define PROLOGUE_OFFSET (BTI_INSNS + 2 + 8)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -268,9 +300,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */
- /* BTI landing pad */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_C, ctx); + emit_bti(A64_BTI_C, ctx); + + emit(A64_MOV(1, A64_R(9), A64_LR), ctx); + emit(A64_NOP, ctx);
/* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); @@ -297,8 +330,7 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) }
/* BTI landing pad for the tail call, done with a BR */ - if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) - emit(A64_BTI_J, ctx); + emit_bti(A64_BTI_J, ctx); }
emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); @@ -376,6 +408,52 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) #undef jmp_offset }
+void dummy_tramp(void); + +asm ( +" .pushsection .text, "ax", @progbits\n" +" .type dummy_tramp, %function\n" +"dummy_tramp:" +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +" bti j\n" /* dummy_tramp is called via "br x10" */ +#endif +" mov x10, lr\n" +" mov lr, x9\n" +" ret x10\n" +" .size dummy_tramp, .-dummy_tramp\n" +" .popsection\n" +); + +/* build a plt initialized like this: + * + * plt: + * ldr tmp, target + * br tmp + * target: + * .quad dummy_tramp + * + * when a long jump trampoline is attached, target is filled with the + * trampoline address, and when the trampoline is removed, target is + * restored to dummy_tramp address. + */ +static void build_plt(struct jit_ctx *ctx) +{ + const u8 tmp = bpf2a64[TMP_REG_1]; + struct bpf_plt *plt = NULL; + + /* make sure target is 64-bit aligned */ + if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) + emit(A64_NOP, ctx); + + plt = (struct bpf_plt *)(ctx->image + ctx->idx); + /* plt is called via bl, no BTI needed here */ + emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); + emit(A64_BR(tmp), ctx); + + if (ctx->image) + plt->target = (u64)&dummy_tramp; +} + static void build_epilogue(struct jit_ctx *ctx) { const u8 r0 = bpf2a64[BPF_REG_0]; @@ -1191,7 +1269,7 @@ struct arm64_jit_data {
struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { - int image_size, prog_size, extable_size; + int image_size, prog_size, extable_size, extable_align, extable_offset; struct bpf_prog *tmp, *orig_prog = prog; struct bpf_binary_header *header; struct arm64_jit_data *jit_data; @@ -1261,13 +1339,17 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.epilogue_offset = ctx.idx; build_epilogue(&ctx); + build_plt(&ctx);
+ extable_align = __alignof__(struct exception_table_entry); extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
/* Now we know the actual image size. */ prog_size = sizeof(u32) * ctx.idx; - image_size = prog_size + extable_size; + /* also allocate space for plt target */ + extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); + image_size = extable_offset + extable_size; header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); if (header == NULL) { @@ -1279,7 +1361,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
ctx.image = (__le32 *)image_ptr; if (extable_size) - prog->aux->extable = (void *)image_ptr + prog_size; + prog->aux->extable = (void *)image_ptr + extable_offset; skip_init_ctx: ctx.idx = 0; ctx.exentry_idx = 0; @@ -1293,6 +1375,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) }
build_epilogue(&ctx); + build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */ if (validate_code(&ctx)) { @@ -1363,3 +1446,218 @@ void bpf_jit_free_exec(void *addr) { return vfree(addr); } + +static bool is_long_jump(void *ip, void *target) +{ + long offset; + + /* NULL target means this is a NOP */ + if (!target) + return false; + + offset = (long)target - (long)ip; + return offset < -SZ_128M || offset >= SZ_128M; +} + +static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, + void *addr, void *plt, u32 *insn) +{ + void *target; + + if (!addr) { + *insn = aarch64_insn_gen_nop(); + return 0; + } + + if (is_long_jump(ip, addr)) + target = plt; + else + target = addr; + + *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, + (unsigned long)target, + type); + + return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; +} + +/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf + * trampoline with the branch instruction from @ip to @new_addr. If @old_addr + * or @new_addr is NULL, the old or new instruction is NOP. + * + * When @ip is the bpf prog entry, a bpf trampoline is being attached or + * detached. Since bpf trampoline and bpf prog are allocated separately with + * vmalloc, the address distance may exceed 128MB, the maximum branch range. + * So long jump should be handled. + * + * When a bpf prog is constructed, a plt pointing to empty trampoline + * dummy_tramp is placed at the end: + * + * bpf_prog: + * mov x9, lr + * nop // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * This is also the state when no trampoline is attached. + * + * When a short-jump bpf trampoline is attached, the patchsite is patched + * to a bl instruction to the trampoline directly: + * + * bpf_prog: + * mov x9, lr + * bl <short-jump bpf trampoline address> // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad dummy_tramp // plt target + * + * When a long-jump bpf trampoline is attached, the plt target is filled with + * the trampoline address and the patchsite is patched to a bl instruction to + * the plt: + * + * bpf_prog: + * mov x9, lr + * bl plt // patchsite + * ... + * ret + * + * plt: + * ldr x10, target + * br x10 + * target: + * .quad <long-jump bpf trampoline address> // plt target + * + * The dummy_tramp is used to prevent another CPU from jumping to unknown + * locations during the patching process, making the patching process easier. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + int ret; + u32 old_insn; + u32 new_insn; + u32 replaced; + struct bpf_plt *plt = NULL; + unsigned long size = 0UL; + unsigned long offset = ~0UL; + enum aarch64_insn_branch_type branch_type; + char namebuf[KSYM_NAME_LEN]; + void *image = NULL; + u64 plt_target = 0ULL; + bool poking_bpf_entry; + + if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) + /* Only poking bpf text is supported. Since kernel function + * entry is set up by ftrace, we reply on ftrace to poke kernel + * functions. + */ + return -ENOTSUPP; + + image = ip - offset; + /* zero offset means we're poking bpf prog entry */ + poking_bpf_entry = (offset == 0UL); + + /* bpf prog entry, find plt and the real patchsite */ + if (poking_bpf_entry) { + /* plt locates at the end of bpf prog */ + plt = image + size - PLT_TARGET_OFFSET; + + /* skip to the nop instruction in bpf prog entry: + * bti c // if BTI enabled + * mov x9, x30 + * nop + */ + ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; + } + + /* long jump is only possible at bpf prog entry */ + if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && + !poking_bpf_entry)) + return -EINVAL; + + if (poke_type == BPF_MOD_CALL) + branch_type = AARCH64_INSN_BRANCH_LINK; + else + branch_type = AARCH64_INSN_BRANCH_NOLINK; + + if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) + return -EFAULT; + + if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) + return -EFAULT; + + if (is_long_jump(ip, new_addr)) + plt_target = (u64)new_addr; + else if (is_long_jump(ip, old_addr)) + /* if the old target is a long jump and the new target is not, + * restore the plt target to dummy_tramp, so there is always a + * legal and harmless address stored in plt target, and we'll + * never jump from plt to an unknown place. + */ + plt_target = (u64)&dummy_tramp; + + if (plt_target) { + /* non-zero plt_target indicates we're patching a bpf prog, + * which is read only. + */ + if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) + return -EFAULT; + WRITE_ONCE(plt->target, plt_target); + set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); + /* since plt target points to either the new trampoline + * or dummy_tramp, even if another CPU reads the old plt + * target value before fetching the bl instruction to plt, + * it will be brought back by dummy_tramp, so no barrier is + * required here. + */ + } + + /* if the old target and the new target are both long jumps, no + * patching is required + */ + if (old_insn == new_insn) + return 0; + + mutex_lock(&text_mutex); + if (aarch64_insn_read(ip, &replaced)) { + ret = -EFAULT; + goto out; + } + + if (replaced != old_insn) { + ret = -EFAULT; + goto out; + } + + /* We call aarch64_insn_patch_text_nosync() to replace instruction + * atomically, so no other CPUs will fetch a half-new and half-old + * instruction. But there is chance that another CPU executes the + * old instruction after the patching operation finishes (e.g., + * pipeline not flushed, or icache not synchronized yet). + * + * 1. when a new trampoline is attached, it is not a problem for + * different CPUs to jump to different trampolines temporarily. + * + * 2. when an old trampoline is freed, we should wait for all other + * CPUs to exit the trampoline and make sure the trampoline is no + * longer reachable, since bpf_tramp_image_put() function already + * uses percpu_ref and task-based rcu to do the sync, no need to call + * the sync version here, see bpf_tramp_image_put() for details. + */ + ret = aarch64_insn_patch_text_nosync(ip, new_insn); +out: + mutex_unlock(&text_mutex); + + return ret; +}
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit efc9909fdce00a827a37609628223cd45bf95d0b category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/efc9909fdce0
--------------------------------
This is arm64 version of commit fec56f5890d9 ("bpf: Introduce BPF trampoline"). A bpf trampoline converts native calling convention to bpf calling convention and is used to implement various bpf features, such as fentry, fexit, fmod_ret and struct_ops.
This patch does essentially the same thing that bpf trampoline does on x86.
Tested on Raspberry Pi 4B and qemu:
#18 /1 bpf_tcp_ca/dctcp:OK #18 /2 bpf_tcp_ca/cubic:OK #18 /3 bpf_tcp_ca/invalid_license:OK #18 /4 bpf_tcp_ca/dctcp_fallback:OK #18 /5 bpf_tcp_ca/rel_setsockopt:OK #18 bpf_tcp_ca:OK #51 /1 dummy_st_ops/dummy_st_ops_attach:OK #51 /2 dummy_st_ops/dummy_init_ret_value:OK #51 /3 dummy_st_ops/dummy_init_ptr_arg:OK #51 /4 dummy_st_ops/dummy_multiple_args:OK #51 dummy_st_ops:OK #57 /1 fexit_bpf2bpf/target_no_callees:OK #57 /2 fexit_bpf2bpf/target_yes_callees:OK #57 /3 fexit_bpf2bpf/func_replace:OK #57 /4 fexit_bpf2bpf/func_replace_verify:OK #57 /5 fexit_bpf2bpf/func_sockmap_update:OK #57 /6 fexit_bpf2bpf/func_replace_return_code:OK #57 /7 fexit_bpf2bpf/func_map_prog_compatibility:OK #57 /8 fexit_bpf2bpf/func_replace_multi:OK #57 /9 fexit_bpf2bpf/fmod_ret_freplace:OK #57 fexit_bpf2bpf:OK #237 xdp_bpf2bpf:OK
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Acked-by: Song Liu songliubraving@fb.com Acked-by: KP Singh kpsingh@kernel.org Link: https://lore.kernel.org/bpf/20220711150823.2128542-5-xukuohai@huawei.com Conflicts: arch/arm64/net/bpf_jit_comp.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 306 +++++++++++++++++++++++++++++++++- 1 file changed, 303 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 288620308ae7..6e4205c2fced 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -162,6 +162,14 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val, } }
+static inline void emit_call(u64 target, struct jit_ctx *ctx) +{ + u8 tmp = bpf2a64[TMP_REG_1]; + + emit_addr_mov_i64(tmp, target, ctx); + emit(A64_BLR(tmp), ctx); +} + static inline int bpf2a64_offset(int bpf_insn, int off, const struct jit_ctx *ctx) { @@ -900,8 +908,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, &func_addr, &func_addr_fixed); if (ret < 0) return ret; - emit_addr_mov_i64(tmp, func_addr, ctx); - emit(A64_BLR(tmp), ctx); + emit_call(func_addr, ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; } @@ -1249,6 +1256,13 @@ static int validate_code(struct jit_ctx *ctx) if (a64_insn == AARCH64_BREAK_FAULT) return -1; } + return 0; +} + +static int validate_ctx(struct jit_ctx *ctx) +{ + if (validate_code(ctx)) + return -1;
if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1; @@ -1378,7 +1392,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) build_plt(&ctx);
/* 3. Extra pass to validate JITed code. */ - if (validate_code(&ctx)) { + if (validate_ctx(&ctx)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; @@ -1447,6 +1461,292 @@ void bpf_jit_free_exec(void *addr) return vfree(addr); }
+static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_prog *p, + int args_off, int retval_off, bool save_ret) +{ + if (p->aux->sleepable) { + emit_call((const u64)__bpf_prog_enter_sleepable, ctx); + } else { + emit_call((const u64)__bpf_prog_enter, ctx); + /* save return value to callee saved register x19 */ + emit(A64_MOV(1, A64_R(19), A64_R(0)), ctx); + } + + emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); + if (!p->jited) + emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); + + emit_call((const u64)p->bpf_func, ctx); + + if (save_ret) + emit(A64_STR64I(bpf2a64[BPF_REG_0], A64_SP, retval_off), ctx); + + if (p->aux->sleepable) { + emit_call((const u64)__bpf_prog_exit_sleepable, ctx); + } else { + /* arg1: prog */ + emit_addr_mov_i64(A64_R(0), (const u64)p, ctx); + /* arg2: start time */ + emit(A64_MOV(1, A64_R(1), A64_R(19)), ctx); + + emit_call((const u64)__bpf_prog_exit, ctx); + } +} + +static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_progs *tp, + int args_off, int retval_off, u32 **branches) +{ + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); + for (i = 0; i < tp->nr_progs; i++) { + invoke_bpf_prog(ctx, tp->progs[i], args_off, retval_off, true); + /* if (*(u64 *)(sp + retval_off) != 0) + * goto do_fexit; + */ + emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); + /* Save the location of branch, and generate a nop. + * This nop will be replaced with a cbnz later. + */ + branches[i] = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } +} + +static void save_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_STR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +static void restore_args(struct jit_ctx *ctx, int args_off, int nargs) +{ + int i; + + for (i = 0; i < nargs; i++) { + emit(A64_LDR64I(i, A64_SP, args_off), ctx); + args_off += 8; + } +} + +/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). + * + * bpf prog and function entry before bpf trampoline hooked: + * mov x9, lr + * nop + * + * bpf prog and function entry after bpf trampoline hooked: + * mov x9, lr + * bl <bpf_trampoline or plt> + * + */ +static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, + struct bpf_tramp_progs *tprogs, void *orig_call, + int nargs, u32 flags) +{ + int i; + int stack_size; + int retaddr_off; + int regs_off; + int retval_off; + int args_off; + struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; + struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + bool save_ret; + u32 **branches = NULL; + + /* trampoline stack layout: + * [ parent ip ] + * [ FP ] + * SP + retaddr_off [ self ip ] + * [ FP ] + * + * [ padding ] align SP to multiples of 16 + * + * SP + regs_off [ x19 ] callee saved reg x19 + * + * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or + * BPF_TRAMP_F_RET_FENTRY_RET + * + * [ argN ] + * [ ... ] + * SP + args_off [ arg1 ] + */ + + stack_size = 0; + + args_off = stack_size; + /* room for args */ + stack_size += nargs * 8; + + /* room for return value */ + retval_off = stack_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + stack_size += 8; + + /* room for callee saved registers, currently only x19 is used */ + regs_off = stack_size; + stack_size += 8; + + /* round up to multiples of 16 to avoid SPAlignmentFault */ + stack_size = round_up(stack_size, 16); + + /* return address locates above FP */ + retaddr_off = stack_size + 8; + + /* bpf trampoline may be invoked by 3 instruction types: + * 1. bl, attached to bpf prog or kernel function via short jump + * 2. br, attached to bpf prog or kernel function via long jump + * 3. blr, working as a function pointer, used by struct_ops. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx); + + /* frame for parent function */ + emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* frame for patched function */ + emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_MOV(1, A64_FP, A64_SP), ctx); + + /* allocate stack space */ + emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); + + /* save args */ + save_args(ctx, args_off, nargs); + + /* save callee saved registers */ + emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_enter, ctx); + } + + for (i = 0; i < fentry->nr_progs; i++) + invoke_bpf_prog(ctx, fentry->progs[i], args_off, + retval_off, flags & BPF_TRAMP_F_RET_FENTRY_RET); + + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(u32 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, branches); + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + restore_args(ctx, args_off, nargs); + /* call original func */ + emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); + emit(A64_BLR(A64_R(10)), ctx); + /* store return value */ + emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); + /* reserve a nop for bpf_tramp_image_put */ + im->ip_after_call = ctx->image + ctx->idx; + emit(A64_NOP, ctx); + } + + /* update the branches saved in invoke_bpf_mod_ret with cbnz */ + for (i = 0; i < fmod_ret->nr_progs && ctx->image != NULL; i++) { + int offset = &ctx->image[ctx->idx] - branches[i]; + *branches[i] = A64_CBNZ(1, A64_R(10), offset); + } + + for (i = 0; i < fexit->nr_progs; i++) + invoke_bpf_prog(ctx, fexit->progs[i], args_off, retval_off, false); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = ctx->image + ctx->idx; + emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); + emit_call((const u64)__bpf_tramp_exit, ctx); + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + restore_args(ctx, args_off, nargs); + + /* restore callee saved register x19 */ + emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); + + if (save_ret) + emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); + + /* reset SP */ + emit(A64_MOV(1, A64_SP, A64_FP), ctx); + + /* pop frames */ + emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); + emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); + + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* skip patched function, return to parent */ + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(9)), ctx); + } else { + /* return to patched function */ + emit(A64_MOV(1, A64_R(10), A64_LR), ctx); + emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); + emit(A64_RET(A64_R(10)), ctx); + } + + if (ctx->image) + bpf_flush_icache(ctx->image, ctx->image + ctx->idx); + + kfree(branches); + + return ctx->idx; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, + void *image_end, const struct btf_func_model *m, + u32 flags, struct bpf_tramp_progs *tprogs, + void *orig_call) +{ + int ret; + int nargs = m->nr_args; + int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; + struct jit_ctx ctx = { + .image = NULL, + .idx = 0, + }; + + /* the first 8 arguments are passed by registers */ + if (nargs > 8) + return -ENOTSUPP; + + ret = prepare_trampoline(&ctx, im, tprogs, orig_call, nargs, flags); + if (ret < 0) + return ret; + + if (ret > max_insns) + return -EFBIG; + + ctx.image = image; + ctx.idx = 0; + + jit_fill_hole(image, (unsigned int)(image_end - image)); + ret = prepare_trampoline(&ctx, im, tprogs, orig_call, nargs, flags); + + if (ret > 0 && validate_code(&ctx) < 0) + ret = -EINVAL; + + if (ret > 0) + ret *= AARCH64_INSN_SIZE; + + return ret; +} + static bool is_long_jump(void *ip, void *target) { long offset;
From: Nathan Chancellor nathan@kernel.org
mainline inclusion from mainline-v6.0-rc1 commit 33f32e5072b6cc84d1b130a3ad485849bcec907a category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/33f32e5072b6
--------------------------------
When building with clang + CONFIG_CFI_CLANG=y, the following error occurs at link time:
ld.lld: error: undefined symbol: dummy_tramp
dummy_tramp is declared globally in C but its definition in inline assembly does not use .global, which prevents clang from properly resolving the references to it when creating the CFI jump tables.
Mark dummy_tramp as global so that the reference can be properly resolved.
Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") Suggested-by: Sami Tolvanen samitolvanen@google.com Signed-off-by: Nathan Chancellor nathan@kernel.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Stanislav Fomichev sdf@google.com Link: https://github.com/ClangBuiltLinux/linux/issues/1661 Link: https://lore.kernel.org/bpf/20220713173503.3889486-1-nathan@kernel.org Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 6e4205c2fced..43c5a657f521 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -420,6 +420,7 @@ void dummy_tramp(void);
asm ( " .pushsection .text, "ax", @progbits\n" +" .global dummy_tramp\n" " .type dummy_tramp, %function\n" "dummy_tramp:" #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit 339ed900b3079b6ea21df55f368b1ccba17cd5e6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/339ed900b307
--------------------------------
dummy_tramp() uses "lr" to refer to the x30 register, but some assembler does not recognize "lr" and reports a build failure:
/tmp/cc52xO0c.s: Assembler messages: /tmp/cc52xO0c.s:8: Error: operand 1 should be an integer register -- `mov lr,x9' /tmp/cc52xO0c.s:7: Error: undefined symbol lr used as an immediate value make[2]: *** [scripts/Makefile.build:250: arch/arm64/net/bpf_jit_comp.o] Error 1 make[1]: *** [scripts/Makefile.build:525: arch/arm64/net] Error 2
So replace "lr" with "x30" to fix it.
Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") Reported-by: Jon Hunter jonathanh@nvidia.com Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Tested-by: Jon Hunter jonathanh@nvidia.com Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Link: https://lore.kernel.org/bpf/20220721121319.2999259-1-xukuohai@huaweicloud.co... Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 43c5a657f521..a5241345ef42 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -426,8 +426,8 @@ asm ( #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) " bti j\n" /* dummy_tramp is called via "br x10" */ #endif -" mov x10, lr\n" -" mov lr, x9\n" +" mov x10, x30\n" +" mov x30, x9\n" " ret x10\n" " .size dummy_tramp, .-dummy_tramp\n" " .popsection\n"
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.0-rc1 commit aada476655461a9ab491d8298a415430cdd10278 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/aada47665546
--------------------------------
The sparse tool complains as follows:
arch/arm64/net/bpf_jit_comp.c:1684:16: warning: incorrect type in assignment (different base types) arch/arm64/net/bpf_jit_comp.c:1684:16: expected unsigned int [usertype] *branch arch/arm64/net/bpf_jit_comp.c:1684:16: got restricted __le32 [usertype] * arch/arm64/net/bpf_jit_comp.c:1700:52: error: subtraction of different types can't work (different base types) arch/arm64/net/bpf_jit_comp.c:1734:29: warning: incorrect type in assignment (different base types) arch/arm64/net/bpf_jit_comp.c:1734:29: expected unsigned int [usertype] * arch/arm64/net/bpf_jit_comp.c:1734:29: got restricted __le32 [usertype] * arch/arm64/net/bpf_jit_comp.c:1918:52: error: subtraction of different types can't work (different base types)
This is because the variable branch in function invoke_bpf_prog and the variable branches in function prepare_trampoline are defined as type u32 *, which conflicts with ctx->image's type __le32 *, so sparse complains when assignment or arithmetic operation are performed on these two variables and ctx->image.
Since arm64 instructions are always little-endian, change the type of these two variables to __le32 * and call cpu_to_le32() to convert instruction to little-endian before writing it to memory. This is also in line with emit() which internally does cpu_to_le32(), too.
Fixes: efc9909fdce0 ("bpf, arm64: Add bpf trampoline for arm64") Reported-by: kernel test robot lkp@intel.com Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Jean-Philippe Brucker jean-philippe@linaro.org Link: https://lore.kernel.org/bpf/20220808040735.1232002-1-xukuohai@huawei.com Conflicts: arch/arm64/net/bpf_jit_comp.c Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a5241345ef42..04f1cbd63f5b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1562,7 +1562,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; bool save_ret; - u32 **branches = NULL; + __le32 **branches = NULL;
/* trampoline stack layout: * [ parent ip ] @@ -1639,7 +1639,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, retval_off, flags & BPF_TRAMP_F_RET_FENTRY_RET);
if (fmod_ret->nr_progs) { - branches = kcalloc(fmod_ret->nr_progs, sizeof(u32 *), + branches = kcalloc(fmod_ret->nr_progs, sizeof(__le32 *), GFP_KERNEL); if (!branches) return -ENOMEM; @@ -1662,7 +1662,7 @@ static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, /* update the branches saved in invoke_bpf_mod_ret with cbnz */ for (i = 0; i < fmod_ret->nr_progs && ctx->image != NULL; i++) { int offset = &ctx->image[ctx->idx] - branches[i]; - *branches[i] = A64_CBNZ(1, A64_R(10), offset); + *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); }
for (i = 0; i < fexit->nr_progs; i++)
From: Alexander Duyck alexanderduyck@fb.com
mainline inclusion from mainline-v6.5-rc3 commit a3f25d614bc73b45e8f02adc6769876dfd16ca84 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
Reference: https://github.com/torvalds/linux/commit/a3f25d614bc7
--------------------------------
When running an freplace attached bpf program on an arm64 system w were seeing the following issue: Unhandled 64-bit el1h sync exception on CPU47, ESR 0x0000000036000003 -- BTI
After a bit of work to track it down I determined that what appeared to be happening is that the 'bti c' at the start of the program was somehow being reached after a 'br' instruction. Further digging pointed me toward the fact that the function was attached via freplace. This in turn led me to build_plt which I believe is invoking the long jump which is triggering this error.
To resolve it we can replace the 'bti c' with 'bti jc' and add a comment explaining why this has to be modified as such.
Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64") Signed-off-by: Alexander Duyck alexanderduyck@fb.com Acked-by: Xu Kuohai xukuohai@huawei.com Link: https://lore.kernel.org/r/168926677665.316237.9953845318337455525.stgit@ahdu... Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/net/bpf_jit_comp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 04f1cbd63f5b..ef3587722748 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -308,7 +308,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */
- emit_bti(A64_BTI_C, ctx); + /* bpf function may be invoked by 3 instruction types: + * 1. bl, attached via freplace to bpf prog via short jump + * 2. br, attached via freplace to bpf prog via long jump + * 3. blr, working as a function pointer, used by emit_call. + * So BTI_JC should used here to support both br and blr. + */ + emit_bti(A64_BTI_JC, ctx);
emit(A64_MOV(1, A64_R(9), A64_LR), ctx); emit(A64_NOP, ctx);
From: Xu Kuohai xukuohai@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
--------------------------------
To support ftrace direct call on arm64, multiple NOP instructions need to be added to the ftrace fentry, which will make the kernel image larger. For users who don't need direct calls, they should not pay this unnecessary price, so they should be allowed to disable this option.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/trace/Kconfig | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index 9682ceb1f3df..40512793e8e9 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -201,9 +201,14 @@ config DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_REGS
config DYNAMIC_FTRACE_WITH_DIRECT_CALLS - def_bool y + bool "Support for calling custom trampoline from fentry directly" + default y depends on DYNAMIC_FTRACE_WITH_REGS depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + help + This option enables calling custom trampoline from ftrace fentry + directly, instead of using ftrace regs caller. This may reserve more + space in the fentry, making the kernel image larger.
config FUNCTION_PROFILER bool "Kernel function profiler"
From: Xu Kuohai xukuohai@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
--------------------------------
Add long jump support to fentry, so dynamically allocated trampolines like bpf trampoline can be called from fentry directly, as these trampoline addresses may be out of the range that a single bl instruction can jump to.
The scheme used here is basically the same as commit b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64").
1. At compile time, we use -fpatchable-function-entry=7,5 to insert 5 NOPs before function entry and 2 NOPs after function entry:
NOP NOP NOP NOP NOP func: BTI C // if BTI NOP NOP
The reason for inserting 5 NOPs before the function entry is that 2 NOPs are patched to LDR and BR instructions, 2 NOPs are used to store the destination jump address, and 1 NOP is used to adjust alignment to ensure the destination jump address is stored in 8-byte aligned memory, which is required by atomic store and load.
2. When there is no trampoline attached, the callsite is patched to:
NOP // extra NOP if func is 8-byte aligned literal: .quad ftrace_dummy_tramp NOP // extra NOP if func is NOT 8-byte aligned literal_call: LDR X16, literal BR X16 func: BTI C // if BTI MOV X9, LR NOP
3. When long jump trampoline is attached, the callsite is patched to:
NOP // extra NOP if func is 8-byte aligned literal: .quad <long-jump-trampoline> NOP // extra NOP if func is NOT 8-byte aligned literal_call: LDR X16, literal BR X16 func: BTI C // if BTI MOV X9, LR BL literal_call
4. When short jump trampoline is attached, the callsite is patched to:
NOP // extra NOP if func is 8-byte aligned literal: .quad ftrace_dummy_tramp NOP // extra NOP if func is NOT 8-byte aligned literal_call: LDR X16, literal BR X16 func: BTI C // if BTI MOV X9, LR BL <short-jump-trampoline>
Note that there is always a valid jump address in literal, either custom trampoline address or the dummy trampoline address, which ensures that we'll never jump from callsite to an unknown place.
Also note that the callsite is only ensured to be patched atomically and securely. Whether the custom trampoline can be freed should be checked by the trampoline user. For example, bpf uses refcnt and task based rcu to ensure bpf trampoline could be freed safely.
In my environment, before this patch, there are 2 NOPs inserted in function entry, and the generated vmlinux size is 463,649,280 bytes, while after this patch, the vmlinux size is 465,069,368 bytes, increased 1,420,088 bytes, about 0.3%. In vmlinux, there are 14,376 8-byte aligned functions and 41,847 unaligned functions. For each aligned function, one of the five NOPs before the function entry is unnecessary, wasting 57,504 bytes.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/Makefile | 4 + arch/arm64/include/asm/ftrace.h | 27 ++--- arch/arm64/include/asm/insn.h | 2 + arch/arm64/kernel/entry-ftrace.S | 21 +++- arch/arm64/kernel/ftrace.c | 198 +++++++++++++++++++++++++++++-- arch/arm64/kernel/insn.c | 14 +++ arch/arm64/net/bpf_jit_comp.c | 4 + include/linux/ftrace.h | 1 + kernel/trace/ftrace.c | 9 +- 9 files changed, 252 insertions(+), 28 deletions(-)
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4a42de35a898..e6b71f720419 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -128,7 +128,11 @@ CHECKFLAGS += -D__aarch64__
ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS),y) + CC_FLAGS_FTRACE := -fpatchable-function-entry=7,5 + else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + endif endif
# Default value diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 0b4d5a6990aa..70e08e049998 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -45,27 +45,16 @@ extern void _mcount(unsigned long); extern void *return_address(unsigned int);
struct dyn_arch_ftrace { - /* No extra data needed for arm64 */ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + unsigned long func; /* start address of function */ +#endif };
extern unsigned long ftrace_graph_call;
extern void return_to_handler(void);
-static inline unsigned long ftrace_call_adjust(unsigned long addr) -{ - /* - * Adjust addr to point at the BL in the callsite. - * See ftrace_init_nop() for the callsite sequence. - */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return addr + AARCH64_INSN_SIZE; - /* - * addr is the address of the mcount call instruction. - * recordmcount does the necessary offset calculation. - */ - return addr; -} +unsigned long ftrace_call_adjust(unsigned long addr);
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS struct dyn_ftrace; @@ -103,6 +92,14 @@ static inline bool arch_syscall_match_sym_name(const char *sym, */ return !strcmp(sym + 8, name); } + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +#define ftrace_dummy_tramp ftrace_dummy_tramp +extern void ftrace_dummy_tramp(void); + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #endif /* ifndef __ASSEMBLY__ */
#endif /* __ASM_FTRACE_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 1dec70e8fa4c..f837a55e7abd 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -503,6 +503,8 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset); int aarch64_insn_patch_text_nosync(void *addr, u32 insn); int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
+void aarch64_literal64_write(void *addr, u64 data); + s32 aarch64_insn_adrp_get_offset(u32 insn); u32 aarch64_insn_adrp_set_offset(u32 insn, s32 offset);
diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 67f68c9ef94c..0aca838614a2 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -14,14 +14,16 @@
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /* - * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before - * the regular function prologue. For an enabled callsite, ftrace_init_nop() and - * ftrace_make_call() have patched those NOPs to: + * Due to -fpatchable-function-entry=2 or -fpatchable-function-entry=7,5, the + * compiler has placed two NOPs before the regular function prologue. For an + * enabled callsite, ftrace_init_nop() and ftrace_make_call() have patched those + * NOPs to: * * MOV X9, LR * BL <entry> * - * ... where <entry> is either ftrace_caller or ftrace_regs_caller. + * ... where <entry> is ftrace_caller or ftrace_regs_caller or custom + * trampoline. * * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to @@ -349,3 +351,14 @@ SYM_CODE_START(return_to_handler) ret SYM_CODE_END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +SYM_FUNC_START(ftrace_dummy_tramp) +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) + bti j /* ftrace_dummy_tramp is called via "br x10" */ +#endif + mov x10, x30 + mov x30, x9 + ret x10 +SYM_FUNC_END(ftrace_dummy_tramp) +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 402a24f845b9..bccb938d52f7 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -76,6 +76,123 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) return NULL; }
+enum ftrace_callsite_action { + FC_INIT, + FC_REMOVE_CALL, + FC_ADD_CALL, + FC_REPLACE_CALL, +}; + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + +/* + * When func is 8-byte aligned, literal_call is located at func - 8 and literal + * is located at func - 16: + * + * NOP + * literal: + * .quad ftrace_dummy_tramp + * literal_call: + * LDR X16, literal + * BR X16 + * func: + * BTI C // if BTI + * MOV X9, LR + * NOP + * + * When func is not 8-byte aligned, literal_call is located at func - 8 and + * literal is located at func - 20: + * + * literal: + * .quad ftrace_dummy_tramp + * NOP + * literal_call: + * LDR X16, literal + * BR X16 + * func: + * BTI C // if BTI + * MOV X9, LR + * NOP + */ + +static unsigned long ftrace_literal_call_addr(struct dyn_ftrace *rec) +{ + return rec->arch.func - 2 * AARCH64_INSN_SIZE; +} + +static unsigned long ftrace_literal_addr(struct dyn_ftrace *rec) +{ + unsigned long addr = 0; + + addr = ftrace_literal_call_addr(rec); + if (addr % sizeof(long)) + addr -= 3 * AARCH64_INSN_SIZE; + else + addr -= 2 * AARCH64_INSN_SIZE; + + return addr; +} + +static void ftrace_update_literal(unsigned long literal_addr, unsigned long call_target, + int action) +{ + unsigned long dummy_tramp = (unsigned long)&ftrace_dummy_tramp; + + if (action == FC_INIT || action == FC_REMOVE_CALL) + aarch64_literal64_write((void *)literal_addr, dummy_tramp); + else if (action == FC_ADD_CALL) + aarch64_literal64_write((void *)literal_addr, call_target); +} + +static int ftrace_init_literal(struct module *mod, struct dyn_ftrace *rec) +{ + int ret; + u32 old, new; + unsigned long addr; + unsigned long pc = rec->ip - AARCH64_INSN_SIZE; + + old = aarch64_insn_gen_nop(); + + addr = ftrace_literal_addr(rec); + ftrace_update_literal(addr, 0, FC_INIT); + + pc = ftrace_literal_call_addr(rec); + new = aarch64_insn_gen_load_literal(pc, addr, AARCH64_INSN_REG_16, + true); + ret = ftrace_modify_code(pc, old, new, true); + if (ret) + return ret; + + pc += AARCH64_INSN_SIZE; + new = aarch64_insn_gen_branch_reg(AARCH64_INSN_REG_16, + AARCH64_INSN_BRANCH_NOLINK); + return ftrace_modify_code(pc, old, new, true); +} + +#else + +static inline unsigned long ftrace_literal_addr(struct dyn_ftrace *rec) +{ + return 0; +} + +static inline unsigned long ftrace_literal_call_addr(struct dyn_ftrace *rec) +{ + return 0; +} + +static inline void ftrace_update_literal(unsigned long literal_addr, unsigned long call_target, + int action) +{ +} + +static inline int ftrace_init_literal(struct module *mod, struct dyn_ftrace *rec) +{ + return 0; +} + +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + /* * Find the address the callsite must branch to in order to reach '*addr'. * @@ -87,7 +204,8 @@ static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) */ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, struct module *mod, - unsigned long *addr) + unsigned long *addr, + int action) { unsigned long pc = rec->ip; long offset = (long)*addr - (long)pc; @@ -100,6 +218,15 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, if (offset >= -SZ_128M && offset < SZ_128M) return true;
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + unsigned long literal_addr; + + literal_addr = ftrace_literal_addr(rec); + ftrace_update_literal(literal_addr, *addr, action); + *addr = ftrace_literal_call_addr(rec); + return true; + } + /* * When the target is outside of the range of a 'BL' instruction, we * must use a PLT to reach it. We can only place PLTs for modules, and @@ -144,7 +271,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) unsigned long pc = rec->ip; u32 old, new;
- if (!ftrace_find_callable_addr(rec, NULL, &addr)) + if (!ftrace_find_callable_addr(rec, NULL, &addr, FC_ADD_CALL)) return -EINVAL;
old = aarch64_insn_gen_nop(); @@ -160,9 +287,9 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long pc = rec->ip; u32 old, new;
- if (!ftrace_find_callable_addr(rec, NULL, &old_addr)) + if (!ftrace_find_callable_addr(rec, NULL, &old_addr, FC_REPLACE_CALL)) return -EINVAL; - if (!ftrace_find_callable_addr(rec, NULL, &addr)) + if (!ftrace_find_callable_addr(rec, NULL, &addr, FC_ADD_CALL)) return -EINVAL;
old = aarch64_insn_gen_branch_imm(pc, old_addr, @@ -187,9 +314,10 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, * | NOP | MOV X9, LR | MOV X9, LR | * | NOP | NOP | BL <entry> | * - * The LR value will be recovered by ftrace_regs_entry, and restored into LR - * before returning to the regular function prologue. When a function is not - * being traced, the MOV is not harmful given x9 is not live per the AAPCS. + * The LR value will be recovered by ftrace_regs_entry or custom trampoline, + * and restored into LR before returning to the regular function prologue. + * When a function is not being traced, the MOV is not harmful given x9 is + * not live per the AAPCS. * * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of * the BL. @@ -199,6 +327,14 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) unsigned long pc = rec->ip - AARCH64_INSN_SIZE; u32 old, new;
+ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + int ret; + + ret = ftrace_init_literal(mod, rec); + if (ret) + return ret; + } + old = aarch64_insn_gen_nop(); new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, AARCH64_INSN_REG_LR, @@ -207,6 +343,45 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) } #endif
+unsigned long ftrace_call_adjust(unsigned long addr) +{ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + u32 insn; + u32 nop = aarch64_insn_gen_nop(); + + /* Skip the first 5 NOPS */ + addr += 5 * AARCH64_INSN_SIZE; + + if (aarch64_insn_read((void *)addr, &insn)) + return 0; + + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { + if (insn != nop) { + addr += AARCH64_INSN_SIZE; + if (aarch64_insn_read((void *)addr, &insn)) + return 0; + } + } + + if (WARN_ON_ONCE(insn != nop)) + return 0; + + return addr + AARCH64_INSN_SIZE; + } else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) { + /* + * Adjust addr to point at the BL in the callsite. + * See ftrace_init_nop() for the callsite sequence. + */ + return addr + AARCH64_INSN_SIZE; + } + + /* + * addr is the address of the mcount call instruction. + * recordmcount does the necessary offset calculation. + */ + return addr; +} + /* * Turn off the call to ftrace_caller() in instrumented function */ @@ -232,7 +407,7 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return aarch64_insn_patch_text_nosync((void *)pc, new); }
- if (!ftrace_find_callable_addr(rec, mod, &addr)) + if (!ftrace_find_callable_addr(rec, mod, &addr, FC_REMOVE_CALL)) return -EINVAL;
old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); @@ -246,6 +421,13 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); }
+void ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long func) +{ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + rec->arch.func = func + 5 * AARCH64_INSN_SIZE; +#endif +} + int __init ftrace_dyn_arch_init(void) { return 0; diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index e56f409c179b..fbd2b7eec1dc 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -153,6 +153,20 @@ static int __kprobes __aarch64_insn_write(void *addr, __le32 insn) return ret; }
+void __kprobes aarch64_literal64_write(void *addr, u64 data) +{ + u64 *waddr; + unsigned long flags = 0; + + raw_spin_lock_irqsave(&patch_lock, flags); + waddr = patch_map(addr, FIX_TEXT_POKE0); + + WRITE_ONCE(*waddr, data); + + patch_unmap(FIX_TEXT_POKE0); + raw_spin_unlock_irqrestore(&patch_lock, flags); +} + int __kprobes aarch64_insn_write(void *addr, u32 insn) { return __aarch64_insn_write(addr, cpu_to_le32(insn)); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ef3587722748..4b65f3dc2606 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -422,6 +422,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) #undef jmp_offset }
+#ifdef ftrace_dummy_tramp +#define dummy_tramp ftrace_dummy_tramp +#else void dummy_tramp(void);
asm ( @@ -438,6 +441,7 @@ asm ( " .size dummy_tramp, .-dummy_tramp\n" " .popsection\n" ); +#endif
/* build a plt initialized like this: * diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b9dd113599eb..d979242f44fe 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -461,6 +461,7 @@ struct dyn_ftrace { };
int ftrace_force_update(void); +void ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long addr); int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip, int remove, int reset); int ftrace_set_filter(struct ftrace_ops *ops, unsigned char *buf, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 837c24dcbafd..ba2cce21bb83 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -6225,6 +6225,10 @@ static int ftrace_cmp_ips(const void *a, const void *b) return 0; }
+void __weak ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long addr) +{ +} + static int ftrace_process_locs(struct module *mod, unsigned long *start, unsigned long *end) @@ -6280,7 +6284,9 @@ static int ftrace_process_locs(struct module *mod, pg = start_pg; while (p < end) { unsigned long end_offset; - addr = ftrace_call_adjust(*p++); + unsigned long nop_addr = *p++; + + addr = ftrace_call_adjust(nop_addr); /* * Some architecture linkers will pad between * the different mcount_loc sections of different @@ -6302,6 +6308,7 @@ static int ftrace_process_locs(struct module *mod,
rec = &pg->records[pg->index++]; rec->ip = addr; + ftrace_rec_arch_init(rec, nop_addr); }
if (pg->next) {
From: Xu Kuohai xukuohai@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
--------------------------------
Add ftrace direct support for arm64.
1. When there is custom trampoline only, patch fentry callsite to call the custom trampoline directly.
2. When ftrace caller and custom trampoline coexist, jump from fentry to ftrace caller first, then jump to custom trampoline when ftrace caller exits. As pt_regs->orig_x0 is currently unused by ftrace, its space is reused as an intermediary for jumping from ftrace caller to custom trampoline.
In short, this patch does the same thing as the x86 commit 562955fe6a55 ("ftrace/x86: Add register_ftrace_direct() for custom trampolines").
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/ftrace.h | 12 ++++++++++++ arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kernel/entry-ftrace.S | 18 +++++++++++++++--- 4 files changed, 30 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a1eab2b7d5b3..2ad130a4f107 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -170,6 +170,8 @@ config ARM64 select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS \ if $(cc-option,-fpatchable-function-entry=2) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS \ + if DYNAMIC_FTRACE_WITH_REGS select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index 70e08e049998..b77a2bb37b5e 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,18 @@ extern void return_to_handler(void);
unsigned long ftrace_call_adjust(unsigned long addr);
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, + unsigned long addr) +{ + /* + * Place custom trampoline address in regs->custom_tramp to let ftrace + * trampoline jump to it. + */ + regs->orig_x0 = addr; +} +#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS struct dyn_ftrace; int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5f59e24c95d3..c247e11130db 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -73,6 +73,7 @@ int main(void) DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1)); DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save)); DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe)); + DEFINE(S_ORIG_X0, offsetof(struct pt_regs, orig_x0)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); #ifdef CONFIG_AARCH32_EL0 diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 0aca838614a2..766468570ee6 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -62,6 +62,9 @@ str x29, [sp, #S_FP] .endif
+ /* Set custom_tramp to zero */ + str xzr, [sp, #S_ORIG_X0] + /* Save the callsite's SP and LR */ add x10, sp, #(S_FRAME_SIZE + 16) stp x9, x10, [sp, #S_LR] @@ -125,12 +128,21 @@ ftrace_common_return: /* Restore the callsite's FP, LR, PC */ ldr x29, [sp, #S_FP] ldr x30, [sp, #S_LR] - ldr x9, [sp, #S_PC] - + ldr x10, [sp, #S_PC] + + ldr x11, [sp, #S_ORIG_X0] + cbz x11, 1f + /* Set x9 to parent ip before jump to custom trampoline */ + mov x9, x30 + /* Set lr to self ip */ + ldr x30, [sp, #S_PC] + /* Set x10 (used for return address) to custom trampoline */ + mov x10, x11 +1: /* Restore the callsite's SP */ add sp, sp, #S_FRAME_SIZE + 16
- ret x9 + ret x10 SYM_CODE_END(ftrace_common)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
From: Xu Kuohai xukuohai@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9FGRE
--------------------------------
For kernel modules copmiled without DIRECT_CALL, there is no literal space reserved on the function entry, this patch add support for these functions.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- arch/arm64/kernel/ftrace.c | 74 +++++++++++++++++++++++++++++++------- include/linux/ftrace.h | 2 ++ kernel/trace/ftrace.c | 8 +++++ 3 files changed, 72 insertions(+), 12 deletions(-)
diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index bccb938d52f7..c7d4764a553f 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -117,6 +117,9 @@ enum ftrace_callsite_action {
static unsigned long ftrace_literal_call_addr(struct dyn_ftrace *rec) { + if (rec->arch.func == 0UL) + return 0UL; + return rec->arch.func - 2 * AARCH64_INSN_SIZE; }
@@ -125,6 +128,9 @@ static unsigned long ftrace_literal_addr(struct dyn_ftrace *rec) unsigned long addr = 0;
addr = ftrace_literal_call_addr(rec); + if (addr == 0UL) + return 0UL; + if (addr % sizeof(long)) addr -= 3 * AARCH64_INSN_SIZE; else @@ -154,6 +160,9 @@ static int ftrace_init_literal(struct module *mod, struct dyn_ftrace *rec) old = aarch64_insn_gen_nop();
addr = ftrace_literal_addr(rec); + if (addr == 0UL) + return 0UL; + ftrace_update_literal(addr, 0, FC_INIT);
pc = ftrace_literal_call_addr(rec); @@ -222,9 +231,11 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, unsigned long literal_addr;
literal_addr = ftrace_literal_addr(rec); - ftrace_update_literal(literal_addr, *addr, action); - *addr = ftrace_literal_call_addr(rec); - return true; + if (literal_addr != 0UL) { + ftrace_update_literal(literal_addr, *addr, action); + *addr = ftrace_literal_call_addr(rec); + return true; + } }
/* @@ -343,27 +354,51 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) } #endif
+static int ftrace_nop_count(unsigned long addr) +{ + u32 insn; + u32 nop = aarch64_insn_gen_nop(); + int count = 0; + + for (;;) { + if (aarch64_insn_read((void *)addr, &insn)) + return -1; + + if (insn != nop) + break; + + count++; + addr += AARCH64_INSN_SIZE; + } + + return count; +} + unsigned long ftrace_call_adjust(unsigned long addr) { if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { u32 insn; u32 nop = aarch64_insn_gen_nop(); + int count = ftrace_nop_count(addr);
- /* Skip the first 5 NOPS */ - addr += 5 * AARCH64_INSN_SIZE; - - if (aarch64_insn_read((void *)addr, &insn)) + if (count != 5 && count != 7 && count != 2) return 0;
- if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { - if (insn != nop) { - addr += AARCH64_INSN_SIZE; + if (count == 5 || count == 7) { + /* Skip the first 5 NOPS */ + addr += 5 * AARCH64_INSN_SIZE; + + /* Skip bti c */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { if (aarch64_insn_read((void *)addr, &insn)) return 0; + + if (insn != nop) + addr += AARCH64_INSN_SIZE; } }
- if (WARN_ON_ONCE(insn != nop)) + if (ftrace_nop_count(addr) != 2) return 0;
return addr + AARCH64_INSN_SIZE; @@ -421,10 +456,25 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); }
+bool ftrace_directable(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + return rec->arch.func != 0UL; +#else + return false; +#endif +} + void ftrace_rec_arch_init(struct dyn_ftrace *rec, unsigned long func) { #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS - rec->arch.func = func + 5 * AARCH64_INSN_SIZE; + int count; + + count = ftrace_nop_count(func); + if (count == 5 || count == 7) + rec->arch.func = func + 5 * AARCH64_INSN_SIZE; + else + rec->arch.func = 0UL; #endif }
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index d979242f44fe..9585a29ace3e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -280,6 +280,8 @@ struct ftrace_func_entry {
struct dyn_ftrace;
+bool ftrace_directable(struct dyn_ftrace *rec); + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS extern int ftrace_direct_func_count; int register_ftrace_direct(unsigned long ip, unsigned long addr); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index ba2cce21bb83..b2888890add0 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5092,6 +5092,11 @@ static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) return direct; }
+bool __weak ftrace_directable(struct dyn_ftrace *rec) +{ + return true; +} + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5133,6 +5138,9 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) if (!rec) goto out_unlock;
+ if (!ftrace_directable(rec)) + goto out_unlock; + /* * Check if the rec says it has a direct call but we didn't * find one earlier?
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/6061 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/6061 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/C...