
From: Xu Kuohai <xukuohai@huawei.com> hulk inclusion category: featrue bugzilla: https://gitee.com/openeuler/kernel/issues/ICJ716 -------------------------------- Fixup bpf_ext_memcpy extension helper for arm64. Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Pu Lehui <pulehui@huawei.com> --- arch/arm64/include/asm/insn.h | 4 + arch/arm64/kernel/insn.c | 8 + arch/arm64/net/bpf_jit.h | 15 ++ arch/arm64/net/bpf_jit_comp.c | 266 ++++++++++++++++++++++++++++++++++ include/linux/filter.h | 3 + kernel/bpf/core.c | 7 + kernel/bpf/verifier.c | 25 ++++ 7 files changed, 328 insertions(+) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 9b9ad2f6d4aa..01faf12ddaac 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -208,6 +208,10 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, AARCH64_INSN_LDST_STORE_EX, +#ifdef CONFIG_HISOCK + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET, + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET, +#endif }; enum aarch64_insn_adsb_type { diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index fbd2b7eec1dc..25390b069a05 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -745,6 +745,14 @@ u32 aarch64_insn_gen_load_store_pair(enum aarch64_insn_register reg1, case AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX: insn = aarch64_insn_get_stp_post_value(); break; +#ifdef CONFIG_HISOCK + case AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_ldp_value(); + break; + case AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET: + insn = aarch64_insn_get_stp_value(); + break; +#endif default: pr_err("%s: unknown load/store encoding %d\n", __func__, type); return AARCH64_BREAK_FAULT; diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 1835dffb8104..c670f35f302c 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -96,6 +96,21 @@ /* Rt = Rn[0]; Rt2 = Rn[8]; Rn += 16; */ #define A64_POP(Rt, Rt2, Rn) A64_LS_PAIR(Rt, Rt2, Rn, 16, LOAD, POST_INDEX) +#ifdef CONFIG_HISOCK +#define A64_STP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, STORE, SIGNED_OFFSET) +#define A64_LDP(Rt, Rt2, Rn, offset) \ + A64_LS_PAIR(Rt, Rt2, Rn, offset, LOAD, SIGNED_OFFSET) +#define A64_STP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_STORE_PAIR_SIGNED_OFFSET) +#define A64_LDP32(Wt, Wt2, Rn, offset) \ + aarch64_insn_gen_load_store_pair(Wt, Wt2, Rn, offset, \ + AARCH64_INSN_VARIANT_32BIT, \ + AARCH64_INSN_LDST_LOAD_PAIR_SIGNED_OFFSET) +#endif + /* Load/store exclusive */ #define A64_SIZE(sf) \ ((sf) ? AARCH64_INSN_SIZE_64 : AARCH64_INSN_SIZE_32) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index af5760c0d2e7..e2e065382984 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -21,11 +21,26 @@ #include "bpf_jit.h" +#ifdef CONFIG_HISOCK +#define TCALL_CNT (MAX_BPF_JIT_REG + 0) +#define FP_BOTTOM (MAX_BPF_JIT_REG + 1) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_3 (MAX_BPF_JIT_REG + 4) +#define TMP_REG_4 (MAX_BPF_JIT_REG + 5) +#define TMP_REG_5 (MAX_BPF_JIT_REG + 6) +#define TMP_REG_6 (MAX_BPF_JIT_REG + 7) +#define TMP_REG_7 (MAX_BPF_JIT_REG + 8) +#define TMP_REG_8 (MAX_BPF_JIT_REG + 9) +#define TMP_REG_9 (MAX_BPF_JIT_REG + 10) +#define TMP_REG_10 (MAX_BPF_JIT_REG + 11) +#else #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) #define TCALL_CNT (MAX_BPF_JIT_REG + 2) #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) +#endif /* Map BPF registers to A64 registers */ static const int bpf2a64[] = { @@ -48,6 +63,15 @@ static const int bpf2a64[] = { [TMP_REG_1] = A64_R(10), [TMP_REG_2] = A64_R(11), [TMP_REG_3] = A64_R(12), +#ifdef CONFIG_HISOCK + [TMP_REG_4] = A64_R(13), + [TMP_REG_5] = A64_R(14), + [TMP_REG_6] = A64_R(15), + [TMP_REG_7] = A64_R(5), + [TMP_REG_8] = A64_R(6), + [TMP_REG_9] = A64_R(7), + [TMP_REG_10] = A64_R(28), +#endif /* tail_call_cnt */ [TCALL_CNT] = A64_R(26), /* temporary register for blinding constants */ @@ -565,6 +589,234 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } +#ifdef CONFIG_HISOCK +static bool support_unaligned_access(void) +{ + unsigned long sctlr = SCTLR_ELx_A; + + switch (read_sysreg(CurrentEL)) { + case CurrentEL_EL1: + sctlr = read_sysreg(sctlr_el1); + break; + case CurrentEL_EL2: + sctlr = read_sysreg(sctlr_el2); + break; + default: + /* not EL1 and EL2 ? */ + break; + } + + return (sctlr & SCTLR_ELx_A) ? false : true; +} + +extern u64 bpf_ext_memcpy(void *dst, size_t dst_sz, + const void *src, size_t src_sz); + +static void emit_memcpy(struct jit_ctx *ctx, int size) +{ + u8 dst = bpf2a64[BPF_REG_1]; + u8 src = bpf2a64[BPF_REG_3]; + u8 tmp1 = bpf2a64[TMP_REG_1]; + u8 tmp2 = bpf2a64[TMP_REG_2]; + u8 tmp3 = bpf2a64[TMP_REG_3]; + u8 tmp4 = bpf2a64[TMP_REG_4]; + u8 tmp5 = bpf2a64[TMP_REG_5]; + u8 tmp6 = bpf2a64[TMP_REG_6]; + u8 tmp7 = bpf2a64[TMP_REG_7]; + u8 tmp8 = bpf2a64[TMP_REG_8]; + u8 tmp9 = bpf2a64[TMP_REG_9]; + u8 tmp10 = bpf2a64[TMP_REG_10]; + + if (!support_unaligned_access()) { + emit_call((u64)bpf_ext_memcpy, ctx); + return; + } + + switch (size) { + case 0: + break; + case 1: + emit(A64_LDRBI(tmp1, src, 0), ctx); + emit(A64_STRBI(tmp1, dst, 0), ctx); + break; + case 2: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + break; + case 3: + emit(A64_LDRHI(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 2), ctx); + emit(A64_STRHI(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 2), ctx); + break; + case 4: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + break; + case 5: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRBI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRBI(tmp2, dst, 4), ctx); + break; + case 6: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_STR32I(tmp1, dst, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + break; + case 7: + emit(A64_LDR32I(tmp1, src, 0), ctx); + emit(A64_LDRHI(tmp2, src, 4), ctx); + emit(A64_LDRBI(tmp3, src, 6), ctx); + emit(A64_STR32I(tmp1, src, 0), ctx); + emit(A64_STRHI(tmp2, dst, 4), ctx); + emit(A64_STRBI(tmp3, dst, 6), ctx); + break; + case 8: + emit(A64_LDR64I(tmp1, src, 0), ctx); + emit(A64_STR64I(tmp1, dst, 0), ctx); + break; + case 9 ... 15: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDR64I(tmp3, src, 0), ctx); + emit(A64_LDP32(tmp4, tmp5, tmp1, -8), ctx); + emit(A64_STR64I(tmp3, dst, 0), ctx); + emit(A64_STP32(tmp4, tmp5, tmp2, -8), ctx); + break; + case 16: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + break; + case 17 ... 31: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 32: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + break; + case 33 ... 63: + emit(A64_ADD_I(1, tmp1, src, size), ctx); + emit(A64_ADD_I(1, tmp2, dst, size), ctx); + emit(A64_LDP(tmp3, tmp4, src, 0), ctx); + emit(A64_LDP(tmp5, tmp6, src, 16), ctx); + emit(A64_STP(tmp3, tmp4, dst, 0), ctx); + emit(A64_STP(tmp5, tmp6, dst, 16), ctx); + emit(A64_LDP(tmp3, tmp4, tmp1, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp1, -16), ctx); + emit(A64_STP(tmp3, tmp4, tmp2, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp2, -16), ctx); + break; + case 64: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + break; + case 65 ... 95: + /* copy first 48 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + + /* copy last 48 bytes */ + emit(A64_ADD_I(1, tmp7, src, size), ctx); + emit(A64_ADD_I(1, tmp8, dst, size), ctx); + + emit(A64_LDP(tmp1, tmp2, tmp7, -48), ctx); + emit(A64_LDP(tmp3, tmp4, tmp7, -32), ctx); + emit(A64_LDP(tmp5, tmp6, tmp7, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp8, -48), ctx); + emit(A64_STP(tmp3, tmp4, tmp8, -32), ctx); + emit(A64_STP(tmp5, tmp6, tmp8, -16), ctx); + break; + case 96: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + break; + case 97 ... 127: + emit(A64_ADD_I(1, tmp9, src, size), ctx); + emit(A64_ADD_I(1, tmp10, dst, size), ctx); + + /* copy first 64 bytes */ + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + /* copy last 64 bytes */ + emit(A64_LDP(tmp1, tmp2, tmp9, -64), ctx); + emit(A64_LDP(tmp3, tmp4, tmp9, -48), ctx); + emit(A64_LDP(tmp5, tmp6, tmp9, -32), ctx); + emit(A64_LDP(tmp7, tmp8, tmp9, -16), ctx); + + emit(A64_STP(tmp1, tmp2, tmp10, -64), ctx); + emit(A64_STP(tmp3, tmp4, tmp10, -48), ctx); + emit(A64_STP(tmp5, tmp6, tmp10, -32), ctx); + emit(A64_STP(tmp7, tmp8, tmp10, -16), ctx); + break; + case 128: + emit(A64_LDP(tmp1, tmp2, src, 0), ctx); + emit(A64_LDP(tmp3, tmp4, src, 16), ctx); + emit(A64_LDP(tmp5, tmp6, src, 32), ctx); + emit(A64_LDP(tmp7, tmp8, src, 48), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 0), ctx); + emit(A64_STP(tmp3, tmp4, dst, 16), ctx); + emit(A64_STP(tmp5, tmp6, dst, 32), ctx); + emit(A64_STP(tmp7, tmp8, dst, 48), ctx); + + emit(A64_LDP(tmp1, tmp2, src, 64), ctx); + emit(A64_LDP(tmp3, tmp4, src, 80), ctx); + emit(A64_LDP(tmp5, tmp6, src, 96), ctx); + emit(A64_LDP(tmp7, tmp8, src, 112), ctx); + + emit(A64_STP(tmp1, tmp2, dst, 64), ctx); + emit(A64_STP(tmp3, tmp4, dst, 80), ctx); + emit(A64_STP(tmp5, tmp6, dst, 96), ctx); + emit(A64_STP(tmp7, tmp8, dst, 112), ctx); + break; + default: + emit_call((u64)bpf_ext_memcpy, ctx); + break; + } +} +#endif + /* JITs an eBPF instruction. * Returns: * 0 - successfully JITed an 8-byte eBPF instruction. @@ -915,6 +1167,13 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool func_addr_fixed; u64 func_addr; +#ifdef CONFIG_HISOCK + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_ext_memcpy) { + emit_memcpy(ctx, insn->off); + break; + } +#endif + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); if (ret < 0) @@ -1461,6 +1720,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) return prog; } +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void) +{ + return true; +} +#endif + u64 bpf_jit_alloc_exec_limit(void) { return BPF_JIT_REGION_SIZE; diff --git a/include/linux/filter.h b/include/linux/filter.h index 758adb32d352..602d2f358eb0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -922,6 +922,9 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +#ifdef CONFIG_HISOCK +bool bpf_jit_supports_ext_helper(void); +#endif u64 bpf_arch_uaddress_limit(void); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index bc9bc96c0c4b..987079bdf2b4 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2358,6 +2358,13 @@ bool __weak bpf_jit_needs_zext(void) return false; } +#ifdef CONFIG_HISOCK +bool __weak bpf_jit_supports_ext_helper(void) +{ + return false; +} +#endif + /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call * skb_copy_bits(), so provide a weak definition of it for NET-less config. */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6341764e0ab3..5d9f2f671653 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5899,6 +5899,21 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } +#ifdef CONFIG_HISOCK + if (func_id == BPF_FUNC_ext_memcpy) { + /* XXX: cleanup & check if allowed to access dst mem */ + u32 regno = BPF_REG_1 + 3; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + struct bpf_insn *insn = &env->prog->insnsi[env->insn_idx]; + + if (!bpf_jit_supports_ext_helper() || + reg->umax_value <= 0 || reg->umax_value > 4096) + return -ENOTSUPP; + + insn->off = reg->umax_value; + } +#endif + /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, regs, caller_saved[i]); @@ -10500,7 +10515,11 @@ static int do_check(struct bpf_verifier_env *env) env->jmps_processed++; if (opcode == BPF_CALL) { if (BPF_SRC(insn->code) != BPF_K || +#ifdef CONFIG_HISOCK + (insn->off != 0 && insn->imm != BPF_FUNC_ext_memcpy) || +#else insn->off != 0 || +#endif (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0 || @@ -12226,6 +12245,12 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } +#ifdef CONFIG_HISOCK + /* will fixup bpf extension helper in jit */ + if (insn->imm == BPF_FUNC_ext_memcpy) + continue; +#endif + patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed -- 2.34.1