From: Luke Nelson lukenels@cs.washington.edu
commit 4178417cc5359c329790a4a8f4a6604612338cca upstream.
This patch fixes an incorrect check in how immediate memory offsets are computed for BPF_DW on arm.
For BPF_LDX/ST/STX + BPF_DW, the 32-bit arm JIT breaks down an 8-byte access into two separate 4-byte accesses using off+0 and off+4. If off fits in imm12, the JIT emits a ldr/str instruction with the immediate and avoids the use of a temporary register. While the current check off <= 0xfff ensures that the first immediate off+0 doesn't overflow imm12, it's not sufficient for the second immediate off+4, which may cause the second access of BPF_DW to read/write the wrong address.
This patch fixes the problem by changing the check to off <= 0xfff - 4 for BPF_DW, ensuring off+4 will never overflow.
A side effect of simplifying the check is that it now allows using negative immediate offsets in ldr/str. This means that small negative offsets can also avoid the use of a temporary register.
This patch introduces no new failures in test_verifier or test_bpf.c.
Fixes: c5eae692571d6 ("ARM: net: bpf: improve 64-bit store implementation") Fixes: ec19e02b343db ("ARM: net: bpf: fix LDX instructions") Co-developed-by: Xi Wang xi.wang@gmail.com Signed-off-by: Xi Wang xi.wang@gmail.com Signed-off-by: Luke Nelson luke.r.nels@gmail.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Link: https://lore.kernel.org/bpf/20200409221752.28448-1-luke.r.nels@gmail.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org --- arch/arm/net/bpf_jit_32.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-)
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 25b3ee8..d06293a 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -993,21 +993,35 @@ static inline void emit_a32_mul_r64(const s8 dst[], const s8 src[], arm_bpf_put_reg32(dst_hi, rd[0], ctx); }
+static bool is_ldst_imm(s16 off, const u8 size) +{ + s16 off_max = 0; + + switch (size) { + case BPF_B: + case BPF_W: + off_max = 0xfff; + break; + case BPF_H: + off_max = 0xff; + break; + case BPF_DW: + /* Need to make sure off+4 does not overflow. */ + off_max = 0xfff - 4; + break; + } + return -off_max <= off && off <= off_max; +} + /* *(size *)(dst + off) = src */ static inline void emit_str_r(const s8 dst, const s8 src[], - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; - s32 off_max; s8 rd;
rd = arm_bpf_get_reg32(dst, tmp[1], ctx);
- if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff; - - if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx); rd = tmp[0]; @@ -1036,18 +1050,12 @@ static inline void emit_str_r(const s8 dst, const s8 src[],
/* dst = *(size*)(src + off) */ static inline void emit_ldx_r(const s8 dst[], const s8 src, - s32 off, struct jit_ctx *ctx, const u8 sz){ + s16 off, struct jit_ctx *ctx, const u8 sz){ const s8 *tmp = bpf2a32[TMP_REG_1]; const s8 *rd = is_stacked(dst_lo) ? tmp : dst; s8 rm = src; - s32 off_max; - - if (sz == BPF_H) - off_max = 0xff; - else - off_max = 0xfff;
- if (off < 0 || off > off_max) { + if (!is_ldst_imm(off, sz)) { emit_a32_mov_i(tmp[0], off, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0];