From: Daniel Borkmann daniel@iogearbox.net
mainline inclusion from mainline-v5.12-rc8 commit 6f55b2f2a1178856c19bbce2f71449926e731914 category: bugfix bugzilla: NA CVE: CVE-2021-29155
--------------------------------
Small refactor to drag off_reg into sanitize_ptr_alu(), so we later on can use off_reg for generalizing some of the checks for all pointer types.
Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: John Fastabend john.fastabend@gmail.com Acked-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/bpf/verifier.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f9d8fc953b70d..4cb746168e4e6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2719,11 +2719,12 @@ static int sanitize_val_alu(struct bpf_verifier_env *env, static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, - struct bpf_reg_state *dst_reg, - bool off_is_neg) + const struct bpf_reg_state *off_reg, + struct bpf_reg_state *dst_reg) { struct bpf_verifier_state *vstate = env->cur_state; struct bpf_insn_aux_data *aux = cur_aux(env); + bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); u32 alu_state, alu_limit; @@ -2847,7 +2848,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
switch (opcode) { case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; @@ -2902,7 +2903,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); if (ret < 0) { verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret;
From: Daniel Borkmann daniel@iogearbox.net
mainline inclusion from mainline-v5.12-rc8 commit b658bbb844e28f1862867f37e8ca11a8e2aa94a3 category: bugfix bugzilla: NA CVE: CVE-2021-29155
--------------------------------
Small refactor with no semantic changes in order to consolidate the max ptr_limit boundary check.
Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: John Fastabend john.fastabend@gmail.com Acked-by: Alexei Starovoitov ast@kernel.org Conflicts: kernel/bpf/verifier.c Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/bpf/verifier.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 846245fffef8b..1c711df1a23c4 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2651,12 +2651,12 @@ static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - u32 *ptr_limit, u8 opcode) + u32 *alu_limit, u8 opcode) { bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max; + u32 off, max = 0, ptr_limit = 0;
if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -2670,22 +2670,27 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, max = MAX_BPF_STACK + mask_to_left; off = ptr_reg->off + ptr_reg->var_off.value; if (mask_to_left) - *ptr_limit = MAX_BPF_STACK + off; + ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off - 1; - return *ptr_limit >= max ? -ERANGE : 0; + ptr_limit = -off - 1; + break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; if (mask_to_left) { - *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; + ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return *ptr_limit >= max ? -ERANGE : 0; + break; default: return -EINVAL; } + + if (ptr_limit >= max) + return -ERANGE; + *alu_limit = ptr_limit; + return 0; }
static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
From: Daniel Borkmann daniel@iogearbox.net
mainline inclusion from mainline-v5.12-rc8 commit 073815b756c51ba9d8384d924c5d1c03ca3d1ae4 category: bugfix bugzilla: NA CVE: CVE-2021-29155
--------------------------------
Move the bounds check in adjust_ptr_min_max_vals() into a small helper named sanitize_check_bounds() in order to simplify the former a bit.
Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: John Fastabend john.fastabend@gmail.com Acked-by: Alexei Starovoitov ast@kernel.org Conflicts: kernel/bpf/verifier.c [yyl: bypass_spec_v1 is not introduced in kernel-4.19, use allow_ptr_leaks instead] Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/bpf/verifier.c | 85 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 17 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2097a44c88ff..3f3e4a4fac268 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2831,6 +2831,72 @@ static int sanitize_err(struct bpf_verifier_env *env, return -EACCES; }
+/* check that stack access falls within stack limits and that 'reg' doesn't + * have a variable offset. + * + * Variable offset is prohibited for unprivileged mode for simplicity since it + * requires corresponding support in Spectre masking for stack ALU. See also + * retrieve_ptr_limit(). + * + * + * 'off' includes 'reg->off'. + */ +static int check_stack_access_for_ptr_arithmetic( + struct bpf_verifier_env *env, + int regno, + const struct bpf_reg_state *reg, + int off) +{ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", + regno, tn_buf, off); + return -EACCES; + } + + if (off >= 0 || off < -MAX_BPF_STACK) { + verbose(env, "R%d stack pointer arithmetic goes out of range, " + "prohibited for !root; off=%d\n", regno, off); + return -EACCES; + } + + return 0; +} + +static int sanitize_check_bounds(struct bpf_verifier_env *env, + const struct bpf_insn *insn, + const struct bpf_reg_state *dst_reg) +{ + u32 dst = insn->dst_reg; + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (env->allow_ptr_leaks) + return 0; + + switch (dst_reg->type) { + case PTR_TO_STACK: + if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, + dst_reg->off + dst_reg->var_off.value)) + return -EACCES; + break; + case PTR_TO_MAP_VALUE: + if (check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } + break; + default: + break; + } + + return 0; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -3038,24 +3104,9 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg);
- /* For unprivileged we require that resulting offset must be in bounds - * in order to be able to sanitize access later on. - */ - if (!env->allow_ptr_leaks) { - if (dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, " - "prohibited for !root\n", dst); - return -EACCES; - } else if (dst_reg->type == PTR_TO_STACK && - check_stack_access(env, dst_reg, dst_reg->off + - dst_reg->var_off.value, 1)) { - verbose(env, "R%d stack pointer arithmetic goes out of range, " - "prohibited for !root\n", dst); - return -EACCES; - } - }
+ if (sanitize_check_bounds(env, insn, dst_reg) < 0) + return -EACCES; return 0; }
From: Daniel Borkmann daniel@iogearbox.net
mainline inclusion from mainline-v5.12-rc8 commit 7fedb63a8307dda0ec3b8969a3b233a1dd7ea8e0 category: bugfix bugzilla: NA CVE: CVE-2021-29155
--------------------------------
This work tightens the offset mask we use for unprivileged pointer arithmetic in order to mitigate a corner case reported by Piotr and Benedict where in the speculative domain it is possible to advance, for example, the map value pointer by up to value_size-1 out-of-bounds in order to leak kernel memory via side-channel to user space.
Before this change, the computed ptr_limit for retrieve_ptr_limit() helper represents largest valid distance when moving pointer to the right or left which is then fed as aux->alu_limit to generate masking instructions against the offset register. After the change, the derived aux->alu_limit represents the largest potential value of the offset register which we mask against which is just a narrower subset of the former limit.
For minimal complexity, we call sanitize_ptr_alu() from 2 observation points in adjust_ptr_min_max_vals(), that is, before and after the simulated alu operation. In the first step, we retieve the alu_state and alu_limit before the operation as well as we branch-off a verifier path and push it to the verification stack as we did before which checks the dst_reg under truncation, in other words, when the speculative domain would attempt to move the pointer out-of-bounds.
In the second step, we retrieve the new alu_limit and calculate the absolute distance between both. Moreover, we commit the alu_state and final alu_limit via update_alu_sanitation_state() to the env's instruction aux data, and bail out from there if there is a mismatch due to coming from different verification paths with different states.
Reported-by: Piotr Krysiuk piotras@gmail.com Reported-by: Benedict Schlueter benedict.schlueter@rub.de Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: John Fastabend john.fastabend@gmail.com Acked-by: Alexei Starovoitov ast@kernel.org Tested-by: Benedict Schlueter benedict.schlueter@rub.de Conflicts: kernel/bpf/verifier.c Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Kuohai Xu xukuohai@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/bpf/verifier.c | 71 +++++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 26 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 26c28e0536073..e0999cb2be600 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2664,7 +2664,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, bool off_is_neg = off_reg->smin_value < 0; bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off, max = 0, ptr_limit = 0; + u32 max = 0, ptr_limit = 0;
if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) @@ -2673,23 +2673,18 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, switch (ptr_reg->type) { case PTR_TO_STACK: /* Offset 0 is out-of-bounds, but acceptable start for the - * left direction, see BPF_REG_FP. + * left direction, see BPF_REG_FP. Also, unknown scalar + * offset where we would need to deal with min/max bounds is + * currently prohibited for unprivileged. */ max = MAX_BPF_STACK + mask_to_left; - off = ptr_reg->off + ptr_reg->var_off.value; - if (mask_to_left) - ptr_limit = MAX_BPF_STACK + off; - else - ptr_limit = -off - 1; + ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off); break; case PTR_TO_MAP_VALUE: max = ptr_reg->map_ptr->value_size; - if (mask_to_left) { - ptr_limit = ptr_reg->umax_value + ptr_reg->off; - } else { - off = ptr_reg->smin_value + ptr_reg->off; - ptr_limit = ptr_reg->map_ptr->value_size - off - 1; - } + ptr_limit = (mask_to_left ? + ptr_reg->smin_value : + ptr_reg->umax_value) + ptr_reg->off; break; default: return REASON_TYPE; @@ -2744,10 +2739,12 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, - struct bpf_reg_state *dst_reg) + struct bpf_reg_state *dst_reg, + struct bpf_insn_aux_data *tmp_aux, + const bool commit_window) { + struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; struct bpf_verifier_state *vstate = env->cur_state; - struct bpf_insn_aux_data *aux = cur_aux(env); bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); @@ -2766,18 +2763,33 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (vstate->speculative) goto do_sim;
- alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; - alu_state |= ptr_is_dst_reg ? - BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - err = retrieve_ptr_limit(ptr_reg, off_reg, &alu_limit, opcode); if (err < 0) return err;
+ if (commit_window) { + /* In commit phase we narrow the masking window based on + * the observed pointer move after the simulated operation. + */ + alu_state = tmp_aux->alu_state; + alu_limit = abs(tmp_aux->alu_limit - alu_limit); + } else { + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + } + err = update_alu_sanitation_state(aux, alu_state, alu_limit); if (err < 0) return err; do_sim: + /* If we're in commit phase, we're done here given we already + * pushed the truncated dst_reg into the speculative verification + * stack. + */ + if (commit_window) + return 0; + /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of * masking when off was not within expected range. If off @@ -2920,6 +2932,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + struct bpf_insn_aux_data tmp_aux = {}; u8 opcode = BPF_OP(insn->code); u32 dst = insn->dst_reg; int ret; @@ -2969,12 +2982,15 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) return -EINVAL;
- switch (opcode) { - case BPF_ADD: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, + &tmp_aux, false); if (ret < 0) return sanitize_err(env, insn, ret, off_reg, dst_reg); + }
+ switch (opcode) { + case BPF_ADD: /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -3025,10 +3041,6 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: - ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg); - if (ret < 0) - return sanitize_err(env, insn, ret, off_reg, dst_reg); - if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ verbose(env, "R%d tried to subtract pointer from scalar\n", @@ -3112,6 +3124,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
if (sanitize_check_bounds(env, insn, dst_reg) < 0) return -EACCES; + if (sanitize_needed(opcode)) { + ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, + &tmp_aux, true); + if (ret < 0) + return sanitize_err(env, insn, ret, off_reg, dst_reg); + } + return 0; }