From: Daniel Borkmann <daniel(a)iogearbox.net>
mainline inclusion
from mainline-v5.13-rc1
commit 801c6058d14a82179a7ee17a4b532cac6fad067f
category: bugfix
bugzilla: NA
CVE: CVE-2021-31829
--------------------------------
The current implemented mechanisms to mitigate data disclosure under
speculation mainly address stack and map value oob access from the
speculative domain. However, Piotr discovered that uninitialized BPF
stack is not protected yet, and thus old data from the kernel stack,
potentially including addresses of kernel structures, could still be
extracted from that 512 bytes large window. The BPF stack is special
compared to map values since it's not zero initialized for every
program invocation, whereas map values /are/ zero initialized upon
their initial allocation and thus cannot leak any prior data in either
domain. In the non-speculative domain, the verifier ensures that every
stack slot read must have a prior stack slot write by the BPF program
to avoid such data leaking issue.
However, this is not enough: for example, when the pointer arithmetic
operation moves the stack pointer from the last valid stack offset to
the first valid offset, the sanitation logic allows for any intermediate
offsets during speculative execution, which could then be used to
extract any restricted stack content via side-channel.
Given for unprivileged stack pointer arithmetic the use of unknown
but bounded scalars is generally forbidden, we can simply turn the
register-based arithmetic operation into an immediate-based arithmetic
operation without the need for masking. This also gives the benefit
of reducing the needed instructions for the operation. Given after
the work in 7fedb63a8307 ("bpf: Tighten speculative pointer arithmetic
mask"), the aux->alu_limit already holds the final immediate value for
the offset register with the known scalar. Thus, a simple mov of the
immediate to AX register with using AX as the source for the original
instruction is sufficient and possible now in this case.
Reported-by: Piotr Krysiuk <piotras(a)gmail.com>
Signed-off-by: Daniel Borkmann <daniel(a)iogearbox.net>
Tested-by: Piotr Krysiuk <piotras(a)gmail.com>
Reviewed-by: Piotr Krysiuk <piotras(a)gmail.com>
Reviewed-by: John Fastabend <john.fastabend(a)gmail.com>
Acked-by: Alexei Starovoitov <ast(a)kernel.org>
Conflicts:
kernel/bpf/verifier.c
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Reviewed-by: Kuohai Xu <xukuohai(a)huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang(a)huawei.com>
---
include/linux/bpf_verifier.h | 5 +++--
kernel/bpf/verifier.c | 27 +++++++++++++++++----------
2 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 42dc8c2146f4d..daab0960c0544 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -142,10 +142,11 @@ struct bpf_verifier_state_list {
};
/* Possible states for alu_state member. */
-#define BPF_ALU_SANITIZE_SRC 1U
-#define BPF_ALU_SANITIZE_DST 2U
+#define BPF_ALU_SANITIZE_SRC (1U << 0)
+#define BPF_ALU_SANITIZE_DST (1U << 1)
#define BPF_ALU_NEG_VALUE (1U << 2)
#define BPF_ALU_NON_POINTER (1U << 3)
+#define BPF_ALU_IMMEDIATE (1U << 4)
#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
BPF_ALU_SANITIZE_DST)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 46e21bdb79c4c..c49aa17839875 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2745,6 +2745,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
{
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_verifier_state *vstate = env->cur_state;
+ bool off_is_imm = tnum_is_const(off_reg->var_off);
bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
@@ -2775,6 +2776,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
alu_limit = abs(tmp_aux->alu_limit - alu_limit);
} else {
alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
alu_state |= ptr_is_dst_reg ?
BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
}
@@ -6301,7 +6303,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
struct bpf_insn insn_buf[16];
struct bpf_insn *patch = &insn_buf[0];
- bool issrc, isneg;
+ bool issrc, isneg, isimm;
u32 off_reg;
aux = &env->insn_aux_data[i + delta];
@@ -6312,16 +6314,21 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
BPF_ALU_SANITIZE_SRC;
+ isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
off_reg = issrc ? insn->src_reg : insn->dst_reg;
- if (isneg)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
- *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
- *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
- *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
+ if (isimm) {
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ } else {
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
+ }
if (!issrc)
*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
insn->src_reg = BPF_REG_AX;
@@ -6329,7 +6336,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code = insn->code == code_add ?
code_sub : code_add;
*patch++ = *insn;
- if (issrc && isneg)
+ if (issrc && isneg && !isimm)
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
cnt = patch - insn_buf;
--
2.25.1