Andrei Matei (2): bpf: Fix verification of indirect var-off stack access bpf: Fix accesses to uninit stack slots
Eduard Zingerman (1): bpf: Allow reads from uninit stack
Kees Cook (1): bpf, verifier: Fix memory leak in array reallocation for stack state
Lorenz Bauer (2): bpf: verifier: Improve function state reallocation bpf: verifier: Use copy_array for jmp_history
Stanislav Fomichev (1): bpf: expose bpf_strtol and bpf_strtoul to all program types
kernel/bpf/cgroup.c | 4 - kernel/bpf/helpers.c | 7 +- kernel/bpf/verifier.c | 302 +++++++++--------- .../selftests/bpf/verifier/basic_stack.c | 11 +- tools/testing/selftests/bpf/verifier/calls.c | 17 +- .../bpf/verifier/helper_access_var_len.c | 104 ++++-- .../testing/selftests/bpf/verifier/int_ptr.c | 16 +- .../selftests/bpf/verifier/raw_stack.c | 7 +- .../selftests/bpf/verifier/search_pruning.c | 13 +- tools/testing/selftests/bpf/verifier/sock.c | 27 -- .../testing/selftests/bpf/verifier/var_off.c | 113 +++---- 11 files changed, 313 insertions(+), 308 deletions(-)
From: Lorenz Bauer lmb@cloudflare.com
mainline inclusion from mainline-v5.14-rc1 commit c69431aab67a912836e5831f03d99a819c14c9c3 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Resizing and copying stack and reference tracking state currently does a lot of kfree / kmalloc when the size of the tracked set changes. The logic in copy_*_state and realloc_*_state is also hard to follow.
Refactor this into two core functions. copy_array copies from a source into a destination. It avoids reallocation by taking the allocated size of the destination into account via ksize(). The function is essentially krealloc_array, with the difference that the contents of dst are not preserved. realloc_array changes the size of an array and zeroes newly allocated items. Contrary to krealloc both functions don't free the destination if the size is zero. Instead we rely on free_func_state to clean up.
realloc_stack_state is renamed to grow_stack_state to better convey that it never shrinks the stack state.
Signed-off-by: Lorenz Bauer lmb@cloudflare.com Signed-off-by: Alexei Starovoitov ast@kernel.org Link: https://lore.kernel.org/bpf/20210429134656.122225-2-lmb@cloudflare.com Conflicts: kernel/bpf/verifier.c Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 202 ++++++++++++++++++++++-------------------- 1 file changed, 106 insertions(+), 96 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index eb236a48bde2..7512412611b9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -703,81 +703,109 @@ static void print_verifier_state(struct bpf_verifier_env *env, verbose(env, "\n"); }
-#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int copy_##NAME##_state(struct bpf_func_state *dst, \ - const struct bpf_func_state *src) \ -{ \ - if (!src->FIELD) \ - return 0; \ - if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \ - /* internal bug, make state invalid to reject the program */ \ - memset(dst, 0, sizeof(*dst)); \ - return -EFAULT; \ - } \ - memcpy(dst->FIELD, src->FIELD, \ - sizeof(*src->FIELD) * (src->COUNT / SIZE)); \ - return 0; \ -} -/* copy_reference_state() */ -COPY_STATE_FN(reference, acquired_refs, refs, 1) -/* copy_stack_state() */ -COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef COPY_STATE_FN - -#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \ -static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \ - bool copy_old) \ -{ \ - u32 old_size = state->COUNT; \ - struct bpf_##NAME##_state *new_##FIELD; \ - int slot = size / SIZE; \ - \ - if (size <= old_size || !size) { \ - if (copy_old) \ - return 0; \ - state->COUNT = slot * SIZE; \ - if (!size && old_size) { \ - kfree(state->FIELD); \ - state->FIELD = NULL; \ - } \ - return 0; \ - } \ - new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \ - GFP_KERNEL); \ - if (!new_##FIELD) \ - return -ENOMEM; \ - if (copy_old) { \ - if (state->FIELD) \ - memcpy(new_##FIELD, state->FIELD, \ - sizeof(*new_##FIELD) * (old_size / SIZE)); \ - memset(new_##FIELD + old_size / SIZE, 0, \ - sizeof(*new_##FIELD) * (size - old_size) / SIZE); \ - } \ - state->COUNT = slot * SIZE; \ - kfree(state->FIELD); \ - state->FIELD = new_##FIELD; \ - return 0; \ -} -/* realloc_reference_state() */ -REALLOC_STATE_FN(reference, acquired_refs, refs, 1) -/* realloc_stack_state() */ -REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE) -#undef REALLOC_STATE_FN - -/* do_check() starts with zero-sized stack in struct bpf_verifier_state to - * make it consume minimal amount of memory. check_stack_write() access from - * the program calls into realloc_func_state() to grow the stack size. - * Note there is a non-zero 'parent' pointer inside bpf_verifier_state - * which realloc_stack_state() copies over. It points to previous - * bpf_verifier_state which is never reallocated. +/* copy array src of length n * size bytes to dst. dst is reallocated if it's too + * small to hold src. This is different from krealloc since we don't want to preserve + * the contents of dst. + * + * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could + * not be allocated. */ -static int realloc_func_state(struct bpf_func_state *state, int stack_size, - int refs_size, bool copy_old) +static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags) { - int err = realloc_reference_state(state, refs_size, copy_old); - if (err) - return err; - return realloc_stack_state(state, stack_size, copy_old); + size_t bytes; + + if (ZERO_OR_NULL_PTR(src)) + goto out; + + if (unlikely(check_mul_overflow(n, size, &bytes))) + return NULL; + + if (ksize(dst) < bytes) { + kfree(dst); + dst = kmalloc_track_caller(bytes, flags); + if (!dst) + return NULL; + } + + memcpy(dst, src, bytes); +out: + return dst ? dst : ZERO_SIZE_PTR; +} + +/* resize an array from old_n items to new_n items. the array is reallocated if it's too + * small to hold new_n items. new items are zeroed out if the array grows. + * + * Contrary to krealloc_array, does not free arr if new_n is zero. + */ +static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) +{ + size_t bytes; + + if (!new_n || old_n == new_n) + goto out; + + if (unlikely(check_mul_overflow(new_n, size, &bytes))) + return NULL; + + arr = krealloc(arr, bytes, GFP_KERNEL); + if (!arr) + return NULL; + + if (new_n > old_n) + memset(arr + old_n * size, 0, (new_n - old_n) * size); + +out: + return arr ? arr : ZERO_SIZE_PTR; +} + +static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs, + sizeof(struct bpf_reference_state), GFP_KERNEL); + if (!dst->refs) + return -ENOMEM; + + dst->acquired_refs = src->acquired_refs; + return 0; +} + +static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src) +{ + size_t n = src->allocated_stack / BPF_REG_SIZE; + + dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state), + GFP_KERNEL); + if (!dst->stack) + return -ENOMEM; + + dst->allocated_stack = src->allocated_stack; + return 0; +} + +static int resize_reference_state(struct bpf_func_state *state, size_t n) +{ + state->refs = realloc_array(state->refs, state->acquired_refs, n, + sizeof(struct bpf_reference_state)); + if (!state->refs) + return -ENOMEM; + + state->acquired_refs = n; + return 0; +} + +static int grow_stack_state(struct bpf_func_state *state, int size) +{ + size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE; + + if (old_n >= n) + return 0; + + state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); + if (!state->stack) + return -ENOMEM; + + state->allocated_stack = size; + return 0; }
/* Acquire a pointer id from the env and update the state->refs to include @@ -791,7 +819,7 @@ static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx) int new_ofs = state->acquired_refs; int id, err;
- err = realloc_reference_state(state, state->acquired_refs + 1, true); + err = resize_reference_state(state, state->acquired_refs + 1); if (err) return err; id = ++env->id_gen; @@ -820,18 +848,6 @@ static int release_reference_state(struct bpf_func_state *state, int ptr_id) return -EINVAL; }
-static int transfer_reference_state(struct bpf_func_state *dst, - struct bpf_func_state *src) -{ - int err = realloc_reference_state(dst, src->acquired_refs, false); - if (err) - return err; - err = copy_reference_state(dst, src); - if (err) - return err; - return 0; -} - static void free_func_state(struct bpf_func_state *state) { if (!state) @@ -870,10 +886,6 @@ static int copy_func_state(struct bpf_func_state *dst, { int err;
- err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, - false); - if (err) - return err; memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs)); err = copy_reference_state(dst, src); if (err) @@ -2473,8 +2485,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg = NULL; u32 dst_reg = insn->dst_reg;
- err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); if (err) return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -2625,8 +2636,7 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, if (value_reg && register_is_null(value_reg)) writing_zero = true;
- err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), - state->acquired_refs, true); + err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); if (err) return err;
@@ -5416,7 +5426,7 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, subprog /* subprog number within this prog */);
/* Transfer references to the callee */ - err = transfer_reference_state(callee, caller); + err = copy_reference_state(callee, caller); if (err) return err;
@@ -5469,7 +5479,7 @@ static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx) caller->regs[BPF_REG_0] = *r0;
/* Transfer references to the caller */ - err = transfer_reference_state(caller, callee); + err = copy_reference_state(caller, callee); if (err) return err;
From: Lorenz Bauer lmb@cloudflare.com
mainline inclusion from mainline-v5.14-rc1 commit 06ab6a505583f9adbf5e1f05d86e7bdd7b02248e category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Eliminate a couple needless kfree / kmalloc cycles by using copy_array for jmp_history.
Signed-off-by: Lorenz Bauer lmb@cloudflare.com Signed-off-by: Alexei Starovoitov ast@kernel.org Link: https://lore.kernel.org/bpf/20210429134656.122225-3-lmb@cloudflare.com Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7512412611b9..712c88a318d8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -897,16 +897,13 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src) { struct bpf_func_state *dst; - u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt; int i, err;
- if (dst_state->jmp_history_cnt < src->jmp_history_cnt) { - kfree(dst_state->jmp_history); - dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER); - if (!dst_state->jmp_history) - return -ENOMEM; - } - memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz); + dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history, + src->jmp_history_cnt, sizeof(struct bpf_idx_pair), + GFP_USER); + if (!dst_state->jmp_history) + return -ENOMEM; dst_state->jmp_history_cnt = src->jmp_history_cnt;
/* if dst has more stack frames then src frame, free them */
From: Kees Cook keescook@chromium.org
mainline inclusion from mainline-v6.1-rc5 commit 42378a9ca55347102bbf86708776061d8fe3ece2 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
If an error (NULL) is returned by krealloc(), callers of realloc_array() were setting their allocation pointers to NULL, but on error krealloc() does not touch the original allocation. This would result in a memory resource leak. Instead, free the old allocation on the error handling path.
The memory leak information is as follows as also reported by Zhengchao:
unreferenced object 0xffff888019801800 (size 256): comm "bpf_repo", pid 6490, jiffies 4294959200 (age 17.170s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<00000000b211474b>] __kmalloc_node_track_caller+0x45/0xc0 [<0000000086712a0b>] krealloc+0x83/0xd0 [<00000000139aab02>] realloc_array+0x82/0xe2 [<00000000b1ca41d1>] grow_stack_state+0xfb/0x186 [<00000000cd6f36d2>] check_mem_access.cold+0x141/0x1341 [<0000000081780455>] do_check_common+0x5358/0xb350 [<0000000015f6b091>] bpf_check.cold+0xc3/0x29d [<000000002973c690>] bpf_prog_load+0x13db/0x2240 [<00000000028d1644>] __sys_bpf+0x1605/0x4ce0 [<00000000053f29bd>] __x64_sys_bpf+0x75/0xb0 [<0000000056fedaf5>] do_syscall_64+0x35/0x80 [<000000002bd58261>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
Fixes: c69431aab67a ("bpf: verifier: Improve function state reallocation") Reported-by: Zhengchao Shao shaozhengchao@huawei.com Reported-by: Kees Cook keescook@chromium.org Signed-off-by: Kees Cook keescook@chromium.org Signed-off-by: Daniel Borkmann daniel@iogearbox.net Reviewed-by: Bill Wendling morbo@google.com Cc: Lorenz Bauer oss@lmb.io Link: https://lore.kernel.org/bpf/20221029025433.2533810-1-keescook@chromium.org Conflicts: kernel/bpf/verifier.c Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 712c88a318d8..ffcc63f14b20 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -740,16 +740,22 @@ static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { size_t bytes; + void *new_arr;
if (!new_n || old_n == new_n) goto out;
- if (unlikely(check_mul_overflow(new_n, size, &bytes))) + if (unlikely(check_mul_overflow(new_n, size, &bytes))) { + kfree(arr); return NULL; + }
- arr = krealloc(arr, bytes, GFP_KERNEL); - if (!arr) + new_arr = krealloc(arr, bytes, GFP_KERNEL); + if (!new_arr) { + kfree(arr); return NULL; + } + arr = new_arr;
if (new_n > old_n) memset(arr + old_n * size, 0, (new_n - old_n) * size);
From: Stanislav Fomichev sdf@google.com
mainline inclusion from mainline-v6.1-rc1 commit 8a67f2de9b1dc3cf8b75b4bf589efb1f08e3e9b8 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
bpf_strncmp is already exposed everywhere. The motivation is to keep those helpers in kernel/bpf/helpers.c. Otherwise it's tempting to move them under kernel/bpf/cgroup.c because they are currently only used by sysctl prog types.
Suggested-by: Martin KaFai Lau kafai@fb.com Acked-by: Martin KaFai Lau kafai@fb.com Signed-off-by: Stanislav Fomichev sdf@google.com Link: https://lore.kernel.org/r/20220823222555.523590-4-sdf@google.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/cgroup.c | 4 ---- kernel/bpf/helpers.c | 7 ++++++- 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index fb72f5c4c5fe..41ddba63082d 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1766,10 +1766,6 @@ static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { - case BPF_FUNC_strtol: - return &bpf_strtol_proto; - case BPF_FUNC_strtoul: - return &bpf_strtoul_proto; case BPF_FUNC_sysctl_get_name: return &bpf_sysctl_get_name_proto; case BPF_FUNC_sysctl_get_current_value: diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 5fccf33196b5..6cbf82ca33f2 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -405,6 +405,8 @@ const struct bpf_func_proto bpf_get_local_storage_proto = { }; #endif
+#endif /* CONFIG_CGROUPS */ + #define BPF_STRTOX_BASE_MASK 0x1F
static int __bpf_strtoull(const char *buf, size_t buf_len, u64 flags, @@ -532,7 +534,6 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg3_type = ARG_ANYTHING, .arg4_type = ARG_PTR_TO_LONG, }; -#endif
BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, struct bpf_pidns_info *, nsdata, u32, size) @@ -701,6 +702,10 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; + case BPF_FUNC_strtol: + return &bpf_strtol_proto; + case BPF_FUNC_strtoul: + return &bpf_strtoul_proto; case BPF_FUNC_sched_tg_tag_of: return &bpf_sched_tg_tag_of_proto; case BPF_FUNC_sched_task_tag_of:
From: Eduard Zingerman eddyz87@gmail.com
mainline inclusion from mainline-v6.3-rc4 commit 6715df8d5d24655b9fd368e904028112b54c7de1 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
This commits updates the following functions to allow reads from uninitialized stack locations when env->allow_uninit_stack option is enabled: - check_stack_read_fixed_off() - check_stack_range_initialized(), called from: - check_stack_read_var_off() - check_helper_mem_access()
Such change allows to relax logic in stacksafe() to treat STACK_MISC and STACK_INVALID in a same way and make the following stack slot configurations equivalent:
| Cached state | Current state | | stack slot | stack slot | |------------------+------------------| | STACK_INVALID or | STACK_INVALID or | | STACK_MISC | STACK_SPILL or | | | STACK_MISC or | | | STACK_ZERO or | | | STACK_DYNPTR |
This leads to significant verification speed gains (see below).
The idea was suggested by Andrii Nakryiko [1] and initial patch was created by Alexei Starovoitov [2].
Currently the env->allow_uninit_stack is allowed for programs loaded by users with CAP_PERFMON or CAP_SYS_ADMIN capabilities.
A number of test cases from verifier/*.c were expecting uninitialized stack access to be an error. These test cases were updated to execute in unprivileged mode (thus preserving the tests).
The test progs/test_global_func10.c expected "invalid indirect read from stack" error message because of the access to uninitialized memory region. This error is no longer possible in privileged mode. The test is updated to provoke an error "invalid indirect access to stack" because of access to invalid stack address (such error is not verified by progs/test_global_func*.c series of tests).
The following tests had to be removed because these can't be made unprivileged: - verifier/sock.c: - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value" BPF_PROG_TYPE_SCHED_CLS programs are not executed in unprivileged mode. - verifier/var_off.c: - "indirect variable-offset stack access, max_off+size > max_initialized" - "indirect variable-offset stack access, uninitialized" These tests verify that access to uninitialized stack values is detected when stack offset is not a constant. However, variable stack access is prohibited in unprivileged mode, thus these tests are no longer valid.
* * *
Here is veristat log comparing this patch with current master on a set of selftest binaries listed in tools/testing/selftests/bpf/veristat.cfg and cilium BPF binaries (see [3]):
$ ./veristat -e file,prog,states -C -f 'states_pct<-30' master.log current.log File Program States (A) States (B) States (DIFF) -------------------------- -------------------------- ---------- ---------- ---------------- bpf_host.o tail_handle_ipv6_from_host 349 244 -105 (-30.09%) bpf_host.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_lxc.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_sock.o cil_sock4_connect 70 48 -22 (-31.43%) bpf_sock.o cil_sock4_sendmsg 68 46 -22 (-32.35%) bpf_xdp.o tail_handle_nat_fwd_ipv4 1554 803 -751 (-48.33%) bpf_xdp.o tail_lb_ipv4 6457 2473 -3984 (-61.70%) bpf_xdp.o tail_lb_ipv6 7249 3908 -3341 (-46.09%) pyperf600_bpf_loop.bpf.o on_event 287 145 -142 (-49.48%) strobemeta.bpf.o on_event 15915 4772 -11143 (-70.02%) strobemeta_nounroll2.bpf.o on_event 17087 3820 -13267 (-77.64%) xdp_synproxy_kern.bpf.o syncookie_tc 21271 6635 -14636 (-68.81%) xdp_synproxy_kern.bpf.o syncookie_xdp 23122 6024 -17098 (-73.95%) -------------------------- -------------------------- ---------- ---------- ----------------
Note: I limited selection by states_pct<-30%.
Inspection of differences in pyperf600_bpf_loop behavior shows that the following patch for the test removes almost all differences:
- a/tools/testing/selftests/bpf/progs/pyperf.h + b/tools/testing/selftests/bpf/progs/pyperf.h @ -266,8 +266,8 @ int __on_event(struct bpf_raw_tracepoint_args *ctx) }
if (event->pthread_match || !pidData->use_tls) { - void* frame_ptr; - FrameData frame; + void* frame_ptr = 0; + FrameData frame = {}; Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id();
W/o this patch the difference comes from the following pattern (for different variables):
static bool get_frame_data(... FrameData *frame ...) { ... bpf_probe_read_user(&frame->f_code, ...); if (!frame->f_code) return false; ... bpf_probe_read_user(&frame->co_name, ...); if (frame->co_name) ...; }
int __on_event(struct bpf_raw_tracepoint_args *ctx) { FrameData frame; ... get_frame_data(... &frame ...) // indirectly via a bpf_loop & callback ... }
SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { ... ret |= __on_event(ctx); ret |= __on_event(ctx); ... }
With regards to value `frame->co_name` the following is important: - Because of the conditional `if (!frame->f_code)` each call to __on_event() produces two states, one with `frame->co_name` marked as STACK_MISC, another with it as is (and marked STACK_INVALID on a first call). - The call to bpf_probe_read_user() does not mark stack slots corresponding to `&frame->co_name` as REG_LIVE_WRITTEN but it marks these slots as BPF_MISC, this happens because of the following loop in the check_helper_call():
for (i = 0; i < meta.access_size; i++) { err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false); if (err) return err; }
Note the size of the write, it is a one byte write for each byte touched by a helper. The BPF_B write does not lead to write marks for the target stack slot. - Which means that w/o this patch when second __on_event() call is verified `if (frame->co_name)` will propagate read marks first to a stack slot with STACK_MISC marks and second to a stack slot with STACK_INVALID marks and these states would be considered different.
[1] https://lore.kernel.org/bpf/CAEf4BzY3e+ZuC6HUa8dCiUovQRg2SzEk7M-dSkqNZyn=xEm... [2] https://lore.kernel.org/bpf/CAADnVQKs2i1iuZ5SUGuJtxWVfGYR9kDgYKhq3rNV+kBLQCu... [3] git@github.com:anakryiko/cilium.git
Suggested-by: Andrii Nakryiko andrii@kernel.org Co-developed-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Eduard Zingerman eddyz87@gmail.com Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/r/20230219200427.606541-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov ast@kernel.org Conflicts: tools/testing/selftests/bpf/verifier/* Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 11 +- tools/testing/selftests/bpf/verifier/calls.c | 13 ++- .../bpf/verifier/helper_access_var_len.c | 104 ++++++++++++------ .../testing/selftests/bpf/verifier/int_ptr.c | 9 +- .../selftests/bpf/verifier/search_pruning.c | 13 ++- tools/testing/selftests/bpf/verifier/sock.c | 27 ----- .../testing/selftests/bpf/verifier/var_off.c | 52 --------- 7 files changed, 100 insertions(+), 129 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ffcc63f14b20..956d47c2cecb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2808,6 +2808,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_MISC) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -2845,6 +2847,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_ZERO) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -4329,7 +4333,8 @@ static int check_stack_range_initialized( stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; - if (*stype == STACK_ZERO) { + if ((*stype == STACK_ZERO) || + (*stype == STACK_INVALID && env->allow_uninit_stack)) { if (clobber) { /* helper can write anything into the stack */ *stype = STACK_MISC; @@ -9543,6 +9548,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue;
+ if (env->allow_uninit_stack && + old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) + continue; + /* explored stack has more populated slots than current stack * and these slots were used */ diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index eb888c8479c3..4b0628cd2d03 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1948,19 +1948,22 @@ * that fp-8 stack slot was unused in the fall-through * branch and will accept the program incorrectly */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack R2 off -8+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_48b = { 7 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "calls: ctx read at start of subprog", diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index 0ab7f1dfc97a..0e24aa11c457 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -29,19 +29,30 @@ { "helper access to variable memory: stack, bitwise AND, zero included", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use bitwise AND to limit r3 range to [0, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, bitwise AND + JMP, wrong max", @@ -183,20 +194,31 @@ { "helper access to variable memory: stack, JMP, no min check", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use JMP to limit r3 range to [0, 64] */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, JMP (signed), no min check", @@ -564,29 +586,41 @@ { "helper access to variable memory: 8 bytes leak", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + /* Note: fp[-32] left uninitialized */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + /* Limit r3 range to [1, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory region [1, 64] + * at &fp[-64] is not fully initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+32 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 3 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: 8 bytes no leak (init memory)", diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index 070893fb2900..02d9e004260b 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -54,12 +54,13 @@ /* bpf_strtoul() */ BPF_EMIT_CALL(BPF_FUNC_strtoul),
- BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+4 size 8", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "ARG_PTR_TO_LONG misaligned", diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index 7e36078f8f48..949cbe460248 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -128,9 +128,10 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "invalid read from stack off -16+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr_unpriv = "invalid read from stack off -16+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "allocated_stack", @@ -187,6 +188,8 @@ BPF_EXIT_INSN(), }, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "invalid read from stack off -8+1 size 8", - .result = REJECT, + .errstr_unpriv = "invalid read from stack off -8+1 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 8c224eac93df..59d976d22867 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -530,33 +530,6 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, -{ - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_sk_storage_map = { 14 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "invalid indirect read from stack", -}, { "bpf_map_lookup_elem(smap, &key)", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index eab1f7f56e2f..dc92a29f0d74 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -212,31 +212,6 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, -{ - "indirect variable-offset stack access, max_off+size > max_initialized", - .insns = { - /* Fill only the second from top 8 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, but we don't know - * which. fp-12 size 8 is partially uninitialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, { "indirect variable-offset stack access, min_off < min_initialized", .insns = { @@ -289,33 +264,6 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, -{ - "indirect variable-offset stack access, uninitialized", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_3, 28), - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_5, 8), - /* Dereference it indirectly. */ - BPF_EMIT_CALL(BPF_FUNC_getsockopt), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack R4 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, { "indirect variable-offset stack access, ok", .insns = {
From: Andrei Matei andreimatei1@gmail.com
stable inclusion from stable-v5.10.209 commit afea95d319ccb4ad2060dece9ac5e2e364dec543 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit a833a17aeac73b33f79433d7cee68d5cafd71e4f ]
This patch fixes a bug around the verification of possibly-zero-sized stack accesses. When the access was done through a var-offset stack pointer, check_stack_access_within_bounds was incorrectly computing the maximum-offset of a zero-sized read to be the same as the register's min offset. Instead, we have to take in account the register's maximum possible value. The patch also simplifies how the max offset is checked; the check is now simpler than for min offset.
The bug was allowing accesses to erroneously pass the check_stack_access_within_bounds() checks, only to later crash in check_stack_range_initialized() when all the possibly-affected stack slots are iterated (this time with a correct max offset). check_stack_range_initialized() is relying on check_stack_access_within_bounds() for its accesses to the stack-tracking vector to be within bounds; in the case of zero-sized accesses, we were essentially only verifying that the lowest possible slot was within bounds. We would crash when the max-offset of the stack pointer was >= 0 (which shouldn't pass verification, and hopefully is not something anyone's code attempts to do in practice).
Thanks Hao for reporting!
Fixes: 01f810ace9ed3 ("bpf: Allow variable-offset stack access") Reported-by: Hao Sun sunhao.th@gmail.com Signed-off-by: Andrei Matei andreimatei1@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Acked-by: Eduard Zingerman eddyz87@gmail.com Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/bpf/20231207041150.229139-2-andreimatei1@gmail.com
Closes: https://lore.kernel.org/bpf/CACkBjsZGEUaRCHsmaX=h-efVogsRfK1FPxmkgb0Os_frnHi... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Wang Hai wanghai38@huawei.com Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 956d47c2cecb..8caee6b90aee 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3939,10 +3939,7 @@ static int check_stack_access_within_bounds(
if (tnum_is_const(reg->var_off)) { min_off = reg->var_off.value + off; - if (access_size > 0) - max_off = min_off + access_size - 1; - else - max_off = min_off; + max_off = min_off + access_size; } else { if (reg->smax_value >= BPF_MAX_VAR_OFF || reg->smin_value <= -BPF_MAX_VAR_OFF) { @@ -3951,15 +3948,12 @@ static int check_stack_access_within_bounds( return -EACCES; } min_off = reg->smin_value + off; - if (access_size > 0) - max_off = reg->smax_value + off + access_size - 1; - else - max_off = min_off; + max_off = reg->smax_value + off + access_size; }
err = check_stack_slot_within_bounds(min_off, state, type); - if (!err) - err = check_stack_slot_within_bounds(max_off, state, type); + if (!err && max_off > 0) + err = -EINVAL; /* out of stack access into non-negative offsets */
if (err) { if (tnum_is_const(reg->var_off)) {
From: Andrei Matei andreimatei1@gmail.com
mainline inclusion from mainline-v6.8-rc1 commit 6b4a64bafd107e521c01eec3453ce94a3fb38529 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I932VT CVE: CVE-2023-52452
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Privileged programs are supposed to be able to read uninitialized stack memory (ever since 6715df8d5) but, before this patch, these accesses were permitted inconsistently. In particular, accesses were permitted above state->allocated_stack, but not below it. In other words, if the stack was already "large enough", the access was permitted, but otherwise the access was rejected instead of being allowed to "grow the stack". This undesired rejection was happening in two places: - in check_stack_slot_within_bounds() - in check_stack_range_initialized() This patch arranges for these accesses to be permitted. A bunch of tests that were relying on the old rejection had to change; all of them were changed to add also run unprivileged, in which case the old behavior persists. One tests couldn't be updated - global_func16 - because it can't run unprivileged for other reasons.
This patch also fixes the tracking of the stack size for variable-offset reads. This second fix is bundled in the same commit as the first one because they're inter-related. Before this patch, writes to the stack using registers containing a variable offset (as opposed to registers with fixed, known values) were not properly contributing to the function's needed stack size. As a result, it was possible for a program to verify, but then to attempt to read out-of-bounds data at runtime because a too small stack had been allocated for it.
Each function tracks the size of the stack it needs in bpf_subprog_info.stack_depth, which is maintained by update_stack_depth(). For regular memory accesses, check_mem_access() was calling update_state_depth() but it was passing in only the fixed part of the offset register, ignoring the variable offset. This was incorrect; the minimum possible value of that register should be used instead.
This tracking is now fixed by centralizing the tracking of stack size in grow_stack_state(), and by lifting the calls to grow_stack_state() to check_stack_access_within_bounds() as suggested by Andrii. The code is now simpler and more convincingly tracks the correct maximum stack size. check_stack_range_initialized() can now rely on enough stack having been allocated for the access; this helps with the fix for the first issue.
A few tests were changed to also check the stack depth computation. The one that fails without this patch is verifier_var_off:stack_write_priv_vs_unpriv.
Fixes: 01f810ace9ed3 ("bpf: Allow variable-offset stack access") Reported-by: Hao Sun sunhao.th@gmail.com Signed-off-by: Andrei Matei andreimatei1@gmail.com Signed-off-by: Andrii Nakryiko andrii@kernel.org Acked-by: Andrii Nakryiko andrii@kernel.org Link: https://lore.kernel.org/bpf/20231208032519.260451-3-andreimatei1@gmail.com Closes: https://lore.kernel.org/bpf/CABWLsev9g8UP_c3a=1qbuZUi20tGoUXoU07FPf-5FLvhOKO... Conflicts: tools/testing/selftests/bpf/verifier/* Signed-off-by: Pu Lehui pulehui@huawei.com --- kernel/bpf/verifier.c | 62 +++++++------------ .../selftests/bpf/verifier/basic_stack.c | 11 ++-- tools/testing/selftests/bpf/verifier/calls.c | 4 +- .../testing/selftests/bpf/verifier/int_ptr.c | 7 ++- .../selftests/bpf/verifier/raw_stack.c | 7 ++- .../testing/selftests/bpf/verifier/var_off.c | 61 +++++++++++++----- 6 files changed, 89 insertions(+), 63 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8caee6b90aee..91e83cfcbfb8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -799,7 +799,10 @@ static int resize_reference_state(struct bpf_func_state *state, size_t n) return 0; }
-static int grow_stack_state(struct bpf_func_state *state, int size) +/* Possibly update state->allocated_stack to be at least size bytes. Also + * possibly update the function's high-water mark in its bpf_subprog_info. + */ +static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size) { size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
@@ -811,6 +814,11 @@ static int grow_stack_state(struct bpf_func_state *state, int size) return -ENOMEM;
state->allocated_stack = size; + + /* update known max for given subprogram */ + if (env->subprog_info[state->subprogno].stack_depth < size) + env->subprog_info[state->subprogno].stack_depth = size; + return 0; }
@@ -2488,9 +2496,6 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env, struct bpf_reg_state *reg = NULL; u32 dst_reg = insn->dst_reg;
- err = grow_stack_state(state, round_up(slot + 1, BPF_REG_SIZE)); - if (err) - return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ @@ -2639,11 +2644,6 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env, if (value_reg && register_is_null(value_reg)) writing_zero = true;
- err = grow_stack_state(state, round_up(-min_off, BPF_REG_SIZE)); - if (err) - return err; - - /* Variable offset writes destroy any spilled pointers in range. */ for (i = min_off; i < max_off; i++) { u8 new_type, *stype; @@ -3471,20 +3471,6 @@ static int check_ptr_alignment(struct bpf_verifier_env *env, strict); }
-static int update_stack_depth(struct bpf_verifier_env *env, - const struct bpf_func_state *func, - int off) -{ - u16 stack = env->subprog_info[func->subprogno].stack_depth; - - if (stack >= -off) - return 0; - - /* update known max for given subprogram */ - env->subprog_info[func->subprogno].stack_depth = -off; - return 0; -} - /* starting from main bpf function walk all instructions of the function * and recursively walk all callees that given function can call. * Ignore jump and exit insns. @@ -3896,13 +3882,14 @@ static int check_ptr_to_map_access(struct bpf_verifier_env *env, * The minimum valid offset is -MAX_BPF_STACK for writes, and * -state->allocated_stack for reads. */ -static int check_stack_slot_within_bounds(int off, +static int check_stack_slot_within_bounds(struct bpf_verifier_env *env, + s64 off, struct bpf_func_state *state, enum bpf_access_type t) { int min_valid_off;
- if (t == BPF_WRITE) + if (t == BPF_WRITE || env->allow_uninit_stack) min_valid_off = -MAX_BPF_STACK; else min_valid_off = -state->allocated_stack; @@ -3951,7 +3938,7 @@ static int check_stack_access_within_bounds( max_off = reg->smax_value + off + access_size; }
- err = check_stack_slot_within_bounds(min_off, state, type); + err = check_stack_slot_within_bounds(env, min_off, state, type); if (!err && max_off > 0) err = -EINVAL; /* out of stack access into non-negative offsets */
@@ -3966,8 +3953,10 @@ static int check_stack_access_within_bounds( verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", err_extra, regno, tn_buf, access_size); } + return err; } - return err; + + return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE)); }
/* check whether memory at (regno + off) is accessible for t = (read | write) @@ -3982,7 +3971,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn { struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = regs + regno; - struct bpf_func_state *state; int size, err = 0;
size = bpf_size_to_bytes(bpf_size); @@ -4100,11 +4088,6 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err) return err;
- state = func(env, reg); - err = update_stack_depth(env, state, off); - if (err) - return err; - if (t == BPF_READ) err = check_stack_read(env, regno, off, size, value_regno); @@ -4239,7 +4222,8 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins
/* When register 'regno' is used to read the stack (either directly or through * a helper function) make sure that it's within stack boundary and, depending - * on the access type, that all elements of the stack are initialized. + * on the access type and privileges, that all elements of the stack are + * initialized. * * 'off' includes 'regno->off', but not its dynamic part (if any). * @@ -4322,8 +4306,11 @@ static int check_stack_range_initialized(
slot = -i - 1; spi = slot / BPF_REG_SIZE; - if (state->allocated_stack <= slot) - goto err; + if (state->allocated_stack <= slot) { + verbose(env, "verifier bug: allocated_stack too small"); + return -EFAULT; + } + stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; @@ -4351,7 +4338,6 @@ static int check_stack_range_initialized( goto mark; }
-err: if (tnum_is_const(reg->var_off)) { verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", err_extra, regno, min_off, i - min_off, access_size); @@ -4371,7 +4357,7 @@ static int check_stack_range_initialized( state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64); } - return update_stack_depth(env, state, min_off); + return 0; }
static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c index f995777dddb3..1e481fca2d72 100644 --- a/tools/testing/selftests/bpf/verifier/basic_stack.c +++ b/tools/testing/selftests/bpf/verifier/basic_stack.c @@ -17,8 +17,9 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 2 }, - .errstr = "invalid indirect read from stack", - .result = REJECT, + .result = ACCEPT, + .errstr_unpriv = "invalid indirect read from stack", + .result_unpriv = REJECT, }, { "uninitialized stack2", @@ -27,8 +28,10 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8), BPF_EXIT_INSN(), }, - .errstr = "invalid read from stack", - .result = REJECT, + .result = ACCEPT, + .retval = POINTER_VALUE, + .errstr_unpriv = "invalid read from stack", + .result_unpriv = REJECT, }, { "invalid fp arithmetic", diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 4b0628cd2d03..0ac55b818867 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1228,7 +1228,9 @@ .prog_type = BPF_PROG_TYPE_XDP, .fixup_map_hash_8b = { 23 }, .result = REJECT, - .errstr = "invalid read from stack R7 off=-16 size=8", + .errstr = "R0 invalid mem access 'inv'", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack R7 off=-16 size=8", }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index 02d9e004260b..c28cd2b8f1da 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -25,9 +25,10 @@ BPF_MOV64_IMM(BPF_REG_0, 1), BPF_EXIT_INSN(), }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+0 size 8", + .result = ACCEPT, + .retval = POINTER_VALUE, + .errstr_unpriv = "invalid indirect read from stack R4 off -16+0 size 8", + .result_unpriv = REJECT, }, { "ARG_PTR_TO_LONG half-uninitialized", diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c index cc8e8c3cdc03..f9364f454009 100644 --- a/tools/testing/selftests/bpf/verifier/raw_stack.c +++ b/tools/testing/selftests/bpf/verifier/raw_stack.c @@ -10,9 +10,10 @@ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid read from stack R6 off=-8 size=8", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .retval = POINTER_VALUE, + .errstr_unpriv = "invalid read from stack R6 off=-8 size=8", + .result_unpriv = REJECT, }, { "raw_stack: skb_load_bytes, negative len", diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index dc92a29f0d74..743b990904a9 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -58,9 +58,10 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid variable-offset read from stack R2", - .prog_type = BPF_PROG_TYPE_LWT_IN, + .result = ACCEPT, + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { "variable-offset stack write, priv vs unpriv", @@ -70,27 +71,58 @@ /* Make it small and 8-byte aligned */ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), + /* Add it to fp. We now have either fp-8 or + * fp-16, but we don't know which + */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), + /* Dereference it for a stack write */ + BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + /* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. + */ + .result = ACCEPT, + /* Variable stack access is rejected for unprivileged. + */ + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, +}, +{ + /* Similar to the previous test, but this time also perform a read from the + * address written to with a variable offset. The read is allowed, showing that, + * after a variable-offset write, a priviledged program can read the slots that + * were in the range of that write (even if the verifier doesn't actually know if + * the slot being read was really written to or not. + * + * Despite this test being mostly a superset, the previous test is also kept for + * the sake of it checking the stack depth in the case where there is no read. + */ + "variable-offset stack write followed by read", + .insns = { + /* Get an unknown value */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), + /* Make it small and 8-byte aligned */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), /* Add it to fp. We now have either fp-8 or fp-16, but * we don't know which */ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), /* Dereference it for a stack write */ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0), - /* Now read from the address we just wrote. This shows - * that, after a variable-offset write, a priviledged - * program can read the slots that were in the range of - * that write (even if the verifier doesn't actually know - * if the slot being read was really written to or not. - */ + /* Now read from the address we just wrote. */ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_2, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - /* Variable stack access is rejected for unprivileged. + /* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. */ + .result = ACCEPT, .errstr_unpriv = "R2 variable stack access prohibited for !root", .result_unpriv = REJECT, - .result = ACCEPT, }, { "variable-offset stack write clobbers spilled regs", @@ -233,9 +265,10 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, + .result = ACCEPT, + .errstr_unpriv = "R2 variable stack access prohibited for !root", + .result_unpriv = REJECT, + .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, { "indirect variable-offset stack access, priv vs unpriv",
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/5364 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/5364 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Y...