patch 1-2 are pre patch patch 3-5 are cve patch
Hou Tao (1): bpf: Optimize the free of inner map
Pu Lehui (1): bpf: Fix kabi breakage in struct bpf_map
Tengda Wu (1): Fix kabi breakage in struct bpf_map
Xu Kuohai (2): bpf: Prevent tail call between progs attached to different hooks selftests/bpf: Add test for lsm tail call
include/linux/bpf.h | 5 +- kernel/bpf/core.c | 25 ++++++++-- kernel/bpf/map_in_map.c | 14 ++++-- kernel/bpf/syscall.c | 8 ++++ kernel/bpf/verifier.c | 4 +- .../selftests/bpf/prog_tests/test_lsm.c | 46 ++++++++++++++++++- .../selftests/bpf/progs/lsm_tailcall.c | 34 ++++++++++++++ 7 files changed, 124 insertions(+), 12 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/lsm_tailcall.c
From: Hou Tao houtao1@huawei.com
stable inclusion from stable-v6.6.35 commit 2ad2f2edb944baf2735b23c7008b3dbe5b8da56c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAD6H2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit af66bfd3c8538ed21cf72af18426fc4a408665cf ]
When removing the inner map from the outer map, the inner map will be freed after one RCU grace period and one RCU tasks trace grace period, so it is certain that the bpf program, which may access the inner map, has exited before the inner map is freed.
However there is no need to wait for one RCU tasks trace grace period if the outer map is only accessed by non-sleepable program. So adding sleepable_refcnt in bpf_map and increasing sleepable_refcnt when adding the outer map into env->used_maps for sleepable program. Although the max number of bpf program is INT_MAX - 1, the number of bpf programs which are being loaded may be greater than INT_MAX, so using atomic64_t instead of atomic_t for sleepable_refcnt. When removing the inner map from the outer map, using sleepable_refcnt to decide whether or not a RCU tasks trace grace period is needed before freeing the inner map.
Signed-off-by: Hou Tao houtao1@huawei.com Link: https://lore.kernel.org/r/20231204140425.1480317-6-houtao@huaweicloud.com Signed-off-by: Alexei Starovoitov ast@kernel.org Stable-dep-of: 2884dc7d08d9 ("bpf: Fix a potential use-after-free in bpf_link_free()") Signed-off-by: Sasha Levin sashal@kernel.org
Conflicts: include/linux/bpf.h [Bpf related structures have changed in commit 6b6e3a2eac5f ("kabi: reserve space for bpf related structures"), causing conflicts in this patch merge] Signed-off-by: Tengda Wu wutengda2@huawei.com --- include/linux/bpf.h | 2 ++ kernel/bpf/core.c | 4 ++++ kernel/bpf/map_in_map.c | 14 +++++++++----- kernel/bpf/syscall.c | 8 ++++++++ kernel/bpf/verifier.c | 4 +++- 5 files changed, 26 insertions(+), 6 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8c4c2c39a6c1..dd164b2267ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -302,6 +302,8 @@ struct bpf_map { bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; s64 __percpu *elem_count;
KABI_RESERVE(1) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index caea5f14ac99..c68931013533 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2677,12 +2677,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len) { struct bpf_map *map; + bool sleepable; u32 i;
+ sleepable = aux->sleepable; for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) map->ops->map_poke_untrack(map, aux); + if (sleepable) + atomic64_dec(&map->sleepable_refcnt); bpf_map_put(map); } } diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 3248ff5d8161..8ef269e66ba5 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -131,12 +131,16 @@ void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { struct bpf_map *inner_map = ptr;
- /* The inner map may still be used by both non-sleepable and sleepable - * bpf program, so free it after one RCU grace period and one tasks - * trace RCU grace period. + /* Defer the freeing of inner map according to the sleepable attribute + * of bpf program which owns the outer map, so unnecessary waiting for + * RCU tasks trace grace period can be avoided. */ - if (need_defer) - WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); + if (need_defer) { + if (atomic64_read(&map->sleepable_refcnt)) + WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); + else + WRITE_ONCE(inner_map->free_after_rcu_gp, true); + } bpf_map_put(inner_map); }
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index bd5b5a9adfd4..086dab9c427c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -754,8 +754,11 @@ void bpf_map_put(struct bpf_map *map) /* bpf_map_free_id() must be called first */ bpf_map_free_id(map);
+ WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); if (READ_ONCE(map->free_after_mult_rcu_gp)) call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); + else if (READ_ONCE(map->free_after_rcu_gp)) + call_rcu(&map->rcu, bpf_map_free_rcu_gp); else bpf_map_free_in_work(map); } @@ -5400,6 +5403,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr) goto out_unlock; }
+ /* The bpf program will not access the bpf map, but for the sake of + * simplicity, increase sleepable_refcnt for sleepable program as well. + */ + if (prog->aux->sleepable) + atomic64_inc(&map->sleepable_refcnt); memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); used_maps_new[prog->aux->used_map_cnt] = map; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d1041517a984..47e22d18730c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17741,10 +17741,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) return -E2BIG; }
+ if (env->prog->aux->sleepable) + atomic64_inc(&map->sleepable_refcnt); /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded - * and all maps are released in free_used_maps() + * and all maps are released in bpf_free_used_maps() */ bpf_map_inc(map);
From: Tengda Wu wutengda2@huawei.com
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAD6H2
--------------------------------
After backport LTS commit 4b359df7b2ad ("[Backport] bpf: Optimize the free of inner map"), two fields `free_after_rcu_gp` and `sleepable_refcnt` were introduced into struct bpf_map, which result in a kabi breakage.
Use KABI_FILL_HOLE and KABI_USE to fix kabi breakage in struct bpf_map.
Before: ------ struct bpf_map { <SNIP> /* --- cacheline 3 boundary (192 bytes) --- */ struct mutex freeze_mutex; /* 192 32 */ atomic64_t writecnt; /* 224 8 */ struct { spinlock_t lock; /* 232 4 */ enum bpf_prog_type type; /* 236 4 */ bool jited; /* 240 1 */ bool xdp_has_frags; /* 241 1 */ } owner; /* 232 12 */
/* XXX last struct has 2 bytes of padding */
bool bypass_spec_v1; /* 244 1 */ bool frozen; /* 245 1 */ bool free_after_mult_rcu_gp; /* 246 1 */
/* XXX 1 byte hole, try to pack */
s64 * elem_count; /* 248 8 */ /* --- cacheline 4 boundary (256 bytes) --- */ u64 kabi_reserved1; /* 256 8 */ u64 kabi_reserved2; /* 264 8 */ u64 kabi_reserved3; /* 272 8 */ u64 kabi_reserved4; /* 280 8 */
/* size: 320, cachelines: 5, members: 32 */ /* sum members: 271, holes: 2, sum holes: 17 */ /* padding: 32 */ /* paddings: 1, sum paddings: 2 */ /* forced alignments: 3, forced holes: 1, sum forced holes: 16 */ } __attribute__((__aligned__(64)));
After: ------ struct bpf_map { <SNIP> /* --- cacheline 3 boundary (192 bytes) --- */ struct mutex freeze_mutex; /* 192 32 */ atomic64_t writecnt; /* 224 8 */ struct { spinlock_t lock; /* 232 4 */ enum bpf_prog_type type; /* 236 4 */ bool jited; /* 240 1 */ bool xdp_has_frags; /* 241 1 */ } owner; /* 232 12 */
/* XXX last struct has 2 bytes of padding */
bool bypass_spec_v1; /* 244 1 */ bool frozen; /* 245 1 */ bool free_after_mult_rcu_gp; /* 246 1 */ bool free_after_rcu_gp; /* 247 1 */ s64 * elem_count; /* 248 8 */ /* --- cacheline 4 boundary (256 bytes) --- */ union { atomic64_t sleepable_refcnt; /* 256 8 */ struct { u64 kabi_reserved1; /* 256 8 */ } kabi_hidden_308; /* 256 8 */ union { }; /* 256 0 */ }; /* 256 8 */ u64 kabi_reserved2; /* 264 8 */ u64 kabi_reserved3; /* 272 8 */ u64 kabi_reserved4; /* 280 8 */
/* size: 320, cachelines: 5, members: 33 */ /* sum members: 272, holes: 1, sum holes: 16 */ /* padding: 32 */ /* paddings: 1, sum paddings: 2 */ /* forced alignments: 3, forced holes: 1, sum forced holes: 16 */ } __attribute__((__aligned__(64)));
Fixes: 1dce7525ede3 ("bpf: Optimize the free of inner map") Signed-off-by: Tengda Wu wutengda2@huawei.com --- include/linux/bpf.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dd164b2267ec..abc920234c28 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -302,11 +302,10 @@ struct bpf_map { bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; - bool free_after_rcu_gp; - atomic64_t sleepable_refcnt; + KABI_FILL_HOLE(bool free_after_rcu_gp) s64 __percpu *elem_count;
- KABI_RESERVE(1) + KABI_USE(1, atomic64_t sleepable_refcnt) KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4)
From: Xu Kuohai xukuohai@huawei.com
stable inclusion from stable-v6.6.57 commit 5d5e3b4cbe8ee16b7bf96fd73a421c92a9da3ca1 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAYRIC CVE: CVE-2024-50063
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 28ead3eaabc16ecc907cfb71876da028080f6356 ]
bpf progs can be attached to kernel functions, and the attached functions can take different parameters or return different return values. If prog attached to one kernel function tail calls prog attached to another kernel function, the ctx access or return value verification could be bypassed.
For example, if prog1 is attached to func1 which takes only 1 parameter and prog2 is attached to func2 which takes two parameters. Since verifier assumes the bpf ctx passed to prog2 is constructed based on func2's prototype, verifier allows prog2 to access the second parameter from the bpf ctx passed to it. The problem is that verifier does not prevent prog1 from passing its bpf ctx to prog2 via tail call. In this case, the bpf ctx passed to prog2 is constructed from func1 instead of func2, that is, the assumption for ctx access verification is bypassed.
Another example, if BPF LSM prog1 is attached to hook file_alloc_security, and BPF LSM prog2 is attached to hook bpf_lsm_audit_rule_known. Verifier knows the return value rules for these two hooks, e.g. it is legal for bpf_lsm_audit_rule_known to return positive number 1, and it is illegal for file_alloc_security to return positive number. So verifier allows prog2 to return positive number 1, but does not allow prog1 to return positive number. The problem is that verifier does not prevent prog1 from calling prog2 via tail call. In this case, prog2's return value 1 will be used as the return value for prog1's hook file_alloc_security. That is, the return value rule is bypassed.
This patch adds restriction for tail call to prevent such bypasses.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Link: https://lore.kernel.org/r/20240719110059.797546-4-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Andrii Nakryiko andrii@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Pu Lehui pulehui@huawei.com --- include/linux/bpf.h | 1 + kernel/bpf/core.c | 21 ++++++++++++++++++--- 2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index abc920234c28..e295c1cec087 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -294,6 +294,7 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { + const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index c68931013533..109110cabc4e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2259,6 +2259,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map, { enum bpf_prog_type prog_type = resolve_prog_type(fp); bool ret; + struct bpf_prog_aux *aux = fp->aux;
if (fp->kprobe_override) return false; @@ -2268,7 +2269,7 @@ bool bpf_prog_map_compatible(struct bpf_map *map, * in the case of devmap and cpumap). Until device checks * are implemented, prohibit adding dev-bound programs to program maps. */ - if (bpf_prog_is_dev_bound(fp->aux)) + if (bpf_prog_is_dev_bound(aux)) return false;
spin_lock(&map->owner.lock); @@ -2278,12 +2279,26 @@ bool bpf_prog_map_compatible(struct bpf_map *map, */ map->owner.type = prog_type; map->owner.jited = fp->jited; - map->owner.xdp_has_frags = fp->aux->xdp_has_frags; + map->owner.xdp_has_frags = aux->xdp_has_frags; + map->owner.attach_func_proto = aux->attach_func_proto; ret = true; } else { ret = map->owner.type == prog_type && map->owner.jited == fp->jited && - map->owner.xdp_has_frags == fp->aux->xdp_has_frags; + map->owner.xdp_has_frags == aux->xdp_has_frags; + if (ret && + map->owner.attach_func_proto != aux->attach_func_proto) { + switch (prog_type) { + case BPF_PROG_TYPE_TRACING: + case BPF_PROG_TYPE_LSM: + case BPF_PROG_TYPE_EXT: + case BPF_PROG_TYPE_STRUCT_OPS: + ret = false; + break; + default: + break; + } + } } spin_unlock(&map->owner.lock);
Offering: HULK hulk inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAYRIC CVE: CVE-2024-50063
--------------------------------
Fix kabi breakage in struct bpf_map by using KABI_USE.
Fixes: c65e6c0dfb36 ("[Backport] bpf: Prevent tail call between progs attached to different hooks") Signed-off-by: Pu Lehui pulehui@huawei.com --- include/linux/bpf.h | 3 +-- kernel/bpf/core.c | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e295c1cec087..337134e1c98b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -294,7 +294,6 @@ struct bpf_map { * same prog type, JITed flag and xdp_has_frags flag. */ struct { - const struct btf_type *attach_func_proto; spinlock_t lock; enum bpf_prog_type type; bool jited; @@ -307,7 +306,7 @@ struct bpf_map { s64 __percpu *elem_count;
KABI_USE(1, atomic64_t sleepable_refcnt) - KABI_RESERVE(2) + KABI_USE(2, const struct btf_type *attach_func_proto) KABI_RESERVE(3) KABI_RESERVE(4) }; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 109110cabc4e..7c00bb2ad004 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2280,14 +2280,14 @@ bool bpf_prog_map_compatible(struct bpf_map *map, map->owner.type = prog_type; map->owner.jited = fp->jited; map->owner.xdp_has_frags = aux->xdp_has_frags; - map->owner.attach_func_proto = aux->attach_func_proto; + map->attach_func_proto = aux->attach_func_proto; ret = true; } else { ret = map->owner.type == prog_type && map->owner.jited == fp->jited && map->owner.xdp_has_frags == aux->xdp_has_frags; if (ret && - map->owner.attach_func_proto != aux->attach_func_proto) { + map->attach_func_proto != aux->attach_func_proto) { switch (prog_type) { case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_LSM:
From: Xu Kuohai xukuohai@huawei.com
mainline inclusion from mainline-v6.12-rc1 commit d463dd9c9aa24b17ccb8ed76bdd7768baf857b48 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAYRIC CVE: CVE-2024-50063
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add test for lsm tail call to ensure tail call can only be used between bpf lsm progs attached to the same hook.
Signed-off-by: Xu Kuohai xukuohai@huawei.com Link: https://lore.kernel.org/r/20240719110059.797546-9-xukuohai@huaweicloud.com Signed-off-by: Alexei Starovoitov ast@kernel.org Signed-off-by: Andrii Nakryiko andrii@kernel.org Signed-off-by: Pu Lehui pulehui@huawei.com --- .../selftests/bpf/prog_tests/test_lsm.c | 46 ++++++++++++++++++- .../selftests/bpf/progs/lsm_tailcall.c | 34 ++++++++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/bpf/progs/lsm_tailcall.c
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index 16175d579bc7..2a27f3714f5c 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -12,6 +12,7 @@ #include <stdlib.h>
#include "lsm.skel.h" +#include "lsm_tailcall.skel.h"
char *CMD_ARGS[] = {"true", NULL};
@@ -95,7 +96,7 @@ static int test_lsm(struct lsm *skel) return 0; }
-void test_test_lsm(void) +static void test_lsm_basic(void) { struct lsm *skel = NULL; int err; @@ -114,3 +115,46 @@ void test_test_lsm(void) close_prog: lsm__destroy(skel); } + +static void test_lsm_tailcall(void) +{ + struct lsm_tailcall *skel = NULL; + int map_fd, prog_fd; + int err, key; + + skel = lsm_tailcall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "lsm_tailcall__skel_load")) + goto close_prog; + + map_fd = bpf_map__fd(skel->maps.jmp_table); + if (CHECK_FAIL(map_fd < 0)) + goto close_prog; + + prog_fd = bpf_program__fd(skel->progs.lsm_file_permission_prog); + if (CHECK_FAIL(prog_fd < 0)) + goto close_prog; + + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (CHECK_FAIL(!err)) + goto close_prog; + + prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog); + if (CHECK_FAIL(prog_fd < 0)) + goto close_prog; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto close_prog; + +close_prog: + lsm_tailcall__destroy(skel); +} + +void test_test_lsm(void) +{ + if (test__start_subtest("lsm_basic")) + test_lsm_basic(); + if (test__start_subtest("lsm_tailcall")) + test_lsm_tailcall(); +} diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c new file mode 100644 index 000000000000..49c075ce2d4c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Huawei Technologies Co., Ltd */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("lsm/file_permission") +int lsm_file_permission_prog(void *ctx) +{ + return 0; +} + +SEC("lsm/file_alloc_security") +int lsm_file_alloc_security_prog(void *ctx) +{ + return 0; +} + +SEC("lsm/file_alloc_security") +int lsm_file_alloc_security_entry(void *ctx) +{ + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +}
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/12756 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/12756 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...