From: Zijian Zhang zijianzhang@bytedance.com
stable inclusion from stable-v6.6.64 commit 98c7ea7d11f2588e8197db042e0291e4ac8f8346 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEGFC CVE: CVE-2024-56720
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------------
[ Upstream commit 5d609ba262475db450ba69b8e8a557bd768ac07a ]
Several fixes to bpf_msg_pop_data, 1. In sk_msg_shift_left, we should put_page 2. if (len == 0), return early is better 3. pop the entire sk_msg (last == msg->sg.size) should be supported 4. Fix for the value of variable "a" 5. In sk_msg_shift_left, after shifting, i has already pointed to the next element. Addtional sk_msg_iter_var_next may result in BUG.
Fixes: 7246d8ed4dcc ("bpf: helper to pop data from messages") Signed-off-by: Zijian Zhang zijianzhang@bytedance.com Reviewed-by: John Fastabend john.fastabend@gmail.com Link: https://lore.kernel.org/r/20241106222520.527076-8-zijianzhang@bytedance.com Signed-off-by: Martin KaFai Lau martin.lau@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Liu Jian liujian56@huawei.com --- net/core/filter.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/net/core/filter.c b/net/core/filter.c index 48dd2896ee1d9..131a21b5463cc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2889,8 +2889,10 @@ static const struct bpf_func_proto bpf_msg_push_data_proto = {
static void sk_msg_shift_left(struct sk_msg *msg, int i) { + struct scatterlist *sge = sk_msg_elem(msg, i); int prev;
+ put_page(sg_page(sge)); do { prev = i; sk_msg_iter_var_next(i); @@ -2927,6 +2929,9 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(flags)) return -EINVAL;
+ if (unlikely(len == 0)) + return 0; + /* First find the starting scatterlist element */ i = msg->sg.start; do { @@ -2939,7 +2944,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } while (i != msg->sg.end);
/* Bounds checks: start and pop must be inside message */ - if (start >= offset + l || last >= msg->sg.size) + if (start >= offset + l || last > msg->sg.size) return -EINVAL;
space = MAX_MSG_FRAGS - sk_msg_elem_used(msg); @@ -2968,12 +2973,12 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, */ if (start != offset) { struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); - int a = start; + int a = start - offset; int b = sge->length - pop - a;
sk_msg_iter_var_next(i);
- if (pop < sge->length - a) { + if (b > 0) { if (space) { sge->length = a; sk_msg_shift_right(msg, i); @@ -2992,7 +2997,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, if (unlikely(!page)) return -ENOMEM;
- sge->length = a; orig = sg_page(sge); from = sg_virt(sge); to = page_address(page); @@ -3002,7 +3006,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, put_page(orig); } pop = 0; - } else if (pop >= sge->length - a) { + } else { pop -= (sge->length - a); sge->length = a; } @@ -3036,7 +3040,6 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, pop -= sge->length; sk_msg_shift_left(msg, i); } - sk_msg_iter_var_next(i); }
sk_mem_uncharge(msg->sk, len - pop);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14770 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14770 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...
From: Ye Bin yebin10@huawei.com
stable inclusion from stable-v6.6.64 commit 1c9a99c89e45b22eb556fd2f3f729f2683f247d5 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEAFL CVE: CVE-2024-53215
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------------
[ Upstream commit ce89e742a4c12b20f09a43fec1b21db33f2166cd ]
There's issue as follows: RPC: Registered rdma transport module. RPC: Registered rdma backchannel transport module. RPC: Unregistered rdma transport module. RPC: Unregistered rdma backchannel transport module. BUG: unable to handle page fault for address: fffffbfff80c609a PGD 123fee067 P4D 123fee067 PUD 123fea067 PMD 10c624067 PTE 0 Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI RIP: 0010:percpu_counter_destroy_many+0xf7/0x2a0 Call Trace: <TASK> __die+0x1f/0x70 page_fault_oops+0x2cd/0x860 spurious_kernel_fault+0x36/0x450 do_kern_addr_fault+0xca/0x100 exc_page_fault+0x128/0x150 asm_exc_page_fault+0x26/0x30 percpu_counter_destroy_many+0xf7/0x2a0 mmdrop+0x209/0x350 finish_task_switch.isra.0+0x481/0x840 schedule_tail+0xe/0xd0 ret_from_fork+0x23/0x80 ret_from_fork_asm+0x1a/0x30 </TASK>
If register_sysctl() return NULL, then svc_rdma_proc_cleanup() will not destroy the percpu counters which init in svc_rdma_proc_init(). If CONFIG_HOTPLUG_CPU is enabled, residual nodes may be in the 'percpu_counters' list. The above issue may occur once the module is removed. If the CONFIG_HOTPLUG_CPU configuration is not enabled, memory leakage occurs. To solve above issue just destroy all percpu counters when register_sysctl() return NULL.
Fixes: 1e7e55731628 ("svcrdma: Restore read and write stats") Fixes: 22df5a22462e ("svcrdma: Convert rdma_stat_sq_starve to a per-CPU counter") Fixes: df971cd853c0 ("svcrdma: Convert rdma_stat_recv to a per-CPU counter") Signed-off-by: Ye Bin yebin10@huawei.com Signed-off-by: Chuck Lever chuck.lever@oracle.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Liu Jian liujian56@huawei.com --- net/sunrpc/xprtrdma/svc_rdma.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-)
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index f0d5eeed4c886..e1d4e426b21fa 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -234,25 +234,34 @@ static int svc_rdma_proc_init(void)
rc = percpu_counter_init(&svcrdma_stat_read, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err; rc = percpu_counter_init(&svcrdma_stat_recv, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_read; rc = percpu_counter_init(&svcrdma_stat_sq_starve, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_recv; rc = percpu_counter_init(&svcrdma_stat_write, 0, GFP_KERNEL); if (rc) - goto out_err; + goto err_sq;
svcrdma_table_header = register_sysctl("sunrpc/svc_rdma", svcrdma_parm_table); + if (!svcrdma_table_header) + goto err_write; + return 0;
-out_err: +err_write: + rc = -ENOMEM; + percpu_counter_destroy(&svcrdma_stat_write); +err_sq: percpu_counter_destroy(&svcrdma_stat_sq_starve); +err_recv: percpu_counter_destroy(&svcrdma_stat_recv); +err_read: percpu_counter_destroy(&svcrdma_stat_read); +err: return rc; }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14769 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14769 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
From: Zijian Zhang zijianzhang@bytedance.com
stable inclusion from stable-v6.6.66 commit 206d56f41a1509cadd06e2178c26cb830e45057d category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBEANQ CVE: CVE-2024-56633
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
-------------------------------------------------
[ Upstream commit ca70b8baf2bd125b2a4d96e76db79375c07d7ff2 ]
The current sk memory accounting logic in __SK_REDIRECT is pre-uncharging tosend bytes, which is either msg->sg.size or a smaller value apply_bytes.
Potential problems with this strategy are as follows:
- If the actual sent bytes are smaller than tosend, we need to charge some bytes back, as in line 487, which is okay but seems not clean.
- When tosend is set to apply_bytes, as in line 417, and (ret < 0), we may miss uncharging (msg->sg.size - apply_bytes) bytes.
[...] 415 tosend = msg->sg.size; 416 if (psock->apply_bytes && psock->apply_bytes < tosend) 417 tosend = psock->apply_bytes; [...] 443 sk_msg_return(sk, msg, tosend); 444 release_sock(sk); 446 origsize = msg->sg.size; 447 ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress, 448 msg, tosend, flags); 449 sent = origsize - msg->sg.size; [...] 454 lock_sock(sk); 455 if (unlikely(ret < 0)) { 456 int free = sk_msg_free_nocharge(sk, msg); 458 if (!cork) 459 *copied -= free; 460 } [...] 487 if (eval == __SK_REDIRECT) 488 sk_mem_charge(sk, tosend - sent); [...]
When running the selftest test_txmsg_redir_wait_sndmem with txmsg_apply, the following warning will be reported:
------------[ cut here ]------------ WARNING: CPU: 6 PID: 57 at net/ipv4/af_inet.c:156 inet_sock_destruct+0x190/0x1a0 Modules linked in: CPU: 6 UID: 0 PID: 57 Comm: kworker/6:0 Not tainted 6.12.0-rc1.bm.1-amd64+ #43 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 Workqueue: events sk_psock_destroy RIP: 0010:inet_sock_destruct+0x190/0x1a0 RSP: 0018:ffffad0a8021fe08 EFLAGS: 00010206 RAX: 0000000000000011 RBX: ffff9aab4475b900 RCX: ffff9aab481a0800 RDX: 0000000000000303 RSI: 0000000000000011 RDI: ffff9aab4475b900 RBP: ffff9aab4475b990 R08: 0000000000000000 R09: ffff9aab40050ec0 R10: 0000000000000000 R11: ffff9aae6fdb1d01 R12: ffff9aab49c60400 R13: ffff9aab49c60598 R14: ffff9aab49c60598 R15: dead000000000100 FS: 0000000000000000(0000) GS:ffff9aae6fd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007ffec7e47bd8 CR3: 00000001a1a1c004 CR4: 0000000000770ef0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: <TASK> ? __warn+0x89/0x130 ? inet_sock_destruct+0x190/0x1a0 ? report_bug+0xfc/0x1e0 ? handle_bug+0x5c/0xa0 ? exc_invalid_op+0x17/0x70 ? asm_exc_invalid_op+0x1a/0x20 ? inet_sock_destruct+0x190/0x1a0 __sk_destruct+0x25/0x220 sk_psock_destroy+0x2b2/0x310 process_scheduled_works+0xa3/0x3e0 worker_thread+0x117/0x240 ? __pfx_worker_thread+0x10/0x10 kthread+0xcf/0x100 ? __pfx_kthread+0x10/0x10 ret_from_fork+0x31/0x40 ? __pfx_kthread+0x10/0x10 ret_from_fork_asm+0x1a/0x30 </TASK> ---[ end trace 0000000000000000 ]---
In __SK_REDIRECT, a more concise way is delaying the uncharging after sent bytes are finalized, and uncharge this value. When (ret < 0), we shall invoke sk_msg_free.
Same thing happens in case __SK_DROP, when tosend is set to apply_bytes, we may miss uncharging (msg->sg.size - apply_bytes) bytes. The same warning will be reported in selftest.
[...] 468 case __SK_DROP: 469 default: 470 sk_msg_free_partial(sk, msg, tosend); 471 sk_msg_apply_bytes(psock, tosend); 472 *copied -= (tosend + delta); 473 return -EACCES; [...]
So instead of sk_msg_free_partial we can do sk_msg_free here.
Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Fixes: 8ec95b94716a ("bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues") Signed-off-by: Zijian Zhang zijianzhang@bytedance.com Signed-off-by: Daniel Borkmann daniel@iogearbox.net Acked-by: John Fastabend john.fastabend@gmail.com Link: https://lore.kernel.org/bpf/20241016234838.3167769-3-zijianzhang@bytedance.c... Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Liu Jian liujian56@huawei.com --- net/ipv4/tcp_bpf.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-)
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index c0802042e67f6..a7fd39d9c44ed 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -451,7 +451,6 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, cork = true; psock->cork = NULL; } - sk_msg_return(sk, msg, tosend); release_sock(sk);
origsize = msg->sg.size; @@ -463,8 +462,9 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, sock_put(sk_redir);
lock_sock(sk); + sk_mem_uncharge(sk, sent); if (unlikely(ret < 0)) { - int free = sk_msg_free_nocharge(sk, msg); + int free = sk_msg_free(sk, msg);
if (!cork) *copied -= free; @@ -478,7 +478,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, break; case __SK_DROP: default: - sk_msg_free_partial(sk, msg, tosend); + sk_msg_free(sk, msg); sk_msg_apply_bytes(psock, tosend); *copied -= (tosend + delta); return -EACCES; @@ -494,11 +494,8 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, } if (msg && msg->sg.data[msg->sg.start].page_link && - msg->sg.data[msg->sg.start].length) { - if (eval == __SK_REDIRECT) - sk_mem_charge(sk, tosend - sent); + msg->sg.data[msg->sg.start].length) goto more_data; - } } return ret; }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14772 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Q...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14772 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Q...