[PATCH OLK-6.6 v2 0/4] bpf: sockmap fixes
patch of sockmap fixes Dong Chenchen (1): Revert "bpf, sockmap: Add sk_rmem_alloc check for sockmap" Levi Zim (2): skmsg: Return copied bytes in sk_msg_memcopy_from_iter tcp_bpf: Fix copied value in tcp_bpf_sendmsg Zijian Zhang (1): tcp_bpf: Add sk_rmem_alloc related logic for tcp_bpf ingress redirection include/linux/skmsg.h | 11 ++++++++--- net/core/skmsg.c | 11 ++++------- net/ipv4/tcp_bpf.c | 23 +++++++---------------- 3 files changed, 19 insertions(+), 26 deletions(-) -- 2.25.1
hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8376 -------------------------------- This reverts commit af4375846a8030a5df31ddf49c5653623aa01d38. Use the mainline solution for the fix. Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/core/skmsg.c | 10 +--------- net/ipv4/tcp_bpf.c | 11 ----------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index fa2ccc6de0a4..4cf6e080788b 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -445,12 +445,8 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - if (!msg_rx->skb) { -#if IS_ENABLED(CONFIG_NETACC_TERRACE) - atomic_sub(copy, &sk->sk_rmem_alloc); -#endif + if (!msg_rx->skb) sk_mem_uncharge(sk, copy); - } msg_rx->sg.size -= copy; if (!sge->length) { @@ -797,10 +793,6 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { list_del(&msg->list); -#if IS_ENABLED(CONFIG_NETACC_TERRACE) - if (!msg->skb) - atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); -#endif sk_msg_free(psock->sk, msg); kfree(msg); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 72974e4940a9..de22464314fb 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -43,13 +43,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, return -ENOMEM; lock_sock(sk); -#if IS_ENABLED(CONFIG_NETACC_TERRACE) - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) { - kfree(tmp); - release_sock(sk); - return -EAGAIN; - } -#endif tmp->sg.start = msg->sg.start; i = msg->sg.start; do { @@ -82,10 +75,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, if (!ret) { msg->sg.start = i; sk_psock_queue_msg(psock, tmp); -#if IS_ENABLED(CONFIG_NETACC_TERRACE) - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) - atomic_add(tmp->sg.size, &sk->sk_rmem_alloc); -#endif sk_psock_data_ready(sk, psock); } else { sk_msg_free(sk, tmp); -- 2.25.1
From: Zijian Zhang <zijianzhang@bytedance.com> stable inclusion from stable-v6.6.68 commit 343a932a05f41d321b6643fc28d6315b2457d9f8 category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8376 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit d888b7af7c149c115dd6ac772cc11c375da3e17c ] When we do sk_psock_verdict_apply->sk_psock_skb_ingress, an sk_msg will be created out of the skb, and the rmem accounting of the sk_msg will be handled by the skb. For skmsgs in __SK_REDIRECT case of tcp_bpf_send_verdict, when redirecting to the ingress of a socket, although we sk_rmem_schedule and add sk_msg to the ingress_msg of sk_redir, we do not update sk_rmem_alloc. As a result, except for the global memory limit, the rmem of sk_redir is nearly unlimited. Thus, add sk_rmem_alloc related logic to limit the recv buffer. Since the function sk_msg_recvmsg and __sk_psock_purge_ingress_msg are used in these two paths. We use "msg->skb" to test whether the sk_msg is skb backed up. If it's not, we shall do the memory accounting explicitly. Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface") Signed-off-by: Zijian Zhang <zijianzhang@bytedance.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Reviewed-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20241210012039.1669389-3-zijianzhang@bytedance.c... Signed-off-by: Sasha Levin <sashal@kernel.org> Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- include/linux/skmsg.h | 11 ++++++++--- net/core/skmsg.c | 6 +++++- net/ipv4/tcp_bpf.c | 4 +++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h index fdedb7a29c0e..7f89245aee9b 100644 --- a/include/linux/skmsg.h +++ b/include/linux/skmsg.h @@ -319,17 +319,22 @@ static inline void sock_drop(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); } -static inline void sk_psock_queue_msg(struct sk_psock *psock, +static inline bool sk_psock_queue_msg(struct sk_psock *psock, struct sk_msg *msg) { + bool ret; + spin_lock_bh(&psock->ingress_lock); - if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) + if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) { list_add_tail(&msg->list, &psock->ingress_msg); - else { + ret = true; + } else { sk_msg_free(psock->sk, msg); kfree(msg); + ret = false; } spin_unlock_bh(&psock->ingress_lock); + return ret; } static inline struct sk_msg *sk_psock_dequeue_msg(struct sk_psock *psock) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 4cf6e080788b..8c72db242056 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -445,8 +445,10 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (likely(!peek)) { sge->offset += copy; sge->length -= copy; - if (!msg_rx->skb) + if (!msg_rx->skb) { sk_mem_uncharge(sk, copy); + atomic_sub(copy, &sk->sk_rmem_alloc); + } msg_rx->sg.size -= copy; if (!sge->length) { @@ -793,6 +795,8 @@ static void __sk_psock_purge_ingress_msg(struct sk_psock *psock) list_for_each_entry_safe(msg, tmp, &psock->ingress_msg, list) { list_del(&msg->list); + if (!msg->skb) + atomic_sub(msg->sg.size, &psock->sk->sk_rmem_alloc); sk_msg_free(psock->sk, msg); kfree(msg); } diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index de22464314fb..dc8cbee30cfd 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -56,6 +56,7 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, } sk_mem_charge(sk, size); + atomic_add(size, &sk->sk_rmem_alloc); sk_msg_xfer(tmp, msg, i, size); copied += size; if (sge->length) @@ -74,7 +75,8 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, if (!ret) { msg->sg.start = i; - sk_psock_queue_msg(psock, tmp); + if (!sk_psock_queue_msg(psock, tmp)) + atomic_sub(copied, &sk->sk_rmem_alloc); sk_psock_data_ready(sk, psock); } else { sk_msg_free(sk, tmp); -- 2.25.1
From: Levi Zim <rsworktech@outlook.com> mainline inclusion from mainline-v6.13-rc3 commit fdf478d236dcf0f1f68534df5d456ced625195bd category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8376 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Previously sk_msg_memcopy_from_iter returns the copied bytes from the last copy_from_iter{,_nocache} call upon success. This commit changes it to return the total number of copied bytes on success. Signed-off-by: Levi Zim <rsworktech@outlook.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Tested-by: Björn Töpel <bjorn@kernel.org> Reviewed-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20241130-tcp-bpf-sendmsg-v1-1-bae583d014f3@outlo... Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/core/skmsg.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 8c72db242056..3429c6e2ce21 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -369,8 +369,8 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, struct sk_msg *msg, u32 bytes) { int ret = -ENOSPC, i = msg->sg.curr; + u32 copy, buf_size, copied = 0; struct scatterlist *sge; - u32 copy, buf_size; void *to; do { @@ -397,6 +397,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, goto out; } bytes -= copy; + copied += copy; if (!bytes) break; msg->sg.copybreak = 0; @@ -404,7 +405,7 @@ int sk_msg_memcopy_from_iter(struct sock *sk, struct iov_iter *from, } while (i != msg->sg.end); out: msg->sg.curr = i; - return ret; + return (ret < 0) ? ret : copied; } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); -- 2.25.1
From: Levi Zim <rsworktech@outlook.com> mainline inclusion from mainline-v6.13-rc3 commit 5153a75ef34b3f7478ca918044d0f05eed8fb3f9 category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8376 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- bpf kselftest sockhash::test_txmsg_cork_hangs in test_sockmap.c triggers a kernel NULL pointer dereference: BUG: kernel NULL pointer dereference, address: 0000000000000008 ? __die_body+0x6e/0xb0 ? __die+0x8b/0xa0 ? page_fault_oops+0x358/0x3c0 ? local_clock+0x19/0x30 ? lock_release+0x11b/0x440 ? kernelmode_fixup_or_oops+0x54/0x60 ? __bad_area_nosemaphore+0x4f/0x210 ? mmap_read_unlock+0x13/0x30 ? bad_area_nosemaphore+0x16/0x20 ? do_user_addr_fault+0x6fd/0x740 ? prb_read_valid+0x1d/0x30 ? exc_page_fault+0x55/0xd0 ? asm_exc_page_fault+0x2b/0x30 ? splice_to_socket+0x52e/0x630 ? shmem_file_splice_read+0x2b1/0x310 direct_splice_actor+0x47/0x70 splice_direct_to_actor+0x133/0x300 ? do_splice_direct+0x90/0x90 do_splice_direct+0x64/0x90 ? __ia32_sys_tee+0x30/0x30 do_sendfile+0x214/0x300 __se_sys_sendfile64+0x8e/0xb0 __x64_sys_sendfile64+0x25/0x30 x64_sys_call+0xb82/0x2840 do_syscall_64+0x75/0x110 entry_SYSCALL_64_after_hwframe+0x4b/0x53 This is caused by tcp_bpf_sendmsg() returning a larger value(12289) than size (8192), which causes the while loop in splice_to_socket() to release an uninitialized pipe buf. The underlying cause is that this code assumes sk_msg_memcopy_from_iter() will copy all bytes upon success but it actually might only copy part of it. This commit changes it to use the real copied bytes. Signed-off-by: Levi Zim <rsworktech@outlook.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Tested-by: Björn Töpel <bjorn@kernel.org> Reviewed-by: John Fastabend <john.fastabend@gmail.com> Link: https://lore.kernel.org/bpf/20241130-tcp-bpf-sendmsg-v1-2-bae583d014f3@outlo... Signed-off-by: Dong Chenchen <dongchenchen2@huawei.com> --- net/ipv4/tcp_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index dc8cbee30cfd..0e56f3baf98a 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -498,7 +498,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock, static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_msg tmp, *msg_tx = NULL; - int copied = 0, err = 0; + int copied = 0, err = 0, ret = 0; struct sk_psock *psock; long timeo; int flags; @@ -541,14 +541,14 @@ static int tcp_bpf_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) copy = msg_tx->sg.size - osize; } - err = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx, + ret = sk_msg_memcopy_from_iter(sk, &msg->msg_iter, msg_tx, copy); - if (err < 0) { + if (ret < 0) { sk_msg_trim(sk, msg_tx, osize); goto out_err; } - copied += copy; + copied += ret; if (psock->cork_bytes) { if (size > psock->cork_bytes) psock->cork_bytes = 0; -- 2.25.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20727 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/KWY... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20727 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/KWY...
participants (2)
-
Dong Chenchen -
patchwork bot