From: Eric Dumazet edumazet@google.com
mainline inclusion from mainline-5.4-rc4 commit 8265792bf887 category: bugfix bugzilla: 24071 CVE: NA
-------------------------------------------------
sk_add_backlog() callers usually read sk->sk_rcvbuf without owning the socket lock. This means sk_rcvbuf value can be changed by other cpus, and KCSAN complains.
Add READ_ONCE() annotations to document the lockless nature of these reads.
Note that writes over sk_rcvbuf should also use WRITE_ONCE(), but this will be done in separate patches to ease stable backports (if we decide this is relevant for stable trees).
BUG: KCSAN: data-race in tcp_add_backlog / tcp_recvmsg
write to 0xffff88812ab369f8 of 8 bytes by interrupt on cpu 1: __sk_add_backlog include/net/sock.h:902 [inline] sk_add_backlog include/net/sock.h:933 [inline] tcp_add_backlog+0x45a/0xcc0 net/ipv4/tcp_ipv4.c:1737 tcp_v4_rcv+0x1aba/0x1bf0 net/ipv4/tcp_ipv4.c:1925 ip_protocol_deliver_rcu+0x51/0x470 net/ipv4/ip_input.c:204 ip_local_deliver_finish+0x110/0x140 net/ipv4/ip_input.c:231 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_local_deliver+0x133/0x210 net/ipv4/ip_input.c:252 dst_input include/net/dst.h:442 [inline] ip_rcv_finish+0x121/0x160 net/ipv4/ip_input.c:413 NF_HOOK include/linux/netfilter.h:305 [inline] NF_HOOK include/linux/netfilter.h:299 [inline] ip_rcv+0x18f/0x1a0 net/ipv4/ip_input.c:523 __netif_receive_skb_one_core+0xa7/0xe0 net/core/dev.c:5004 __netif_receive_skb+0x37/0xf0 net/core/dev.c:5118 netif_receive_skb_internal+0x59/0x190 net/core/dev.c:5208 napi_skb_finish net/core/dev.c:5671 [inline] napi_gro_receive+0x28f/0x330 net/core/dev.c:5704 receive_buf+0x284/0x30b0 drivers/net/virtio_net.c:1061 virtnet_receive drivers/net/virtio_net.c:1323 [inline] virtnet_poll+0x436/0x7d0 drivers/net/virtio_net.c:1428 napi_poll net/core/dev.c:6352 [inline] net_rx_action+0x3ae/0xa50 net/core/dev.c:6418
read to 0xffff88812ab369f8 of 8 bytes by task 7271 on cpu 0: tcp_recvmsg+0x470/0x1a30 net/ipv4/tcp.c:2047 inet_recvmsg+0xbb/0x250 net/ipv4/af_inet.c:838 sock_recvmsg_nosec net/socket.c:871 [inline] sock_recvmsg net/socket.c:889 [inline] sock_recvmsg+0x92/0xb0 net/socket.c:885 sock_read_iter+0x15f/0x1e0 net/socket.c:967 call_read_iter include/linux/fs.h:1864 [inline] new_sync_read+0x389/0x4f0 fs/read_write.c:414 __vfs_read+0xb1/0xc0 fs/read_write.c:427 vfs_read fs/read_write.c:461 [inline] vfs_read+0x143/0x2c0 fs/read_write.c:446 ksys_read+0xd5/0x1b0 fs/read_write.c:587 __do_sys_read fs/read_write.c:597 [inline] __se_sys_read fs/read_write.c:595 [inline] __x64_sys_read+0x4c/0x60 fs/read_write.c:595 do_syscall_64+0xcf/0x2f0 arch/x86/entry/common.c:296 entry_SYSCALL_64_after_hwframe+0x44/0xa9
Reported by Kernel Concurrency Sanitizer on: CPU: 0 PID: 7271 Comm: syz-fuzzer Not tainted 5.3.0+ #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Signed-off-by: Eric Dumazet edumazet@google.com Reported-by: syzbot syzkaller@googlegroups.com Signed-off-by: Jakub Kicinski jakub.kicinski@netronome.com Signed-off-by: Huang Guobin huangguobin4@huawei.com Reviewed-by: Wenan Mao maowenan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/core/sock.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/llc/llc_conn.c | 2 +- net/sctp/input.c | 4 ++-- net/tipc/socket.c | 6 +++--- net/x25/x25_dev.c | 2 +- 6 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/net/core/sock.c b/net/core/sock.c index 935d9a8f12c2..f0b465789041 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -473,7 +473,7 @@ int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, rc = sk_backlog_rcv(sk, skb);
mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); - } else if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) { + } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) { bh_unlock_sock(sk); atomic_inc(&sk->sk_drops); goto discard_and_relse; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index c5d38eec7296..0830a3f75f9b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1618,7 +1618,7 @@ int tcp_v4_early_demux(struct sk_buff *skb)
bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { - u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; + u32 limit = READ_ONCE(sk->sk_rcvbuf) + READ_ONCE(sk->sk_sndbuf); struct skb_shared_info *shinfo; const struct tcphdr *th; struct tcphdr *thtail; diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index ed2aca12460c..f114e12635ba 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -832,7 +832,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb) else { dprintk("%s: adding to backlog...\n", __func__); llc_set_backlog_type(skb, LLC_PACKET); - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) goto drop_unlock; } out: diff --git a/net/sctp/input.c b/net/sctp/input.c index bfe29158afcc..81bb91118cee 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -334,7 +334,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb) bh_lock_sock(sk);
if (sock_owned_by_user(sk)) { - if (sk_add_backlog(sk, skb, sk->sk_rcvbuf)) + if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) sctp_chunk_free(chunk); else backloged = 1; @@ -370,7 +370,7 @@ static int sctp_add_backlog(struct sock *sk, struct sk_buff *skb) struct sctp_ep_common *rcvr = chunk->rcvr; int ret;
- ret = sk_add_backlog(sk, skb, sk->sk_rcvbuf); + ret = sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); if (!ret) { /* Hold the assoc/ep while hanging on the backlog queue. * This way, we know structures we need will not disappear diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5841d62ff580..b206a5dfd8dc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2097,13 +2097,13 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_msg *hdr = buf_msg(skb);
if (unlikely(msg_in_group(hdr))) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf);
if (unlikely(!msg_connected(hdr))) - return sk->sk_rcvbuf << msg_importance(hdr); + return READ_ONCE(sk->sk_rcvbuf) << msg_importance(hdr);
if (likely(tsk->peer_caps & TIPC_BLOCK_FLOWCTL)) - return sk->sk_rcvbuf; + return READ_ONCE(sk->sk_rcvbuf);
return FLOWCTL_MSG_LIM; } diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c index 39231237e1c3..1763e19817fc 100644 --- a/net/x25/x25_dev.c +++ b/net/x25/x25_dev.c @@ -60,7 +60,7 @@ static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb) if (!sock_owned_by_user(sk)) { queued = x25_process_rx_frame(sk, skb); } else { - queued = !sk_add_backlog(sk, skb, sk->sk_rcvbuf); + queued = !sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf)); } bh_unlock_sock(sk); sock_put(sk);