
From: Enzo Matsumiya <ematsumiya@suse.de> mainline inclusion from mainline-v6.10-rc2 commit e9f2517a3e18a54a3943c098d2226b245d488801 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IBI5BZ CVE: CVE-2024-54680 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Commit ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") fixed a netns UAF by manually enabled socket refcounting (sk->sk_net_refcnt=1 and sock_inuse_add(net, 1)). The reason the patch worked for that bug was because we now hold references to the netns (get_net_track() gets a ref internally) and they're properly released (internally, on __sk_destruct()), but only because sk->sk_net_refcnt was set. Problem: (this happens regardless of CONFIG_NET_NS_REFCNT_TRACKER and regardless if init_net or other) Setting sk->sk_net_refcnt=1 *manually* and *after* socket creation is not only out of cifs scope, but also technically wrong -- it's set conditionally based on user (=1) vs kernel (=0) sockets. And net/ implementations seem to base their user vs kernel space operations on it. e.g. upon TCP socket close, the TCP timers are not cleared because sk->sk_net_refcnt=1: (cf. commit 151c9c724d05 ("tcp: properly terminate timers for kernel sockets")) net/ipv4/tcp.c: void tcp_close(struct sock *sk, long timeout) { lock_sock(sk); __tcp_close(sk, timeout); release_sock(sk); if (!sk->sk_net_refcnt) inet_csk_clear_xmit_timers_sync(sk); sock_put(sk); } Which will throw a lockdep warning and then, as expected, deadlock on tcp_write_timer(). A way to reproduce this is by running the reproducer from ef7134c7fc48 and then 'rmmod cifs'. A few seconds later, the deadlock/lockdep warning shows up. Fix: We shouldn't mess with socket internals ourselves, so do not set sk_net_refcnt manually. Also change __sock_create() to sock_create_kern() for explicitness. As for non-init_net network namespaces, we deal with it the best way we can -- hold an extra netns reference for server->ssocket and drop it when it's released. This ensures that the netns still exists whenever we need to create/destroy server->ssocket, but is not directly tied to it. Fixes: ef7134c7fc48 ("smb: client: Fix use-after-free of network namespace.") Cc: stable@vger.kernel.org Signed-off-by: Enzo Matsumiya <ematsumiya@suse.de> Signed-off-by: Steve French <stfrench@microsoft.com> Conflicts: fs/cifs/connect.c fs/smb/client/connect.c [Conflicts due to fs/cifs move to fs/smb/client] Signed-off-by: Wang Zhaolong <wangzhaolong1@huawei.com> --- fs/cifs/connect.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index bd9365d375f9..ee9bdc4ca569 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -811,6 +811,9 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) if (server->ssocket) { sock_release(server->ssocket); server->ssocket = NULL; + + /* Release netns reference for the socket. */ + put_net(cifs_net_ns(server)); } if (!list_empty(&server->pending_mid_q)) { @@ -859,6 +862,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) */ } + /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); kfree(server->hostname); kfree(server); @@ -2526,6 +2530,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->ops = volume_info->ops; tcp_ses->vals = volume_info->vals; + + /* Grab netns reference for this server. */ cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns)); tcp_ses->hostname = extract_hostname(volume_info->UNC); if (IS_ERR(tcp_ses->hostname)) { @@ -2627,14 +2633,17 @@ cifs_get_tcp_session(struct smb_vol *volume_info) out_err_crypto_release: cifs_crypto_secmech_release(tcp_ses); + /* Release netns reference for this server. */ put_net(cifs_net_ns(tcp_ses)); out_err: if (tcp_ses) { if (!IS_ERR(tcp_ses->hostname)) kfree(tcp_ses->hostname); - if (tcp_ses->ssocket) + if (tcp_ses->ssocket) { sock_release(tcp_ses->ssocket); + put_net(cifs_net_ns(tcp_ses)); + } kfree(tcp_ses); } return ERR_PTR(rc); @@ -3629,22 +3638,22 @@ generic_ip_connect(struct TCP_Server_Info *server) if (socket == NULL) { struct net *net = cifs_net_ns(server); - struct sock *sk; - rc = __sock_create(net, sfamily, SOCK_STREAM, - IPPROTO_TCP, &socket, 1); + rc = sock_create_kern(net, sfamily, SOCK_STREAM, + IPPROTO_TCP, &socket); if (rc < 0) { cifs_dbg(VFS, "Error %d creating socket\n", rc); server->ssocket = NULL; return rc; } - sk = socket->sk; - sk->sk_net_refcnt = 1; + /* + * Grab netns reference for the socket. + * + * It'll be released here, on error, or in clean_demultiplex_info() upon server + * teardown. + */ get_net(net); -#ifdef CONFIG_PROC_FS - this_cpu_add(*net->core.sock_inuse, 1); -#endif /* BB other socket options to set KEEPALIVE, NODELAY? */ cifs_dbg(FYI, "Socket created\n"); @@ -3657,8 +3666,10 @@ generic_ip_connect(struct TCP_Server_Info *server) } rc = bind_socket(server); - if (rc < 0) + if (rc < 0) { + put_net(cifs_net_ns(server)); return rc; + } /* * Eventually check for other socket options to change from @@ -3692,6 +3703,7 @@ generic_ip_connect(struct TCP_Server_Info *server) rc = socket->ops->connect(socket, saddr, slen, 0); if (rc < 0) { cifs_dbg(FYI, "Error %d connecting to server\n", rc); + put_net(cifs_net_ns(server)); sock_release(socket); server->ssocket = NULL; return rc; @@ -3700,6 +3712,9 @@ generic_ip_connect(struct TCP_Server_Info *server) if (sport == htons(RFC1001_PORT)) rc = ip_rfc1001_connect(server); + if (rc < 0) + put_net(cifs_net_ns(server)); + return rc; } -- 2.39.2