From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
mainline inclusion from mainline-v5.15 commit ba316be1b6a00db7126ed9a39f9bee434a508043 category: bugfix bugzilla: NA CVE: CVE-2021-3640
---------------------------
struct sock.sk_timer should be used as a sock cleanup timer. However, SCO uses it to implement sock timeouts.
This causes issues because struct sock.sk_timer's callback is run in an IRQ context, and the timer callback function sco_sock_timeout takes a spin lock on the socket. However, other functions such as sco_conn_del and sco_conn_ready take the spin lock with interrupts enabled.
This inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} lock usage could lead to deadlocks as reported by Syzbot [1]: CPU0 ---- lock(slock-AF_BLUETOOTH-BTPROTO_SCO); <Interrupt> lock(slock-AF_BLUETOOTH-BTPROTO_SCO);
To fix this, we use delayed work to implement SCO sock timouts instead. This allows us to avoid taking the spin lock on the socket in an IRQ context, and corrects the misuse of struct sock.sk_timer.
As a note, cancel_delayed_work is used instead of cancel_delayed_work_sync in sco_sock_set_timer and sco_sock_clear_timer to avoid a deadlock. In the future, the call to bh_lock_sock inside sco_sock_timeout should be changed to lock_sock to synchronize with other functions using lock_sock. However, since sco_sock_set_timer and sco_sock_clear_timer are sometimes called under the locked socket (in sco_connect and __sco_sock_close), cancel_delayed_work_sync might cause them to sleep until an sco_sock_timeout that has started finishes running. But sco_sock_timeout would also sleep until it can grab the lock_sock.
Using cancel_delayed_work is fine because sco_sock_timeout does not change from run to run, hence there is no functional difference between: 1. waiting for a timeout to finish running before scheduling another timeout 2. scheduling another timeout while a timeout is running.
Link: https://syzkaller.appspot.com/bug?id=9089d89de0502e120f234ca0fc8a703f7368b31... [1] Reported-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com Tested-by: syzbot+2f6d7c28bb4bf7e82060@syzkaller.appspotmail.com Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Signed-off-by: Luiz Augusto von Dentz luiz.von.dentz@intel.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/bluetooth/sco.c | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index a4ca55df73908..e30151e81566f 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -48,6 +48,8 @@ struct sco_conn { spinlock_t lock; struct sock *sk;
+ struct delayed_work timeout_work; + unsigned int mtu; };
@@ -73,9 +75,20 @@ struct sco_pinfo { #define SCO_CONN_TIMEOUT (HZ * 40) #define SCO_DISCONN_TIMEOUT (HZ * 2)
-static void sco_sock_timeout(struct timer_list *t) +static void sco_sock_timeout(struct work_struct *work) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sco_conn *conn = container_of(work, struct sco_conn, + timeout_work.work); + struct sock *sk; + + sco_conn_lock(conn); + sk = conn->sk; + if (sk) + sock_hold(sk); + sco_conn_unlock(conn); + + if (!sk) + return;
BT_DBG("sock %p state %d", sk, sk->sk_state);
@@ -90,14 +103,21 @@ static void sco_sock_timeout(struct timer_list *t)
static void sco_sock_set_timer(struct sock *sk, long timeout) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d timeout %ld", sk, sk->sk_state, timeout); - sk_reset_timer(sk, &sk->sk_timer, jiffies + timeout); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); + schedule_delayed_work(&sco_pi(sk)->conn->timeout_work, timeout); }
static void sco_sock_clear_timer(struct sock *sk) { + if (!sco_pi(sk)->conn) + return; + BT_DBG("sock %p state %d", sk, sk->sk_state); - sk_stop_timer(sk, &sk->sk_timer); + cancel_delayed_work(&sco_pi(sk)->conn->timeout_work); }
/* ---- SCO connections ---- */ @@ -178,6 +198,9 @@ static void sco_conn_del(struct hci_conn *hcon, int err) bh_unlock_sock(sk); sco_sock_kill(sk); sock_put(sk); + + /* Ensure no more work items will run before freeing conn. */ + cancel_delayed_work_sync(&conn->timeout_work); }
hcon->sco_data = NULL; @@ -192,6 +215,8 @@ static void __sco_chan_add(struct sco_conn *conn, struct sock *sk, sco_pi(sk)->conn = conn; conn->sk = sk;
+ INIT_DELAYED_WORK(&conn->timeout_work, sco_sock_timeout); + if (parent) bt_accept_enqueue(parent, sk, true); } @@ -488,8 +513,6 @@ static struct sock *sco_sock_alloc(struct net *net, struct socket *sock,
sco_pi(sk)->setting = BT_VOICE_CVSD_16BIT;
- timer_setup(&sk->sk_timer, sco_sock_timeout, 0); - bt_sock_link(&sco_sk_list, sk); return sk; }
From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
mainline inclusion from mainline-v5.15 commit 734bc5ff783115aa3164f4e9dd5967ae78e0a8ab category: bugfix bugzilla: NA CVE: CVE-2021-3640
---------------------------
In a future patch, calls to bh_lock_sock in sco.c should be replaced by lock_sock now that none of the functions are run in IRQ context.
However, doing so results in a circular locking dependency: Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com
====================================================== WARNING: possible circular locking dependency detected 5.14.0-rc4-syzkaller #0 Not tainted ------------------------------------------------------ syz-executor.2/14867 is trying to acquire lock: ffff88803e3c1120 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}, at: lock_sock include/net/sock.h:1613 [inline] ffff88803e3c1120 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}, at: sco_conn_del+0x12a/0x2a0 net/bluetooth/sco.c:191
but task is already holding lock: ffffffff8d2dc7c8 (hci_cb_list_lock){+.+.}-{3:3}, at: hci_disconn_cfm include/net/bluetooth/hci_core.h:1497 [inline] ffffffff8d2dc7c8 (hci_cb_list_lock){+.+.}-{3:3}, at: hci_conn_hash_flush+0xda/0x260 net/bluetooth/hci_conn.c:1608
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (hci_cb_list_lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:959 [inline] __mutex_lock+0x12a/0x10a0 kernel/locking/mutex.c:1104 hci_connect_cfm include/net/bluetooth/hci_core.h:1482 [inline] hci_remote_features_evt net/bluetooth/hci_event.c:3263 [inline] hci_event_packet+0x2f4d/0x7c50 net/bluetooth/hci_event.c:6240 hci_rx_work+0x4f8/0xd30 net/bluetooth/hci_core.c:5122 process_one_work+0x98d/0x1630 kernel/workqueue.c:2276 worker_thread+0x658/0x11f0 kernel/workqueue.c:2422 kthread+0x3e5/0x4d0 kernel/kthread.c:319 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
-> #1 (&hdev->lock){+.+.}-{3:3}: __mutex_lock_common kernel/locking/mutex.c:959 [inline] __mutex_lock+0x12a/0x10a0 kernel/locking/mutex.c:1104 sco_connect net/bluetooth/sco.c:245 [inline] sco_sock_connect+0x227/0xa10 net/bluetooth/sco.c:601 __sys_connect_file+0x155/0x1a0 net/socket.c:1879 __sys_connect+0x161/0x190 net/socket.c:1896 __do_sys_connect net/socket.c:1906 [inline] __se_sys_connect net/socket.c:1903 [inline] __x64_sys_connect+0x6f/0xb0 net/socket.c:1903 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x44/0xae
-> #0 (sk_lock-AF_BLUETOOTH-BTPROTO_SCO){+.+.}-{0:0}: check_prev_add kernel/locking/lockdep.c:3051 [inline] check_prevs_add kernel/locking/lockdep.c:3174 [inline] validate_chain kernel/locking/lockdep.c:3789 [inline] __lock_acquire+0x2a07/0x54a0 kernel/locking/lockdep.c:5015 lock_acquire kernel/locking/lockdep.c:5625 [inline] lock_acquire+0x1ab/0x510 kernel/locking/lockdep.c:5590 lock_sock_nested+0xca/0x120 net/core/sock.c:3170 lock_sock include/net/sock.h:1613 [inline] sco_conn_del+0x12a/0x2a0 net/bluetooth/sco.c:191 sco_disconn_cfm+0x71/0xb0 net/bluetooth/sco.c:1202 hci_disconn_cfm include/net/bluetooth/hci_core.h:1500 [inline] hci_conn_hash_flush+0x127/0x260 net/bluetooth/hci_conn.c:1608 hci_dev_do_close+0x528/0x1130 net/bluetooth/hci_core.c:1778 hci_unregister_dev+0x1c0/0x5a0 net/bluetooth/hci_core.c:4015 vhci_release+0x70/0xe0 drivers/bluetooth/hci_vhci.c:340 __fput+0x288/0x920 fs/file_table.c:280 task_work_run+0xdd/0x1a0 kernel/task_work.c:164 exit_task_work include/linux/task_work.h:32 [inline] do_exit+0xbd4/0x2a60 kernel/exit.c:825 do_group_exit+0x125/0x310 kernel/exit.c:922 get_signal+0x47f/0x2160 kernel/signal.c:2808 arch_do_signal_or_restart+0x2a9/0x1c40 arch/x86/kernel/signal.c:865 handle_signal_work kernel/entry/common.c:148 [inline] exit_to_user_mode_loop kernel/entry/common.c:172 [inline] exit_to_user_mode_prepare+0x17d/0x290 kernel/entry/common.c:209 __syscall_exit_to_user_mode_work kernel/entry/common.c:291 [inline] syscall_exit_to_user_mode+0x19/0x60 kernel/entry/common.c:302 ret_from_fork+0x15/0x30 arch/x86/entry/entry_64.S:288
other info that might help us debug this:
Chain exists of: sk_lock-AF_BLUETOOTH-BTPROTO_SCO --> &hdev->lock --> hci_cb_list_lock
Possible unsafe locking scenario:
CPU0 CPU1 ---- ---- lock(hci_cb_list_lock); lock(&hdev->lock); lock(hci_cb_list_lock); lock(sk_lock-AF_BLUETOOTH-BTPROTO_SCO);
*** DEADLOCK ***
The issue is that the lock hierarchy should go from &hdev->lock --> hci_cb_list_lock --> sk_lock-AF_BLUETOOTH-BTPROTO_SCO. For example, one such call trace is:
hci_dev_do_close(): hci_dev_lock(); hci_conn_hash_flush(): hci_disconn_cfm(): mutex_lock(&hci_cb_list_lock); sco_disconn_cfm(): sco_conn_del(): lock_sock(sk);
However, in sco_sock_connect, we call lock_sock before calling hci_dev_lock inside sco_connect, thus inverting the lock hierarchy.
We fix this by pulling the call to hci_dev_lock out from sco_connect.
Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Signed-off-by: Luiz Augusto von Dentz luiz.von.dentz@intel.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/bluetooth/sco.c | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e30151e81566f..8345d17e98be4 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -236,44 +236,32 @@ static int sco_chan_add(struct sco_conn *conn, struct sock *sk, return err; }
-static int sco_connect(struct sock *sk) +static int sco_connect(struct hci_dev *hdev, struct sock *sk) { struct sco_conn *conn; struct hci_conn *hcon; - struct hci_dev *hdev; int err, type;
BT_DBG("%pMR -> %pMR", &sco_pi(sk)->src, &sco_pi(sk)->dst);
- hdev = hci_get_route(&sco_pi(sk)->dst, &sco_pi(sk)->src, BDADDR_BREDR); - if (!hdev) - return -EHOSTUNREACH; - - hci_dev_lock(hdev); - if (lmp_esco_capable(hdev) && !disable_esco) type = ESCO_LINK; else type = SCO_LINK;
if (sco_pi(sk)->setting == BT_VOICE_TRANSPARENT && - (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) { - err = -EOPNOTSUPP; - goto done; - } + (!lmp_transp_capable(hdev) || !lmp_esco_capable(hdev))) + return -EOPNOTSUPP;
hcon = hci_connect_sco(hdev, type, &sco_pi(sk)->dst, sco_pi(sk)->setting); - if (IS_ERR(hcon)) { - err = PTR_ERR(hcon); - goto done; - } + if (IS_ERR(hcon)) + return PTR_ERR(hcon);
conn = sco_conn_add(hcon); if (!conn) { hci_conn_drop(hcon); - err = -ENOMEM; - goto done; + return -ENOMEM; }
/* Update source addr of the socket */ @@ -281,7 +269,7 @@ static int sco_connect(struct sock *sk)
err = sco_chan_add(conn, sk, NULL); if (err) - goto done; + return err;
if (hcon->state == BT_CONNECTED) { sco_sock_clear_timer(sk); @@ -291,9 +279,6 @@ static int sco_connect(struct sock *sk) sco_sock_set_timer(sk, sk->sk_sndtimeo); }
-done: - hci_dev_unlock(hdev); - hci_dev_put(hdev); return err; }
@@ -577,6 +562,7 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen { struct sockaddr_sco *sa = (struct sockaddr_sco *) addr; struct sock *sk = sock->sk; + struct hci_dev *hdev; int err;
BT_DBG("sk %p", sk); @@ -591,12 +577,19 @@ static int sco_sock_connect(struct socket *sock, struct sockaddr *addr, int alen if (sk->sk_type != SOCK_SEQPACKET) return -EINVAL;
+ hdev = hci_get_route(&sa->sco_bdaddr, &sco_pi(sk)->src, BDADDR_BREDR); + if (!hdev) + return -EHOSTUNREACH; + hci_dev_lock(hdev); + lock_sock(sk);
/* Set destination address and psm */ bacpy(&sco_pi(sk)->dst, &sa->sco_bdaddr);
- err = sco_connect(sk); + err = sco_connect(hdev, sk); + hci_dev_unlock(hdev); + hci_dev_put(hdev); if (err) goto done;
From: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com
mainline inclusion from mainline-v5.15 commit 27c24fda62b601d6f9ca5e992502578c4310876f category: bugfix bugzilla: NA CVE: CVE-2021-3640
---------------------------
Since sco_sock_timeout is now scheduled using delayed work, it is no longer run in SOFTIRQ context. Hence bh_lock_sock is no longer necessary in SCO to synchronise between user contexts and SOFTIRQ processing.
As such, calls to bh_lock_sock should be replaced with lock_sock to synchronize with other concurrent processes that use lock_sock.
Signed-off-by: Desmond Cheong Zhi Xi desmondcheongzx@gmail.com Signed-off-by: Luiz Augusto von Dentz luiz.von.dentz@intel.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- net/bluetooth/sco.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 8345d17e98be4..1d740bbcdb014 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -92,10 +92,10 @@ static void sco_sock_timeout(struct work_struct *work)
BT_DBG("sock %p state %d", sk, sk->sk_state);
- bh_lock_sock(sk); + lock_sock(sk); sk->sk_err = ETIMEDOUT; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk);
sco_sock_kill(sk); sock_put(sk); @@ -192,10 +192,10 @@ static void sco_conn_del(struct hci_conn *hcon, int err)
if (sk) { sock_hold(sk); - bh_lock_sock(sk); + lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); - bh_unlock_sock(sk); + release_sock(sk); sco_sock_kill(sk); sock_put(sk);
@@ -1044,10 +1044,10 @@ static void sco_conn_ready(struct sco_conn *conn)
if (sk) { sco_sock_clear_timer(sk); - bh_lock_sock(sk); + lock_sock(sk); sk->sk_state = BT_CONNECTED; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk); } else { sco_conn_lock(conn);
@@ -1062,12 +1062,12 @@ static void sco_conn_ready(struct sco_conn *conn) return; }
- bh_lock_sock(parent); + lock_sock(parent);
sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); return; } @@ -1088,7 +1088,7 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent);
- bh_unlock_sock(parent); + release_sock(parent);
sco_conn_unlock(conn); }