From: Pavel Begunkov <asml.silence(a)gmail.com>
mainline inclusion
from mainline-v6.1-rc1
commit 0091bfc81741b8d3aeb3b7ab8636f911b2de6e80
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5WFKI
CVE: CVE-2022-2602
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit?h…
--------------------------------
Instead of putting io_uring's registered files in unix_gc() we want it
to be done by io_uring itself. The trick here is to consider io_uring
registered files for cycle detection but not actually putting them down.
Because io_uring can't register other ring instances, this will remove
all refs to the ring file triggering the ->release path and clean up
with io_ring_ctx_free().
Cc: stable(a)vger.kernel.org
Fixes: 6b06314c47e1 ("io_uring: add file set registration")
Reported-and-tested-by: David Bouman <dbouman03(a)gmail.com>
Signed-off-by: Pavel Begunkov <asml.silence(a)gmail.com>
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo(a)canonical.com>
[axboe: add kerneldoc comment to skb, fold in skb leak fix]
Signed-off-by: Jens Axboe <axboe(a)kernel.dk>
Conflicts:
fs/io_uring.c
include/linux/skbuff.h
Signed-off-by: Zhihao Cheng <chengzhihao1(a)huawei.com>
Reviewed-by: Yue Haibing <yuehaibing(a)huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
fs/io_uring.c | 1 +
include/linux/skbuff.h | 3 +++
net/unix/garbage.c | 20 ++++++++++++++++++++
3 files changed, 24 insertions(+)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d4e430b51098..7d7af6a0ef96 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -6835,6 +6835,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
}
skb->sk = sk;
+ skb->scm_io_uring = 1;
nr_files = 0;
fpl->user = get_uid(ctx->user);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index dbdb03ac557f..4524bef053b8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -654,6 +654,7 @@ typedef unsigned char *sk_buff_data_t;
* @transport_header: Transport layer header
* @network_header: Network layer header
* @mac_header: Link layer header
+ * @scm_io_uring: SKB holds io_uring registered files
* @tail: Tail pointer
* @end: End pointer
* @head: Head of buffer
@@ -800,6 +801,8 @@ struct sk_buff {
__u8 decrypted:1;
#endif
+ __u8 scm_io_uring:1;
+
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
#endif
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 4d283e26d816..5c9ff8df9136 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -209,6 +209,7 @@ void wait_for_unix_gc(void)
/* The external entry point: unix_gc() */
void unix_gc(void)
{
+ struct sk_buff *next_skb, *skb;
struct unix_sock *u;
struct unix_sock *next;
struct sk_buff_head hitlist;
@@ -302,11 +303,30 @@ void unix_gc(void)
spin_unlock(&unix_gc_lock);
+ /* We need io_uring to clean its registered files, ignore all io_uring
+ * originated skbs. It's fine as io_uring doesn't keep references to
+ * other io_uring instances and so killing all other files in the cycle
+ * will put all io_uring references forcing it to go through normal
+ * release.path eventually putting registered files.
+ */
+ skb_queue_walk_safe(&hitlist, skb, next_skb) {
+ if (skb->scm_io_uring) {
+ __skb_unlink(skb, &hitlist);
+ skb_queue_tail(&skb->sk->sk_receive_queue, skb);
+ }
+ }
+
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
spin_lock(&unix_gc_lock);
+ /* There could be io_uring registered files, just push them back to
+ * the inflight list
+ */
+ list_for_each_entry_safe(u, next, &gc_candidates, link)
+ list_move_tail(&u->link, &gc_inflight_list);
+
/* All candidates should have been detached by now. */
BUG_ON(!list_empty(&gc_candidates));
--
2.25.1
From: Luo Meng <luomeng12(a)huawei.com>
hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5WBID
CVE: NA
--------------------------------
When dm_resume() and dm_destroy() are concurrent, it will
lead to UAF.
One of the concurrency UAF can be shown as below:
use free
do_resume |
__find_device_hash_cell |
dm_get |
atomic_inc(&md->holders) |
| dm_destroy
| __dm_destroy
| if (!dm_suspended_md(md))
| atomic_read(&md->holders)
| msleep(1)
dm_resume |
__dm_resume |
dm_table_resume_targets |
pool_resume |
do_waker #add delay work |
| dm_table_destroy
| pool_dtr
| __pool_dec
| __pool_destroy
| destroy_workqueue
| kfree(pool) # free pool
time out
__do_softirq
run_timer_softirq # pool has already been freed
This can be easily reproduced using:
1. create thin-pool
2. dmsetup suspend pool
3. dmsetup resume pool
4. dmsetup remove_all # Concurrent with 3
The root cause of UAF bugs is that dm_resume() adds timer after
dm_destroy() skips cancel timer beause of suspend status. After
timeout, it will call run_timer_softirq(), however pool has already
been freed. The concurrency UAF bug will happen.
Therefore, canceling timer is moved after md->holders is zero.
Signed-off-by: Luo Meng <luomeng12(a)huawei.com>
Reviewed-by: Zhang Xiaoxu <zhangxiaoxu5(a)huawei.com>
Reviewed-by: Zhang Yi <yi.zhang(a)huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13(a)huawei.com>
---
drivers/md/dm.c | 26 +++++++++++++-------------
1 file changed, 13 insertions(+), 13 deletions(-)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4c46f030eed2..288dab0ab226 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2411,6 +2411,19 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
if (dm_request_based(md) && md->kworker_task)
kthread_flush_worker(&md->kworker);
+ /*
+ * Rare, but there may be I/O requests still going to complete,
+ * for example. Wait for all references to disappear.
+ * No one should increment the reference count of the mapped_device,
+ * after the mapped_device state becomes DMF_FREEING.
+ */
+ if (wait)
+ while (atomic_read(&md->holders))
+ msleep(1);
+ else if (atomic_read(&md->holders))
+ DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
+ dm_device_name(md), atomic_read(&md->holders));
+
/*
* Take suspend_lock so that presuspend and postsuspend methods
* do not race with internal suspend.
@@ -2427,19 +2440,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
dm_put_live_table(md, srcu_idx);
mutex_unlock(&md->suspend_lock);
- /*
- * Rare, but there may be I/O requests still going to complete,
- * for example. Wait for all references to disappear.
- * No one should increment the reference count of the mapped_device,
- * after the mapped_device state becomes DMF_FREEING.
- */
- if (wait)
- while (atomic_read(&md->holders))
- msleep(1);
- else if (atomic_read(&md->holders))
- DMWARN("%s: Forcibly removing mapped_device still in use! (%d users)",
- dm_device_name(md), atomic_read(&md->holders));
-
dm_sysfs_exit(md);
dm_table_destroy(__unbind(md));
free_dev(md);
--
2.25.1