From: Breno Leitao leitao@debian.org
mainline inclusion from mainline-v6.10-rc1 commit 8a565304927fbd28c9f028c492b5c1714002cbab category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACR2Y CVE: CVE-2024-39508
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Utilize set_bit() and test_bit() on worker->flags within io_uring/io-wq to address potential data races.
The structure io_worker->flags may be accessed through various data paths, leading to concurrency issues. When KCSAN is enabled, it reveals data races occurring in io_worker_handle_work and io_wq_activate_free_worker functions.
BUG: KCSAN: data-race in io_worker_handle_work / io_wq_activate_free_worker write to 0xffff8885c4246404 of 4 bytes by task 49071 on cpu 28: io_worker_handle_work (io_uring/io-wq.c:434 io_uring/io-wq.c:569) io_wq_worker (io_uring/io-wq.c:?) <snip>
read to 0xffff8885c4246404 of 4 bytes by task 49024 on cpu 5: io_wq_activate_free_worker (io_uring/io-wq.c:? io_uring/io-wq.c:285) io_wq_enqueue (io_uring/io-wq.c:947) io_queue_iowq (io_uring/io_uring.c:524) io_req_task_submit (io_uring/io_uring.c:1511) io_handle_tw_list (io_uring/io_uring.c:1198) <snip>
Line numbers against commit 18daea77cca6 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm").
These races involve writes and reads to the same memory location by different tasks running on different CPUs. To mitigate this, refactor the code to use atomic operations such as set_bit(), test_bit(), and clear_bit() instead of basic "and" and "or" operations. This ensures thread-safe manipulation of worker flags.
Also, move `create_index` to avoid holes in the structure.
Signed-off-by: Breno Leitao leitao@debian.org Link: https://lore.kernel.org/r/20240507170002.2269003-1-leitao@debian.org Signed-off-by: Jens Axboe axboe@kernel.dk Conflicts: fs/io-wq.c io_uring/io-wq.c [The function names are different than cve commit. But their basic functions are same and they are using the io_worker in the same way.] Signed-off-by: Yifan Qiao qiaoyifan4@huawei.com --- fs/io-wq.c | 59 +++++++++++++++++++++++++++--------------------------- 1 file changed, 30 insertions(+), 29 deletions(-)
diff --git a/fs/io-wq.c b/fs/io-wq.c index 67d157bcf2fb..b7aaf060547b 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -25,12 +25,12 @@ #define WORKER_IDLE_TIMEOUT (5 * HZ)
enum { - IO_WORKER_F_UP = 1, /* up and active */ - IO_WORKER_F_RUNNING = 2, /* account as running */ - IO_WORKER_F_FREE = 4, /* worker on free list */ - IO_WORKER_F_EXITING = 8, /* worker exiting */ - IO_WORKER_F_FIXED = 16, /* static idle worker */ - IO_WORKER_F_BOUND = 32, /* is doing bounded work */ + IO_WORKER_F_UP = 0, /* up and active */ + IO_WORKER_F_RUNNING = 1, /* account as running */ + IO_WORKER_F_FREE = 2, /* worker on free list */ + IO_WORKER_F_EXITING = 3, /* worker exiting */ + IO_WORKER_F_FIXED = 4, /* static idle worker */ + IO_WORKER_F_BOUND = 5, /* is doing bounded work */ };
enum { @@ -48,7 +48,7 @@ enum { */ struct io_worker { refcount_t ref; - unsigned flags; + unsigned long flags; struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; @@ -193,7 +193,7 @@ static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe, struct io_worker *worker) { - if (worker->flags & IO_WORKER_F_BOUND) + if (test_bit(IO_WORKER_F_BOUND, &worker->flags)) return &wqe->acct[IO_WQ_ACCT_BOUND];
return &wqe->acct[IO_WQ_ACCT_UNBOUND]; @@ -215,9 +215,9 @@ static void io_worker_exit(struct io_worker *worker)
preempt_disable(); current->flags &= ~PF_IO_WORKER; - if (worker->flags & IO_WORKER_F_RUNNING) + if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) atomic_dec(&acct->nr_running); - if (!(worker->flags & IO_WORKER_F_BOUND)) + if (!test_bit(IO_WORKER_F_BOUND, &worker->flags)) atomic_dec(&wqe->wq->user->processes); worker->flags = 0; preempt_enable(); @@ -315,7 +315,8 @@ static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
current->flags |= PF_IO_WORKER;
- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + set_mask_bits(&worker->flags, 0, + BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); worker->restore_files = current->files; worker->restore_fs = current->fs; io_wqe_inc_running(wqe, worker); @@ -331,8 +332,8 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, { bool worker_bound, work_bound;
- if (worker->flags & IO_WORKER_F_FREE) { - worker->flags &= ~IO_WORKER_F_FREE; + if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { + clear_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_del_init_rcu(&worker->nulls_node); }
@@ -340,17 +341,17 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, * If worker is moving from bound to unbound (or vice versa), then * ensure we update the running accounting. */ - worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0; + worker_bound = test_bit(IO_WORKER_F_BOUND, &worker->flags); work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0; if (worker_bound != work_bound) { io_wqe_dec_running(wqe, worker); if (work_bound) { - worker->flags |= IO_WORKER_F_BOUND; + set_bit(IO_WORKER_F_BOUND, &worker->flags); wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++; atomic_dec(&wqe->wq->user->processes); } else { - worker->flags &= ~IO_WORKER_F_BOUND; + clear_bit(IO_WORKER_F_BOUND, &worker->flags); wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++; wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--; atomic_inc(&wqe->wq->user->processes); @@ -369,8 +370,8 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) __must_hold(wqe->lock) { - if (!(worker->flags & IO_WORKER_F_FREE)) { - worker->flags |= IO_WORKER_F_FREE; + if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { + set_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); }
@@ -585,7 +586,7 @@ static int io_wqe_worker(void *data) continue; /* timed out, exit unless we're the fixed worker */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || - !(worker->flags & IO_WORKER_F_FIXED)) + !test_bit(IO_WORKER_F_FIXED, &worker->flags)) break; }
@@ -609,11 +610,11 @@ void io_wq_worker_running(struct task_struct *tsk) struct io_worker *worker = kthread_data(tsk); struct io_wqe *wqe = worker->wqe;
- if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (worker->flags & IO_WORKER_F_RUNNING) + if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; - worker->flags |= IO_WORKER_F_RUNNING; + set_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wqe_inc_running(wqe, worker); }
@@ -627,12 +628,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk) struct io_worker *worker = kthread_data(tsk); struct io_wqe *wqe = worker->wqe;
- if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (!(worker->flags & IO_WORKER_F_RUNNING)) + if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return;
- worker->flags &= ~IO_WORKER_F_RUNNING; + clear_bit(IO_WORKER_F_RUNNING, &worker->flags);
spin_lock_irq(&wqe->lock); io_wqe_dec_running(wqe, worker); @@ -663,11 +664,11 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) spin_lock_irq(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; + set_bit(IO_WORKER_F_FREE, &worker->flags); if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; - if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND)) - worker->flags |= IO_WORKER_F_FIXED; + set_bit(IO_WORKER_F_BOUND, &worker->flags); + if (!acct->nr_workers && test_bit(IO_WORKER_F_BOUND, &worker->flags)) + set_bit(IO_WORKER_F_FIXED, &worker->flags); acct->nr_workers++; spin_unlock_irq(&wqe->lock);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/14261 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/L...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/14261 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/L...