From: Breno Leitao leitao@debian.org
stable inclusion from stable-v6.6.35 commit ab702c3483db9046bab9f40306f1a28b22dbbdc0 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IACR2Y CVE: CVE-2024-39508
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 8a565304927fbd28c9f028c492b5c1714002cbab ]
Utilize set_bit() and test_bit() on worker->flags within io_uring/io-wq to address potential data races.
The structure io_worker->flags may be accessed through various data paths, leading to concurrency issues. When KCSAN is enabled, it reveals data races occurring in io_worker_handle_work and io_wq_activate_free_worker functions.
BUG: KCSAN: data-race in io_worker_handle_work / io_wq_activate_free_worker write to 0xffff8885c4246404 of 4 bytes by task 49071 on cpu 28: io_worker_handle_work (io_uring/io-wq.c:434 io_uring/io-wq.c:569) io_wq_worker (io_uring/io-wq.c:?) <snip>
read to 0xffff8885c4246404 of 4 bytes by task 49024 on cpu 5: io_wq_activate_free_worker (io_uring/io-wq.c:? io_uring/io-wq.c:285) io_wq_enqueue (io_uring/io-wq.c:947) io_queue_iowq (io_uring/io_uring.c:524) io_req_task_submit (io_uring/io_uring.c:1511) io_handle_tw_list (io_uring/io_uring.c:1198) <snip>
Line numbers against commit 18daea77cca6 ("Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm").
These races involve writes and reads to the same memory location by different tasks running on different CPUs. To mitigate this, refactor the code to use atomic operations such as set_bit(), test_bit(), and clear_bit() instead of basic "and" and "or" operations. This ensures thread-safe manipulation of worker flags.
Also, move `create_index` to avoid holes in the structure.
Signed-off-by: Breno Leitao leitao@debian.org Link: https://lore.kernel.org/r/20240507170002.2269003-1-leitao@debian.org Signed-off-by: Jens Axboe axboe@kernel.dk Stable-dep-of: 91215f70ea85 ("io_uring/io-wq: avoid garbage value of 'match' in io_wq_enqueue()") Signed-off-by: Sasha Levin sashal@kernel.org Conflicts: io_uring/io-wq.c [The function names are different than cve commit. But their basic functions are same and they are using the io_worker in the same way.] Signed-off-by: Yifan Qiao qiaoyifan4@huawei.com --- io_uring/io-wq.c | 47 ++++++++++++++++++++++++----------------------- 1 file changed, 24 insertions(+), 23 deletions(-)
diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 6031fb319d87..cc31ebf3a1d5 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -21,10 +21,10 @@ #define WORKER_IDLE_TIMEOUT (5 * HZ)
enum { - IO_WORKER_F_UP = 1, /* up and active */ - IO_WORKER_F_RUNNING = 2, /* account as running */ - IO_WORKER_F_FREE = 4, /* worker on free list */ - IO_WORKER_F_BOUND = 8, /* is doing bounded work */ + IO_WORKER_F_UP = 0, /* up and active */ + IO_WORKER_F_RUNNING = 1, /* account as running */ + IO_WORKER_F_FREE = 2, /* worker on free list */ + IO_WORKER_F_BOUND = 3, /* is doing bounded work */ };
enum { @@ -40,7 +40,8 @@ enum { */ struct io_worker { refcount_t ref; - unsigned flags; + int create_index; + unsigned long flags; struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; @@ -53,7 +54,6 @@ struct io_worker {
unsigned long create_state; struct callback_head create_work; - int create_index;
union { struct rcu_head rcu; @@ -167,7 +167,7 @@ static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) { - return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); + return io_get_acct(worker->wqe, test_bit(IO_WORKER_F_BOUND, &worker->flags)); }
static void io_worker_ref_put(struct io_wq *wq) @@ -220,7 +220,7 @@ static void io_worker_exit(struct io_worker *worker) wait_for_completion(&worker->ref_done);
raw_spin_lock(&wqe->lock); - if (worker->flags & IO_WORKER_F_FREE) + if (test_bit(IO_WORKER_F_FREE, &worker->flags)) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); preempt_disable(); @@ -389,7 +389,7 @@ static void io_wqe_dec_running(struct io_worker *worker) struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe *wqe = worker->wqe;
- if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return;
if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { @@ -409,8 +409,8 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, struct io_wq_work *work) __must_hold(wqe->lock) { - if (worker->flags & IO_WORKER_F_FREE) { - worker->flags &= ~IO_WORKER_F_FREE; + if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { + clear_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_del_init_rcu(&worker->nulls_node); } } @@ -425,8 +425,8 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker, static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) __must_hold(wqe->lock) { - if (!(worker->flags & IO_WORKER_F_FREE)) { - worker->flags |= IO_WORKER_F_FREE; + if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { + set_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); } } @@ -615,7 +615,8 @@ static int io_wqe_worker(void *data) bool last_timeout = false; char buf[TASK_COMM_LEN];
- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + set_mask_bits(&worker->flags, 0, + BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING));
snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); set_task_comm(current, buf); @@ -671,11 +672,11 @@ void io_wq_worker_running(struct task_struct *tsk)
if (!worker) return; - if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (worker->flags & IO_WORKER_F_RUNNING) + if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; - worker->flags |= IO_WORKER_F_RUNNING; + set_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wqe_inc_running(worker); }
@@ -689,12 +690,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
if (!worker) return; - if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (!(worker->flags & IO_WORKER_F_RUNNING)) + if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return;
- worker->flags &= ~IO_WORKER_F_RUNNING; + clear_bit(IO_WORKER_F_RUNNING, &worker->flags);
raw_spin_lock(&worker->wqe->lock); io_wqe_dec_running(worker); @@ -712,7 +713,7 @@ static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, raw_spin_lock(&wqe->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; + set_bit(IO_WORKER_F_FREE, &worker->flags); raw_spin_unlock(&wqe->lock); wake_up_new_task(tsk); } @@ -816,7 +817,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) init_completion(&worker->ref_done);
if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; + set_bit(IO_WORKER_F_BOUND, &worker->flags);
tsk = create_io_thread(io_wqe_worker, worker, wqe->node); if (!IS_ERR(tsk)) { @@ -904,7 +905,7 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) { struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - unsigned work_flags = work->flags; + unsigned long work_flags = work->flags; bool do_create;
/*