From: Jan Kara jack@suse.cz
stable inclusion from stable-5.10.51 commit 8cc58a6e2c394aa48aa05f600be7d279efbafcd7 bugzilla: 175263
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 11c7aa0ddea8611007768d3e6b58d45dc60a19e1 upstream.
Commit 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle") tried to fix a problem that a process could be sleeping in rq_qos_wait() without anyone to wake it up. However the fix is not complete and the following can still happen:
CPU1 (waiter1) CPU2 (waiter2) CPU3 (waker) rq_qos_wait() rq_qos_wait() acquire_inflight_cb() -> fails acquire_inflight_cb() -> fails
completes IOs, inflight decreased prepare_to_wait_exclusive() prepare_to_wait_exclusive() has_sleeper = !wq_has_single_sleeper() -> true as there are two sleepers has_sleeper = !wq_has_single_sleeper() -> true io_schedule() io_schedule()
Deadlock as now there's nobody to wakeup the two waiters. The logic automatically blocking when there are already sleepers is really subtle and the only way to make it work reliably is that we check whether there are some waiters in the queue when adding ourselves there. That way, we are guaranteed that at least the first process to enter the wait queue will recheck the waiting condition before going to sleep and thus guarantee forward progress.
Fixes: 545fbd0775ba ("rq-qos: fix missed wake-ups in rq_qos_throttle") CC: stable@vger.kernel.org Signed-off-by: Jan Kara jack@suse.cz Link: https://lore.kernel.org/r/20210607112613.25344-1-jack@suse.cz Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Lihong Kou koulihong@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-wbt.c | 4 ++-- include/linux/wait.h | 1 + kernel/sched/wait.c | 19 ++++++++++++++++--- 3 files changed, 19 insertions(+), 5 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index ffd5c17f5a101..366d294a11ef1 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -549,8 +549,8 @@ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, if (!has_sleeper && rq_wait_inc_below(rqw, get_limit(rwb, rw))) return;
- prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); - has_sleeper = !wq_has_single_sleeper(&rqw->wait); + has_sleeper = !__prepare_to_wait_exclusive(&rqw->wait, &data.wq, + TASK_UNINTERRUPTIBLE); do { /* The memory barrier in set_task_state saves us here. */ if (data.got_token) diff --git a/include/linux/wait.h b/include/linux/wait.h index 29d28c2084ce8..1234e6cbacd2a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -1121,6 +1121,7 @@ do { \ * Waitqueues which are removed from the waitqueue_head at wakeup time */ void prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); +bool __prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); long prepare_to_wait_event(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state); void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 5dd47f1103d18..c95b392735672 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -235,17 +235,30 @@ prepare_to_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_ent } EXPORT_SYMBOL(prepare_to_wait);
-void -prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +/* Returns true if we are the first waiter in the queue, false otherwise. */ +bool +__prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) { unsigned long flags; + bool was_empty = false;
wq_entry->flags |= WQ_FLAG_EXCLUSIVE; spin_lock_irqsave(&wq_head->lock, flags); - if (list_empty(&wq_entry->entry)) + if (list_empty(&wq_entry->entry)) { + was_empty = list_empty(&wq_head->head); __add_wait_queue_entry_tail(wq_head, wq_entry); + } set_current_state(state); spin_unlock_irqrestore(&wq_head->lock, flags); + return was_empty; + +} +EXPORT_SYMBOL(__prepare_to_wait_exclusive); + +void +prepare_to_wait_exclusive(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry, int state) +{ + __prepare_to_wait_exclusive(wq_head, wq_entry, state); } EXPORT_SYMBOL(prepare_to_wait_exclusive);