From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-5.8-rc4 commit b7db41c9e03b5189bc94993bd50e4506ac9e34c1 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27 CVE: NA ---------------------------
When switching to TWA_SIGNAL for task_work notifications, we also made any signal based condition in io_cqring_wait() return -ERESTARTSYS. This breaks applications that rely on using signals to abort someone waiting for events.
Check if we have a signal pending because of queued task_work, and repeat the signal check once we've run the task_work. This provides a reliable way of telling the two apart.
Additionally, only use TWA_SIGNAL if we are using an eventfd. If not, we don't have the dependency situation described in the original commit, and we can get by with just using TWA_RESUME like we previously did.
Fixes: ce593a6c480a ("io_uring: use signal based task_work running") Cc: stable@vger.kernel.org # v5.7 Reported-by: Andres Freund andres@anarazel.de Tested-by: Andres Freund andres@anarazel.de Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- fs/io_uring.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 8024e7bcb4fc..f38c24f80537 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -4032,14 +4032,22 @@ struct io_poll_table { int error; };
-static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb, - int notify) +static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb) { struct task_struct *tsk = req->task; - int ret; + struct io_ring_ctx *ctx = req->ctx; + int ret, notify = TWA_RESUME;
- if (req->ctx->flags & IORING_SETUP_SQPOLL) + /* + * SQPOLL kernel thread doesn't need notification, just a wakeup. + * If we're not using an eventfd, then TWA_RESUME is always fine, + * as we won't have dependencies between request completions for + * other kernel wait conditions. + */ + if (ctx->flags & IORING_SETUP_SQPOLL) notify = 0; + else if (ctx->cq_ev_fd) + notify = TWA_SIGNAL;
ret = task_work_add(tsk, cb, notify); if (!ret) @@ -4070,7 +4078,7 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, * of executing it. We can't safely execute it anyway, as we may not * have the needed state needed for it anyway. */ - ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL); + ret = io_req_task_work_add(req, &req->task_work); if (unlikely(ret)) { WRITE_ONCE(poll->canceled, true); tsk = io_wq_get_task(req->ctx->io_wq); @@ -6160,7 +6168,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, if (current->task_works) task_work_run(); if (signal_pending(current)) { - ret = -ERESTARTSYS; + if (current->jobctl & JOBCTL_TASK_WORK) { + spin_lock_irq(¤t->sighand->siglock); + current->jobctl &= ~JOBCTL_TASK_WORK; + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + continue; + } + ret = -EINTR; break; } if (io_should_wake(&iowq, false)) @@ -6169,7 +6184,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } while (1); finish_wait(&ctx->wait, &iowq.wq);
- restore_saved_sigmask_unless(ret == -ERESTARTSYS); + restore_saved_sigmask_unless(ret == -EINTR);
return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; }