bugzaila: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
Colin Ian King (1): io_uring: remove redundant initialization of variable ret
Jens Axboe (3): io_uring: re-issue block requests that failed because of resources io_uring: don't double complete failed reissue request io_uring: don't re-setup vecs/iter in io_resumit_prep() is already there
Pavel Begunkov (1): block: don't ignore REQ_NOWAIT for direct IO
fs/block_dev.c | 5 +++ fs/io_uring.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 104 insertions(+), 1 deletion(-)
From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-v5.9-rc1 commit b63534c41e20b474483b4ddf47efc858c17352e0 category: bugfix bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D CVE: NA
-------------------------------------------------
Mark the plug with nowait == true, which will cause requests to avoid blocking on request allocation. If they do, we catch them and reissue them from a task_work based handler.
Normally we can catch -EAGAIN directly, but the hard case is for split requests. As an example, the application issues a 512KB request. The block core will split this into 128KB if that's the max size for the device. The first request issues just fine, but we run into -EAGAIN for some latter splits for the same request. As the bio is split, we don't get to see the -EAGAIN until one of the actual reads complete, and hence we cannot handle it inline as part of submission.
This does potentially cause re-reads of parts of the range, as the whole request is reissued. There's currently no better way to handle this.
Signed-off-by: Jens Axboe axboe@kernel.dk conflict: fs/io_uring.c Adding nowait to plug list is reverted in (62c774ed4831 ("io_uring: don't unconditionally set plug->nowait = true")) Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com --- fs/io_uring.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index f933d4f0edb4..aeedf191b813 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -917,6 +917,13 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct io_comp_state *cs);
+static ssize_t io_import_iovec(int rw, struct io_kiocb *req, + struct iovec **iovec, struct iov_iter *iter, + bool needs_lock); +static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size, + struct iovec *iovec, struct iovec *fast_iov, + struct iov_iter *iter); + static struct kmem_cache *req_cachep;
static const struct file_operations io_uring_fops; @@ -2363,10 +2370,90 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res, __io_req_complete(req, res, cflags, cs); }
+#ifdef CONFIG_BLOCK +static bool io_resubmit_prep(struct io_kiocb *req, int error) +{ + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + ssize_t ret = -ECANCELED; + struct iov_iter iter; + int rw; + + if (error) { + ret = error; + goto end_req; + } + + switch (req->opcode) { + case IORING_OP_READV: + case IORING_OP_READ_FIXED: + case IORING_OP_READ: + rw = READ; + break; + case IORING_OP_WRITEV: + case IORING_OP_WRITE_FIXED: + case IORING_OP_WRITE: + rw = WRITE; + break; + default: + printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n", + req->opcode); + goto end_req; + } + + ret = io_import_iovec(rw, req, &iovec, &iter, false); + if (ret < 0) + goto end_req; + ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); + if (!ret) + return true; + kfree(iovec); +end_req: + io_cqring_add_event(req, ret, 0); + req_set_fail_links(req); + io_put_req(req); + return false; +} + +static void io_rw_resubmit(struct callback_head *cb) +{ + struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); + struct io_ring_ctx *ctx = req->ctx; + int err; + + __set_current_state(TASK_RUNNING); + + err = io_sq_thread_acquire_mm_files(ctx, req); + + if (io_resubmit_prep(req, err)) { + refcount_inc(&req->refs); + io_queue_async_work(req); + } +} +#endif + +static bool io_rw_reissue(struct io_kiocb *req, long res) +{ +#ifdef CONFIG_BLOCK + struct task_struct *tsk; + int ret; + + if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) + return false; + + tsk = req->task; + init_task_work(&req->task_work, io_rw_resubmit); + ret = task_work_add(tsk, &req->task_work, true); + if (!ret) + return true; +#endif + return false; +} + static void __io_complete_rw(struct io_kiocb *req, long res, long res2, struct io_comp_state *cs) { - io_complete_rw_common(&req->rw.kiocb, res, cs); + if (!io_rw_reissue(req, res)) + io_complete_rw_common(&req->rw.kiocb, res, cs); }
static void io_complete_rw(struct kiocb *kiocb, long res, long res2) @@ -2536,6 +2623,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, if (kiocb->ki_flags & IOCB_NOWAIT) req->flags |= REQ_F_NOWAIT;
+ if (kiocb->ki_flags & IOCB_DIRECT) + io_get_req_task(req); + if (force_nonblock) kiocb->ki_flags |= IOCB_NOWAIT;
@@ -3037,6 +3127,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count); if (!ret) { + unsigned long nr_segs = iter.nr_segs; ssize_t ret2;
if (req->file->f_op->read_iter) @@ -3054,6 +3145,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, goto copy_iov; kiocb_done(kiocb, ret2, cs); } else { + iter.count = iov_count; + iter.nr_segs = nr_segs; copy_iov: ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, &iter); @@ -3120,6 +3213,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count); if (!ret) { + unsigned long nr_segs = iter.nr_segs; ssize_t ret2;
/* @@ -3157,6 +3251,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock, goto copy_iov; kiocb_done(kiocb, ret2, cs); } else { + iter.count = iov_count; + iter.nr_segs = nr_segs; copy_iov: ret = io_setup_async_rw(req, io_size, iovec, inline_vecs, &iter);
From: Pavel Begunkov asml.silence@gmail.com
mainline inclusion from mainline-v5.12-rc6 commit f8b78caf21d5bc3fcfc40c18898f9d52ed1451a5 category: bugfix bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D CVE: NA
-------------------------------------------------
If IOCB_NOWAIT is set on submission, then that needs to get propagated to REQ_NOWAIT on the block side. Otherwise we completely lose this information, and any issuer of IOCB_NOWAIT IO will potentially end up blocking on eg request allocation on the storage side.
Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk conflict: fs/block_dev.c Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com --- fs/block_dev.c | 5 +++++ 1 file changed, 5 insertions(+)
diff --git a/fs/block_dev.c b/fs/block_dev.c index 8b299347f2aa..9868b21b8ef9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -253,6 +253,9 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, task_io_account_write(ret); }
+ if (iocb->ki_flags & IOCB_NOWAIT) + bio.bi_opf |= REQ_NOWAIT; + qc = submit_bio(&bio); for (;;) { set_current_state(TASK_UNINTERRUPTIBLE); @@ -407,6 +410,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio->bi_opf = dio_bio_write_op(iocb); task_io_account_write(bio->bi_iter.bi_size); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT;
dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size;
From: Colin Ian King colin.king@canonical.com
mainline inclusion from mainline-v5.12-rc1 commit 4a245479c2312e6b51862c21af134d4191ab9cf7 category: bugfix bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D CVE: NA
-------------------------------------------------
The variable ret is being initialized with a value that is never read and it is being updated later with a new value. The initialization is redundant and can be removed.
Addresses-Coverity: ("Unused value") Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Signed-off-by: Colin Ian King colin.king@canonical.com Reviewed-by: Chaitanya Kulkarni chaitanya.kulkarni@wdc.com Signed-off-by: Jens Axboe axboe@kernel.dk conflict: fs/io_uring.c Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com --- fs/io_uring.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index aeedf191b813..4e552dfe1c64 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2374,7 +2374,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res, static bool io_resubmit_prep(struct io_kiocb *req, int error) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; - ssize_t ret = -ECANCELED; + ssize_t ret; struct iov_iter iter; int rw;
From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-v5.10-rc5 commit c993df5a688975bf9ce899706ca13d2bc8d6be25 category: bugfix bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D CVE: NA
-------------------------------------------------
Zorro reports that an xfstest test case is failing, and it turns out that for the reissue path we can potentially issue a double completion on the request for the failure path. There's an issue around the retry as well, but for now, at least just make sure that we handle the error path correctly.
Cc: stable@vger.kernel.org Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Reported-by: Zorro Lang zlang@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk conflict: fs/io_uring.c Change based on e1e16097e265 ("io_uring: provide generic io_req_complete() helper") Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com --- fs/io_uring.c | 2 -- 1 file changed, 2 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 4e552dfe1c64..bdcac452b174 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2408,9 +2408,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) return true; kfree(iovec); end_req: - io_cqring_add_event(req, ret, 0); req_set_fail_links(req); - io_put_req(req); return false; }
From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-v5.9-rc7 commit 8f3d749685e48c44dbe877ac9781079d85f914c8 category: bugfix bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D CVE: NA
-------------------------------------------------
If we already have mapped the necessary data for retry, then don't set it up again. It's a pointless operation, and we leak the iovec if it's a large (non-stack) vec.
Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources") Signed-off-by: Jens Axboe axboe@kernel.dk conflict: fs/io_uring.c Signed-off-by: Laibin Qiu qiulaibin@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com --- fs/io_uring.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index bdcac452b174..7240b5423170 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2400,13 +2400,17 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error) goto end_req; }
- ret = io_import_iovec(rw, req, &iovec, &iter, false); - if (ret < 0) - goto end_req; - ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); - if (!ret) + if (!req->io) { + ret = io_import_iovec(rw, req, &iovec, &iter, false); + if (ret < 0) + goto end_req; + ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter); + if (!ret) + return true; + kfree(iovec); + } else { return true; - kfree(iovec); + } end_req: req_set_fail_links(req); return false;