From: Pavel Begunkov asml.silence@gmail.com
mainline inclusion from mainline-5.6-rc1 commit 24369c2e3bb06d8c4e71fd6ceaf4f8a01ae79b7c category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27 CVE: NA ---------------------------
If IORING_SETUP_ATTACH_WQ is set, it expects wq_fd in io_uring_params to be a valid io_uring fd io-wq of which will be shared with the newly created io_uring instance. If the flag is set but it can't share io-wq, it fails.
This allows creation of "sibling" io_urings, where we prefer to keep the SQ/CQ private, but want to share the async backend to minimize the amount of overhead associated with having multiple rings that belong to the same backend.
Reported-by: Jens Axboe axboe@kernel.dk Reported-by: Daurnimator quae@daurnimator.com Signed-off-by: Pavel Begunkov asml.silence@gmail.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- fs/io_uring.c | 64 +++++++++++++++++++++++++++-------- include/uapi/linux/io_uring.h | 4 ++- 2 files changed, 53 insertions(+), 15 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 25932635a228..cc4a5e92153b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -5646,11 +5646,56 @@ static void io_get_work(struct io_wq_work *work) refcount_inc(&req->refs); }
+static int io_init_wq_offload(struct io_ring_ctx *ctx, + struct io_uring_params *p) +{ + struct io_wq_data data; + struct fd f; + struct io_ring_ctx *ctx_attach; + unsigned int concurrency; + int ret = 0; + + data.user = ctx->user; + data.get_work = io_get_work; + data.put_work = io_put_work; + + if (!(p->flags & IORING_SETUP_ATTACH_WQ)) { + /* Do QD, or 4 * CPUS, whatever is smallest */ + concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); + + ctx->io_wq = io_wq_create(concurrency, &data); + if (IS_ERR(ctx->io_wq)) { + ret = PTR_ERR(ctx->io_wq); + ctx->io_wq = NULL; + } + return ret; + } + + f = fdget(p->wq_fd); + if (!f.file) + return -EBADF; + + if (f.file->f_op != &io_uring_fops) { + ret = -EINVAL; + goto out_fput; + } + + ctx_attach = f.file->private_data; + /* @io_wq is protected by holding the fd */ + if (!io_wq_get(ctx_attach->io_wq, &data)) { + ret = -EINVAL; + goto out_fput; + } + + ctx->io_wq = ctx_attach->io_wq; +out_fput: + fdput(f); + return ret; +} + static int io_sq_offload_start(struct io_ring_ctx *ctx, struct io_uring_params *p) { - struct io_wq_data data; - unsigned concurrency; int ret;
init_waitqueue_head(&ctx->sqo_wait); @@ -5694,18 +5739,9 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, goto err; }
- data.user = ctx->user; - data.get_work = io_get_work; - data.put_work = io_put_work; - - /* Do QD, or 4 * CPUS, whatever is smallest */ - concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); - ctx->io_wq = io_wq_create(concurrency, &data); - if (IS_ERR(ctx->io_wq)) { - ret = PTR_ERR(ctx->io_wq); - ctx->io_wq = NULL; + ret = io_init_wq_offload(ctx, p); + if (ret) goto err; - }
return 0; err: @@ -6522,7 +6558,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | - IORING_SETUP_CLAMP)) + IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL;
ret = io_uring_create(entries, &p); diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index ffba7b1bf171..4b5a3376d959 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -75,6 +75,7 @@ enum { #define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */ #define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */ #define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */ +#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
enum { IORING_OP_NOP, @@ -182,7 +183,8 @@ struct io_uring_params { __u32 sq_thread_cpu; __u32 sq_thread_idle; __u32 features; - __u32 resv[4]; + __u32 wq_fd; + __u32 resv[3]; struct io_sqring_offsets sq_off; struct io_cqring_offsets cq_off; };