From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-5.6-rc1 commit f2842ab5b72d7ee5f7f8385c2d4f32c133f5837b category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27 CVE: NA ---------------------------
If an application is using eventfd notifications with poll to know when new SQEs can be issued, it's expecting the following read/writes to complete inline. And with that, it knows that there are events available, and don't want spurious wakeups on the eventfd for those requests.
This adds IORING_REGISTER_EVENTFD_ASYNC, which works just like IORING_REGISTER_EVENTFD, except it only triggers notifications for events that happen from async completions (IRQ, or io-wq worker completions). Any completions inline from the submission itself will not trigger notifications.
Suggested-by: Mark Papadakis markuspapadakis@icloud.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- fs/io_uring.c | 17 ++++++++++++++++- include/uapi/linux/io_uring.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 346f2298837d..b3ca3f380b37 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -206,6 +206,7 @@ struct io_ring_ctx { int account_mem: 1; int cq_overflow_flushed: 1; int drain_next: 1; + int eventfd_async: 1;
/* * Ring buffer of indices into array of io_uring_sqe, which is @@ -962,13 +963,20 @@ static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) return &rings->cqes[tail & ctx->cq_mask]; }
+static inline bool io_should_trigger_evfd(struct io_ring_ctx *ctx) +{ + if (!ctx->eventfd_async) + return true; + return io_wq_current_is_worker() || in_interrupt(); +} + static void io_cqring_ev_posted(struct io_ring_ctx *ctx) { if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); if (waitqueue_active(&ctx->sqo_wait)) wake_up(&ctx->sqo_wait); - if (ctx->cq_ev_fd) + if (ctx->cq_ev_fd && io_should_trigger_evfd(ctx)) eventfd_signal(ctx->cq_ev_fd, 1); }
@@ -6544,10 +6552,17 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, ret = io_sqe_files_update(ctx, arg, nr_args); break; case IORING_REGISTER_EVENTFD: + case IORING_REGISTER_EVENTFD_ASYNC: ret = -EINVAL; if (nr_args != 1) break; ret = io_eventfd_register(ctx, arg); + if (ret) + break; + if (opcode == IORING_REGISTER_EVENTFD_ASYNC) + ctx->eventfd_async = 1; + else + ctx->eventfd_async = 0; break; case IORING_UNREGISTER_EVENTFD: ret = -EINVAL; diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 0fe270ab191c..66772a90a7f2 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -192,6 +192,7 @@ struct io_uring_params { #define IORING_REGISTER_EVENTFD 4 #define IORING_UNREGISTER_EVENTFD 5 #define IORING_REGISTER_FILES_UPDATE 6 +#define IORING_REGISTER_EVENTFD_ASYNC 7
struct io_uring_files_update { __u32 offset;