From: Jens Axboe axboe@kernel.dk
stable inclusion from stable-v6.6.5 commit 7138ebbe65caf65f52b923d4ef819c77d04ea671 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8N21P
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit c392cbecd8eca4c53f2bf508731257d9d0a21c2d upstream.
If a provided buffer ring is setup with IOU_PBUF_RING_MMAP, then the kernel allocates the memory for it and the application is expected to mmap(2) this memory. However, io_uring uses remap_pfn_range() for this operation, so we cannot rely on normal munmap/release on freeing them for us.
Stash an io_buf_free entry away for each of these, if any, and provide a helper to free them post ->release().
Cc: stable@vger.kernel.org Fixes: c56e022c0a27 ("io_uring: add support for user mapped provided buffer ring") Reported-by: Jann Horn jannh@google.com Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/io_uring_types.h | 3 +++ io_uring/io_uring.c | 2 ++ io_uring/kbuf.c | 44 ++++++++++++++++++++++++++++++---- io_uring/kbuf.h | 2 ++ 4 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 13d19b9be9f4..5fd664fb71c8 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -327,6 +327,9 @@ struct io_ring_ctx {
struct list_head io_buffers_cache;
+ /* deferred free list, protected by ->uring_lock */ + struct hlist_head io_buf_list; + /* Keep this last, we don't need it for the fast path */ struct wait_queue_head poll_wq; struct io_restriction restrictions; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 678fc84bf2f1..dbf6855b3b8c 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -323,6 +323,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) INIT_LIST_HEAD(&ctx->sqd_list); INIT_LIST_HEAD(&ctx->cq_overflow_list); INIT_LIST_HEAD(&ctx->io_buffers_cache); + INIT_HLIST_HEAD(&ctx->io_buf_list); io_alloc_cache_init(&ctx->rsrc_node_cache, IO_NODE_ALLOC_CACHE_MAX, sizeof(struct io_rsrc_node)); io_alloc_cache_init(&ctx->apoll_cache, IO_ALLOC_CACHE_MAX, @@ -2945,6 +2946,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) ctx->mm_account = NULL; } io_rings_free(ctx); + io_kbuf_mmap_list_free(ctx);
percpu_ref_exit(&ctx->refs); free_uid(ctx->user); diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index cd8c688c1a97..8da5c10ec352 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -41,6 +41,11 @@ static struct io_buffer_list *__io_buffer_get_list(struct io_ring_ctx *ctx, return xa_load(&ctx->io_bl_xa, bgid); }
+struct io_buf_free { + struct hlist_node list; + void *mem; +}; + static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { @@ -238,7 +243,10 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, if (bl->is_mapped) { i = bl->buf_ring->tail - bl->head; if (bl->is_mmap) { - folio_put(virt_to_folio(bl->buf_ring)); + /* + * io_kbuf_list_free() will free the page(s) at + * ->release() time. + */ bl->buf_ring = NULL; bl->is_mmap = 0; } else if (bl->buf_nr_pages) { @@ -552,18 +560,28 @@ static int io_pin_pbuf_ring(struct io_uring_buf_reg *reg, return -EINVAL; }
-static int io_alloc_pbuf_ring(struct io_uring_buf_reg *reg, +static int io_alloc_pbuf_ring(struct io_ring_ctx *ctx, + struct io_uring_buf_reg *reg, struct io_buffer_list *bl) { - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; + struct io_buf_free *ibf; size_t ring_size; void *ptr;
ring_size = reg->ring_entries * sizeof(struct io_uring_buf_ring); - ptr = (void *) __get_free_pages(gfp, get_order(ring_size)); + ptr = io_mem_alloc(ring_size); if (!ptr) return -ENOMEM;
+ /* Allocate and store deferred free entry */ + ibf = kmalloc(sizeof(*ibf), GFP_KERNEL_ACCOUNT); + if (!ibf) { + io_mem_free(ptr); + return -ENOMEM; + } + ibf->mem = ptr; + hlist_add_head(&ibf->list, &ctx->io_buf_list); + bl->buf_ring = ptr; bl->is_mapped = 1; bl->is_mmap = 1; @@ -622,7 +640,7 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) if (!(reg.flags & IOU_PBUF_RING_MMAP)) ret = io_pin_pbuf_ring(®, bl); else - ret = io_alloc_pbuf_ring(®, bl); + ret = io_alloc_pbuf_ring(ctx, ®, bl);
if (!ret) { bl->nr_entries = reg.ring_entries; @@ -682,3 +700,19 @@ void *io_pbuf_get_address(struct io_ring_ctx *ctx, unsigned long bgid)
return bl->buf_ring; } + +/* + * Called at or after ->release(), free the mmap'ed buffers that we used + * for memory mapped provided buffer rings. + */ +void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx) +{ + struct io_buf_free *ibf; + struct hlist_node *tmp; + + hlist_for_each_entry_safe(ibf, tmp, &ctx->io_buf_list, list) { + hlist_del(&ibf->list); + io_mem_free(ibf->mem); + kfree(ibf); + } +} diff --git a/io_uring/kbuf.h b/io_uring/kbuf.h index 274f8b7fb8ab..3d0cb6b8c1ed 100644 --- a/io_uring/kbuf.h +++ b/io_uring/kbuf.h @@ -54,6 +54,8 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags); int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg); int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
+void io_kbuf_mmap_list_free(struct io_ring_ctx *ctx); + unsigned int __io_put_kbuf(struct io_kiocb *req, unsigned issue_flags);
void io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);