From: Jens Axboe axboe@kernel.dk
mainline inclusion from mainline-5.7-rc1 commit 067524e914cb23e20d59480b318fe2625eaee7c8 category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=27 CVE: NA ---------------------------
We have IORING_OP_PROVIDE_BUFFERS, but the only way to remove buffers is to trigger IO on them. The usual case of shrinking a buffer pool would be to just not replenish the buffers when IO completes, and instead just free it. But it may be nice to have a way to manually remove a number of buffers from a given group, and IORING_OP_REMOVE_BUFFERS provides that functionality.
Signed-off-by: Jens Axboe axboe@kernel.dk Conflicts: fs/io_uring.c [commit cebdb98617ae ("io_uring: add support for IORING_OP_OPENAT2") is not merged] Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- fs/io_uring.c | 102 +++++++++++++++++++++++++++------- include/uapi/linux/io_uring.h | 1 + 2 files changed, 84 insertions(+), 19 deletions(-)
diff --git a/fs/io_uring.c b/fs/io_uring.c index 97ddc9fbf625..f91e154a9a61 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -824,6 +824,7 @@ static const struct io_op_def io_op_defs[] = { .unbound_nonreg_file = 1, }, [IORING_OP_PROVIDE_BUFFERS] = {}, + [IORING_OP_REMOVE_BUFFERS] = {}, };
static void io_wq_submit_work(struct io_wq_work **workptr); @@ -2946,6 +2947,75 @@ static int io_openat(struct io_kiocb *req, bool force_nonblock) return 0; }
+static int io_remove_buffers_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_provide_buf *p = &req->pbuf; + u64 tmp; + + if (sqe->ioprio || sqe->rw_flags || sqe->addr || sqe->len || sqe->off) + return -EINVAL; + + tmp = READ_ONCE(sqe->fd); + if (!tmp || tmp > USHRT_MAX) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->nbufs = tmp; + p->bgid = READ_ONCE(sqe->buf_group); + return 0; +} + +static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf, + int bgid, unsigned nbufs) +{ + unsigned i = 0; + + /* shouldn't happen */ + if (!nbufs) + return 0; + + /* the head kbuf is the list itself */ + while (!list_empty(&buf->list)) { + struct io_buffer *nxt; + + nxt = list_first_entry(&buf->list, struct io_buffer, list); + list_del(&nxt->list); + kfree(nxt); + if (++i == nbufs) + return i; + } + i++; + kfree(buf); + idr_remove(&ctx->io_buffer_idr, bgid); + + return i; +} + +static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock) +{ + struct io_provide_buf *p = &req->pbuf; + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer *head; + int ret = 0; + + io_ring_submit_lock(ctx, !force_nonblock); + + lockdep_assert_held(&ctx->uring_lock); + + ret = -ENOENT; + head = idr_find(&ctx->io_buffer_idr, p->bgid); + if (head) + ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs); + + io_ring_submit_lock(ctx, !force_nonblock); + if (ret < 0) + req_set_fail_links(req); + io_cqring_add_event(req, ret); + io_put_req(req); + return 0; +} + static int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { @@ -3021,15 +3091,7 @@ static int io_provide_buffers(struct io_kiocb *req, bool force_nonblock) ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1, GFP_KERNEL); if (ret < 0) { - while (!list_empty(&head->list)) { - struct io_buffer *buf; - - buf = list_first_entry(&head->list, - struct io_buffer, list); - list_del(&buf->list); - kfree(buf); - } - kfree(head); + __io_remove_buffers(ctx, head, p->bgid, -1U); goto out; } } @@ -4778,6 +4840,9 @@ static int io_req_defer_prep(struct io_kiocb *req, case IORING_OP_PROVIDE_BUFFERS: ret = io_provide_buffers_prep(req, sqe); break; + case IORING_OP_REMOVE_BUFFERS: + ret = io_remove_buffers_prep(req, sqe); + break; default: printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", req->opcode); @@ -5064,6 +5129,14 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, } ret = io_provide_buffers(req, force_nonblock); break; + case IORING_OP_REMOVE_BUFFERS: + if (sqe) { + ret = io_remove_buffers_prep(req, sqe); + if (ret) + break; + } + ret = io_remove_buffers(req, force_nonblock); + break; default: ret = -EINVAL; break; @@ -6965,16 +7038,7 @@ static int __io_destroy_buffers(int id, void *p, void *data) struct io_ring_ctx *ctx = data; struct io_buffer *buf = p;
- /* the head kbuf is the list itself */ - while (!list_empty(&buf->list)) { - struct io_buffer *nxt; - - nxt = list_first_entry(&buf->list, struct io_buffer, list); - list_del(&nxt->list); - kfree(nxt); - } - kfree(buf); - idr_remove(&ctx->io_buffer_idr, id); + __io_remove_buffers(ctx, buf, id, -1U); return 0; }
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 28a85bdff505..b8c6c1a9cbb4 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -127,6 +127,7 @@ enum { IORING_OP_EPOLL_CTL, IORING_OP_SPLICE, IORING_OP_PROVIDE_BUFFERS, + IORING_OP_REMOVE_BUFFERS,
/* this goes last, obviously */ IORING_OP_LAST,