From: Jingbo Xu jefflexu@linux.alibaba.com
anolis inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB5UKT
Reference: https://gitee.com/anolis/cloud-kernel/commit/d6843be666bf
--------------------------------
ANBZ: #2056
Fscache/CacheFiles offer fscache_read_or_alloc_pages() to implement the readahead routine of filesystems using fscache. The implementation of fscache_read_or_alloc_pages() will call .readpage() on each backpage, in which case each backpage will generate an IO request. The performance bottleneck is not an issue when fscache is used as the local cache for network filesystems. However it is not the case for filesystems using fscache in on-demand mode.
This patch introduces a new helper fscache_prepare_read() for this use. It first checks if there's any hole inside the requested range, and triggers on-demand read if there's any. This step ensures that all the data is ready there for the requested range.
Then it triggers an asynchronous readahead for the backing file. Since FMODE_RANDOM, the following page_cache_sync_readahead() will fallback to force_page_cache_readahead().
At last it will start a synchronous buffer read on the backing file. Since the asynchronous readahead, the following buffer read will find the page cache up to date most times. The buffer read is handled in the context of workers, so that the readahead routine will not be blocked in the synchronous buffer read.
Signed-off-by: Jingbo Xu jefflexu@linux.alibaba.com Link: https://gitee.com/anolis/cloud-kernel/pulls/692 Reviewed-by: Joseph Qi joseph.qi@linux.alibaba.com Signed-off-by: Baokun Li libaokun1@huawei.com --- fs/cachefiles/interface.c | 1 + fs/cachefiles/internal.h | 1 + fs/cachefiles/rdwr.c | 162 ++++++++++++++++++++++++++++++++++ fs/fscache/page.c | 66 ++++++++++++++ include/linux/fscache-cache.h | 5 ++ include/linux/fscache.h | 20 +++++ 6 files changed, 255 insertions(+)
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 60b6ca443e8e..634e7041c0f3 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -573,6 +573,7 @@ const struct fscache_cache_ops cachefiles_cache_ops = { .attr_changed = cachefiles_attr_changed, .read_or_alloc_page = cachefiles_read_or_alloc_page, .read_or_alloc_pages = cachefiles_read_or_alloc_pages, + .prepare_read = cachefiles_prepare_read, .allocate_page = cachefiles_allocate_page, .allocate_pages = cachefiles_allocate_pages, .write_page = cachefiles_write_page, diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index ab0ca3b1cd08..b8ef5be59005 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -241,6 +241,7 @@ extern int cachefiles_read_or_alloc_page(struct fscache_retrieval *, extern int cachefiles_read_or_alloc_pages(struct fscache_retrieval *, struct list_head *, unsigned *, gfp_t); +extern int cachefiles_prepare_read(struct fscache_retrieval *op, pgoff_t index); extern int cachefiles_allocate_page(struct fscache_retrieval *, struct page *, gfp_t); extern int cachefiles_allocate_pages(struct fscache_retrieval *, diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 7cfbbeee9e87..0e1992bedf71 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -9,6 +9,8 @@ #include <linux/slab.h> #include <linux/file.h> #include <linux/swap.h> +#include <linux/backing-dev.h> +#include <linux/uio.h> #include "internal.h"
/* @@ -793,6 +795,166 @@ int cachefiles_read_or_alloc_pages(struct fscache_retrieval *op, return -ENOBUFS; }
+static int cachefiles_ondemand_check(struct cachefiles_object *object, + loff_t start_pos, size_t len) +{ + struct file *file = object->file; + size_t remained; + loff_t pos; + int ret; + + /* make sure there's no hole in the requested range */ + pos = start_pos; + remained = len; + + while (remained) { + bool again = true; + size_t count = remained; + loff_t off, off2, new_pos; +retry: + off = vfs_llseek(file, pos, SEEK_DATA); + if (off < 0) { + if (off == (loff_t)-ENXIO) + goto ondemand_read; + return -ENODATA; + } + + if (off >= pos + remained) + goto ondemand_read; + + if (off > pos) { + count = off - pos; + goto ondemand_read; + } + + off2 = vfs_llseek(file, pos, SEEK_HOLE); + if (off2 < 0) + return -ENODATA; + + new_pos = min_t(loff_t, off2, pos + remained); + remained -= new_pos - pos; + pos = new_pos; + continue; +ondemand_read: + if (again) { + ret = cachefiles_ondemand_read(object, pos, count); + if (!ret) { + /* recheck if the hole has been filled or not */ + again = false; + goto retry; + } + } + return -ENODATA; + } + return 0; +} + +struct cachefiles_kiocb { + struct kiocb iocb; + struct fscache_retrieval *op; + struct iov_iter iter; + struct work_struct work; + struct bio_vec bvs[]; +}; + +void cachefiles_readpages_work_func(struct work_struct *work) +{ + struct cachefiles_kiocb *ki = container_of(work, struct cachefiles_kiocb, work); + int ret; + + ret = vfs_iocb_iter_read(ki->iocb.ki_filp, &ki->iocb, &ki->iter); + /* complete the request if there's any progress or error occurred */ + if (ret != -EIOCBQUEUED) { + struct fscache_retrieval *op = ki->op; + unsigned int nr_pages = atomic_read(&op->n_pages); + unsigned int done_pages = 0; + int i, error; + + if (ret > 0) + done_pages = ret / PAGE_SIZE; + + for (i = 0; i < nr_pages; i++) { + error = i < done_pages ? 0 : -EIO; + fscache_end_io(op, ki->bvs[i].bv_page, error); + } + + fscache_retrieval_complete(op, nr_pages); + fscache_put_retrieval(op); + kfree(ki); + } +} + +int cachefiles_prepare_read(struct fscache_retrieval *op, pgoff_t index) +{ + struct cachefiles_object *object; + struct cachefiles_kiocb *ki; + loff_t start_pos = op->offset; + unsigned int n, nr_pages = atomic_read(&op->n_pages); + size_t len = nr_pages << PAGE_SHIFT; + struct page **pages; + size_t size; + int i, ret; + + object = container_of(op->op.object, struct cachefiles_object, fscache); + if (!object->backer) + goto all_enobufs; + + /* + * 1. Check if there's hole in the requested range, and trigger an + * on-demand read request if there's any. + */ + ASSERT(start_pos % PAGE_SIZE == 0); + ret = cachefiles_ondemand_check(object, start_pos, len); + if (ret) + goto all_enobufs; + + /* + * 2. Trigger readahead on the backing file in advance. Since + * FMODE_RANDOM, the following page_cache_sync_readahead() will fallback + * to force_page_cache_readahead(). + */ + page_cache_sync_readahead(d_inode(object->backer)->i_mapping, + &object->file->f_ra, object->file, + start_pos / PAGE_SIZE, nr_pages); + + size = sizeof(struct cachefiles_kiocb) + nr_pages * sizeof(struct bio_vec); + ki = kzalloc(size, GFP_KERNEL); + if (!ki) + goto all_enobufs; + + /* reuse the tailing part of ki as pages[] */ + pages = (void *)ki + size - nr_pages * sizeof(struct page *); + n = find_get_pages_contig(op->mapping, index, nr_pages, pages); + if (WARN_ON(n != nr_pages)) { + for (i = 0; i < n; i++) + put_page(pages[i]); + kfree(ki); + goto all_enobufs; + } + + for (i = 0; i < n; i++) { + put_page(pages[i]); + ki->bvs[i].bv_page = pages[i]; + ki->bvs[i].bv_offset = 0; + ki->bvs[i].bv_len = PAGE_SIZE; + } + iov_iter_bvec(&ki->iter, READ, ki->bvs, n, n * PAGE_SIZE); + + ki->iocb.ki_filp = object->file; + ki->iocb.ki_pos = start_pos; + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->op = fscache_get_retrieval(op); + + /* 3. Start a buffer read in worker context */ + INIT_WORK(&ki->work, cachefiles_readpages_work_func); + queue_work(system_unbound_wq, &ki->work); + return 0; + +all_enobufs: + fscache_retrieval_complete(op, nr_pages); + return -ENOBUFS; +} + /* * allocate a block in the cache in which to store a page * - cache withdrawal is prevented by the caller diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 888ace2cc6e1..39a05a43284d 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -666,6 +666,72 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, } EXPORT_SYMBOL(__fscache_read_or_alloc_pages);
+int __fscache_prepare_read(struct fscache_cookie *cookie, + struct address_space *mapping, pgoff_t index, + unsigned int nr_pages, loff_t start_pos, + fscache_rw_complete_t term_func, void *context) +{ + struct fscache_retrieval *op; + struct fscache_object *object; + bool wake_cookie = false; + int ret; + + if (hlist_empty(&cookie->backing_objects)) + return -ENOBUFS; + + if (test_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { + _leave(" = -ENOBUFS [invalidating]"); + return -ENOBUFS; + } + + ASSERTCMP(cookie->def->type, !=, FSCACHE_COOKIE_TYPE_INDEX); + + if (fscache_wait_for_deferred_lookup(cookie) < 0) + return -ERESTARTSYS; + + op = fscache_alloc_retrieval(cookie, mapping, term_func, context); + if (!op) + return -ENOMEM; + atomic_set(&op->n_pages, nr_pages); + op->offset = start_pos; + + spin_lock(&cookie->lock); + + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) + goto nobufs_unlock; + + object = hlist_entry(cookie->backing_objects.first, + struct fscache_object, cookie_link); + + __fscache_use_cookie(cookie); + if (fscache_submit_op(object, &op->op) < 0) + goto nobufs_unlock_dec; + spin_unlock(&cookie->lock); + + ret = fscache_wait_for_operation_activation( + object, &op->op, + __fscache_stat(&fscache_n_retrieval_op_waits), + __fscache_stat(&fscache_n_retrievals_object_dead)); + if (ret < 0) + goto out; + + ret = object->cache->ops->prepare_read(op, index); +out: + fscache_put_retrieval(op); + return ret; + +nobufs_unlock_dec: + wake_cookie = __fscache_unuse_cookie(cookie); +nobufs_unlock: + spin_unlock(&cookie->lock); + fscache_put_retrieval(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); + return -ENOBUFS; +} +EXPORT_SYMBOL(__fscache_prepare_read); + /* * allocate a block in the cache on which to store a page * - we return: diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h index 71ee23f78f1d..31f2f13e2924 100644 --- a/include/linux/fscache-cache.h +++ b/include/linux/fscache-cache.h @@ -161,6 +161,9 @@ typedef int (*fscache_pages_retrieval_func_t)(struct fscache_retrieval *op, unsigned *nr_pages, gfp_t gfp);
+typedef int (*fscache_prepare_read_func_t)(struct fscache_retrieval *op, + pgoff_t index); + /** * fscache_get_retrieval - Get an extra reference on a retrieval operation * @op: The retrieval operation to get a reference on @@ -285,6 +288,8 @@ struct fscache_cache_ops { * the cache */ fscache_pages_retrieval_func_t read_or_alloc_pages;
+ fscache_prepare_read_func_t prepare_read; + /* request a backing block for a page be allocated in the cache so that * it can be written directly */ fscache_page_retrieval_func_t allocate_page; diff --git a/include/linux/fscache.h b/include/linux/fscache.h index ce51b915ad43..f262446f3a49 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -212,6 +212,13 @@ extern int __fscache_read_or_alloc_pages(struct fscache_cookie *, fscache_rw_complete_t, void *, gfp_t); +extern int __fscache_prepare_read(struct fscache_cookie *cookie, + struct address_space *mapping, + pgoff_t index, + unsigned int nr_pages, + loff_t start_pos, + fscache_rw_complete_t term_func, + void *context); extern int __fscache_alloc_page(struct fscache_cookie *, struct page *, gfp_t); extern int __fscache_write_page(struct fscache_cookie *, struct page *, loff_t, gfp_t); extern void __fscache_uncache_page(struct fscache_cookie *, struct page *); @@ -616,6 +623,19 @@ int fscache_read_or_alloc_pages(struct fscache_cookie *cookie, return -ENOBUFS; }
+static inline +int fscache_prepare_read(struct fscache_cookie *cookie, + struct address_space *mapping, pgoff_t index, + unsigned int nr_pages, loff_t start_pos, + fscache_rw_complete_t term_func, void *context) +{ + if (fscache_cookie_valid(cookie) && fscache_cookie_enabled(cookie)) + return __fscache_prepare_read(cookie, mapping, index, + nr_pages, start_pos, term_func, context); + else + return -ENOBUFS; +} + /** * fscache_alloc_page - Allocate a block in which to store a page * @cookie: The cookie representing the cache object