From: Hongbo Li <lihongbo22@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5W1P -------------------------------- All events posted during data miss will link into the xarray structure. The @mfs_event is introduced to represent the target miss event. For now, only read event should be considered. The message header in the event is encoded in @mfs_msg, the user use the fd as file handler to control the cache in MFS. The core structure @mfs_cache_object is kept in the inode's priviate data at the time that the inode was allocated. Signed-off-by: Huang Xiaojia <huangxiaojia2@huawei.com> Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- fs/mfs/cache.c | 124 +++++++++++++++++++++++++++++++++++++++ fs/mfs/data.c | 6 +- fs/mfs/internal.h | 44 ++++++++++++++ fs/mfs/super.c | 3 + include/uapi/linux/mfs.h | 15 +++++ 5 files changed, 191 insertions(+), 1 deletion(-) diff --git a/fs/mfs/cache.c b/fs/mfs/cache.c index 30d5c0f986ee..ffc6191bbe7b 100644 --- a/fs/mfs/cache.c +++ b/fs/mfs/cache.c @@ -3,6 +3,8 @@ #include "internal.h" +#include <linux/mfs.h> + /* * Used for cache object */ @@ -38,11 +40,133 @@ void mfs_post_event_read(struct mfs_cache_object *object, loff_t off, uint64_t len, struct mfs_syncer *syncer, int op) { + struct mfs_sb_info *sbi = MFS_SB(object->mfs_inode->i_sb); + struct mfs_caches *caches = &sbi->caches; + XA_STATE(xas, &caches->events, 0); + struct mfs_event *event; + struct mfs_read *msg; + int ret; + + /* 1. init event struct */ + event = kzalloc(sizeof(*event) + sizeof(*msg), GFP_KERNEL); + if (!event) { + pr_warn("post read event failed, off:%lld, len:%llu\n", off, len); + return; + } + + /* 2. hold object's owner mfs_inode */ + ihold(object->mfs_inode); + refcount_set(&event->ref, 1); + event->object = object; + event->msg.version = 0; + event->msg.opcode = op; + event->msg.len = sizeof(struct mfs_msg) + sizeof(struct mfs_read); + event->msg.fd = object->fd; + msg = (void *)event->msg.data; + msg->off = off; + msg->len = len; + msg->pid = current->pid; + INIT_LIST_HEAD(&event->link); + event->syncer = syncer; + if (event->syncer) { + atomic_inc(&syncer->notback); + spin_lock(&syncer->list_lock); + list_add_tail(&event->link, &syncer->head); + spin_unlock(&syncer->list_lock); + } + + /* 3. put event into reqs' xarray */ + do { + xas_lock(&xas); + + if (!test_bit(MFS_CACHE_READY, &caches->flags)) { + xas_unlock(&xas); + goto out; + } + + /* Ensure cache enabled judgement before posting events */ + smp_mb__after_atomic(); + + xas.xa_index = caches->next_msg; + xas_find_marked(&xas, UINT_MAX, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) { + xas.xa_index = 0; + xas_find_marked(&xas, caches->next_msg - 1, XA_FREE_MARK); + } + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -EBUSY); + xas_store(&xas, event); + if (xas_valid(&xas)) { + caches->next_msg = xas.xa_index + 1; + event->msg.id = xas.xa_index; + xas_clear_mark(&xas, XA_FREE_MARK); + xas_set_mark(&xas, MFS_EVENT_NEW); + } + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + ret = xas_error(&xas); + if (ret) { + pr_warn("post read event failed to insert events, off:%lld, len:%llu, ret:%d\n", + off, len, ret); + goto out; + } + + /* 3. wakeup the polling wait list */ + wake_up_all(&caches->pollwq); + return; +out: + if (event->syncer) { + spin_lock(&syncer->list_lock); + list_del_init(&event->link); + spin_unlock(&syncer->list_lock); + atomic_dec(&syncer->notback); + } + kfree(event); + iput(object->mfs_inode); +} + +void mfs_destroy_events(struct super_block *sb) +{ + struct mfs_sb_info *sbi = MFS_SB(sb); + struct mfs_caches *caches = &sbi->caches; + unsigned long index; + struct mfs_event *event; + + xa_lock(&caches->events); + xa_for_each(&caches->events, index, event) { + /* + * Inodes will be evicted before destroy events. + * Hence there should be none of events. + */ + pr_warn("Event remains:%lu\n", index); + __xa_erase(&caches->events, index); + xa_unlock(&caches->events); + put_mfs_event(event); + xa_lock(&caches->events); + } + xa_unlock(&caches->events); + xa_destroy(&caches->events); } void mfs_cancel_syncer_events(struct mfs_cache_object *object, struct mfs_syncer *syncer) { + struct mfs_sb_info *sbi = MFS_SB(object->mfs_inode->i_sb); + struct mfs_caches *caches = &sbi->caches; + struct mfs_event *event; + struct list_head tmp; + + INIT_LIST_HEAD(&tmp); + spin_lock(&syncer->list_lock); + list_splice_init(&syncer->head, &tmp); + spin_unlock(&syncer->list_lock); + + list_for_each_entry(event, &tmp, link) { + xa_erase(&caches->events, event->msg.id); + iput(event->object->mfs_inode); + kfree(event); + } } struct mfs_cache_object *mfs_alloc_object(struct inode *inode, diff --git a/fs/mfs/data.c b/fs/mfs/data.c index 2cd28e0b0222..9df523abf92e 100644 --- a/fs/mfs/data.c +++ b/fs/mfs/data.c @@ -5,7 +5,6 @@ #include <linux/pagemap.h> #include <linux/uio.h> -#include <linux/types.h> #include <linux/completion.h> static struct mfs_file_info *mfs_file_info_alloc(struct file *lower, struct file *cache) @@ -243,6 +242,7 @@ static int range_check_mem(struct range_t *r) static int mfs_check_range(struct range_ctx *ctx) { + struct mfs_sb_info *sbi = MFS_SB(ctx->object->mfs_inode->i_sb); loff_t start = ctx->off, end = ctx->off + ctx->len; struct file *file = ctx->file; struct range_t r = { .file = file }; @@ -250,6 +250,10 @@ static int mfs_check_range(struct range_ctx *ctx) struct mfs_syncer syncer; int err = 0, err2 = 0; + if (!support_event(sbi)) + return 0; + if (!cache_is_ready(sbi)) + return ctx->sync ? -EIO : 0; if (!ctx->len) return 0; diff --git a/fs/mfs/internal.h b/fs/mfs/internal.h index 295adc0794a3..d2e068665f43 100644 --- a/fs/mfs/internal.h +++ b/fs/mfs/internal.h @@ -9,12 +9,19 @@ #include <linux/mm.h> #include <linux/container_of.h> #include <linux/spinlock_types.h> +#include <linux/xarray.h> +#include <linux/wait.h> #include <linux/completion.h> +#include <linux/types.h> #include <linux/mfs.h> #define MFS_NAME "mfs" #define MFS_OPEN_FLAGS (O_NOATIME) +#define MFS_EVENT_NEW XA_MARK_1 + +/* mfs_caches flags */ +#define MFS_CACHE_READY 0 struct mfs_cache_object { struct file *cache_file; @@ -33,6 +40,22 @@ struct mfs_syncer { atomic_t res; }; +struct mfs_event { + refcount_t ref; + struct mfs_cache_object *object; + struct mfs_syncer *syncer; + struct list_head link; + struct mfs_msg msg; +}; + +struct mfs_caches { + struct xarray events; + wait_queue_head_t pollwq; + unsigned long next_msg; + unsigned long next_ev; + unsigned long flags; +}; + struct mfs_sb_info { int mode; char *mtree; @@ -43,6 +66,8 @@ struct mfs_sb_info { int minor; struct super_block *sb; + + struct mfs_caches caches; }; struct mfs_inode { @@ -155,6 +180,24 @@ static inline bool need_sync_event(struct super_block *sb) return sbi->mode == MFS_MODE_REMOTE; } +static inline bool cache_is_ready(struct mfs_sb_info *sbi) +{ + return test_bit(MFS_CACHE_READY, &sbi->caches.flags); +} + +static inline void get_mfs_event(struct mfs_event *event) +{ + refcount_inc(&event->ref); +} + +static inline void put_mfs_event(struct mfs_event *event) +{ + if (refcount_dec_and_test(&event->ref)) { + iput(event->object->mfs_inode); + kfree(event); + } +} + struct inode *mfs_iget(struct super_block *sb, struct inode *lower_inode, struct path *cache_path); int mfs_alloc_dentry_info(struct dentry *dentry); @@ -163,6 +206,7 @@ void mfs_free_dentry_info(struct dentry *dentry); void mfs_post_event_read(struct mfs_cache_object *object, loff_t off, uint64_t len, struct mfs_syncer *syncer, int op); +void mfs_destroy_events(struct super_block *sb); void mfs_cancel_syncer_events(struct mfs_cache_object *object, struct mfs_syncer *syncer); struct mfs_cache_object *mfs_alloc_object(struct inode *inode, diff --git a/fs/mfs/super.c b/fs/mfs/super.c index 1f715b93cb03..a8023a66cd8d 100644 --- a/fs/mfs/super.c +++ b/fs/mfs/super.c @@ -370,6 +370,8 @@ static int mfs_init_fs_context(struct fs_context *fc) if (!sbi) return -ENOMEM; + init_waitqueue_head(&sbi->caches.pollwq); + xa_init_flags(&sbi->caches.events, XA_FLAGS_ALLOC); sbi->minor = -1; fc->s_fs_info = sbi; fc->ops = &mfs_context_ops; @@ -381,6 +383,7 @@ static void mfs_kill_sb(struct super_block *sb) struct mfs_sb_info *sbi = MFS_SB(sb); kill_anon_super(sb); + mfs_destroy_events(sb); if (sbi->mtree) { path_put(&sbi->lower); kfree(sbi->mtree); diff --git a/include/uapi/linux/mfs.h b/include/uapi/linux/mfs.h index 78c4b57a83f5..1bae08939b95 100644 --- a/include/uapi/linux/mfs.h +++ b/include/uapi/linux/mfs.h @@ -18,4 +18,19 @@ enum { MFS_MODE_REMOTE, }; +struct mfs_msg { + __u8 version; + __u8 opcode; + __u16 len; + __u32 fd; + __u32 id; + __u8 data[]; +}; + +struct mfs_read { + __u64 off; + __u64 len; + __s32 pid; +}; + #endif /* _UAPI_LINUX_MFS_H */ -- 2.25.1