From: Hongbo Li <lihongbo22@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5W1P -------------------------------- MFS is going to provides the three modes: none, local and remote. - none: As the stackable file system on the lower file system, and just transfer operations to the backend file system. - local: Working at local case which lower and cachedir layer are both local file system. And the miss event (not hit in page cache) will post the async events to the userspace. - remote: Working at remote case which the target data is in the remote server such as OBS or other private DFS without POSIX-like interface. And the miss event (not hit in local cache) will post the sync events to the userspace and waiting for response. MFS also needs mtree and cachedir mount options. mtree means the metadata source of MFS, cachedir means the data source of MFS. For remote mode, cachedir will be the local cache of the backend remote storage system. MFS provides a read-only view of the under file system. So some modified operation on metadata or data shouldn't be allowed. We will implement other file system operation step by step. Signed-off-by: Huang Xiaojia <huangxiaojia2@huawei.com> Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- fs/mfs/Makefile | 2 +- fs/mfs/inode.c | 62 +++++++ fs/mfs/internal.h | 63 +++++++ fs/mfs/super.c | 359 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + include/uapi/linux/mfs.h | 15 ++ 6 files changed, 501 insertions(+), 1 deletion(-) create mode 100644 fs/mfs/inode.c create mode 100644 include/uapi/linux/mfs.h diff --git a/fs/mfs/Makefile b/fs/mfs/Makefile index ff2cd4a514cc..0c10e447c6b2 100644 --- a/fs/mfs/Makefile +++ b/fs/mfs/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MFS_FS) += mfs.o -mfs-objs := super.o +mfs-objs := super.o inode.o diff --git a/fs/mfs/inode.c b/fs/mfs/inode.c new file mode 100644 index 000000000000..32e2ce27761d --- /dev/null +++ b/fs/mfs/inode.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ + +#include "internal.h" + +#include <linux/err.h> +#include <linux/fs_stack.h> +#include <linux/namei.h> + +static int mfs_inode_eq(struct inode *inode, void *lower_target) +{ + return mfs_lower_inode(inode) == (struct inode *)lower_target; +} + +static int mfs_inode_set(struct inode *inode, void *lower_target) +{ + return 0; +} + +struct inode *mfs_iget(struct super_block *sb, struct inode *lower_inode, + struct path *cache_path) +{ + struct inode *inode, *cache_inode = d_inode(cache_path->dentry); + struct mfs_inode *vi; + int err; + + if (!igrab(lower_inode)) + return ERR_PTR(-ESTALE); + if (!igrab(cache_inode)) { + err = -ESTALE; + goto err_put_lower; + } + inode = iget5_locked(sb, lower_inode->i_ino, + mfs_inode_eq, + mfs_inode_set, + lower_inode); + if (!inode) { + err = -ENOMEM; + goto err_put_cache; + } + /* found in cache */ + if (!(inode->i_state & I_NEW)) { + iput(cache_inode); + iput(lower_inode); + return inode; + } + /* new inode */ + vi = MFS_I(inode); + inode->i_ino = lower_inode->i_ino; + vi->lower = lower_inode; + vi->cache = cache_inode; + + fsstack_copy_attr_all(inode, lower_inode); + fsstack_copy_inode_size(inode, lower_inode); + unlock_new_inode(inode); + return inode; +err_put_cache: + iput(cache_inode); +err_put_lower: + iput(lower_inode); + return ERR_PTR(err); +} diff --git a/fs/mfs/internal.h b/fs/mfs/internal.h index 70969cd1c124..67c3a7ad699b 100644 --- a/fs/mfs/internal.h +++ b/fs/mfs/internal.h @@ -5,10 +5,25 @@ #define _MFS_INTERNAL_H #include <linux/fs.h> +#include <linux/mm.h> +#include <linux/container_of.h> #include <linux/spinlock_types.h> +#include <linux/mfs.h> #define MFS_NAME "mfs" +struct mfs_sb_info { + int mode; + char *mtree; + char *cachedir; + struct path lower; + struct path cache; + + int minor; + + struct super_block *sb; +}; + struct mfs_inode { struct inode *lower; struct inode *cache; @@ -22,4 +37,52 @@ struct mfs_dentry_info { struct path cache; }; +#define MFS_SB(sb) ((struct mfs_sb_info *)(sb)->s_fs_info) +#define MFS_I(ptr) container_of(ptr, struct mfs_inode, vfs_inode) +#define MFS_D(dent) ((struct mfs_dentry_info *)(dent)->d_fsdata) + +static inline struct inode *mfs_lower_inode(const struct inode *i) +{ + return MFS_I(i)->lower; +} + +static inline void pathcpy(struct path *dst, const struct path *src) +{ + dst->dentry = src->dentry; + dst->mnt = src->mnt; +} + +static inline void mfs_install_path(const struct dentry *dent, + struct path *lpath, + struct path *cpath) +{ + spin_lock(&MFS_D(dent)->lock); + pathcpy(&MFS_D(dent)->lower, lpath); + pathcpy(&MFS_D(dent)->cache, cpath); + spin_unlock(&MFS_D(dent)->lock); +} + +static inline void mfs_release_path(const struct dentry *dent) +{ + struct path lpath, cpath; + + if (!dent || !dent->d_fsdata) + return; + spin_lock(&MFS_D(dent)->lock); + pathcpy(&lpath, &MFS_D(dent)->lower); + pathcpy(&cpath, &MFS_D(dent)->cache); + MFS_D(dent)->lower.dentry = NULL; + MFS_D(dent)->lower.mnt = NULL; + MFS_D(dent)->cache.dentry = NULL; + MFS_D(dent)->cache.mnt = NULL; + path_put(&lpath); + path_put(&cpath); + spin_unlock(&MFS_D(dent)->lock); +} + +struct inode *mfs_iget(struct super_block *sb, struct inode *lower_inode, + struct path *cache_path); +int mfs_alloc_dentry_info(struct dentry *dentry); +void mfs_free_dentry_info(struct dentry *dentry); + #endif diff --git a/fs/mfs/super.c b/fs/mfs/super.c index 607b47cdc31b..f6bc2739f350 100644 --- a/fs/mfs/super.c +++ b/fs/mfs/super.c @@ -4,7 +4,12 @@ #include "internal.h" #include <linux/module.h> +#include <linux/magic.h> #include <linux/fs_context.h> +#include <linux/fs_parser.h> +#include <linux/namei.h> +#include <linux/seq_file.h> +#include <linux/statfs.h> /* * Used for alloc_inode @@ -23,13 +28,367 @@ static void mfs_init_once(void *obj) inode_init_once(&i->vfs_inode); } +static struct inode *mfs_alloc_inode(struct super_block *sb) +{ + struct mfs_inode *vi = alloc_inode_sb(sb, mfs_inode_cachep, GFP_KERNEL); + + if (!vi) + return NULL; + memset(vi, 0, offsetof(struct mfs_inode, vfs_inode)); + mutex_init(&vi->lock); + return &vi->vfs_inode; +} + +static void mfs_free_inode(struct inode *inode) +{ + struct mfs_inode *vi = MFS_I(inode); + + kmem_cache_free(mfs_inode_cachep, vi); +} + +static void mfs_evict_inode(struct inode *inode) +{ + struct mfs_inode *vi = MFS_I(inode); + struct inode *lower_inode = vi->lower; + struct inode *cache_inode = vi->cache; + + truncate_inode_pages_final(&inode->i_data); + clear_inode(inode); + if (lower_inode) { + vi->lower = NULL; + iput(lower_inode); + } + if (cache_inode) { + vi->cache = NULL; + iput(cache_inode); + } +} + +int mfs_alloc_dentry_info(struct dentry *dentry) +{ + struct mfs_dentry_info *info = + kmem_cache_zalloc(mfs_dentry_cachep, GFP_ATOMIC); + + if (!info) + return -ENOMEM; + spin_lock_init(&info->lock); + dentry->d_fsdata = info; + return 0; +} + +void mfs_free_dentry_info(struct dentry *dentry) +{ + if (!dentry || !dentry->d_fsdata) + return; + + kmem_cache_free(mfs_dentry_cachep, dentry->d_fsdata); + dentry->d_fsdata = NULL; +} + +static void mfs_d_release(struct dentry *dentry) +{ + /* for root, the path will release with super block */ + if (!IS_ROOT(dentry)) + mfs_release_path(dentry); + + mfs_free_dentry_info(dentry); +} + +const struct dentry_operations mfs_dops = { + .d_release = mfs_d_release, +}; + +static int mfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct mfs_sb_info *sbi = MFS_SB(dentry->d_sb); + int err = vfs_statfs(&sbi->cache, buf); + + buf->f_type = MFS_SUPER_MAGIC; + /* Use the reserved slot to keep the device id */ + buf->f_spare[0] = sbi->minor; + return err; +} + +static int mfs_show_options(struct seq_file *seq, struct dentry *root) +{ + struct mfs_sb_info *sbi = MFS_SB(root->d_sb); + + if (sbi->mtree) + seq_show_option(seq, "mtree", sbi->mtree); + if (sbi->cachedir) + seq_show_option(seq, "cachedir", sbi->cachedir); + switch (sbi->mode) { + case MFS_MODE_NONE: + seq_puts(seq, ",mode=none"); + break; + case MFS_MODE_LOCAL: + seq_puts(seq, ",mode=local"); + break; + case MFS_MODE_REMOTE: + seq_puts(seq, ",mode=remote"); + break; + } + return 0; +} + +const struct super_operations mfs_sops = { + .alloc_inode = mfs_alloc_inode, + .free_inode = mfs_free_inode, + .drop_inode = generic_delete_inode, + .evict_inode = mfs_evict_inode, + .statfs = mfs_statfs, + .show_options = mfs_show_options, +}; + +enum { + Opt_mtree, + Opt_cachedir, + Opt_mode, +}; + +static const struct constant_table mfs_param_mode[] = { + {"none", MFS_MODE_NONE}, + {"local", MFS_MODE_LOCAL}, + {"remote", MFS_MODE_REMOTE}, + {} +}; + +static const struct fs_parameter_spec mfs_fs_parameters[] = { + fsparam_string("mtree", Opt_mtree), + fsparam_string("cachedir", Opt_cachedir), + fsparam_enum("mode", Opt_mode, mfs_param_mode), + {} +}; + +static char *remove_trailing(char *s, char c) +{ + size_t size; + char *end; + + size = strlen(s); + if (!size) + return s; + + end = s + size - 1; + while (end >= s && c == *end) + end--; + *(end + 1) = '\0'; + return s; +} + +char *_acquire_set_path(char *inputpath, struct path *target) +{ + char *p, *realp, *path; + char *res; + int ret = 0; + + p = kstrdup(inputpath, GFP_KERNEL); + if (!p) + return ERR_PTR(-ENOMEM); + realp = remove_trailing(p, '/'); + if (strlen(realp) == 0) { + kfree(p); + return ERR_PTR(-EINVAL); + } + ret = kern_path(realp, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, target); + kfree(p); + if (ret) + return ERR_PTR(ret); + + path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!path) { + path_put(target); + return ERR_PTR(-ENOMEM); + } + + realp = d_path(target, path, PATH_MAX); + if (IS_ERR(realp)) { + path_put(target); + res = realp; + goto free; + } + + res = kstrdup(realp, GFP_KERNEL); + if (!res) { + path_put(target); + res = ERR_PTR(-ENOMEM); + } +free: + kfree(path); + return res; +} + +static int mfs_fc_parse_param(struct fs_context *fc, + struct fs_parameter *param) +{ + struct mfs_sb_info *sbi = fc->s_fs_info; + struct fs_parse_result result; + struct path target; + char *p; + int opt; + + opt = fs_parse(fc, mfs_fs_parameters, param, &result); + if (opt < 0) + return opt; + + switch (opt) { + case Opt_mtree: + p = _acquire_set_path(param->string, &target); + if (IS_ERR(p)) + return PTR_ERR(p); + sbi->mtree = p; + pathcpy(&sbi->lower, &target); + break; + case Opt_cachedir: + p = _acquire_set_path(param->string, &target); + if (IS_ERR(p)) + return PTR_ERR(p); + sbi->cachedir = p; + pathcpy(&sbi->cache, &target); + break; + case Opt_mode: + sbi->mode = result.int_32; + break; + default: + return -ENOPARAM; + } + return 0; +} + +static int mfs_fc_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct mfs_sb_info *sbi = MFS_SB(sb); + struct inode *inode; + int err = 0; + + if (!sbi->cachedir || !sbi->mtree) { + pr_err("Lack of mtree or cachedir option.\n"); + return -EINVAL; + } + + if (sbi->mode != MFS_MODE_REMOTE) { + if (strcmp(sbi->cachedir, sbi->mtree)) { + pr_err("local/none mode require the same mtree and cachedir.\n"); + return -EINVAL; + } + } else { + if (!strcmp(sbi->cachedir, sbi->mtree)) { + pr_err("remote mode require different mtree and cachedir.\n"); + return -EINVAL; + } + if (strlen(sbi->cachedir) > strlen(sbi->mtree) && + strncmp(sbi->mtree, sbi->cachedir, strlen(sbi->mtree)) == 0) { + pr_err("remote mode mtree should not be parent of cachedir.\n"); + return -EINVAL; + } + } + + sb->s_stack_depth = max(sbi->lower.mnt->mnt_sb->s_stack_depth, + sbi->cache.mnt->mnt_sb->s_stack_depth) + 1; + if (sb->s_stack_depth > 1) { + pr_err("cannot be stacked on other stackable file system.\n"); + return -EINVAL; + } + + sb->s_magic = MFS_SUPER_MAGIC; + sb->s_flags |= SB_RDONLY | SB_NOATIME; + sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_op = &mfs_sops; + sb->s_d_op = &mfs_dops; + err = super_setup_bdi(sb); + if (err) + return err; + + inode = mfs_iget(sb, d_inode(sbi->lower.dentry), &sbi->cache); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + goto out_exit; + } + + sb->s_root = d_make_root(inode); + if (!sb->s_root) { + err = -ENOMEM; + goto out_iput; + } + + err = mfs_alloc_dentry_info(sb->s_root); + if (err) + goto out_dput; + mfs_install_path(sb->s_root, &sbi->lower, &sbi->cache); + sbi->sb = sb; + return 0; +out_dput: + dput(sb->s_root); +out_iput: + iput(inode); +out_exit: + return err; +} + +static int mfs_fc_get_tree(struct fs_context *fc) +{ + return get_tree_nodev(fc, mfs_fc_fill_super); +} + +static int mfs_reconfigure(struct fs_context *fc) +{ + return -EOPNOTSUPP; +} + +static void mfs_fc_free(struct fs_context *fc) +{ + struct mfs_sb_info *sbi = fc->s_fs_info; + + if (!sbi) + return; + + if (sbi->mtree) { + path_put(&sbi->lower); + kfree(sbi->mtree); + } + if (sbi->cachedir) { + path_put(&sbi->cache); + kfree(sbi->cachedir); + } + kfree(sbi); +} + +static const struct fs_context_operations mfs_context_ops = { + .parse_param = mfs_fc_parse_param, + .get_tree = mfs_fc_get_tree, + .reconfigure = mfs_reconfigure, + .free = mfs_fc_free, +}; + static int mfs_init_fs_context(struct fs_context *fc) { + struct mfs_sb_info *sbi; + + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sbi->minor = -1; + fc->s_fs_info = sbi; + fc->ops = &mfs_context_ops; return 0; } static void mfs_kill_sb(struct super_block *sb) { + struct mfs_sb_info *sbi = MFS_SB(sb); + + kill_anon_super(sb); + if (sbi->mtree) { + path_put(&sbi->lower); + kfree(sbi->mtree); + } + if (sbi->cachedir) { + path_put(&sbi->cache); + kfree(sbi->cachedir); + } + kfree(sbi); + sb->s_fs_info = NULL; } static struct file_system_type mfs_fs_type = { diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index 6325d1d0e90f..4ca73708ed9e 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -37,6 +37,7 @@ #define HOSTFS_SUPER_MAGIC 0x00c0ffee #define OVERLAYFS_SUPER_MAGIC 0x794c7630 #define FUSE_SUPER_MAGIC 0x65735546 +#define MFS_SUPER_MAGIC 0x85428370 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ diff --git a/include/uapi/linux/mfs.h b/include/uapi/linux/mfs.h new file mode 100644 index 000000000000..81feff7b7fe0 --- /dev/null +++ b/include/uapi/linux/mfs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_MFS_H +#define _UAPI_LINUX_MFS_H + +#include <linux/types.h> +#include <linux/ioctl.h> + +enum { + MFS_MODE_NONE = 0, + MFS_MODE_LOCAL, + MFS_MODE_REMOTE, +}; + +#endif /* _UAPI_LINUX_MFS_H */ -- 2.25.1