From: Hongbo Li <lihongbo22@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5W1P -------------------------------- Each MFS has a unique communication device which is named with /dev/mfs${minor}. User space can obtain the MISS event by polling and reading the device. Signed-off-by: Huang Xiaojia <huangxiaojia2@huawei.com> Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- fs/mfs/Makefile | 2 +- fs/mfs/cache.c | 80 ++++++++++++ fs/mfs/dev.c | 264 +++++++++++++++++++++++++++++++++++++++ fs/mfs/internal.h | 16 +++ fs/mfs/super.c | 30 ++++- include/uapi/linux/mfs.h | 6 + 6 files changed, 396 insertions(+), 2 deletions(-) create mode 100644 fs/mfs/dev.c diff --git a/fs/mfs/Makefile b/fs/mfs/Makefile index a3fe71ba61e8..68c090fb4bc3 100644 --- a/fs/mfs/Makefile +++ b/fs/mfs/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_MFS_FS) += mfs.o -mfs-objs := super.o inode.o data.o cache.o +mfs-objs := super.o inode.o data.o dev.o cache.o diff --git a/fs/mfs/cache.c b/fs/mfs/cache.c index ffc6191bbe7b..da1ef8432e95 100644 --- a/fs/mfs/cache.c +++ b/fs/mfs/cache.c @@ -3,6 +3,7 @@ #include "internal.h" +#include <linux/anon_inodes.h> #include <linux/mfs.h> /* @@ -10,6 +11,9 @@ */ static struct kmem_cache *mfs_cobject_cachep; +static const struct file_operations mfs_fd_fops = { +}; + static int mfs_setup_object(struct mfs_cache_object *object, struct inode *inode, struct path *cache_path) @@ -36,6 +40,18 @@ static int mfs_setup_object(struct mfs_cache_object *object, return 0; } +struct mfs_event *mfs_pick_event(struct xa_state *xas, + unsigned long xa_max) +{ + struct mfs_event *event; + + xas_for_each_marked(xas, event, xa_max, MFS_EVENT_NEW) { + return event; + } + + return NULL; +} + void mfs_post_event_read(struct mfs_cache_object *object, loff_t off, uint64_t len, struct mfs_syncer *syncer, int op) @@ -169,6 +185,70 @@ void mfs_cancel_syncer_events(struct mfs_cache_object *object, } } +void mfs_cancel_all_events(struct mfs_sb_info *sbi) +{ + struct mfs_caches *caches = &sbi->caches; + struct xarray *xa = &caches->events; + struct mfs_syncer *syncer; + struct mfs_event *event; + unsigned long index; + + xa_lock(xa); + xa_for_each(xa, index, event) { + __xa_erase(xa, index); + xa_unlock(xa); + if (event->syncer) { + syncer = event->syncer; + if (atomic_dec_return(&syncer->notback) == 0) + complete(&syncer->done); + spin_lock(&syncer->list_lock); + list_del_init(&event->link); + spin_unlock(&syncer->list_lock); + } + iput(event->object->mfs_inode); + kfree(event); + xa_lock(xa); + } + caches->next_ev = 0; + caches->next_msg = 0; + xa_unlock(xa); +} + +int try_hook_fd(struct mfs_event *event) +{ + struct mfs_cache_object *object = event->object; + struct file *anon_file; + int fd; + + down_read(&object->rwsem); + if (object->fd > 0) { + up_read(&object->rwsem); + return object->fd; + } + up_read(&object->rwsem); + down_write(&object->rwsem); + fd = get_unused_fd_flags(O_WRONLY); + if (fd < 0) { + up_write(&object->rwsem); + return fd; + } + + anon_file = anon_inode_getfile("[mfs]", &mfs_fd_fops, object, O_WRONLY); + if (IS_ERR(anon_file)) { + put_unused_fd(fd); + up_write(&object->rwsem); + return PTR_ERR(anon_file); + } + anon_file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; + object->fd = fd; + object->anon_file = anon_file; + /* lifecyle of fd/anon_file should later than mfs_inode */ + ihold(object->mfs_inode); + fd_install(fd, anon_file); + up_write(&object->rwsem); + return fd; +} + struct mfs_cache_object *mfs_alloc_object(struct inode *inode, struct path *cache_path) { diff --git a/fs/mfs/dev.c b/fs/mfs/dev.c new file mode 100644 index 000000000000..b9e512a11ee4 --- /dev/null +++ b/fs/mfs/dev.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (C) 2025. Huawei Technologies Co., Ltd */ + +#include "internal.h" + +#include <linux/device.h> +#include <linux/idr.h> +#include <linux/poll.h> +#include <linux/list.h> +#include "../mount.h" + +static DEFINE_MUTEX(mfs_dev_lock); +static DEFINE_IDR(mfs_dev_minor); + +static int mfs_dev_major; +static const struct class mfs_dev_class = { + .name = "mfs", +}; +static struct device *mfs_dev; + +static inline void mfs_finish_event(struct mfs_event *event, struct xa_state *xas) +{ + struct mfs_syncer *syncer = event->syncer; + + if (unlikely(!xas || !event)) + return; + + if (syncer) { + if (xa_cmpxchg(xas->xa, xas->xa_index, event, NULL, 0) != event) + return; + + if (atomic_dec_and_test(&syncer->notback)) + complete(&syncer->done); + put_mfs_event(event); + } +} + +static int mfs_dev_open(struct inode *inode, struct file *file) +{ + struct mfs_caches *caches; + struct mfs_sb_info *sbi; + struct mount *mnt; + unsigned minor = iminor(inode); + + sbi = minor < U8_MAX ? idr_find(&mfs_dev_minor, minor) : NULL; + if (!sbi) + return -EOPNOTSUPP; + caches = &sbi->caches; + if (test_and_set_bit(MFS_CACHE_OPENED, &caches->flags)) + return -EBUSY; + smp_mb__after_atomic(); + + /* not mounted or cleaned by umounting */ + if (!test_bit(MFS_MOUNTED, &sbi->flags)) { + clear_bit(MFS_CACHE_OPENED, &caches->flags); + return -EBUSY; + } + mnt = list_first_entry(&sbi->sb->s_mounts, struct mount, mnt_instance); + /* during mounting or delete from s_mounts in umounting */ + if (list_empty(&sbi->sb->s_mounts)) { + clear_bit(MFS_CACHE_OPENED, &caches->flags); + return -EBUSY; + } + sbi->mnt = mntget(&mnt->mnt); + + file->private_data = sbi; + set_bit(MFS_CACHE_READY, &caches->flags); + return 0; +} + +static int mfs_dev_release(struct inode *inode, struct file *file) +{ + struct mfs_sb_info *sbi = file->private_data; + struct mfs_caches *caches = &sbi->caches; + + clear_bit(MFS_CACHE_READY, &caches->flags); + smp_mb__after_atomic(); + mfs_cancel_all_events(sbi); + mntput(sbi->mnt); + smp_mb__before_atomic(); + clear_bit(MFS_CACHE_OPENED, &caches->flags); + return 0; +} + +static ssize_t mfs_dev_read(struct file *file, char __user *buf, + size_t blen, loff_t *off) +{ + struct mfs_sb_info *sbi = file->private_data; + struct mfs_caches *caches = &sbi->caches; + XA_STATE(xas, &caches->events, caches->next_ev); + struct mfs_event *event; + struct mfs_msg *msg; + size_t n; + int ret = 0; + + xas_lock(&xas); + event = mfs_pick_event(&xas, ULONG_MAX); + if (!event && caches->next_ev > 0) { + xas_set(&xas, 0); + event = mfs_pick_event(&xas, caches->next_ev - 1); + } + if (!event) { + xas_unlock(&xas); + return 0; + } + xas_unlock(&xas); + + msg = &event->msg; + n = msg->len; + if (n > blen) + return -EMSGSIZE; + + ret = try_hook_fd(event); + if (ret < 0) + return ret; + + msg->fd = ret; + ret = 0; + xas_lock(&xas); + xas_clear_mark(&xas, MFS_EVENT_NEW); + caches->next_ev = xas.xa_index + 1; + if (event->syncer) + get_mfs_event(event); + else + xas_store(&xas, NULL); + xas_unlock(&xas); + + if (copy_to_user(buf, msg, n)) + ret = -EFAULT; + if (ret) + mfs_finish_event(event, &xas); + put_mfs_event(event); + return ret ? ret : n; +} + +static __poll_t mfs_dev_poll(struct file *file, + struct poll_table_struct *poll) +{ + struct mfs_sb_info *sbi = file->private_data; + struct mfs_caches *caches = &sbi->caches; + struct mfs_event *event; + XA_STATE(xas, &caches->events, 0); + __poll_t mask; + + poll_wait(file, &caches->pollwq, poll); + mask = 0; + + if (!xa_empty(&caches->events)) { + xas_lock(&xas); + xas_for_each_marked(&xas, event, ULONG_MAX, MFS_EVENT_NEW) { + mask |= EPOLLIN; + break; + } + xas_unlock(&xas); + } + + return mask; +} + +static long mfs_dev_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct mfs_ioc_fsinfo fsinfo; + unsigned minor = iminor(file_inode(filp)); + struct mfs_sb_info *sbi = minor < U8_MAX ? + idr_find(&mfs_dev_minor, minor) : NULL; + if (!sbi) + return -EOPNOTSUPP; + + if (cmd != MFS_IOC_FSINFO) + return -EINVAL; + if (!test_bit(MFS_MOUNTED, &sbi->flags)) + return -EBUSY; + + fsinfo.mode = sbi->mode; + if (copy_to_user((void __user *)arg, &fsinfo, + sizeof(struct mfs_ioc_fsinfo))) + return -EFAULT; + return 0; +} + +static const struct file_operations mfs_dev_fops = { + .owner = THIS_MODULE, + .open = mfs_dev_open, + .release = mfs_dev_release, + .read = mfs_dev_read, + .poll = mfs_dev_poll, + .unlocked_ioctl = mfs_dev_ioctl, +}; + +int mfs_fs_dev_init(struct super_block *sb) +{ + struct mfs_sb_info *sbi = MFS_SB(sb); + struct device *dev; + + mutex_lock(&mfs_dev_lock); + sbi->minor = idr_alloc(&mfs_dev_minor, sbi, 0, U8_MAX, GFP_KERNEL); + if (sbi->minor < 0) { + mutex_unlock(&mfs_dev_lock); + return sbi->minor; + } + + dev = device_create(&mfs_dev_class, NULL, + MKDEV(mfs_dev_major, sbi->minor), sbi, + "mfs%u", sbi->minor); + if (IS_ERR(dev)) { + idr_remove(&mfs_dev_minor, sbi->minor); + sbi->minor = -1; + mutex_unlock(&mfs_dev_lock); + return PTR_ERR(dev); + } + mutex_unlock(&mfs_dev_lock); + return 0; +} + +void mfs_fs_dev_exit(struct super_block *sb) +{ + struct mfs_sb_info *sbi = MFS_SB(sb); + + if (sbi->minor < 0) + return; + mutex_lock(&mfs_dev_lock); + device_destroy(&mfs_dev_class, MKDEV(mfs_dev_major, sbi->minor)); + idr_remove(&mfs_dev_minor, sbi->minor); + mutex_unlock(&mfs_dev_lock); + sbi->minor = -1; +} + +int mfs_dev_init(void) +{ + int ret; + + mfs_dev_major = register_chrdev(0, "mfs-ctl", &mfs_dev_fops); + if (mfs_dev_major < 0) + return mfs_dev_major; + + ret = class_register(&mfs_dev_class); + if (ret) + goto major_out; + + mfs_dev = device_create(&mfs_dev_class, NULL, + MKDEV(mfs_dev_major, U8_MAX), + NULL, "mfs-ctl"); + if (IS_ERR(mfs_dev)) { + ret = PTR_ERR(mfs_dev); + goto class_out; + } + return 0; + +class_out: + class_unregister(&mfs_dev_class); +major_out: + unregister_chrdev(mfs_dev_major, "mfs-ctl"); + return ret; +} + +void mfs_dev_exit(void) +{ + if (!IS_ERR_OR_NULL(mfs_dev)) + device_destroy(&mfs_dev_class, MKDEV(mfs_dev_major, U8_MAX)); + class_unregister(&mfs_dev_class); + if (mfs_dev_major > 0) + unregister_chrdev(mfs_dev_major, "mfs-ctl"); +} diff --git a/fs/mfs/internal.h b/fs/mfs/internal.h index d2e068665f43..1e6e1defd862 100644 --- a/fs/mfs/internal.h +++ b/fs/mfs/internal.h @@ -13,6 +13,7 @@ #include <linux/wait.h> #include <linux/completion.h> #include <linux/types.h> +#include <linux/mount.h> #include <linux/mfs.h> #define MFS_NAME "mfs" @@ -20,8 +21,12 @@ #define MFS_OPEN_FLAGS (O_NOATIME) #define MFS_EVENT_NEW XA_MARK_1 +/* mfs_sb_info flags */ +#define MFS_MOUNTED 0 + /* mfs_caches flags */ #define MFS_CACHE_READY 0 +#define MFS_CACHE_OPENED 1 struct mfs_cache_object { struct file *cache_file; @@ -65,6 +70,8 @@ struct mfs_sb_info { int minor; + unsigned long flags; + struct vfsmount *mnt; struct super_block *sb; struct mfs_caches caches; @@ -203,12 +210,21 @@ struct inode *mfs_iget(struct super_block *sb, struct inode *lower_inode, int mfs_alloc_dentry_info(struct dentry *dentry); void mfs_free_dentry_info(struct dentry *dentry); +int mfs_fs_dev_init(struct super_block *sb); +void mfs_fs_dev_exit(struct super_block *sb); +int mfs_dev_init(void); +void mfs_dev_exit(void); + +struct mfs_event *mfs_pick_event(struct xa_state *xas, + unsigned long xa_max); void mfs_post_event_read(struct mfs_cache_object *object, loff_t off, uint64_t len, struct mfs_syncer *syncer, int op); void mfs_destroy_events(struct super_block *sb); void mfs_cancel_syncer_events(struct mfs_cache_object *object, struct mfs_syncer *syncer); +void mfs_cancel_all_events(struct mfs_sb_info *sbi); +int try_hook_fd(struct mfs_event *event); struct mfs_cache_object *mfs_alloc_object(struct inode *inode, struct path *cache_path); void mfs_free_object(void *data); diff --git a/fs/mfs/super.c b/fs/mfs/super.c index a8023a66cd8d..172d95b7d607 100644 --- a/fs/mfs/super.c +++ b/fs/mfs/super.c @@ -10,6 +10,8 @@ #include <linux/namei.h> #include <linux/seq_file.h> #include <linux/statfs.h> +#include <linux/delay.h> +#include <linux/string.h> /* * Used for alloc_inode @@ -301,6 +303,12 @@ static int mfs_fc_fill_super(struct super_block *sb, struct fs_context *fc) if (err) return err; + if (support_event(sbi)) { + err = mfs_fs_dev_init(sb); + if (err) + return err; + } + inode = mfs_iget(sb, d_inode(sbi->lower.dentry), &sbi->cache); if (IS_ERR(inode)) { err = PTR_ERR(inode); @@ -318,12 +326,15 @@ static int mfs_fc_fill_super(struct super_block *sb, struct fs_context *fc) goto out_dput; mfs_install_path(sb->s_root, &sbi->lower, &sbi->cache); sbi->sb = sb; + set_bit(MFS_MOUNTED, &sbi->flags); return 0; out_dput: dput(sb->s_root); out_iput: iput(inode); out_exit: + if (support_event(sbi)) + mfs_fs_dev_exit(sb); return err; } @@ -381,7 +392,17 @@ static int mfs_init_fs_context(struct fs_context *fc) static void mfs_kill_sb(struct super_block *sb) { struct mfs_sb_info *sbi = MFS_SB(sb); - + struct mfs_caches *caches = &sbi->caches; + + clear_bit(MFS_MOUNTED, &sbi->flags); + if (support_event(sbi)) { + while (test_bit(MFS_CACHE_OPENED, &caches->flags)) { + msleep(100); + printk_once(KERN_WARNING "Pending until close the /dev/mfs%u...\n", + sbi->minor); + } + mfs_fs_dev_exit(sb); + } kill_anon_super(sb); mfs_destroy_events(sb); if (sbi->mtree) { @@ -433,8 +454,14 @@ static int __init init_mfs_fs(void) if (err) goto err_register; + err = mfs_dev_init(); + if (err) + goto err_dev; + pr_info("MFS module loaded\n"); return 0; +err_dev: + unregister_filesystem(&mfs_fs_type); err_register: mfs_cache_exit(); err_cache: @@ -446,6 +473,7 @@ static int __init init_mfs_fs(void) static void __exit exit_mfs_fs(void) { + mfs_dev_exit(); unregister_filesystem(&mfs_fs_type); mfs_cache_exit(); kmem_cache_destroy(mfs_dentry_cachep); diff --git a/include/uapi/linux/mfs.h b/include/uapi/linux/mfs.h index 1bae08939b95..1e4e253dbdd4 100644 --- a/include/uapi/linux/mfs.h +++ b/include/uapi/linux/mfs.h @@ -18,6 +18,12 @@ enum { MFS_MODE_REMOTE, }; +struct mfs_ioc_fsinfo { + __u8 mode; /* 0: none, 1: local, 2: remote */ +}; + +#define MFS_IOC_FSINFO _IOR(0xbd, 1, struct mfs_ioc_fsinfo) + struct mfs_msg { __u8 version; __u8 opcode; -- 2.25.1