[PATCH OLK-6.6 v2 13/14] mfs: Optimize mfs syncer process and metadata ops

11 Nov 2025

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/release-management/issues/ID5W1P

------------------

Optimize mfs syncer process to solve UAF and reduce blocking on event
destruction. Metadata initialization in mfs_lookup are reorganized
to ensure invisibility.

Signed-off-by: Huang Xiaojia <huangxiaojia2@huawei.com>
---
 fs/mfs/cache.c    | 61 +++++++++++++++++++++++++----------------------
 fs/mfs/data.c     |  8 +++----
 fs/mfs/dev.c      | 51 ++++++++++-----------------------------
 fs/mfs/inode.c    | 35 ++++++++++++++-------------
 fs/mfs/internal.h |  2 --
 fs/mfs/super.c    | 16 +++++++++----
 6 files changed, 79 insertions(+), 94 deletions(-)

diff --git a/fs/mfs/cache.c b/fs/mfs/cache.c
index 2178c78f4f1a..2675fe58b781 100644
--- a/fs/mfs/cache.c
+++ b/fs/mfs/cache.c
@@ -78,8 +78,6 @@ static long _ioc_done(struct mfs_cache_object *object,
 		return -EINVAL;
 	}
 	xas_store(&xas, NULL);
-	xas_unlock(&xas);
-
 	syncer = event->syncer;
 	if (done->ret)
 		atomic_cmpxchg(&syncer->res, 0, -EIO);
@@ -88,6 +86,8 @@ static long _ioc_done(struct mfs_cache_object *object,
 	spin_unlock(&syncer->list_lock);
 	if (atomic_dec_return(&syncer->notback) == 0)
 		complete(&syncer->done);
+	xas_unlock(&xas);
+
 	put_mfs_event(event);
 	return 0;
 }
@@ -366,9 +366,7 @@ void mfs_destroy_events(struct super_block *sb)
 		 */
 		pr_warn("Event remains:%lu\n", index);
 		__xa_erase(&caches->events, index);
-		xa_unlock(&caches->events);
 		put_mfs_event(event);
-		xa_lock(&caches->events);
 	}
 	xa_unlock(&caches->events);
 	xa_destroy(&caches->events);
@@ -379,19 +377,18 @@ void mfs_cancel_syncer_events(struct mfs_cache_object *object,
 {
 	struct mfs_sb_info *sbi = MFS_SB(object->mfs_inode->i_sb);
 	struct mfs_caches *caches = &sbi->caches;
-	struct mfs_event *event;
-	struct list_head tmp;
+	struct xarray *xa = &caches->events;
+	struct mfs_event *event, *nevent;
 
-	INIT_LIST_HEAD(&tmp);
+	xa_lock(xa);
 	spin_lock(&syncer->list_lock);
-	list_splice_init(&syncer->head, &tmp);
-	spin_unlock(&syncer->list_lock);
-
-	list_for_each_entry(event, &tmp, link) {
-		xa_erase(&caches->events, event->msg.id);
-		iput(event->object->mfs_inode);
-		kfree(event);
+	list_for_each_entry_safe(event, nevent, &syncer->head, link) {
+		__xa_erase(&caches->events, event->msg.id);
+		list_del(&event->link);
+		put_mfs_event(event);
 	}
+	spin_unlock(&syncer->list_lock);
+	xa_unlock(xa);
 }
 
 void mfs_cancel_all_events(struct mfs_sb_info *sbi)
@@ -402,25 +399,33 @@ void mfs_cancel_all_events(struct mfs_sb_info *sbi)
 	struct mfs_event *event;
 	unsigned long index;
 
-	xa_lock(xa);
-	xa_for_each(xa, index, event) {
-		__xa_erase(xa, index);
-		xa_unlock(xa);
-		if (event->syncer) {
+	while (!xa_empty(xa)) {
+		xa_lock(xa);
+		xa_for_each(xa, index, event) {
+			__xa_erase(xa, index);
 			syncer = event->syncer;
-			if (atomic_dec_return(&syncer->notback) == 0)
-				complete(&syncer->done);
-			spin_lock(&syncer->list_lock);
-			list_del_init(&event->link);
-			spin_unlock(&syncer->list_lock);
+			/*
+			 * Here should keep syncer (a stack variable), so we should
+			 * wakeup the syncer list in the protect of xa lock.
+			 */
+			if (syncer) {
+				spin_lock(&syncer->list_lock);
+				list_del(&event->link);
+				spin_unlock(&syncer->list_lock);
+				if (atomic_dec_return(&syncer->notback) == 0) {
+					atomic_cmpxchg(&syncer->res, 0, -EIO);
+					complete(&syncer->done);
+				}
+			}
+			put_mfs_event(event);
+			if (need_resched())
+				break;
 		}
-		iput(event->object->mfs_inode);
-		kfree(event);
-		xa_lock(xa);
+		xa_unlock(xa);
+		cond_resched();
 	}
 	caches->next_ev = 0;
 	caches->next_msg = 0;
-	xa_unlock(xa);
 }
 
 int try_hook_fd(struct mfs_event *event)
diff --git a/fs/mfs/data.c b/fs/mfs/data.c
index 6b4f61096f87..32d0b6947aa8 100644
--- a/fs/mfs/data.c
+++ b/fs/mfs/data.c
@@ -327,7 +327,7 @@ static ssize_t mfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	ctx.object = file_inode(file)->i_private;
 	ctx.off = iocb->ki_pos;
 	ctx.op = MFS_OP_READ;
-	ctx.len = min(isize - ctx.off, iov_iter_count(to));
+	ctx.len = min_t(size_t, isize - ctx.off, iov_iter_count(to));
 	ctx.sync = false;
 	ctx.checker = range_check_mem;
 	if (need_sync_event(file_inode(file)->i_sb)) {
@@ -396,7 +396,7 @@ static vm_fault_t mfs_filemap_fault(struct vm_fault *vmf)
 	ctx.file = cfile;
 	ctx.object = file_inode(file)->i_private;
 	ctx.off = vmf->pgoff << PAGE_SHIFT;
-	ctx.len = min(isize - ctx.off, PAGE_SIZE);
+	ctx.len = min_t(size_t, isize - ctx.off, PAGE_SIZE);
 	ctx.op = MFS_OP_FAULT;
 	ctx.sync = false;
 	ctx.checker = range_check_mem;
@@ -425,7 +425,7 @@ static vm_fault_t mfs_filemap_fault(struct vm_fault *vmf)
 	return ret;
 }
 
-vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf,
+static vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf,
 					pgoff_t start_pgoff, pgoff_t end_pgoff)
 {
 	struct file *cfile, *file = vmf->vma->vm_file;
@@ -452,7 +452,7 @@ vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf,
 	ctx.file = cfile;
 	ctx.object = file_inode(file)->i_private;
 	ctx.off = start_pgoff << PAGE_SHIFT;
-	ctx.len = min(isize - ctx.off, (end_pgoff - start_pgoff) << PAGE_SHIFT);
+	ctx.len = min_t(size_t, isize - ctx.off, (end_pgoff - start_pgoff) << PAGE_SHIFT);
 	ctx.op = MFS_OP_FAROUND;
 	ctx.sync = false;
 	ctx.checker = range_check_mem;
diff --git a/fs/mfs/dev.c b/fs/mfs/dev.c
index 15b55b257d2f..902f73b1c25c 100644
--- a/fs/mfs/dev.c
+++ b/fs/mfs/dev.c
@@ -7,7 +7,6 @@
 #include <linux/idr.h>
 #include <linux/poll.h>
 #include <linux/list.h>
-#include "../mount.h"
 
 #include <trace/events/mfs.h>
 
@@ -20,28 +19,10 @@ static const struct class mfs_dev_class = {
 };
 static struct device *mfs_dev;
 
-static inline void mfs_finish_event(struct mfs_event *event, struct xa_state *xas)
-{
-	struct mfs_syncer *syncer = event->syncer;
-
-	if (unlikely(!xas || !event))
-		return;
-
-	if (syncer) {
-		if (xa_cmpxchg(xas->xa, xas->xa_index, event, NULL, 0) != event)
-			return;
-
-		if (atomic_dec_and_test(&syncer->notback))
-			complete(&syncer->done);
-		put_mfs_event(event);
-	}
-}
-
 static int mfs_dev_open(struct inode *inode, struct file *file)
 {
 	struct mfs_caches *caches;
 	struct mfs_sb_info *sbi;
-	struct mount *mnt;
 	unsigned minor = iminor(inode);
 
 	sbi = minor < U8_MAX ? idr_find(&mfs_dev_minor, minor) : NULL;
@@ -57,13 +38,6 @@ static int mfs_dev_open(struct inode *inode, struct file *file)
 		clear_bit(MFS_CACHE_OPENED, &caches->flags);
 		return -EBUSY;
 	}
-	mnt = list_first_entry(&sbi->sb->s_mounts, struct mount, mnt_instance);
-	/* during mounting or delete from s_mounts in umounting */
-	if (list_empty(&sbi->sb->s_mounts)) {
-		clear_bit(MFS_CACHE_OPENED, &caches->flags);
-		return -EBUSY;
-	}
-	sbi->mnt = mntget(&mnt->mnt);
 
 	file->private_data = sbi;
 	set_bit(MFS_CACHE_READY, &caches->flags);
@@ -78,7 +52,6 @@ static int mfs_dev_release(struct inode *inode, struct file *file)
 	clear_bit(MFS_CACHE_READY, &caches->flags);
 	smp_mb__after_atomic();
 	mfs_cancel_all_events(sbi);
-	mntput(sbi->mnt);
 	smp_mb__before_atomic();
 	clear_bit(MFS_CACHE_OPENED, &caches->flags);
 	return 0;
@@ -105,32 +78,34 @@ static ssize_t mfs_dev_read(struct file *file, char __user *buf,
 		xas_unlock(&xas);
 		return 0;
 	}
+	if (event->syncer)
+		get_mfs_event(event);
 	xas_unlock(&xas);
 
 	msg = &event->msg;
 	n = msg->len;
-	if (n > blen)
-		return -EMSGSIZE;
+	if (n > blen) {
+		ret = -EMSGSIZE;
+		goto out;
+	}
 
 	ret = try_hook_fd(event);
 	if (ret < 0)
-		return ret;
+		goto out;
 
 	msg->fd = ret;
 	ret = 0;
+	if (copy_to_user(buf, msg, n)) {
+		ret = -EFAULT;
+		goto out;
+	}
 	xas_lock(&xas);
 	xas_clear_mark(&xas, MFS_EVENT_NEW);
 	caches->next_ev = xas.xa_index + 1;
-	if (event->syncer)
-		get_mfs_event(event);
-	else
+	if (!event->syncer)
 		xas_store(&xas, NULL);
 	xas_unlock(&xas);
-
-	if (copy_to_user(buf, msg, n))
-		ret = -EFAULT;
-	if (ret)
-		mfs_finish_event(event, &xas);
+out:
 	put_mfs_event(event);
 	trace_mfs_dev_read(file, msg->opcode, msg->id, msg->fd);
 	return ret ? ret : n;
diff --git a/fs/mfs/inode.c b/fs/mfs/inode.c
index 9894676a1da9..e45a5da7a67c 100644
--- a/fs/mfs/inode.c
+++ b/fs/mfs/inode.c
@@ -19,14 +19,12 @@ static int mfs_inode_set(struct inode *inode, void *lower_target)
 	return 0;
 }
 
-static struct dentry *_mfs_get_inode(struct dentry *dentry,
-					  struct super_block *sb,
+static struct inode *_mfs_get_inode(struct super_block *sb,
 					  struct path *lower_path,
 					  struct path *cache_path)
 {
 	struct mfs_sb_info *sbi = MFS_SB(sb);
-	struct inode *inode, *lower_inode, *cache_inode;
-	struct dentry *ret;
+	struct inode *ret, *lower_inode, *cache_inode;
 
 	lower_inode = d_inode(lower_path->dentry);
 	cache_inode = d_inode(cache_path->dentry);
@@ -49,12 +47,7 @@ static struct dentry *_mfs_get_inode(struct dentry *dentry,
 	}
 
 	/* allocate new inode for mfs */
-	inode = mfs_iget(sb, lower_inode, cache_path);
-	if (IS_ERR(inode)) {
-		ret = ERR_PTR(PTR_ERR(inode));
-		goto out;
-	}
-	ret = d_splice_alias(inode, dentry);
+	ret = mfs_iget(sb, lower_inode, cache_path);
 out:
 	return ret;
 }
@@ -120,11 +113,12 @@ static int _lookup_create(struct path *lpath, struct path *parent_cpath,
 	return ret;
 }
 
-struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry,
-			     unsigned int flag)
+static struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry,
+				    unsigned int flag)
 {
 	struct path parent_lpath, parent_cpath, lpath, cpath;
 	struct dentry *ret, *parent;
+	struct inode *inode;
 	const char *name;
 	int err;
 
@@ -163,14 +157,21 @@ struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry,
 			goto cdentry_fail;
 	}
 	/* build the inode from lower layer */
-	ret = _mfs_get_inode(dentry, dir->i_sb, &lpath, &cpath);
-	if (IS_ERR(ret)) {
+	inode = _mfs_get_inode(dir->i_sb, &lpath, &cpath);
+	if (IS_ERR(inode)) {
 		path_put(&lpath);
 		path_put(&cpath);
 		mfs_free_dentry_info(dentry);
+		ret = ERR_PTR(PTR_ERR(inode));
 		goto out;
 	}
 	mfs_install_path(dentry, &lpath, &cpath);
+	ret = d_splice_alias(inode, dentry);
+	if (IS_ERR(ret)) {
+		path_put(&lpath);
+		path_put(&cpath);
+		mfs_free_dentry_info(dentry);
+	}
 out:
 	mfs_put_path(&parent_lpath, &parent_cpath);
 	dput(parent);
@@ -217,17 +218,17 @@ static const char *mfs_get_link(struct dentry *dentry,
 	return p;
 }
 
-const struct inode_operations mfs_dir_iops = {
+static const struct inode_operations mfs_dir_iops = {
 	.lookup		= mfs_lookup,
 	.getattr	= mfs_getattr,
 };
 
-const struct inode_operations mfs_symlink_iops = {
+static const struct inode_operations mfs_symlink_iops = {
 	.getattr	= mfs_getattr,
 	.get_link	= mfs_get_link,
 };
 
-const struct inode_operations mfs_file_iops = {
+static const struct inode_operations mfs_file_iops = {
 	.getattr	= mfs_getattr,
 };
 
diff --git a/fs/mfs/internal.h b/fs/mfs/internal.h
index 1e6e1defd862..66c6c0a38c68 100644
--- a/fs/mfs/internal.h
+++ b/fs/mfs/internal.h
@@ -13,7 +13,6 @@
 #include <linux/wait.h>
 #include <linux/completion.h>
 #include <linux/types.h>
-#include <linux/mount.h>
 #include <linux/mfs.h>
 
 #define MFS_NAME "mfs"
@@ -71,7 +70,6 @@ struct mfs_sb_info {
 	int minor;
 
 	unsigned long flags;
-	struct vfsmount *mnt;
 	struct super_block *sb;
 
 	struct mfs_caches caches;
diff --git a/fs/mfs/super.c b/fs/mfs/super.c
index fa096f17e4a7..91b1b13b0657 100644
--- a/fs/mfs/super.c
+++ b/fs/mfs/super.c
@@ -101,7 +101,7 @@ static void mfs_d_release(struct dentry *dentry)
 	mfs_free_dentry_info(dentry);
 }
 
-const struct dentry_operations mfs_dops = {
+static const struct dentry_operations mfs_dops = {
 	.d_release	= mfs_d_release,
 };
 
@@ -138,7 +138,7 @@ static int mfs_show_options(struct seq_file *seq, struct dentry *root)
 	return 0;
 }
 
-const struct super_operations mfs_sops = {
+static const struct super_operations mfs_sops = {
 	.alloc_inode	= mfs_alloc_inode,
 	.free_inode	= mfs_free_inode,
 	.drop_inode	= generic_delete_inode,
@@ -183,7 +183,7 @@ static char *remove_trailing(char *s, char c)
 	return s;
 }
 
-char *_acquire_set_path(char *inputpath, struct path *target)
+static char *_acquire_set_path(char *inputpath, struct path *target)
 {
 	char *p, *realp, *path;
 	char *res;
@@ -400,9 +400,12 @@ static void mfs_kill_sb(struct super_block *sb)
 	clear_bit(MFS_MOUNTED, &sbi->flags);
 	if (support_event(sbi)) {
 		while (test_bit(MFS_CACHE_OPENED, &caches->flags)) {
+			static DEFINE_RATELIMIT_STATE(busy_open, 30 * HZ, 1);
+
 			msleep(100);
-			printk_once(KERN_WARNING "Pending until close the /dev/mfs%u...\n",
-				    sbi->minor);
+			if (!__ratelimit(&busy_open))
+				continue;
+			pr_warn("Pending until close the /dev/mfs%u...\n", sbi->minor);
 		}
 		mfs_fs_dev_exit(sb);
 	}
@@ -478,6 +481,9 @@ static void __exit exit_mfs_fs(void)
 {
 	mfs_dev_exit();
 	unregister_filesystem(&mfs_fs_type);
+
+	/* Make sure all delayed rcu free inodes are safe to be destroyed. */
+	rcu_barrier();
 	mfs_cache_exit();
 	kmem_cache_destroy(mfs_dentry_cachep);
 	kmem_cache_destroy(mfs_inode_cachep);
-- 
2.25.1