hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/ID5W1P ------------------ Optimize mfs syncer process to solve UAF and reduce blocking on event destruction. Metadata initialization in mfs_lookup are reorganized to ensure invisibility. Signed-off-by: Huang Xiaojia <huangxiaojia2@huawei.com> --- fs/mfs/cache.c | 61 +++++++++++++++++++++++++---------------------- fs/mfs/data.c | 8 +++---- fs/mfs/dev.c | 51 ++++++++++----------------------------- fs/mfs/inode.c | 35 ++++++++++++++------------- fs/mfs/internal.h | 2 -- fs/mfs/super.c | 16 +++++++++---- 6 files changed, 79 insertions(+), 94 deletions(-) diff --git a/fs/mfs/cache.c b/fs/mfs/cache.c index 2178c78f4f1a..2675fe58b781 100644 --- a/fs/mfs/cache.c +++ b/fs/mfs/cache.c @@ -78,8 +78,6 @@ static long _ioc_done(struct mfs_cache_object *object, return -EINVAL; } xas_store(&xas, NULL); - xas_unlock(&xas); - syncer = event->syncer; if (done->ret) atomic_cmpxchg(&syncer->res, 0, -EIO); @@ -88,6 +86,8 @@ static long _ioc_done(struct mfs_cache_object *object, spin_unlock(&syncer->list_lock); if (atomic_dec_return(&syncer->notback) == 0) complete(&syncer->done); + xas_unlock(&xas); + put_mfs_event(event); return 0; } @@ -366,9 +366,7 @@ void mfs_destroy_events(struct super_block *sb) */ pr_warn("Event remains:%lu\n", index); __xa_erase(&caches->events, index); - xa_unlock(&caches->events); put_mfs_event(event); - xa_lock(&caches->events); } xa_unlock(&caches->events); xa_destroy(&caches->events); @@ -379,19 +377,18 @@ void mfs_cancel_syncer_events(struct mfs_cache_object *object, { struct mfs_sb_info *sbi = MFS_SB(object->mfs_inode->i_sb); struct mfs_caches *caches = &sbi->caches; - struct mfs_event *event; - struct list_head tmp; + struct xarray *xa = &caches->events; + struct mfs_event *event, *nevent; - INIT_LIST_HEAD(&tmp); + xa_lock(xa); spin_lock(&syncer->list_lock); - list_splice_init(&syncer->head, &tmp); - spin_unlock(&syncer->list_lock); - - list_for_each_entry(event, &tmp, link) { - xa_erase(&caches->events, event->msg.id); - iput(event->object->mfs_inode); - kfree(event); + list_for_each_entry_safe(event, nevent, &syncer->head, link) { + __xa_erase(&caches->events, event->msg.id); + list_del(&event->link); + put_mfs_event(event); } + spin_unlock(&syncer->list_lock); + xa_unlock(xa); } void mfs_cancel_all_events(struct mfs_sb_info *sbi) @@ -402,25 +399,33 @@ void mfs_cancel_all_events(struct mfs_sb_info *sbi) struct mfs_event *event; unsigned long index; - xa_lock(xa); - xa_for_each(xa, index, event) { - __xa_erase(xa, index); - xa_unlock(xa); - if (event->syncer) { + while (!xa_empty(xa)) { + xa_lock(xa); + xa_for_each(xa, index, event) { + __xa_erase(xa, index); syncer = event->syncer; - if (atomic_dec_return(&syncer->notback) == 0) - complete(&syncer->done); - spin_lock(&syncer->list_lock); - list_del_init(&event->link); - spin_unlock(&syncer->list_lock); + /* + * Here should keep syncer (a stack variable), so we should + * wakeup the syncer list in the protect of xa lock. + */ + if (syncer) { + spin_lock(&syncer->list_lock); + list_del(&event->link); + spin_unlock(&syncer->list_lock); + if (atomic_dec_return(&syncer->notback) == 0) { + atomic_cmpxchg(&syncer->res, 0, -EIO); + complete(&syncer->done); + } + } + put_mfs_event(event); + if (need_resched()) + break; } - iput(event->object->mfs_inode); - kfree(event); - xa_lock(xa); + xa_unlock(xa); + cond_resched(); } caches->next_ev = 0; caches->next_msg = 0; - xa_unlock(xa); } int try_hook_fd(struct mfs_event *event) diff --git a/fs/mfs/data.c b/fs/mfs/data.c index 6b4f61096f87..32d0b6947aa8 100644 --- a/fs/mfs/data.c +++ b/fs/mfs/data.c @@ -327,7 +327,7 @@ static ssize_t mfs_read_iter(struct kiocb *iocb, struct iov_iter *to) ctx.object = file_inode(file)->i_private; ctx.off = iocb->ki_pos; ctx.op = MFS_OP_READ; - ctx.len = min(isize - ctx.off, iov_iter_count(to)); + ctx.len = min_t(size_t, isize - ctx.off, iov_iter_count(to)); ctx.sync = false; ctx.checker = range_check_mem; if (need_sync_event(file_inode(file)->i_sb)) { @@ -396,7 +396,7 @@ static vm_fault_t mfs_filemap_fault(struct vm_fault *vmf) ctx.file = cfile; ctx.object = file_inode(file)->i_private; ctx.off = vmf->pgoff << PAGE_SHIFT; - ctx.len = min(isize - ctx.off, PAGE_SIZE); + ctx.len = min_t(size_t, isize - ctx.off, PAGE_SIZE); ctx.op = MFS_OP_FAULT; ctx.sync = false; ctx.checker = range_check_mem; @@ -425,7 +425,7 @@ static vm_fault_t mfs_filemap_fault(struct vm_fault *vmf) return ret; } -vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf, +static vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf, pgoff_t start_pgoff, pgoff_t end_pgoff) { struct file *cfile, *file = vmf->vma->vm_file; @@ -452,7 +452,7 @@ vm_fault_t mfs_filemap_map_pages(struct vm_fault *vmf, ctx.file = cfile; ctx.object = file_inode(file)->i_private; ctx.off = start_pgoff << PAGE_SHIFT; - ctx.len = min(isize - ctx.off, (end_pgoff - start_pgoff) << PAGE_SHIFT); + ctx.len = min_t(size_t, isize - ctx.off, (end_pgoff - start_pgoff) << PAGE_SHIFT); ctx.op = MFS_OP_FAROUND; ctx.sync = false; ctx.checker = range_check_mem; diff --git a/fs/mfs/dev.c b/fs/mfs/dev.c index 15b55b257d2f..902f73b1c25c 100644 --- a/fs/mfs/dev.c +++ b/fs/mfs/dev.c @@ -7,7 +7,6 @@ #include <linux/idr.h> #include <linux/poll.h> #include <linux/list.h> -#include "../mount.h" #include <trace/events/mfs.h> @@ -20,28 +19,10 @@ static const struct class mfs_dev_class = { }; static struct device *mfs_dev; -static inline void mfs_finish_event(struct mfs_event *event, struct xa_state *xas) -{ - struct mfs_syncer *syncer = event->syncer; - - if (unlikely(!xas || !event)) - return; - - if (syncer) { - if (xa_cmpxchg(xas->xa, xas->xa_index, event, NULL, 0) != event) - return; - - if (atomic_dec_and_test(&syncer->notback)) - complete(&syncer->done); - put_mfs_event(event); - } -} - static int mfs_dev_open(struct inode *inode, struct file *file) { struct mfs_caches *caches; struct mfs_sb_info *sbi; - struct mount *mnt; unsigned minor = iminor(inode); sbi = minor < U8_MAX ? idr_find(&mfs_dev_minor, minor) : NULL; @@ -57,13 +38,6 @@ static int mfs_dev_open(struct inode *inode, struct file *file) clear_bit(MFS_CACHE_OPENED, &caches->flags); return -EBUSY; } - mnt = list_first_entry(&sbi->sb->s_mounts, struct mount, mnt_instance); - /* during mounting or delete from s_mounts in umounting */ - if (list_empty(&sbi->sb->s_mounts)) { - clear_bit(MFS_CACHE_OPENED, &caches->flags); - return -EBUSY; - } - sbi->mnt = mntget(&mnt->mnt); file->private_data = sbi; set_bit(MFS_CACHE_READY, &caches->flags); @@ -78,7 +52,6 @@ static int mfs_dev_release(struct inode *inode, struct file *file) clear_bit(MFS_CACHE_READY, &caches->flags); smp_mb__after_atomic(); mfs_cancel_all_events(sbi); - mntput(sbi->mnt); smp_mb__before_atomic(); clear_bit(MFS_CACHE_OPENED, &caches->flags); return 0; @@ -105,32 +78,34 @@ static ssize_t mfs_dev_read(struct file *file, char __user *buf, xas_unlock(&xas); return 0; } + if (event->syncer) + get_mfs_event(event); xas_unlock(&xas); msg = &event->msg; n = msg->len; - if (n > blen) - return -EMSGSIZE; + if (n > blen) { + ret = -EMSGSIZE; + goto out; + } ret = try_hook_fd(event); if (ret < 0) - return ret; + goto out; msg->fd = ret; ret = 0; + if (copy_to_user(buf, msg, n)) { + ret = -EFAULT; + goto out; + } xas_lock(&xas); xas_clear_mark(&xas, MFS_EVENT_NEW); caches->next_ev = xas.xa_index + 1; - if (event->syncer) - get_mfs_event(event); - else + if (!event->syncer) xas_store(&xas, NULL); xas_unlock(&xas); - - if (copy_to_user(buf, msg, n)) - ret = -EFAULT; - if (ret) - mfs_finish_event(event, &xas); +out: put_mfs_event(event); trace_mfs_dev_read(file, msg->opcode, msg->id, msg->fd); return ret ? ret : n; diff --git a/fs/mfs/inode.c b/fs/mfs/inode.c index 9894676a1da9..e45a5da7a67c 100644 --- a/fs/mfs/inode.c +++ b/fs/mfs/inode.c @@ -19,14 +19,12 @@ static int mfs_inode_set(struct inode *inode, void *lower_target) return 0; } -static struct dentry *_mfs_get_inode(struct dentry *dentry, - struct super_block *sb, +static struct inode *_mfs_get_inode(struct super_block *sb, struct path *lower_path, struct path *cache_path) { struct mfs_sb_info *sbi = MFS_SB(sb); - struct inode *inode, *lower_inode, *cache_inode; - struct dentry *ret; + struct inode *ret, *lower_inode, *cache_inode; lower_inode = d_inode(lower_path->dentry); cache_inode = d_inode(cache_path->dentry); @@ -49,12 +47,7 @@ static struct dentry *_mfs_get_inode(struct dentry *dentry, } /* allocate new inode for mfs */ - inode = mfs_iget(sb, lower_inode, cache_path); - if (IS_ERR(inode)) { - ret = ERR_PTR(PTR_ERR(inode)); - goto out; - } - ret = d_splice_alias(inode, dentry); + ret = mfs_iget(sb, lower_inode, cache_path); out: return ret; } @@ -120,11 +113,12 @@ static int _lookup_create(struct path *lpath, struct path *parent_cpath, return ret; } -struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry, - unsigned int flag) +static struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flag) { struct path parent_lpath, parent_cpath, lpath, cpath; struct dentry *ret, *parent; + struct inode *inode; const char *name; int err; @@ -163,14 +157,21 @@ struct dentry *mfs_lookup(struct inode *dir, struct dentry *dentry, goto cdentry_fail; } /* build the inode from lower layer */ - ret = _mfs_get_inode(dentry, dir->i_sb, &lpath, &cpath); - if (IS_ERR(ret)) { + inode = _mfs_get_inode(dir->i_sb, &lpath, &cpath); + if (IS_ERR(inode)) { path_put(&lpath); path_put(&cpath); mfs_free_dentry_info(dentry); + ret = ERR_PTR(PTR_ERR(inode)); goto out; } mfs_install_path(dentry, &lpath, &cpath); + ret = d_splice_alias(inode, dentry); + if (IS_ERR(ret)) { + path_put(&lpath); + path_put(&cpath); + mfs_free_dentry_info(dentry); + } out: mfs_put_path(&parent_lpath, &parent_cpath); dput(parent); @@ -217,17 +218,17 @@ static const char *mfs_get_link(struct dentry *dentry, return p; } -const struct inode_operations mfs_dir_iops = { +static const struct inode_operations mfs_dir_iops = { .lookup = mfs_lookup, .getattr = mfs_getattr, }; -const struct inode_operations mfs_symlink_iops = { +static const struct inode_operations mfs_symlink_iops = { .getattr = mfs_getattr, .get_link = mfs_get_link, }; -const struct inode_operations mfs_file_iops = { +static const struct inode_operations mfs_file_iops = { .getattr = mfs_getattr, }; diff --git a/fs/mfs/internal.h b/fs/mfs/internal.h index 1e6e1defd862..66c6c0a38c68 100644 --- a/fs/mfs/internal.h +++ b/fs/mfs/internal.h @@ -13,7 +13,6 @@ #include <linux/wait.h> #include <linux/completion.h> #include <linux/types.h> -#include <linux/mount.h> #include <linux/mfs.h> #define MFS_NAME "mfs" @@ -71,7 +70,6 @@ struct mfs_sb_info { int minor; unsigned long flags; - struct vfsmount *mnt; struct super_block *sb; struct mfs_caches caches; diff --git a/fs/mfs/super.c b/fs/mfs/super.c index fa096f17e4a7..91b1b13b0657 100644 --- a/fs/mfs/super.c +++ b/fs/mfs/super.c @@ -101,7 +101,7 @@ static void mfs_d_release(struct dentry *dentry) mfs_free_dentry_info(dentry); } -const struct dentry_operations mfs_dops = { +static const struct dentry_operations mfs_dops = { .d_release = mfs_d_release, }; @@ -138,7 +138,7 @@ static int mfs_show_options(struct seq_file *seq, struct dentry *root) return 0; } -const struct super_operations mfs_sops = { +static const struct super_operations mfs_sops = { .alloc_inode = mfs_alloc_inode, .free_inode = mfs_free_inode, .drop_inode = generic_delete_inode, @@ -183,7 +183,7 @@ static char *remove_trailing(char *s, char c) return s; } -char *_acquire_set_path(char *inputpath, struct path *target) +static char *_acquire_set_path(char *inputpath, struct path *target) { char *p, *realp, *path; char *res; @@ -400,9 +400,12 @@ static void mfs_kill_sb(struct super_block *sb) clear_bit(MFS_MOUNTED, &sbi->flags); if (support_event(sbi)) { while (test_bit(MFS_CACHE_OPENED, &caches->flags)) { + static DEFINE_RATELIMIT_STATE(busy_open, 30 * HZ, 1); + msleep(100); - printk_once(KERN_WARNING "Pending until close the /dev/mfs%u...\n", - sbi->minor); + if (!__ratelimit(&busy_open)) + continue; + pr_warn("Pending until close the /dev/mfs%u...\n", sbi->minor); } mfs_fs_dev_exit(sb); } @@ -478,6 +481,9 @@ static void __exit exit_mfs_fs(void) { mfs_dev_exit(); unregister_filesystem(&mfs_fs_type); + + /* Make sure all delayed rcu free inodes are safe to be destroyed. */ + rcu_barrier(); mfs_cache_exit(); kmem_cache_destroy(mfs_dentry_cachep); kmem_cache_destroy(mfs_inode_cachep); -- 2.25.1