From: Naohiro Aota naohiro.aota@wdc.com
mainline inclusion from mainline-5.6 commit d795a90e2ba024dbf2f22107ae89c210b98b08b8 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -EBUSY). And, on the other error cases, it does not lock the inode.
This inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon().
This commit fixes this issue by moving the inode_lock() and IS_SWAPFILE check out of claim_swapfile(). The inode is unlocked in "bad_swap_unlock_inode" section, so that the inode is ensured to be unlocked at "bad_swap". Thus, error handling codes after the locking now jumps to "bad_swap_unlock_inode" instead of "bad_swap".
===================================== WARNING: bad unlock balance detected! 5.5.0-rc7+ #176 Not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: __do_sys_swapon+0x94b/0x3550 but there are no more locks to release!
other info that might help us debug this: no locks held by swapon/4294.
stack backtrace: CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176 Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014 Call Trace: dump_stack+0xa1/0xea print_unlock_imbalance_bug.cold+0x114/0x123 lock_release+0x562/0xed0 up_write+0x2d/0x490 __do_sys_swapon+0x94b/0x3550 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f15da0a0dc7
Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices") Signed-off-by: Naohiro Aota naohiro.aota@wdc.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Tested-by: Qais Youef qais.yousef@arm.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Cc: Christoph Hellwig hch@infradead.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- mm/swapfile.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index 074a724df169..c2a672301410 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3001,10 +3001,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; }
- inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; }
@@ -3259,36 +3255,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host;
- /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap;
+ inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; }
/* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; }
if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3313,7 +3314,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; }
for (ci = 0; ci < nr_cluster; ci++) @@ -3322,7 +3323,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3336,13 +3337,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode;
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3382,7 +3383,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode;
/* * Flush any pending IO and dirty mappings before we start using this @@ -3392,7 +3393,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; }
mutex_lock(&swapon_mutex); @@ -3417,6 +3418,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3424,6 +3427,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3435,13 +3439,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) kvfree(frontswap_map); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page);