From: "Darrick J. Wong" darrick.wong@oracle.com
mainline inclusion from mainline-5.4-rc1 commit dc617f29dbe5ef0c8ced65ce62c464af1daaab3d category: bugfix bugzilla: 50612 CVE: NA ---------------------------
Don't let userspace write to an active swap file because the kernel effectively has a long term lease on the storage and things could get seriously corrupted if we let this happen.
Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Reviewed-by: Christoph Hellwig hch@lst.de
Conflict: include/linux/fs.h mm/filemap.c Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/block_dev.c | 3 +++ include/linux/fs.h | 11 +++++++++++ mm/filemap.c | 3 +++ mm/memory.c | 4 ++++ mm/mmap.c | 8 ++++++-- mm/swapfile.c | 12 +++++++++++- 6 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2db79b6d5e6b8..5f58e1a604a05 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2007,6 +2007,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM;
+ if (IS_SWAPFILE(bd_inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h index 46eb1d540606b..85ed9b11fcdf1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3548,4 +3548,15 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice);
+/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + #endif /* _LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 13f3276effd40..3a285b6a4531d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3050,6 +3050,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) unsigned long limit = rlimit(RLIMIT_FSIZE); loff_t pos;
+ if (IS_SWAPFILE(inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0;
diff --git a/mm/memory.c b/mm/memory.c index b69f8bd23ca6d..473b7c2974ce6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2312,6 +2312,10 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+ if (vmf->vma->vm_file && + IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) + return VM_FAULT_SIGBUS; + ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; diff --git a/mm/mmap.c b/mm/mmap.c index e0399b087430c..64f1b151f7a12 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1500,8 +1500,12 @@ unsigned long __do_mmap(struct mm_struct *mm, struct file *file, case MAP_SHARED_VALIDATE: if (flags & ~flags_mask) return -EOPNOTSUPP; - if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) - return -EACCES; + if (prot & PROT_WRITE) { + if (!(file->f_mode & FMODE_WRITE)) + return -EACCES; + if (IS_SWAPFILE(file->f_mapping->host)) + return -ETXTBSY; + }
/* * Make sure we don't allow writing to an append-only diff --git a/mm/swapfile.c b/mm/swapfile.c index 06df64e59d9ce..c03de4f1ee770 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3384,6 +3384,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (error) goto bad_swap;
+ /* + * Flush any pending IO and dirty mappings before we start using this + * swap device. + */ + inode->i_flags |= S_SWAPFILE; + error = inode_drain_writes(inode); + if (error) { + inode->i_flags &= ~S_SWAPFILE; + goto bad_swap; + } + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) @@ -3404,7 +3415,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait);
- inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: