From: Ye Bin yebin10@huawei.com
hulk inclusion category: bugfix bugzilla: 50614 CVE: NA
-----------------------------------------------
Fixes: 24d1ffda34be("ext4: don't remount read-only with errors=continue on reboot") Signed-off-by: Ye Bin yebin10@huawei.com Reviewed-by: zhangyi (F) yi.zhang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/super.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 15f8aeda9ee7f..18870ae874ab6 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -509,9 +509,12 @@ static void ext4_handle_error(struct super_block *sb) if (test_opt(sb, WARN_ON_ERROR)) WARN_ON_ONCE(1);
- if (sb_rdonly(sb) || test_opt(sb, ERRORS_CONT)) + if (sb_rdonly(sb)) return;
+ if (test_opt(sb, ERRORS_CONT)) + goto out; + EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; if (journal) jbd2_journal_abort(journal, -EIO); @@ -533,6 +536,7 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); }
+out: ext4_netlink_send_info(sb, 1); }
From: Wenchao Hao haowenchao@huawei.com
euleros/rtos inclusion category: bugfix bugzilla: NA
--------------------------------
We should be registering the ns_id attribute as default sysfs attribute groups, otherwise we have a race condition between the uevent and the attributes appearing in sysfs.
Signed-off-by: Wenchao Hao haowenchao@huawei.com Reviewed-by: Yufen Yu yuyufen@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/nvme/host/core.c | 20 +++---- drivers/nvme/host/lightnvm.c | 105 ++++++++++++++-------------------- drivers/nvme/host/multipath.c | 11 +--- drivers/nvme/host/nvme.h | 10 +--- 4 files changed, 58 insertions(+), 88 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 5f500ee424a3e..ba13d8336cf78 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2990,6 +2990,14 @@ const struct attribute_group nvme_ns_id_attr_group = { .is_visible = nvme_ns_id_attrs_are_visible, };
+const struct attribute_group *nvme_ns_id_attr_groups[] = { + &nvme_ns_id_attr_group, +#ifdef CONFIG_NVM + &nvme_nvm_attr_group, +#endif + NULL, +}; + #define nvme_show_str_function(field) \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -3359,14 +3367,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
nvme_get_ctrl(ctrl);
+ disk_to_dev(ns->disk)->groups = nvme_ns_id_attr_groups; device_add_disk(ctrl->device, ns->disk); - if (sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group)) - pr_warn("%s: failed to create sysfs group for identification\n", - ns->disk->disk_name); - if (ns->ndev && nvme_nvm_register_sysfs(ns)) - pr_warn("%s: failed to register lightnvm sysfs group for identification\n", - ns->disk->disk_name);
nvme_mpath_add_disk(ns, id); nvme_fault_inject_init(ns); @@ -3406,10 +3408,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvme_ns_id_attr_group); - if (ns->ndev) - nvme_nvm_unregister_sysfs(ns); del_gendisk(ns->disk); blk_cleanup_queue(ns->queue); if (blk_get_integrity(ns->disk)) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index a69553e75f38e..d10257b9c5236 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -1193,10 +1193,29 @@ static NVM_DEV_ATTR_12_RO(multiplane_modes); static NVM_DEV_ATTR_12_RO(media_capabilities); static NVM_DEV_ATTR_12_RO(max_phys_secs);
-static struct attribute *nvm_dev_attrs_12[] = { +/* 2.0 values */ +static NVM_DEV_ATTR_20_RO(groups); +static NVM_DEV_ATTR_20_RO(punits); +static NVM_DEV_ATTR_20_RO(chunks); +static NVM_DEV_ATTR_20_RO(clba); +static NVM_DEV_ATTR_20_RO(ws_min); +static NVM_DEV_ATTR_20_RO(ws_opt); +static NVM_DEV_ATTR_20_RO(maxoc); +static NVM_DEV_ATTR_20_RO(maxocpu); +static NVM_DEV_ATTR_20_RO(mw_cunits); +static NVM_DEV_ATTR_20_RO(write_typ); +static NVM_DEV_ATTR_20_RO(write_max); +static NVM_DEV_ATTR_20_RO(reset_typ); +static NVM_DEV_ATTR_20_RO(reset_max); + +static struct attribute *nvm_dev_attrs[] = { + /* version agnostic attrs */ &dev_attr_version.attr, &dev_attr_capabilities.attr, + &dev_attr_read_typ.attr, + &dev_attr_read_max.attr,
+ /* 1.2 attrs */ &dev_attr_vendor_opcode.attr, &dev_attr_device_mode.attr, &dev_attr_media_manager.attr, @@ -1211,8 +1230,6 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_page_size.attr, &dev_attr_hw_sector_size.attr, &dev_attr_oob_sector_size.attr, - &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_prog_typ.attr, &dev_attr_prog_max.attr, &dev_attr_erase_typ.attr, @@ -1221,33 +1238,7 @@ static struct attribute *nvm_dev_attrs_12[] = { &dev_attr_media_capabilities.attr, &dev_attr_max_phys_secs.attr,
- NULL, -}; - -static const struct attribute_group nvm_dev_attr_group_12 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_12, -}; - -/* 2.0 values */ -static NVM_DEV_ATTR_20_RO(groups); -static NVM_DEV_ATTR_20_RO(punits); -static NVM_DEV_ATTR_20_RO(chunks); -static NVM_DEV_ATTR_20_RO(clba); -static NVM_DEV_ATTR_20_RO(ws_min); -static NVM_DEV_ATTR_20_RO(ws_opt); -static NVM_DEV_ATTR_20_RO(maxoc); -static NVM_DEV_ATTR_20_RO(maxocpu); -static NVM_DEV_ATTR_20_RO(mw_cunits); -static NVM_DEV_ATTR_20_RO(write_typ); -static NVM_DEV_ATTR_20_RO(write_max); -static NVM_DEV_ATTR_20_RO(reset_typ); -static NVM_DEV_ATTR_20_RO(reset_max); - -static struct attribute *nvm_dev_attrs_20[] = { - &dev_attr_version.attr, - &dev_attr_capabilities.attr, - + /* 2.0 attrs */ &dev_attr_groups.attr, &dev_attr_punits.attr, &dev_attr_chunks.attr, @@ -1258,8 +1249,6 @@ static struct attribute *nvm_dev_attrs_20[] = { &dev_attr_maxocpu.attr, &dev_attr_mw_cunits.attr,
- &dev_attr_read_typ.attr, - &dev_attr_read_max.attr, &dev_attr_write_typ.attr, &dev_attr_write_max.attr, &dev_attr_reset_typ.attr, @@ -1268,44 +1257,38 @@ static struct attribute *nvm_dev_attrs_20[] = { NULL, };
-static const struct attribute_group nvm_dev_attr_group_20 = { - .name = "lightnvm", - .attrs = nvm_dev_attrs_20, -}; - -int nvme_nvm_register_sysfs(struct nvme_ns *ns) +static umode_t nvm_dev_attrs_visible(struct kobject *kobj, + struct attribute *attr, int index) { + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct nvme_ns *ns = disk->private_data; struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + struct device_attribute *dev_attr = + container_of(attr, typeof(*dev_attr), attr);
if (!ndev) - return -EINVAL; - - switch (geo->major_ver_id) { - case 1: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); - case 2: - return sysfs_create_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); - } - - return -EINVAL; -} + return 0;
-void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) -{ - struct nvm_dev *ndev = ns->ndev; - struct nvm_geo *geo = &ndev->geo; + if (dev_attr->show == nvm_dev_attr_show) + return attr->mode;
- switch (geo->major_ver_id) { + switch (ndev->geo.major_ver_id) { case 1: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_12); + if (dev_attr->show == nvm_dev_attr_show_12) + return attr->mode; break; case 2: - sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, - &nvm_dev_attr_group_20); + if (dev_attr->show == nvm_dev_attr_show_20) + return attr->mode; break; } + + return 0; } + +const struct attribute_group nvme_nvm_attr_group = { + .name = "lightnvm", + .attrs = nvm_dev_attrs, + .is_visible = nvm_dev_attrs_visible, +}; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 8df0bf2384555..8cead2c95e214 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -346,11 +346,9 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) return;
if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) { + WARN_ON(disk_to_dev(head->disk)->groups); + disk_to_dev(head->disk)->groups = nvme_ns_id_attr_groups; device_add_disk(&head->subsys->dev, head->disk); - if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group)) - dev_warn(&head->subsys->dev, - "failed to create id group.\n"); }
synchronize_srcu(&head->srcu); @@ -573,11 +571,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) return; - if (head->disk->flags & GENHD_FL_UP) { - sysfs_remove_group(&disk_to_dev(head->disk)->kobj, - &nvme_ns_id_attr_group); + if (head->disk->flags & GENHD_FL_UP) del_gendisk(head->disk); - } blk_set_queue_dying(head->disk->queue); /* make sure all pending bios are cleaned up */ kblockd_schedule_work(&head->requeue_work); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 04c2d9ffd0041..1f0fdd0664e44 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -471,7 +471,7 @@ int nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset);
-extern const struct attribute_group nvme_ns_id_attr_group; +extern const struct attribute_group *nvme_ns_id_attr_groups[]; extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH @@ -604,8 +604,7 @@ static inline void nvme_mpath_update_disk_size(struct gendisk *disk) void nvme_nvm_update_nvm_info(struct nvme_ns *ns); int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); void nvme_nvm_unregister(struct nvme_ns *ns); -int nvme_nvm_register_sysfs(struct nvme_ns *ns); -void nvme_nvm_unregister_sysfs(struct nvme_ns *ns); +extern const struct attribute_group nvme_nvm_attr_group; int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); #else static inline void nvme_nvm_update_nvm_info(struct nvme_ns *ns) {}; @@ -616,11 +615,6 @@ static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, }
static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; -static inline int nvme_nvm_register_sysfs(struct nvme_ns *ns) -{ - return 0; -} -static inline void nvme_nvm_unregister_sysfs(struct nvme_ns *ns) {}; static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg) {
From: Wenchao Hao haowenchao@huawei.com
euleros/rtos inclusion category: bugfix bugzilla: NA
--------------------------------
Register default sysfs groups during device_add_disk() to avoid a race condition with udev during startup.
Signed-off-by: Wenchao Hao haowenchao@huawei.com Reviewed-by: Yufen Yu yuyufen@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/block/virtio_blk.c | 67 ++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 28 deletions(-)
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 075523777a4a9..52e3327581f95 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -423,8 +423,8 @@ static int minor_to_index(int minor) return minor >> PART_BITS; }
-static ssize_t virtblk_serial_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t serial_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); int err; @@ -443,7 +443,7 @@ static ssize_t virtblk_serial_show(struct device *dev, return err; }
-static DEVICE_ATTR(serial, 0444, virtblk_serial_show, NULL); +static DEVICE_ATTR_RO(serial);
/* The queue's logical block size must be set before calling this */ static void virtblk_update_capacity(struct virtio_blk *vblk, bool resize) @@ -619,8 +619,8 @@ static const char *const virtblk_cache_types[] = { };
static ssize_t -virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) +cache_type_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -638,8 +638,7 @@ virtblk_cache_type_store(struct device *dev, struct device_attribute *attr, }
static ssize_t -virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, - char *buf) +cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct gendisk *disk = dev_to_disk(dev); struct virtio_blk *vblk = disk->private_data; @@ -649,12 +648,38 @@ virtblk_cache_type_show(struct device *dev, struct device_attribute *attr, return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); }
-static const struct device_attribute dev_attr_cache_type_ro = - __ATTR(cache_type, 0444, - virtblk_cache_type_show, NULL); -static const struct device_attribute dev_attr_cache_type_rw = - __ATTR(cache_type, 0644, - virtblk_cache_type_show, virtblk_cache_type_store); +static DEVICE_ATTR_RW(cache_type); + +static struct attribute *virtblk_attrs[] = { + &dev_attr_serial.attr, + &dev_attr_cache_type.attr, + NULL, +}; + +static umode_t virtblk_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct gendisk *disk = dev_to_disk(dev); + struct virtio_blk *vblk = disk->private_data; + struct virtio_device *vdev = vblk->vdev; + + if (a == &dev_attr_cache_type.attr && + !virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) + return S_IRUGO; + + return a->mode; +} + +static const struct attribute_group virtblk_attr_group = { + .attrs = virtblk_attrs, + .is_visible = virtblk_attrs_are_visible, +}; + +static const struct attribute_group *virtblk_attr_groups[] = { + &virtblk_attr_group, + NULL, +};
static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) @@ -858,24 +883,10 @@ static int virtblk_probe(struct virtio_device *vdev) virtblk_update_capacity(vblk, false); virtio_device_ready(vdev);
+ disk_to_dev(vblk->disk)->groups = virtblk_attr_groups; device_add_disk(&vdev->dev, vblk->disk); - err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial); - if (err) - goto out_del_disk; - - if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE)) - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_rw); - else - err = device_create_file(disk_to_dev(vblk->disk), - &dev_attr_cache_type_ro); - if (err) - goto out_del_disk; return 0;
-out_del_disk: - del_gendisk(vblk->disk); - blk_cleanup_queue(vblk->disk->queue); out_free_tags: blk_mq_free_tag_set(&vblk->tag_set); out_put_disk:
From: "zhangyi (F)" yi.zhang@huawei.com
hulk inclusion category: bugfix bugzilla: 48166 CVE: NA ---------------------------
block_dump is an old debugging interface, one of it's functions is used to dump who write which file on disk. If block_dump is enabled, we can turn on debug log level and gather information about write process name file name from kmsg. It is done by block_dump___mark_inode_dirty() to print kernel message directly when marking inode dirty, so it can trigger log storm easily.
After tracepoints has been introduced into the kernel, we got trace_writeback_mark_inode_dirty() in __mark_inode_dirty(), which is a better replacement of block_dump___mark_inode_dirty(). The only downside is that it only trace the inode number and not a file name, but it may not a big deal because the original dumped file name in block_dump is not accurate in some cases, and we can still find it through the inode number and device id. So this patch delete the block_dump feature.
Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Ye bin yebin10@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/fs-writeback.c | 25 ------------------------- 1 file changed, 25 deletions(-)
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c65796603ec06..617296cad83d7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2118,28 +2118,6 @@ int dirtytime_interval_handler(struct ctl_table *table, int write, return ret; }
-static noinline void block_dump___mark_inode_dirty(struct inode *inode) -{ - if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { - struct dentry *dentry; - const char *name = "?"; - - dentry = d_find_alias(inode); - if (dentry) { - spin_lock(&dentry->d_lock); - name = (const char *) dentry->d_name.name; - } - printk(KERN_DEBUG - "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, task_pid_nr(current), inode->i_ino, - name, inode->i_sb->s_id); - if (dentry) { - spin_unlock(&dentry->d_lock); - dput(dentry); - } - } -} - /** * __mark_inode_dirty - internal function * @@ -2199,9 +2177,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) (dirtytime && (inode->i_state & I_DIRTY_INODE))) return;
- if (unlikely(block_dump)) - block_dump___mark_inode_dirty(inode); - spin_lock(&inode->i_lock); if (dirtytime && (inode->i_state & I_DIRTY_INODE)) goto out_unlock_inode;
From: "Darrick J. Wong" darrick.wong@oracle.com
mainline inclusion from mainline-5.4-rc1 commit 1638045c36772b47a0765f7dca07cb90267e4942 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
Set S_SWAPFILE on block device inodes so that they have the same protections as a swap flie.
Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swapfile.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index 13171e764c566..06df64e59d9ce 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2474,9 +2474,8 @@ add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, * requirements, they are simply tossed out - we will never use those blocks * for swapping. * - * For S_ISREG swapfiles we set S_SWAPFILE across the life of the swapon. This - * prevents root from shooting her foot off by ftruncating an in-use swapfile, - * which will scribble on the fs. + * For all swap devices we set S_SWAPFILE across the life of the swapon. This + * prevents users from writing to the swap device, which will corrupt memory. * * The amount of disk space which a single swap extent represents varies. * Typically it is in the 1-4 megabyte range. So we can have hundreds of @@ -2767,13 +2766,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) inode = mapping->host; if (S_ISBLK(inode->i_mode)) { struct block_device *bdev = I_BDEV(inode); + set_blocksize(bdev, old_block_size); blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - } else { - inode_lock(inode); - inode->i_flags &= ~S_SWAPFILE; - inode_unlock(inode); } + + inode_lock(inode); + inode->i_flags &= ~S_SWAPFILE; + inode_unlock(inode); filp_close(swap_file, NULL);
/* @@ -2999,11 +2999,11 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; - inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - } else - return -EINVAL; + } + + inode_lock(inode); + if (IS_SWAPFILE(inode)) + return -EBUSY;
return 0; } @@ -3404,8 +3404,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait);
- if (S_ISREG(inode->i_mode)) - inode->i_flags |= S_SWAPFILE; + inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap: @@ -3427,7 +3426,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); if (swap_file) { - if (inode && S_ISREG(inode->i_mode)) { + if (inode) { inode_unlock(inode); inode = NULL; } @@ -3440,7 +3439,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) } if (name) putname(name); - if (inode && S_ISREG(inode->i_mode)) + if (inode) inode_unlock(inode); if (!error) enable_swap_slots_cache();
From: "Darrick J. Wong" darrick.wong@oracle.com
mainline inclusion from mainline-5.4-rc1 commit dc617f29dbe5ef0c8ced65ce62c464af1daaab3d category: bugfix bugzilla: 50612 CVE: NA ---------------------------
Don't let userspace write to an active swap file because the kernel effectively has a long term lease on the storage and things could get seriously corrupted if we let this happen.
Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Reviewed-by: Christoph Hellwig hch@lst.de
Conflict: include/linux/fs.h mm/filemap.c Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/block_dev.c | 3 +++ include/linux/fs.h | 11 +++++++++++ mm/filemap.c | 3 +++ mm/memory.c | 4 ++++ mm/mmap.c | 8 ++++++-- mm/swapfile.c | 12 +++++++++++- 6 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c index 2db79b6d5e6b8..5f58e1a604a05 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -2007,6 +2007,9 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM;
+ if (IS_SWAPFILE(bd_inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h index 46eb1d540606b..85ed9b11fcdf1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3548,4 +3548,15 @@ extern int vfs_fadvise(struct file *file, loff_t offset, loff_t len, extern int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice);
+/* + * Flush file data before changing attributes. Caller must hold any locks + * required to prevent further writes to this file until we're done setting + * flags. + */ +static inline int inode_drain_writes(struct inode *inode) +{ + inode_dio_wait(inode); + return filemap_write_and_wait(inode->i_mapping); +} + #endif /* _LINUX_FS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 13f3276effd40..3a285b6a4531d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3050,6 +3050,9 @@ inline ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) unsigned long limit = rlimit(RLIMIT_FSIZE); loff_t pos;
+ if (IS_SWAPFILE(inode)) + return -ETXTBSY; + if (!iov_iter_count(from)) return 0;
diff --git a/mm/memory.c b/mm/memory.c index b69f8bd23ca6d..473b7c2974ce6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2312,6 +2312,10 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf)
vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
+ if (vmf->vma->vm_file && + IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) + return VM_FAULT_SIGBUS; + ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; diff --git a/mm/mmap.c b/mm/mmap.c index e0399b087430c..64f1b151f7a12 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1500,8 +1500,12 @@ unsigned long __do_mmap(struct mm_struct *mm, struct file *file, case MAP_SHARED_VALIDATE: if (flags & ~flags_mask) return -EOPNOTSUPP; - if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE)) - return -EACCES; + if (prot & PROT_WRITE) { + if (!(file->f_mode & FMODE_WRITE)) + return -EACCES; + if (IS_SWAPFILE(file->f_mapping->host)) + return -ETXTBSY; + }
/* * Make sure we don't allow writing to an append-only diff --git a/mm/swapfile.c b/mm/swapfile.c index 06df64e59d9ce..c03de4f1ee770 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3384,6 +3384,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) if (error) goto bad_swap;
+ /* + * Flush any pending IO and dirty mappings before we start using this + * swap device. + */ + inode->i_flags |= S_SWAPFILE; + error = inode_drain_writes(inode); + if (error) { + inode->i_flags &= ~S_SWAPFILE; + goto bad_swap; + } + mutex_lock(&swapon_mutex); prio = -1; if (swap_flags & SWAP_FLAG_PREFER) @@ -3404,7 +3415,6 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) atomic_inc(&proc_poll_event); wake_up_interruptible(&proc_poll_wait);
- inode->i_flags |= S_SWAPFILE; error = 0; goto out; bad_swap:
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-5.6-rc3 commit fed98ef4d8b665316479dd35cbd92d3e2ff470a3 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
claim_swapfile now always takes i_rwsem.
Link: http://lkml.kernel.org/r/20200114161225.309792-2-hch@lst.de Signed-off-by: Christoph Hellwig hch@lst.de Reviewed-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swapfile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index c03de4f1ee770..074a724df169d 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3259,7 +3259,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host;
- /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */ + /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap;
From: Naohiro Aota naohiro.aota@wdc.com
mainline inclusion from mainline-5.6 commit d795a90e2ba024dbf2f22107ae89c210b98b08b8 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -EBUSY). And, on the other error cases, it does not lock the inode.
This inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon().
This commit fixes this issue by moving the inode_lock() and IS_SWAPFILE check out of claim_swapfile(). The inode is unlocked in "bad_swap_unlock_inode" section, so that the inode is ensured to be unlocked at "bad_swap". Thus, error handling codes after the locking now jumps to "bad_swap_unlock_inode" instead of "bad_swap".
===================================== WARNING: bad unlock balance detected! 5.5.0-rc7+ #176 Not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: __do_sys_swapon+0x94b/0x3550 but there are no more locks to release!
other info that might help us debug this: no locks held by swapon/4294.
stack backtrace: CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176 Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014 Call Trace: dump_stack+0xa1/0xea print_unlock_imbalance_bug.cold+0x114/0x123 lock_release+0x562/0xed0 up_write+0x2d/0x490 __do_sys_swapon+0x94b/0x3550 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f15da0a0dc7
Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices") Signed-off-by: Naohiro Aota naohiro.aota@wdc.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Tested-by: Qais Youef qais.yousef@arm.com Reviewed-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Cc: Christoph Hellwig hch@infradead.org Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20200206090132.154869-1-naohiro.aota@wdc.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swapfile.c | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index 074a724df169d..c2a6723014108 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3001,10 +3001,6 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->bdev = inode->i_sb->s_bdev; }
- inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; - return 0; }
@@ -3259,36 +3255,41 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host;
- /* will take i_rwsem; */ error = claim_swapfile(p, inode); if (unlikely(error)) goto bad_swap;
+ inode_lock(inode); + if (IS_SWAPFILE(inode)) { + error = -EBUSY; + goto bad_swap_unlock_inode; + } + /* * Read the swap header. */ if (!mapping->a_ops->readpage) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; } page = read_mapping_page(mapping, 0, swap_file); if (IS_ERR(page)) { error = PTR_ERR(page); - goto bad_swap; + goto bad_swap_unlock_inode; } swap_header = kmap(page);
maxpages = read_swap_header(p, swap_header, inode); if (unlikely(!maxpages)) { error = -EINVAL; - goto bad_swap; + goto bad_swap_unlock_inode; }
/* OK, set up the swap map and apply the bad block list */ swap_map = vzalloc(maxpages); if (!swap_map) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; }
if (bdi_cap_stable_pages_required(inode_to_bdi(inode))) @@ -3313,7 +3314,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) GFP_KERNEL); if (!cluster_info) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; }
for (ci = 0; ci < nr_cluster; ci++) @@ -3322,7 +3323,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->percpu_cluster = alloc_percpu(struct percpu_cluster); if (!p->percpu_cluster) { error = -ENOMEM; - goto bad_swap; + goto bad_swap_unlock_inode; } for_each_possible_cpu(cpu) { struct percpu_cluster *cluster; @@ -3336,13 +3337,13 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = swap_cgroup_swapon(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode;
nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map, cluster_info, maxpages, &span); if (unlikely(nr_extents < 0)) { error = nr_extents; - goto bad_swap; + goto bad_swap_unlock_inode; } /* frontswap enabled? set up bit-per-page map for frontswap */ if (IS_ENABLED(CONFIG_FRONTSWAP)) @@ -3382,7 +3383,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = init_swap_address_space(p->type, maxpages); if (error) - goto bad_swap; + goto bad_swap_unlock_inode;
/* * Flush any pending IO and dirty mappings before we start using this @@ -3392,7 +3393,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap; + goto bad_swap_unlock_inode; }
mutex_lock(&swapon_mutex); @@ -3417,6 +3418,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0; goto out; +bad_swap_unlock_inode: + inode_unlock(inode); bad_swap: free_percpu(p->percpu_cluster); p->percpu_cluster = NULL; @@ -3424,6 +3427,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) set_blocksize(p->bdev, p->old_block_size); blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); } + inode = NULL; destroy_swap_extents(p); swap_cgroup_swapoff(p->type); spin_lock(&swap_lock); @@ -3435,13 +3439,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) kvfree(frontswap_map); if (inced_nr_rotate_swap) atomic_dec(&nr_rotate_swap); - if (swap_file) { - if (inode) { - inode_unlock(inode); - inode = NULL; - } + if (swap_file) filp_close(swap_file, NULL); - } out: if (page && !IS_ERR(page)) { kunmap(page);
From: Domenico Andreoli domenico.andreoli@linux.com
mainline inclusion from mainline-5.7-rc1 commit 56939e014a6c212b317414faa307029e2e80c3b9 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
It turns out that there is one use case for programs being able to write to swap devices, and that is the userspace hibernation code.
Quick fix: disable the S_SWAPFILE check if hibernation is configured.
Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files") Reported-by: Domenico Andreoli domenico.andreoli@linux.com Reported-by: Marian Klein mkleinsoft@gmail.com Signed-off-by: Domenico Andreoli domenico.andreoli@linux.com Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: Darrick J. Wong darrick.wong@oracle.com Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/block_dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/fs/block_dev.c b/fs/block_dev.c index 5f58e1a604a05..a90bfc36c6da2 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -34,6 +34,7 @@ #include <linux/task_io_accounting_ops.h> #include <linux/falloc.h> #include <linux/uaccess.h> +#include <linux/suspend.h> #include "internal.h"
struct bdev_inode { @@ -2007,7 +2008,8 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM;
- if (IS_SWAPFILE(bd_inode)) + /* uswsusp needs write permission to the swap */ + if (IS_SWAPFILE(bd_inode) && !hibernation_available()) return -ETXTBSY;
if (!iov_iter_count(from))
From: Miaohe Lin linmiaohe@huawei.com
mainline inclusion from mainline-5.10-rc1 commit 822bca52ee7eb279acfba261a423ed7ac47d6f73 category: bugfix bugzilla: 50612 CVE: NA ---------------------------
If we failed to drain inode, we would forget to free the swap address space allocated by init_swap_address_space() above.
Fixes: dc617f29dbe5 ("vfs: don't allow writes to swap files") Signed-off-by: Miaohe Lin linmiaohe@huawei.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Darrick J. Wong darrick.wong@oracle.com Link: https://lkml.kernel.org/r/20200930101803.53884-1-linmiaohe@huawei.com Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: zhangyi (F) yi.zhang@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/swapfile.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/mm/swapfile.c b/mm/swapfile.c index c2a6723014108..c54b0afd8c874 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3393,7 +3393,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) error = inode_drain_writes(inode); if (error) { inode->i_flags &= ~S_SWAPFILE; - goto bad_swap_unlock_inode; + goto free_swap_address_space; }
mutex_lock(&swapon_mutex); @@ -3418,6 +3418,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
error = 0; goto out; +free_swap_address_space: + exit_swap_address_space(p->type); bad_swap_unlock_inode: inode_unlock(inode); bad_swap: