From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: bugfix bugzilla: 30109 CVE: NA ---------------------------
We reported kernel crash:
[201962.639350] Call trace: [201962.644403] string+0x28/0xa0 [201962.650501] vsnprintf+0x5f0/0x748 [201962.657472] seq_vprintf+0x70/0x98 [201962.664442] seq_printf+0x7c/0xa0 [201962.671238] __blkg_prfill_rwstat+0x84/0x128 [201962.679949] blkg_prfill_rwstat_field+0x94/0xc0 [201962.689182] blkcg_print_blkgs+0xcc/0x140 [201962.697370] blkg_print_stat_bytes+0x4c/0x60 [201962.706083] cgroup_seqfile_show+0x58/0xc0 [201962.714446] kernfs_seq_show+0x44/0x50 [201962.722112] seq_read+0xd4/0x4a8 [201962.728732] kernfs_fop_read+0x16c/0x218 [201962.736748] __vfs_read+0x60/0x188 [201962.743717] vfs_read+0x94/0x150 [201962.750338] ksys_read+0x6c/0xd8 [201962.756958] __arm64_sys_read+0x24/0x30 [201962.764800] el0_svc_common+0x78/0x130 [201962.772466] el0_svc_handler+0x38/0x78 [201962.780131] el0_svc+0x8/0xc
__blkg_prfill_rwstat() tried to get the device name by 'bdi->dev', while the 'dev' have been freed by bdi_release(). The race as following:
blkg_print_stat_bytes __scsi_remove_device del_gendisk bdi_unregister
put_device(bdi->dev) kfree(bdi->dev)
__blkg_prfill_rwstat blkg_dev_name //use the freed bdi->dev dev_name(blkg->q->backing_dev_info->dev)
bdi->dev = NULL
Since blkg_dev_name() have been coverd by rcu_read_lock/unlock(), we wait all rcu reader before free 'bdi->dev' to avoid use-after-free.
Link: https://lore.kernel.org/linux-block/20200211140038.146629-1-yuyufen@huawei.c... Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Jason Yan yanaijie@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/blk-cgroup.c | 7 ++++-- include/linux/backing-dev-defs.h | 5 +++- include/linux/device.h | 5 ++++ mm/backing-dev.c | 49 ++++++++++++++++++++++++++++++++++++---- 4 files changed, 58 insertions(+), 8 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a06547f..e592167 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -473,8 +473,11 @@ static int blkcg_reset_stats(struct cgroup_subsys_state *css, const char *blkg_dev_name(struct blkcg_gq *blkg) { /* some drivers (floppy) instantiate a queue w/o disk registered */ - if (blkg->q->backing_dev_info->dev) - return dev_name(blkg->q->backing_dev_info->dev); + struct rcu_device *rcu_dev; + + rcu_dev = rcu_dereference(blkg->q->backing_dev_info->rcu_dev); + if (rcu_dev) + return dev_name(&rcu_dev->dev); return NULL; } EXPORT_SYMBOL_GPL(blkg_dev_name); diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 3cfe7de..73fb6bc 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -201,7 +201,10 @@ struct backing_dev_info { #endif wait_queue_head_t wb_waitq;
- struct device *dev; + union { + struct rcu_device *rcu_dev; + struct device *dev; + }; struct device *owner;
struct timer_list laptop_mode_wb_timer; diff --git a/include/linux/device.h b/include/linux/device.h index 23a1529..0d6960c 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1073,6 +1073,11 @@ struct device { KABI_RESERVE(16) };
+struct rcu_device { + struct device dev; + struct rcu_head rcu_head; +}; + static inline struct device *kobj_to_dev(struct kobject *kobj) { return container_of(kobj, struct device, kobj); diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 00277c8..040d778 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -872,19 +872,43 @@ struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id) } EXPORT_SYMBOL(bdi_alloc_node);
+static void bdi_device_release(struct device *dev) +{ + struct rcu_device *rcu_dev = container_of(dev, + struct rcu_device, dev); + kfree(rcu_dev); +} + int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) { struct device *dev; + struct rcu_device *rcu_dev; + int retval = -ENODEV;
if (bdi->dev) /* The driver needs to use separate queues per device */ return 0;
- dev = device_create_vargs(bdi_class, NULL, MKDEV(0, 0), bdi, fmt, args); - if (IS_ERR(dev)) - return PTR_ERR(dev); + rcu_dev = kzalloc(sizeof(struct rcu_device), GFP_KERNEL); + if (!rcu_dev) + return -ENOMEM; + + /* initialize device */ + dev = &rcu_dev->dev; + device_initialize(dev); + dev->class = bdi_class; + dev->release = bdi_device_release; + dev->devt = MKDEV(0, 0); + dev_set_drvdata(dev, (void *)bdi); + retval = kobject_set_name_vargs(&dev->kobj, fmt, args); + if (retval) + goto error; + + retval = device_add(dev); + if (retval) + goto error;
cgwb_bdi_register(bdi); - bdi->dev = dev; + bdi->rcu_dev = rcu_dev;
bdi_debug_register(bdi, dev_name(dev)); set_bit(WB_registered, &bdi->wb.state); @@ -895,6 +919,10 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
trace_writeback_bdi_register(bdi); return 0; + +error: + kfree(rcu_dev); + return retval; } EXPORT_SYMBOL(bdi_register_va);
@@ -937,17 +965,28 @@ static void bdi_remove_from_list(struct backing_dev_info *bdi) synchronize_rcu_expedited(); }
+static void bdi_put_device_rcu(struct rcu_head *rcu) +{ + struct rcu_device *rcu_dev = container_of(rcu, struct rcu_device, + rcu_head); + put_device(&rcu_dev->dev); +} void bdi_unregister(struct backing_dev_info *bdi) { /* make sure nobody finds us on the bdi_list anymore */ + struct rcu_device *rcu_dev = bdi->rcu_dev; bdi_remove_from_list(bdi); wb_shutdown(&bdi->wb); cgwb_bdi_unregister(bdi);
if (bdi->dev) { bdi_debug_unregister(bdi); + get_device(bdi->dev); device_unregister(bdi->dev); - bdi->dev = NULL; + rcu_assign_pointer(bdi->dev, NULL); + + /* wait all rcu reader of bdi->dev before free dev */ + call_rcu(&rcu_dev->rcu_head, bdi_put_device_rcu); }
if (bdi->owner) {