From: Ming Lei ming.lei@redhat.com
mainline inclusion from mainline-5.4-rc1 commit b89f625e28d44552083f43752f62d8621ded0a04 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6HOKY CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?h...
---------------------------
cecf5d87ff20 ("block: split .sysfs_lock into two locks") starts to release & acquire sysfs_lock before registering/un-registering elevator queue during switching elevator for avoiding potential deadlock from showing & storing 'queue/iosched' attributes and removing elevator's kobject.
Turns out there isn't such deadlock because 'q->sysfs_lock' isn't required in .show & .store of queue/iosched's attributes, and just elevator's sysfs lock is acquired in elv_iosched_store() and elv_iosched_show(). So it is safe to hold queue's sysfs lock when registering/un-registering elevator queue.
The biggest issue is that commit cecf5d87ff20 assumes that concurrent write on 'queue/scheduler' can't happen. However, this assumption isn't true, because kernfs_fop_write() only guarantees that concurrent write aren't called on the same open file, but the write could be from different open on the file. So we can't release & re-acquire queue's sysfs lock during switching elevator, otherwise use-after-free on elevator could be triggered.
Fixes the issue by not releasing queue's sysfs lock during switching elevator.
Fixes: cecf5d87ff20 ("block: split .sysfs_lock into two locks") Cc: Christoph Hellwig hch@infradead.org Cc: Hannes Reinecke hare@suse.com Cc: Greg KH gregkh@linuxfoundation.org Cc: Mike Snitzer snitzer@redhat.com Reviewed-by: Bart Van Assche bvanassche@acm.org Signed-off-by: Ming Lei ming.lei@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk Conflict: block/blk-sysfs.c block/elevator.c Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yongqiang Liu liuyongqiang13@huawei.com --- block/blk-sysfs.c | 13 ++++--------- block/elevator.c | 31 +------------------------------ 2 files changed, 5 insertions(+), 39 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a663dbcbdd19..711724a41fc7 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -959,13 +959,11 @@ int blk_register_queue(struct gendisk *disk) blk_mq_debugfs_register(q); }
- /* - * The flag of QUEUE_FLAG_REGISTERED isn't set yet, so elevator - * switch won't happen at all. - */ + mutex_lock(&q->sysfs_lock); if (q->request_fn || (q->mq_ops && q->elevator)) { ret = elv_register_queue(q, false); if (ret) { + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); kobject_del(&q->kobj); blk_trace_remove_sysfs(dev); @@ -975,7 +973,6 @@ int blk_register_queue(struct gendisk *disk) has_elevator = true; }
- mutex_lock(&q->sysfs_lock); blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); wbt_enable_default(q); blk_throtl_register_queue(q); @@ -1049,12 +1046,10 @@ void blk_unregister_queue(struct gendisk *disk)
blk_trace_remove_sysfs(disk_to_dev(disk));
- /* - * q->kobj has been removed, so it is safe to check if elevator - * exists without holding q->sysfs_lock. - */ + mutex_lock(&q->sysfs_lock); if (q->request_fn || (q->mq_ops && q->elevator)) elv_unregister_queue(q); + mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock);
/* Now that we've deleted all child objects, we can delete the queue. */ diff --git a/block/elevator.c b/block/elevator.c index a1e62af8bf33..77fde865bf7a 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -864,11 +864,9 @@ int elv_register_queue(struct request_queue *q, bool uevent) if (uevent) kobject_uevent(&e->kobj, KOBJ_ADD);
- mutex_lock(&q->sysfs_lock); e->registered = 1; if (!e->uses_mq && e->type->ops.sq.elevator_registered_fn) e->type->ops.sq.elevator_registered_fn(q); - mutex_unlock(&q->sysfs_lock); } return error; } @@ -886,9 +884,7 @@ void elv_unregister_queue(struct request_queue *q) kobject_uevent(&e->kobj, KOBJ_REMOVE); kobject_del(&e->kobj);
- mutex_lock(&q->sysfs_lock); e->registered = 0; - mutex_unlock(&q->sysfs_lock); } }
@@ -959,32 +955,11 @@ int elevator_switch_mq(struct request_queue *q, lockdep_assert_held(&q->sysfs_lock);
if (q->elevator) { - if (q->elevator->registered) { - mutex_unlock(&q->sysfs_lock); - - /* - * Concurrent elevator switch can't happen becasue - * sysfs write is always exclusively on same file. - * - * Also the elevator queue won't be freed after - * sysfs_lock is released becasue kobject_del() in - * blk_unregister_queue() waits for completion of - * .store & .show on its attributes. - */ + if (q->elevator->registered) elv_unregister_queue(q);
- mutex_lock(&q->sysfs_lock); - } ioc_clear_queue(q); elevator_exit(q, q->elevator); - - /* - * sysfs_lock may be dropped, so re-check if queue is - * unregistered. If yes, don't switch to new elevator - * any more - */ - if (!test_bit(QUEUE_FLAG_REGISTERED, &q->queue_flags)) - return 0; }
ret = blk_mq_init_sched(q, new_e); @@ -992,11 +967,7 @@ int elevator_switch_mq(struct request_queue *q, goto out;
if (new_e) { - mutex_unlock(&q->sysfs_lock); - ret = elv_register_queue(q, true); - - mutex_lock(&q->sysfs_lock); if (ret) { elevator_exit(q, q->elevator); goto out;