From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.19-rc4 commit 5cf9c91ba927119fc6606b938b1895bb2459d3bc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGRKP CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it.
To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h.
Move debugfs_mutex next to the dentries that it protects and document what it is used for.
Signed-off-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk Conflicts: block/blk-mq-debugfs.h block/blk-mq-debugfs.c block/blk-mq-sched.c block/blk-rq-qos.c block/blk-rq-qos.h block/blk-sysfs.c block/blk-io-hierarchy/debugfs.c block/blk-io-hierarchy/stats.c include/linux/blkdev.h kernel/trace/blktrace.c [Context conflicts] Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-io-hierarchy/debugfs.c | 6 ++++++ block/blk-io-hierarchy/stats.c | 8 ++++++++ block/blk-mq-debugfs.c | 17 ++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-sysfs.c | 19 +++++++++---------- include/linux/blkdev.h | 7 ++++--- kernel/trace/blktrace.c | 3 --- 8 files changed, 50 insertions(+), 26 deletions(-)
diff --git a/block/blk-io-hierarchy/debugfs.c b/block/blk-io-hierarchy/debugfs.c index 29c17e116773..ba2f4af49d1d 100644 --- a/block/blk-io-hierarchy/debugfs.c +++ b/block/blk-io-hierarchy/debugfs.c @@ -198,6 +198,8 @@ void blk_mq_debugfs_register_hierarchy(struct request_queue *q, struct blk_io_hierarchy_stats *stats = queue_to_wrapper(q)->io_hierarchy_stats;
+ lockdep_assert_held(&q->debugfs_mutex); + if (!blk_mq_hierarchy_registered(q, stage) || !blk_mq_debugfs_enabled(q)) return; @@ -211,6 +213,8 @@ void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, struct blk_io_hierarchy_stats *stats = queue_to_wrapper(q)->io_hierarchy_stats;
+ lockdep_assert_held(&q->debugfs_mutex); + if (!blk_mq_hierarchy_registered(q, stage) || !blk_mq_debugfs_enabled(q)) return; @@ -223,6 +227,8 @@ void blk_mq_debugfs_create_default_hierarchy_attr(struct request_queue *q) struct blk_io_hierarchy_stats *stats = queue_to_wrapper(q)->io_hierarchy_stats;
+ lockdep_assert_held(&q->debugfs_mutex); + if (!blk_mq_debugfs_enabled(q)) return;
diff --git a/block/blk-io-hierarchy/stats.c b/block/blk-io-hierarchy/stats.c index b9e79b435149..9b6b735fd5bf 100644 --- a/block/blk-io-hierarchy/stats.c +++ b/block/blk-io-hierarchy/stats.c @@ -33,6 +33,8 @@ void blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q) struct blk_io_hierarchy_stats *stats; enum stage_group stage;
+ lockdep_assert_held(&q->debugfs_mutex); + stats = queue_to_wrapper(q)->io_hierarchy_stats; if (!stats || !blk_mq_debugfs_enabled(q)) return; @@ -203,8 +205,10 @@ void blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage)
blk_mq_freeze_queue(q);
+ mutex_lock(&q->debugfs_mutex); WRITE_ONCE(stats->hstage[stage], hstage); blk_mq_debugfs_register_hierarchy(q, stage); + mutex_unlock(&q->debugfs_mutex);
blk_mq_unfreeze_queue(q); } @@ -220,6 +224,8 @@ void blk_mq_unregister_hierarchy(struct request_queue *q, if (!blk_mq_hierarchy_registered(q, stage)) return;
+ mutex_lock(&q->debugfs_mutex); + blk_mq_debugfs_unregister_hierarchy(q, stage); blk_io_hierarchy_iodump_exit(q, stage);
@@ -230,6 +236,8 @@ void blk_mq_unregister_hierarchy(struct request_queue *q, spin_unlock(&stats->hstage_lock);
kfree(hstage); + + mutex_unlock(&q->debugfs_mutex); } EXPORT_SYMBOL_GPL(blk_mq_unregister_hierarchy);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 8bb6045d8f0d..5347620fe7c7 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -862,11 +862,6 @@ void blk_mq_debugfs_register(struct request_queue *q) blk_mq_debugfs_register_hierarchy_stats(q); }
-void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -900,6 +895,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -927,6 +924,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -944,6 +943,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; } @@ -953,6 +954,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent debugfs directory has not been created yet, return; * We will be called again later on with appropriate parent debugfs @@ -972,6 +975,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 5cbe1573d47b..4ef149cf5cfb 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -20,7 +20,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -46,10 +45,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { }
-static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 1c8befbe7b69..443d92e8982a 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -609,7 +609,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err;
+ mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex);
queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.mq.init_hctx) { @@ -621,7 +623,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); }
return 0; @@ -638,13 +642,20 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int i;
queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.mq.exit_hctx && hctx->sched_data) { e->type->ops.mq.exit_hctx(hctx, i); hctx->sched_data = NULL; } } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.mq.exit_sched) e->type->ops.mq.exit_sched(e); blk_mq_sched_tags_teardown(q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e1683d1a3b01..a2a24757fc4e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -895,14 +895,13 @@ static void __blk_release_queue(struct work_struct *work) blk_mq_release(q); }
- blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex);
- if (q->mq_ops) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split);
ida_simple_remove(&blk_queue_ida, q->id); @@ -972,17 +971,17 @@ int blk_register_queue(struct gendisk *disk) goto unlock; }
+ if (q->mq_ops) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (q->mq_ops) { - __blk_mq_register_dev(dev, q); + if (q->mq_ops) blk_mq_debugfs_register(q); - } + mutex_unlock(&q->debugfs_mutex);
- mutex_lock(&q->sysfs_lock); if (q->request_fn || (q->mq_ops && q->elevator)) { ret = elv_register_queue(q, false); if (ret) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eda670342060..9948b3248e96 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -641,7 +641,6 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -690,9 +689,11 @@ struct request_queue { struct bio_set bio_split;
struct dentry *debugfs_dir; -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex;
bool mq_sysfs_init_done;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 897890e8f774..8310695efd3d 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -779,12 +779,9 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - - mutex_unlock(&q->debugfs_mutex); }
#ifdef CONFIG_BLK_CGROUP