From: Luis Chamberlain mcgrof@kernel.org
mainline inclusion from mainline-v5.9-rc1 commit 85e0cbbb8a79537dbc465e9deb449a08b2b092a6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGRKP CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
We were only creating the request_queue debugfs_dir only for make_request block drivers (multiqueue), but never for request-based block drivers. We did this as we were only creating non-blktrace additional debugfs files on that directory for make_request drivers. However, since blktrace *always* creates that directory anyway, we special-case the use of that directory on blktrace. Other than this being an eye-sore, this exposes request-based block drivers to the same debugfs fragile race that used to exist with make_request block drivers where if we start adding files onto that directory we can later run a race with a double removal of dentries on the directory if we don't deal with this carefully on blktrace.
Instead, just simplify things by always creating the request_queue debugfs_dir on request_queue registration. Rename the mutex also to reflect the fact that this is used outside of the blktrace context.
Signed-off-by: Luis Chamberlain mcgrof@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk Conflicts: block/blk-core.c block/blk-sysfs.c block/blk.h include/linux/blkdev.h kernel/trace/blktrace.c [Context conflicts] Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-core.c | 8 +------ block/blk-mq-debugfs.c | 5 ----- block/blk-sysfs.c | 9 ++++++++ block/blk.h | 2 -- include/linux/blkdev.h | 4 ++-- kernel/trace/blktrace.c | 49 +++++++++++++++++++---------------------- 6 files changed, 35 insertions(+), 42 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c index 0c74101424dc..835496adf694 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -45,9 +45,7 @@ #include "blk-rq-qos.h" #include "blk-io-hierarchy/stats.h"
-#ifdef CONFIG_DEBUG_FS struct dentry *blk_debugfs_root; -#endif
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); @@ -1346,9 +1344,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id,
kobject_init(&q->kobj, &blk_queue_ktype);
-#ifdef CONFIG_BLK_DEV_IO_TRACE - mutex_init(&q->blk_trace_mutex); -#endif + mutex_init(&q->debugfs_mutex); mutex_init(&q->sysfs_lock); mutex_init(&q_wrapper->sysfs_dir_lock); spin_lock_init(&q->__queue_lock); @@ -4307,9 +4303,7 @@ int __init blk_dev_init(void)
init_blk_queue_async_dispatch();
-#ifdef CONFIG_DEBUG_FS blk_debugfs_root = debugfs_create_dir("block", NULL); -#endif
return 0; } diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e14f3e4500ff..231b58534d6f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -841,9 +841,6 @@ void blk_mq_debugfs_register(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i;
- q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), - blk_debugfs_root); - debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
/* @@ -867,9 +864,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
void blk_mq_debugfs_unregister(struct request_queue *q) { - debugfs_remove_recursive(q->debugfs_dir); q->sched_debugfs_dir = NULL; - q->debugfs_dir = NULL; }
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 719687a394ea..e1683d1a3b01 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -11,6 +11,7 @@ #include <linux/blktrace_api.h> #include <linux/blk-mq.h> #include <linux/blk-cgroup.h> +#include <linux/debugfs.h> #include <linux/atomic.h>
#include "blk.h" @@ -895,6 +896,9 @@ static void __blk_release_queue(struct work_struct *work) }
blk_trace_shutdown(q); + mutex_lock(&q->debugfs_mutex); + debugfs_remove_recursive(q->debugfs_dir); + mutex_unlock(&q->debugfs_mutex);
if (q->mq_ops) blk_mq_debugfs_unregister(q); @@ -968,6 +972,11 @@ int blk_register_queue(struct gendisk *disk) goto unlock; }
+ mutex_lock(&q->debugfs_mutex); + q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), + blk_debugfs_root); + mutex_unlock(&q->debugfs_mutex); + if (q->mq_ops) { __blk_mq_register_dev(dev, q); blk_mq_debugfs_register(q); diff --git a/block/blk.h b/block/blk.h index 99a57be83765..30146e2099e3 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,9 +15,7 @@ /* Max future timer expiry for timeouts */ #define BLK_MAX_TIMEOUT (5 * HZ)
-#ifdef CONFIG_DEBUG_FS extern struct dentry *blk_debugfs_root; -#endif
struct blk_flush_queue { unsigned int flush_queue_delayed:1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 241f59eb5b64..eda670342060 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -641,9 +641,9 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; - struct mutex blk_trace_mutex; #endif /* * for flush operations @@ -689,8 +689,8 @@ struct request_queue { struct list_head tag_set_list; struct bio_set bio_split;
-#ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; #endif
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index ab824d742bfd..897890e8f774 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -381,7 +381,7 @@ static int __blk_trace_remove(struct request_queue *q) struct blk_trace *bt;
bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (!bt) return -EINVAL;
@@ -394,9 +394,9 @@ int blk_trace_remove(struct request_queue *q) { int ret;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_remove(q); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex);
return ret; } @@ -515,14 +515,11 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct dentry *dir = NULL; int ret;
- lockdep_assert_held(&q->blk_trace_mutex); + lockdep_assert_held(&q->debugfs_mutex);
if (!buts->buf_size || !buts->buf_nr) return -EINVAL;
- if (!blk_debugfs_root) - return -ENOENT; - strncpy(buts->name, name, BLKTRACE_BDEV_SIZE); buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
@@ -537,7 +534,7 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, * we can be. */ if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) { + lockdep_is_held(&q->debugfs_mutex))) { pr_warn("Concurrent blktraces are not allowed on %s\n", buts->name); return -EBUSY; @@ -653,9 +650,9 @@ int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, { int ret;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_setup(q, name, dev, bdev, arg); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex);
return ret; } @@ -700,7 +697,7 @@ static int __blk_trace_startstop(struct request_queue *q, int start) struct blk_trace *bt;
bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL;
@@ -714,9 +711,9 @@ int blk_trace_startstop(struct request_queue *q, int start) { int ret;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); ret = __blk_trace_startstop(q, start); - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex);
return ret; } @@ -745,7 +742,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) if (!q) return -ENXIO;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex);
switch (cmd) { case BLKTRACESETUP: @@ -771,7 +768,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) break; }
- mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); return ret; }
@@ -782,12 +779,12 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex))) + lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q);
- mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); }
#ifdef CONFIG_BLK_CGROUP @@ -1671,7 +1668,7 @@ static int blk_trace_remove_queue(struct request_queue *q) struct blk_trace *bt;
bt = rcu_replace_pointer(q->blk_trace, NULL, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (bt == NULL) return -EINVAL;
@@ -1848,10 +1845,10 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, if (q == NULL) goto out_bdput;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { ret = sprintf(buf, "%u\n", !!bt); goto out_unlock_bdev; @@ -1869,7 +1866,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev, ret = sprintf(buf, "%llu\n", bt->end_lba);
out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); out_bdput: bdput(bdev); out: @@ -1912,10 +1909,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (q == NULL) goto out_bdput;
- mutex_lock(&q->blk_trace_mutex); + mutex_lock(&q->debugfs_mutex);
bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); if (attr == &dev_attr_enable) { if (!!value == !!bt) { ret = 0; @@ -1932,7 +1929,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, if (bt == NULL) { ret = blk_trace_setup_queue(q, bdev); bt = rcu_dereference_protected(q->blk_trace, - lockdep_is_held(&q->blk_trace_mutex)); + lockdep_is_held(&q->debugfs_mutex)); }
if (ret == 0) { @@ -1947,7 +1944,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev, }
out_unlock_bdev: - mutex_unlock(&q->blk_trace_mutex); + mutex_unlock(&q->debugfs_mutex); out_bdput: bdput(bdev); out: