hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
----------------------------------------
After a disk is created, debugfs inode and dentry will be created together, and the memory used for debugfs can't be freed until disk removal.
The number of debugfs inode and dentry is based on how many cpus and hctxs. For example, testing on a 128-core environemt, with default module parameters, each loop device will cost 1679KB memory, and debugfs will cost 336KB(20%).
The memory cost for debugfs for a disk seems little, but if a big machine contains thousands of disks, the cost will be xxGB. This memory overhead can be avoided by disabling CONFIG_BLK_DEBUG_FS.
This patch add a disk level switch that can enable/disable debugfs dynamically, so that user can disable debugfs if they care about the memory overhead, in the meantime, debugfs can be enabled again in demand.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-mq-debugfs.c | 64 +++++++++++++++++++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ++++ block/blk-sysfs.c | 54 +++++++++++++++++++++++++++++++++++ include/linux/blkdev.h | 4 ++- 4 files changed, 119 insertions(+), 8 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index e2b8942311c9..f600ac9677a5 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -859,11 +859,20 @@ static void debugfs_create_files(struct dentry *parent, void *data, (void *)attr, &blk_mq_debugfs_fops); }
+static bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + return !IS_ERR_OR_NULL(q->debugfs_dir) && + test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); +} + void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i;
+ if (!blk_mq_debugfs_enabled(q)) + return; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
/* @@ -892,6 +901,46 @@ void blk_mq_debugfs_register(struct request_queue *q) } }
+static void debugfs_remove_files(struct dentry *parent, + const struct blk_mq_debugfs_attr *attr) +{ + if (IS_ERR_OR_NULL(parent)) + return; + + for (; attr->name; attr++) + debugfs_lookup_and_remove(attr->name, parent); +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + spin_lock(&q->queue_lock); + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + rqos->debugfs_dir = NULL; + rqos = rqos->next; + } + } + spin_unlock(&q->queue_lock); + + debugfs_remove_recursive(q->rqos_debugfs_dir); + q->rqos_debugfs_dir = NULL; + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->debugfs_dir) + blk_mq_debugfs_unregister_hctx(hctx); + } + + if (q->sched_debugfs_dir) + blk_mq_debugfs_unregister_sched(q); + + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs); +} + static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -913,7 +962,7 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
lockdep_assert_held(&q->debugfs_mutex);
- if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return;
snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); @@ -929,7 +978,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex);
- if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; @@ -968,7 +1017,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. */ - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return;
if (!e->queue_debugfs_attrs) @@ -991,7 +1040,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { lockdep_assert_held(&rqos->q->debugfs_mutex);
- if (!rqos->q->debugfs_dir) + if (!blk_mq_debugfs_enabled(rqos->q)) return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; @@ -1004,7 +1053,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
lockdep_assert_held(&q->debugfs_mutex);
- if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs || + !blk_mq_debugfs_enabled(q)) return;
if (!q->rqos_debugfs_dir) @@ -1024,7 +1074,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
lockdep_assert_held(&q->debugfs_mutex);
- if (!e->hctx_debugfs_attrs) + if (!e->hctx_debugfs_attrs || !blk_mq_debugfs_enabled(q)) return;
hctx->sched_debugfs_dir = debugfs_create_dir("sched", @@ -1037,7 +1087,7 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex);
- if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index f6898560b1f3..3a2c43a9a0ae 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,6 +19,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
void blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -38,6 +39,10 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { }
+static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index dbe5d15c511f..ea2d62ace573 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -619,6 +619,57 @@ QUEUE_RW_ENTRY(queue_iostats, "iostats"); QUEUE_RW_ENTRY(queue_random, "add_random"); QUEUE_RW_ENTRY(queue_stable_writes, "stable_writes");
+#ifdef CONFIG_BLK_DEBUG_FS +static ssize_t queue_debugfs_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags), + page); +} + +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret; + bool enabled; + int err; + + if (!queue_is_mq(q)) + return count; + + if (!blk_queue_registered(q)) + return -ENODEV; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + err = blk_queue_enter(q, 0); + if (err) + return err; + + mutex_lock(&q->debugfs_mutex); + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); + if (!!val == enabled) + goto unlock; + + if (val) { + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); + blk_mq_debugfs_register(q); + } else { + blk_mq_debugfs_unregister(q); + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); + } + +unlock: + mutex_unlock(&q->debugfs_mutex); + blk_queue_exit(q); + return ret; +} + +QUEUE_RW_ENTRY(queue_debugfs, "debugfs"); +#endif + static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -661,6 +712,9 @@ static struct attribute *queue_attrs[] = { &queue_io_timeout_entry.attr, #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, +#endif +#ifdef CONFIG_BLK_DEBUG_FS + &queue_debugfs_entry.attr, #endif NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8208922552b5..5ccb9d5f1c29 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -643,10 +643,12 @@ struct request_queue { #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ /*at least one blk-mq hctx can't get driver tag */ #define QUEUE_FLAG_HCTX_WAIT 30 +#define QUEUE_FLAG_DEBUGFS 31 /* supports debugfs */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_NOWAIT)) + (1 << QUEUE_FLAG_NOWAIT) | \ + (1 << QUEUE_FLAG_DEBUGFS))
void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);