Changes in v6: - fix a typo Changes in v5: - rebase Changes in v4: - just resend Changes in v3: - modify openeuler_defconfig for disabled new config in patch 1,2 Changes in v2: - fix head in commit message
Christoph Hellwig (2): block: serialize all debugfs operations using q->debugfs_mutex block: remove per-disk debugfs files in blk_unregister_queue
Yu Kuai (7): block: add a switch to enable hungtask check for io blk-throttle: add a config to control hierarchical throttle in cgroup v1 blk-throttle: fix missing prefix "CONFIG_" block: protect blk_mq_debugfs_register/unregister_hctx() with 'debugfs_mutex' block: shutdown blktrace in blk_release_queue() block: support enable/disable blk-mq debugfs dynamically block: fix kabi broken in struct request_queue
arch/arm64/configs/openeuler_defconfig | 3 + arch/powerpc/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 3 + block/Kconfig | 37 +++++++++ block/bio.c | 2 +- block/blk-core.c | 13 +++- block/blk-exec.c | 2 +- block/blk-mq-debugfs.c | 97 +++++++++++++++++++++--- block/blk-mq-debugfs.h | 5 -- block/blk-mq-sched.c | 11 +++ block/blk-mq.c | 11 +++ block/blk-rq-qos.c | 2 - block/blk-rq-qos.h | 7 +- block/blk-sysfs.c | 82 +++++++++++++++++--- block/blk-throttle.c | 17 ++++- block/blk.h | 1 + include/linux/blkdev.h | 7 +- kernel/trace/blktrace.c | 3 - 18 files changed, 264 insertions(+), 41 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
--------------------------------
Because slow io or io timeout handler can take a long time, hungtask check is forbidden in order to prevent false positive warnings. However, this also cause kenel to be silence if io really hang.
It's quite complicated to distinguish if io is slow or hanged, this patch add a switch to enable hungtask check, the switch is enabled by default, and can be turn off by:
1) disable config BLK_IO_HUNG_TASK_CHECK 2) add blk_core.io_hung_task_check=0 to boot cmd 3) echo 0 > /sys/module/blk_core/parameters/io_hung_task_check
Noted that user has to be careful to use this with hungtask panic enabeld, since there could be false positive hungtask warnings.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/bio.c | 2 +- block/blk-core.c | 13 ++++++++++++- block/blk-exec.c | 2 +- block/blk.h | 1 + 8 files changed, 29 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 05b50ca381b1..2b1f1355e61f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -964,6 +964,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y CONFIG_BLK_BIO_DISPATCH_ASYNC=y
diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index cbc315cc4dc1..fc0ca355dcaa 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -687,6 +687,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set
# # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f3b810d0cf47..7903ccb27e66 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -942,6 +942,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HUNG_TASK_CHECK is not set CONFIG_BLK_DEV_DUMPINFO=y CONFIG_BLK_BIO_DISPATCH_ASYNC=y
diff --git a/block/Kconfig b/block/Kconfig index 24c6bb87727d..1b8220766e3a 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -220,6 +220,17 @@ config BLK_BIO_DISPATCH_ASYNC the pressure on the busy CPUs. If unsure, say N.
+config BLK_IO_HUNG_TASK_CHECK + bool "Enable io hung task check" + default n + depends on DETECT_HUNG_TASK + help + Enabling this lets the block layer detect hungtask for io, noted + if this is set, hungtask will complain about slow io even if such + io is not hanged. Be careful to enable hungtask panic in this case. + + If unsure, say N. + menu "Partition Types"
source "block/partitions/Kconfig" diff --git a/block/bio.c b/block/bio.c index 8c64c93e96c8..123b44ba17cb 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1169,7 +1169,7 @@ int submit_bio_wait(struct bio *bio) submit_bio(bio);
/* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&done, hang_check * (HZ/2))) diff --git a/block/blk-core.c b/block/blk-core.c index a1ebbf96d19a..048724bb4ae4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -77,6 +77,17 @@ static int __init precise_iostat_setup(char *str) } __setup("precise_iostat=", precise_iostat_setup);
+/* + * Noted if this is set, hungtask will complain about slow io even if such io is + * not hanged. Be careful to enable hungtask panic in this case. + */ +#ifdef CONFIG_BLK_IO_HUNG_TASK_CHECK +bool io_hung_task_check = true; +#else +bool io_hung_task_check; +#endif +module_param_named(io_hung_task_check, io_hung_task_check, bool, 0644); + /* * For queue allocation */ @@ -2115,7 +2126,7 @@ void blk_io_schedule(void) /* Prevent hang_check timer from firing at us during very long I/O */ unsigned long timeout = sysctl_hung_task_timeout_secs * HZ / 2;
- if (timeout) + if (timeout && !io_hung_task_check) io_schedule_timeout(timeout); else io_schedule(); diff --git a/block/blk-exec.c b/block/blk-exec.c index b2676de4c6a5..497aa52cd51e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -87,7 +87,7 @@ blk_status_t blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, blk_execute_rq_nowait(q, bd_disk, rq, at_head, blk_end_sync_rq);
/* Prevent hang_check timer from firing at us during very long I/O */ - hang_check = sysctl_hung_task_timeout_secs; + hang_check = sysctl_hung_task_timeout_secs && !io_hung_task_check; if (hang_check) while (!wait_for_completion_io_timeout(&wait, hang_check * (HZ/2))); else diff --git a/block/blk.h b/block/blk.h index 5e7c00356ddc..5e756746387b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -15,6 +15,7 @@ #define BLK_MAX_TIMEOUT (5 * HZ)
extern struct dentry *blk_debugfs_root; +extern bool io_hung_task_check;
struct blk_flush_queue { unsigned int flush_pending_idx:1;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
-------------------------------
The feature that enable default hierarchy for io throttle in cgroup v1 can only be enabled with CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 11 +++++++++++ block/blk-throttle.c | 17 ++++++++++++++--- 4 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 2b1f1355e61f..0cfc294d0b7f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -955,6 +955,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 7903ccb27e66..39b9252f156a 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -933,6 +933,7 @@ CONFIG_BLK_DEV_INTEGRITY_T10=m CONFIG_BLK_DEV_ZONED=y CONFIG_BLK_DEV_THROTTLING=y # CONFIG_BLK_DEV_THROTTLING_LOW is not set +CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT=y # CONFIG_BLK_CMDLINE_PARSER is not set CONFIG_BLK_WBT=y # CONFIG_BLK_CGROUP_IOLATENCY is not set diff --git a/block/Kconfig b/block/Kconfig index 1b8220766e3a..a1026e1b8f4d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -114,6 +114,17 @@ config BLK_DEV_THROTTLING_LOW
Note, this is an experimental interface and could be changed someday.
+config BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT + bool "Block layer global limit in cgroup v1" + depends on BLK_DEV_THROTTLING=y + default n + help + blkio subsytem is not under default hierarchy in cgroup v1 by default, + Enabling this will support globlal limit in cgroup v1. + + Note, a cmdline "blkcg_global_limit=1" is still required to enabled this + feature. + config BLK_CMDLINE_PARSER bool "Block device command line partition parser" help diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 050ddf0ad002..e5da0664b16c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,9 +43,15 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue;
+#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit;
+static inline bool blkcg_global_limit_enabled(void) +{ + return global_limit; +} + static int __init setup_global_limit(char *str) { if (!strcmp(str, "1") || !strcmp(str, "Y") || !strcmp(str, "y")) @@ -55,7 +61,12 @@ static int __init setup_global_limit(char *str) }
__setup("blkcg_global_limit=", setup_global_limit); - +#else +static inline bool blkcg_global_limit_enabled(void) +{ + return false; +} +#endif /* * To implement hierarchical throttling, throtl_grps form a tree and bios * are dispatched upwards level by level until they reach the top and get @@ -571,8 +582,8 @@ static void throtl_pd_init(struct blkg_policy_data *pd) * regardless of the position of the group in the hierarchy. */ sq->parent_sq = &td->service_queue; - if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || global_limit) && - blkg->parent) + if ((cgroup_subsys_on_dfl(io_cgrp_subsys) || + blkcg_global_limit_enabled()) && blkg->parent) sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; tg->td = td; }
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
-------------------------------
Global limit of blk throttle will be invalid because commit a633be8a278f ("[Huawei] blk-throttle: add a config to control hierarchical throttle in cgroup v1") missed prefix "CONFIG_" in ifdef marco.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e5da0664b16c..9fa00b8b1ac2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -43,7 +43,7 @@ static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue;
-#ifdef BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT +#ifdef CONFIG_BLK_DEV_SUPPORT_LEGACY_GLOBAL_LIMIT /* True if global limit is enabled in cgroup v1 */ static bool global_limit;
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.19-rc4 commit 5cf9c91ba927119fc6606b938b1895bb2459d3bc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------
Various places like I/O schedulers or the QOS infrastructure try to register debugfs files on demans, which can race with creating and removing the main queue debugfs directory. Use the existing debugfs_mutex to serialize all debugfs operations that rely on q->debugfs_dir or the directories hanging off it.
To make the teardown code a little simpler declare all debugfs dentry pointers and not just the main one uncoditionally in blkdev.h.
Move debugfs_mutex next to the dentries that it protects and document what it is used for.
Signed-off-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20220614074827.458955-3-hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk
Conflicts: block/blk-sysfs.c include/linux/blkdev.h kernel/trace/blktrace.c block/blk-mq-debugfs.c block/blk-mq-sched.c Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-mq-debugfs.c | 25 ++++++++++++++++++++----- block/blk-mq-debugfs.h | 5 ----- block/blk-mq-sched.c | 11 +++++++++++ block/blk-rq-qos.c | 2 ++ block/blk-rq-qos.h | 7 ++++++- block/blk-sysfs.c | 20 ++++++++++---------- include/linux/blkdev.h | 8 ++++---- kernel/trace/blktrace.c | 3 --- 8 files changed, 53 insertions(+), 28 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index b5b17c6ee650..5903f94acf80 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -893,11 +893,6 @@ void blk_mq_debugfs_register(struct request_queue *q) } }
-void blk_mq_debugfs_unregister(struct request_queue *q) -{ - q->sched_debugfs_dir = NULL; -} - static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -931,6 +926,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; hctx->debugfs_dir = NULL; @@ -958,6 +955,8 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) { struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex); + /* * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. @@ -975,12 +974,18 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
void blk_mq_debugfs_unregister_sched(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->sched_debugfs_dir); q->sched_debugfs_dir = NULL; }
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { + lockdep_assert_held(&rqos->q->debugfs_mutex); + + if (!rqos->q->debugfs_dir) + return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; } @@ -990,6 +995,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) struct request_queue *q = rqos->q; const char *dir_name = rq_qos_id_to_name(rqos->id);
+ lockdep_assert_held(&q->debugfs_mutex); + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) return;
@@ -1005,6 +1012,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) { + lockdep_assert_held(&q->debugfs_mutex); + debugfs_remove_recursive(q->rqos_debugfs_dir); q->rqos_debugfs_dir = NULL; } @@ -1014,6 +1023,8 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, { struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex); + if (!e->hctx_debugfs_attrs) return;
@@ -1025,6 +1036,10 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + + if (!hctx->queue->debugfs_dir) + return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; } diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index a68aa6041a10..891c3af6f611 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,7 +19,6 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
void blk_mq_debugfs_register(struct request_queue *q); -void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -40,10 +39,6 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { }
-static inline void blk_mq_debugfs_unregister(struct request_queue *q) -{ -} - static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index c92d25b71a72..8620a5d75c62 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -610,7 +610,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) if (ret) goto err_free_map_and_rqs;
+ mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched(q); + mutex_unlock(&q->debugfs_mutex);
queue_for_each_hw_ctx(q, hctx, i) { if (e->ops.init_hctx) { @@ -623,7 +625,9 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e) return ret; } } + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_sched_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); }
return 0; @@ -664,14 +668,21 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e) unsigned int flags = 0;
queue_for_each_hw_ctx(q, hctx, i) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_hctx && hctx->sched_data) { e->type->ops.exit_hctx(hctx, i); hctx->sched_data = NULL; } flags = hctx->flags; } + + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_sched(q); + mutex_unlock(&q->debugfs_mutex); + if (e->type->ops.exit_sched) e->type->ops.exit_sched(e); blk_mq_sched_tags_teardown(q, flags); diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index e83af7bc7591..249a6f05dd3b 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,7 +294,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
void rq_qos_exit(struct request_queue *q) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_queue_rqos(q); + mutex_unlock(&q->debugfs_mutex);
while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37c59d7d6ba7..af1c2ca157d7 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -118,8 +118,11 @@ static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
blk_mq_unfreeze_queue(q);
- if (rqos->ops->debugfs_attrs) + if (rqos->ops->debugfs_attrs) { + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_register_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); + }
return 0; ebusy: @@ -150,7 +153,9 @@ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
blk_mq_unfreeze_queue(q);
+ mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_rqos(rqos); + mutex_unlock(&q->debugfs_mutex); }
typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 53598eb6affd..3fb52934ba6b 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -863,14 +863,13 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q);
- blk_trace_shutdown(q); mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex);
- if (queue_is_mq(q)) - blk_mq_debugfs_unregister(q); - bioset_exit(&q->bio_split);
ida_simple_remove(&blk_queue_ida, q->id); @@ -940,17 +939,18 @@ int blk_register_queue(struct gendisk *disk) goto unlock; }
+ if (queue_is_mq(q)) + __blk_mq_register_dev(dev, q); + mutex_lock(&q->sysfs_lock); + mutex_lock(&q->debugfs_mutex); q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent), blk_debugfs_root); - mutex_unlock(&q->debugfs_mutex); - - if (queue_is_mq(q)) { - __blk_mq_register_dev(dev, q); + if (queue_is_mq(q)) blk_mq_debugfs_register(q); - }
- mutex_lock(&q->sysfs_lock); + mutex_unlock(&q->debugfs_mutex); + if (q->elevator) { ret = elv_register_queue(q, false); if (ret) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4ddfa95f266a..fbe7146b63fe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,7 +540,6 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; - struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -588,11 +587,12 @@ struct request_queue { struct bio_set bio_split;
struct dentry *debugfs_dir; - -#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; -#endif + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex;
bool mq_sysfs_init_done;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index bc98d5e4e033..246ed13b49ca 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -773,12 +773,9 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg) **/ void blk_trace_shutdown(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); if (rcu_dereference_protected(q->blk_trace, lockdep_is_held(&q->debugfs_mutex))) __blk_trace_remove(q); - - mutex_unlock(&q->debugfs_mutex); }
#ifdef CONFIG_BLK_CGROUP
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.19-rc4 commit 99d055b4fd4bbb309c6cdb51a0d420669f777944 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------
The block debugfs files are created in blk_register_queue, which is called by add_disk and use a naming scheme based on the disk_name. After del_gendisk returns that name can be reused and thus we must not leave these debugfs files around, otherwise the kernel is unhappy and spews messages like:
Directory XXXXX with parent 'block' already present!
and the newly created devices will not have working debugfs files.
Move the unregistration to blk_unregister_queue instead (which matches the sysfs unregistration) to make sure the debugfs life time rules match those of the disk name.
As part of the move also make sure the whole debugfs unregistration is inside a single debugfs_mutex critical section.
Note that this breaks blktests block/002, which checks that the debugfs directory has not been removed while blktests is running, but that particular check should simply be removed from the test case.
Signed-off-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20220614074827.458955-4-hch@lst.de Signed-off-by: Jens Axboe axboe@kernel.dk Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-mq-debugfs.c | 8 -------- block/blk-mq-debugfs.h | 5 ----- block/blk-rq-qos.c | 4 ---- block/blk-sysfs.c | 16 ++++++++-------- 4 files changed, 8 insertions(+), 25 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 5903f94acf80..f2eb958eb050 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -1010,14 +1010,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs); }
-void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ - lockdep_assert_held(&q->debugfs_mutex); - - debugfs_remove_recursive(q->rqos_debugfs_dir); - q->rqos_debugfs_dir = NULL; -} - void blk_mq_debugfs_register_sched_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 891c3af6f611..f6898560b1f3 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -33,7 +33,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); -void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q); #else static inline void blk_mq_debugfs_register(struct request_queue *q) { @@ -80,10 +79,6 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos) static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { } - -static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q) -{ -} #endif
#ifdef CONFIG_BLK_DEBUG_FS_ZONED diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 249a6f05dd3b..d3a75693adbf 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -294,10 +294,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
void rq_qos_exit(struct request_queue *q) { - mutex_lock(&q->debugfs_mutex); - blk_mq_debugfs_unregister_queue_rqos(q); - mutex_unlock(&q->debugfs_mutex); - while (q->rq_qos) { struct rq_qos *rqos = q->rq_qos; q->rq_qos = rqos->next; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3fb52934ba6b..0bbd6bec1b3e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -863,13 +863,6 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q);
- mutex_lock(&q->debugfs_mutex); - blk_trace_shutdown(q); - debugfs_remove_recursive(q->debugfs_dir); - q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - mutex_unlock(&q->debugfs_mutex); - bioset_exit(&q->bio_split);
ida_simple_remove(&blk_queue_ida, q->id); @@ -1032,8 +1025,15 @@ void blk_unregister_queue(struct gendisk *disk) /* Now that we've deleted all child objects, we can delete the queue. */ kobject_uevent(&q->kobj, KOBJ_REMOVE); kobject_del(&q->kobj); - mutex_unlock(&q->sysfs_dir_lock);
+ mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + debugfs_remove_recursive(q->debugfs_dir); + q->debugfs_dir = NULL; + q->sched_debugfs_dir = NULL; + q->rqos_debugfs_dir = NULL; + mutex_unlock(&q->debugfs_mutex); + kobject_put(&disk_to_dev(disk)->kobj); }
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
----------------------------------------
All operations to create and remove files under 'q->debugfs_dir' should be protected by 'q->debugfs_mutex'.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-mq-debugfs.c | 8 ++++++++ block/blk-mq.c | 2 ++ 2 files changed, 10 insertions(+)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index f2eb958eb050..9f55fe0d15a3 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -912,6 +912,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i;
+ lockdep_assert_held(&q->debugfs_mutex); + if (!q->debugfs_dir) return;
@@ -926,6 +928,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { + lockdep_assert_held(&hctx->queue->debugfs_mutex); + if (!hctx->queue->debugfs_dir) return; debugfs_remove_recursive(hctx->debugfs_dir); @@ -938,8 +942,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i;
+ mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_register_hctx(q, hctx); + mutex_unlock(&q->debugfs_mutex); }
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) @@ -947,8 +953,10 @@ void blk_mq_debugfs_unregister_hctxs(struct request_queue *q) struct blk_mq_hw_ctx *hctx; int i;
+ mutex_lock(&q->debugfs_mutex); queue_for_each_hw_ctx(q, hctx, i) blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); }
void blk_mq_debugfs_register_sched(struct request_queue *q) diff --git a/block/blk-mq.c b/block/blk-mq.c index a28957dfb757..5670dfeac85a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2873,7 +2873,9 @@ static void blk_mq_exit_hw_queues(struct request_queue *q, queue_for_each_hw_ctx(q, hctx, i) { if (i == nr_queue) break; + mutex_lock(&q->debugfs_mutex); blk_mq_debugfs_unregister_hctx(hctx); + mutex_unlock(&q->debugfs_mutex); blk_mq_exit_hctx(q, set, hctx, i); } }
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
----------------------------------------
Commit 99d055b4fd4b ("block: remove per-disk debugfs files in blk_unregister_queue") move blk_trace_shutdown() from blk_release_queue() to blk_unregister_queue(). However, blktrace can still be enabled through ioctl after blk_unregister_queue(), and blktrace will be leaked in this case.
Fix the problem by calling blk_trace_shutdown() in blk_release_queue().
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-sysfs.c | 4 ++++ 1 file changed, 4 insertions(+)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0bbd6bec1b3e..f4e33203a8ee 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -863,6 +863,10 @@ static void blk_release_queue(struct kobject *kobj) if (queue_is_mq(q)) blk_mq_release(q);
+ mutex_lock(&q->debugfs_mutex); + blk_trace_shutdown(q); + mutex_unlock(&q->debugfs_mutex); + bioset_exit(&q->bio_split);
ida_simple_remove(&blk_queue_ida, q->id);
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
----------------------------------------
After a disk is created, debugfs inode and dentry will be created together, and the memory used for debugfs can't be freed until disk removal.
The number of debugfs inode and dentry is based on how many cpus and hctxs. For example, testing on a 128-core environemt, with default module parameters, each loop device will cost 1679KB memory, and debugfs will cost 336KB(20%).
The memory cost for debugfs for a disk seems little, but if a big machine contains thousands of disks, the cost will be xxGB. This memory overhead can be avoided by disabling CONFIG_BLK_DEBUG_FS.
This patch add a disk level switch that can enable/disable debugfs dynamically, so that user can disable debugfs if they care about the memory overhead, in the meantime, debugfs can be enabled again in demand.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 15 +++++ block/blk-mq-debugfs.c | 74 +++++++++++++++++++++--- block/blk-mq-debugfs.h | 5 ++ block/blk-mq.c | 9 +++ block/blk-sysfs.c | 54 +++++++++++++++++ include/linux/blkdev.h | 3 + 9 files changed, 156 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 0cfc294d0b7f..3c45090368f7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -963,6 +963,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # CONFIG_BLK_IO_HUNG_TASK_CHECK is not set diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index fc0ca355dcaa..c285107c6d97 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -684,6 +684,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y CONFIG_BLK_SED_OPAL=y CONFIG_BLK_INLINE_ENCRYPTION=y # CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK is not set diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 39b9252f156a..e657e4cfdbf9 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -941,6 +941,7 @@ CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y +CONFIG_BLK_DEBUG_FS_SWITCH=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set # CONFIG_BLK_IO_HUNG_TASK_CHECK is not set diff --git a/block/Kconfig b/block/Kconfig index a1026e1b8f4d..d01e418a55d9 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -189,6 +189,21 @@ config BLK_DEBUG_FS_ZONED bool default BLK_DEBUG_FS && BLK_DEV_ZONED
+config BLK_DEBUG_FS_SWITCH + bool "Disk level switch to enable/disable debugfs dynamically" + depends on BLK_DEBUG_FS + depends on 64BIT + default y + help + After a disk is created, debugfs inode and dentry will be created + together, and the memory used for debugfs can't be freed until disk + removal. + + Enabling this will add a disk level switch that can enable/disable + debugfs dynamically, so that user can disable debugfs if they care + about the memory overhead, in the meantime, debugfs can be enabled + again in demand. + config BLK_SED_OPAL bool "Logic for interfacing with Opal enabled SEDs" help diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9f55fe0d15a3..51d5bfdee655 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -132,6 +132,9 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(HCTX_ACTIVE), QUEUE_FLAG_NAME(NOWAIT), QUEUE_FLAG_NAME(DISPATCH_ASYNC), +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + QUEUE_FLAG_NAME(DEBUGFS), +#endif }; #undef QUEUE_FLAG_NAME
@@ -860,11 +863,27 @@ static void debugfs_create_files(struct dentry *parent, void *data, (void *)attr, &blk_mq_debugfs_fops); }
+static bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + if (IS_ERR_OR_NULL(q->debugfs_dir)) + return false; + +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (!test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags)) + return false; +#endif + + return true; +} + void blk_mq_debugfs_register(struct request_queue *q) { struct blk_mq_hw_ctx *hctx; int i;
+ if (!blk_mq_debugfs_enabled(q)) + return; + debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
/* @@ -893,6 +912,46 @@ void blk_mq_debugfs_register(struct request_queue *q) } }
+static void debugfs_remove_files(struct dentry *parent, + const struct blk_mq_debugfs_attr *attr) +{ + if (IS_ERR_OR_NULL(parent)) + return; + + for (; attr->name; attr++) + debugfs_lookup_and_remove(attr->name, parent); +} + +void blk_mq_debugfs_unregister(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + unsigned long i; + + spin_lock(&q->queue_lock); + if (q->rq_qos) { + struct rq_qos *rqos = q->rq_qos; + + while (rqos) { + rqos->debugfs_dir = NULL; + rqos = rqos->next; + } + } + spin_unlock(&q->queue_lock); + + debugfs_remove_recursive(q->rqos_debugfs_dir); + q->rqos_debugfs_dir = NULL; + + queue_for_each_hw_ctx(q, hctx, i) { + if (hctx->debugfs_dir) + blk_mq_debugfs_unregister_hctx(hctx); + } + + if (q->sched_debugfs_dir) + blk_mq_debugfs_unregister_sched(q); + + debugfs_remove_files(q->debugfs_dir, blk_mq_debugfs_queue_attrs); +} + static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx) { @@ -914,7 +973,7 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
lockdep_assert_held(&q->debugfs_mutex);
- if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return;
snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); @@ -930,7 +989,7 @@ void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex);
- if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->debugfs_dir); hctx->sched_debugfs_dir = NULL; @@ -969,7 +1028,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q) * If the parent directory has not been created yet, return, we will be * called again later on and the directory/files will be created then. */ - if (!q->debugfs_dir) + if (!blk_mq_debugfs_enabled(q)) return;
if (!e->queue_debugfs_attrs) @@ -992,7 +1051,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos) { lockdep_assert_held(&rqos->q->debugfs_mutex);
- if (!rqos->q->debugfs_dir) + if (!blk_mq_debugfs_enabled(rqos->q)) return; debugfs_remove_recursive(rqos->debugfs_dir); rqos->debugfs_dir = NULL; @@ -1005,7 +1064,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
lockdep_assert_held(&q->debugfs_mutex);
- if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs) + if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs || + !blk_mq_debugfs_enabled(q)) return;
if (!q->rqos_debugfs_dir) @@ -1025,7 +1085,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
lockdep_assert_held(&q->debugfs_mutex);
- if (!e->hctx_debugfs_attrs) + if (!e->hctx_debugfs_attrs || !blk_mq_debugfs_enabled(q)) return;
hctx->sched_debugfs_dir = debugfs_create_dir("sched", @@ -1038,7 +1098,7 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx) { lockdep_assert_held(&hctx->queue->debugfs_mutex);
- if (!hctx->queue->debugfs_dir) + if (!blk_mq_debugfs_enabled(hctx->queue)) return; debugfs_remove_recursive(hctx->sched_debugfs_dir); hctx->sched_debugfs_dir = NULL; diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index f6898560b1f3..3a2c43a9a0ae 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -19,6 +19,7 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq); int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
void blk_mq_debugfs_register(struct request_queue *q); +void blk_mq_debugfs_unregister(struct request_queue *q); void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx); void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx); @@ -38,6 +39,10 @@ static inline void blk_mq_debugfs_register(struct request_queue *q) { }
+static inline void blk_mq_debugfs_unregister(struct request_queue *q) +{ +} + static inline void blk_mq_debugfs_register_hctx(struct request_queue *q, struct blk_mq_hw_ctx *hctx) { diff --git a/block/blk-mq.c b/block/blk-mq.c index 5670dfeac85a..407098e8f210 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -47,6 +47,11 @@ bool mq_unfair_dtag = true; module_param_named(unfair_dtag, mq_unfair_dtag, bool, 0444);
+#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +bool enable_debugfs = true; +module_param_named(enable_debugfs, enable_debugfs, bool, 0444); +#endif + static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
static void blk_mq_poll_stats_start(struct request_queue *q); @@ -3540,6 +3545,10 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->tag_set = set;
q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + if (enable_debugfs) + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); +#endif if (set->nr_maps > HCTX_TYPE_POLL && set->map[HCTX_TYPE_POLL].nr_queues) blk_queue_flag_set(QUEUE_FLAG_POLL, q); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f4e33203a8ee..18dfd2fe3b3e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -673,6 +673,57 @@ static struct queue_sysfs_entry queue_dispatch_async_cpus_entry = { QUEUE_RW_ENTRY(queue_dispatch_async, "dispatch_async"); #endif
+#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +static ssize_t queue_debugfs_show(struct request_queue *q, char *page) +{ + return queue_var_show(test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags), + page); +} + +static ssize_t queue_debugfs_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret; + bool enabled; + int err; + + if (!queue_is_mq(q)) + return count; + + if (!blk_queue_registered(q)) + return -ENODEV; + + ret = queue_var_store(&val, page, count); + if (ret < 0) + return ret; + + err = blk_queue_enter(q, 0); + if (err) + return err; + + mutex_lock(&q->debugfs_mutex); + enabled = test_bit(QUEUE_FLAG_DEBUGFS, &q->queue_flags); + if (!!val == enabled) + goto unlock; + + if (val) { + blk_queue_flag_set(QUEUE_FLAG_DEBUGFS, q); + blk_mq_debugfs_register(q); + } else { + blk_mq_debugfs_unregister(q); + blk_queue_flag_clear(QUEUE_FLAG_DEBUGFS, q); + } + +unlock: + mutex_unlock(&q->debugfs_mutex); + blk_queue_exit(q); + return ret; +} + +QUEUE_RW_ENTRY(queue_debugfs, "debugfs"); +#endif + static struct attribute *queue_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -719,6 +770,9 @@ static struct attribute *queue_attrs[] = { #endif #ifdef CONFIG_BLK_DEV_THROTTLING_LOW &blk_throtl_sample_time_entry.attr, +#endif +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH + &queue_debugfs_entry.attr, #endif NULL, }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fbe7146b63fe..eea753a46419 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -652,6 +652,9 @@ struct request_queue { #define QUEUE_FLAG_HCTX_WAIT 30 /* support to dispatch bio asynchronously */ #define QUEUE_FLAG_DISPATCH_ASYNC 31 +#ifdef CONFIG_BLK_DEBUG_FS_SWITCH +#define QUEUE_FLAG_DEBUGFS 32 /* supports debugfs */ +#endif
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9QJ1S CVE: NA
----------------------------------------
Move debugfs_mutex back to it's old position to prevent kabi broken.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-sysfs.c | 4 ++-- include/linux/blkdev.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 18dfd2fe3b3e..d717cde56c4f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -1087,10 +1087,10 @@ void blk_unregister_queue(struct gendisk *disk)
mutex_lock(&q->debugfs_mutex); blk_trace_shutdown(q); + if (queue_is_mq(q)) + blk_mq_debugfs_unregister(q); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; - q->sched_debugfs_dir = NULL; - q->rqos_debugfs_dir = NULL; mutex_unlock(&q->debugfs_mutex);
kobject_put(&disk_to_dev(disk)->kobj); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eea753a46419..af6e6196cd27 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -540,6 +540,10 @@ struct request_queue { unsigned int sg_timeout; unsigned int sg_reserved_size; int node; + /* + * Serializes all debugfs metadata operations using the above dentries. + */ + struct mutex debugfs_mutex; #ifdef CONFIG_BLK_DEV_IO_TRACE struct blk_trace __rcu *blk_trace; #endif @@ -587,12 +591,10 @@ struct request_queue { struct bio_set bio_split;
struct dentry *debugfs_dir; +#ifdef CONFIG_BLK_DEBUG_FS struct dentry *sched_debugfs_dir; struct dentry *rqos_debugfs_dir; - /* - * Serializes all debugfs metadata operations using the above dentries. - */ - struct mutex debugfs_mutex; +#endif
bool mq_sysfs_init_done;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/9309 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/9309 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/3...