Offering: HULK hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IB4E8P CVE: NA
--------------------------------
Support to account the numbers of io and io latency that is throttled by blk-throttle.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/powerpc/configs/openeuler_defconfig | 1 + arch/riscv/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 1 + block/Makefile | 1 + block/blk-io-hierarchy/Kconfig | 27 +++ block/blk-io-hierarchy/Makefile | 7 + block/blk-io-hierarchy/debugfs.c | 119 +++++++++++++ block/blk-io-hierarchy/stats.c | 216 +++++++++++++++++++++++ block/blk-io-hierarchy/stats.h | 137 ++++++++++++++ block/blk-mq-debugfs.c | 7 +- block/blk-mq-debugfs.h | 7 + block/blk-mq.c | 10 +- block/blk-throttle.c | 8 + include/linux/blk_types.h | 14 +- include/linux/blkdev.h | 4 + 17 files changed, 558 insertions(+), 4 deletions(-) create mode 100644 block/blk-io-hierarchy/Kconfig create mode 100644 block/blk-io-hierarchy/Makefile create mode 100644 block/blk-io-hierarchy/debugfs.c create mode 100644 block/blk-io-hierarchy/stats.c create mode 100644 block/blk-io-hierarchy/stats.h
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index ddbc29bf75f8..7ca156b31e51 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -952,6 +952,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_DEV_DETECT_WRITING_PART0=y CONFIG_BLK_DEV_WRITE_MOUNTED_DUMP=y CONFIG_BLK_IO_HUNG_TASK_CHECK=y +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/arch/powerpc/configs/openeuler_defconfig b/arch/powerpc/configs/openeuler_defconfig index 19754209e3c8..d725e7c82254 100644 --- a/arch/powerpc/configs/openeuler_defconfig +++ b/arch/powerpc/configs/openeuler_defconfig @@ -679,6 +679,7 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_INLINE_ENCRYPTION is not set +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/arch/riscv/configs/openeuler_defconfig b/arch/riscv/configs/openeuler_defconfig index 026582613f2c..295dc52b3994 100644 --- a/arch/riscv/configs/openeuler_defconfig +++ b/arch/riscv/configs/openeuler_defconfig @@ -646,6 +646,7 @@ CONFIG_BLK_INLINE_ENCRYPTION=y CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK=y # CONFIG_BLK_DEV_DETECT_WRITING_PART0 is not set # CONFIG_BLK_DEV_WRITE_MOUNTED_DUMP is not set +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index a2b6381ef2c3..576d961c7a40 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -988,6 +988,7 @@ CONFIG_BLK_DEBUG_FS_ZONED=y CONFIG_BLK_DEV_DETECT_WRITING_PART0=y CONFIG_BLK_DEV_WRITE_MOUNTED_DUMP=y CONFIG_BLK_IO_HUNG_TASK_CHECK=y +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/block/Kconfig b/block/Kconfig index 04bb49f13176..8fd2a8cb539e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -275,6 +275,7 @@ config BLK_IO_HUNG_TASK_CHECK if this is set, hungtask will complain about slow io even if such io is not hanged. Be careful to enable hungtask panic in this case.
+source "block/blk-io-hierarchy/Kconfig" source "block/partitions/Kconfig"
config BLK_MQ_PCI diff --git a/block/Makefile b/block/Makefile index 46ada9dc8bbf..bfba1d2afc0e 100644 --- a/block/Makefile +++ b/block/Makefile @@ -40,3 +40,4 @@ obj-$(CONFIG_BLK_INLINE_ENCRYPTION) += blk-crypto.o blk-crypto-profile.o \ blk-crypto-sysfs.o obj-$(CONFIG_BLK_INLINE_ENCRYPTION_FALLBACK) += blk-crypto-fallback.o obj-$(CONFIG_BLOCK_HOLDER_DEPRECATED) += holder.o +obj-$(CONFIG_BLK_IO_HIERARCHY_STATS) += blk-io-hierarchy/ diff --git a/block/blk-io-hierarchy/Kconfig b/block/blk-io-hierarchy/Kconfig new file mode 100644 index 000000000000..a12476c73fa5 --- /dev/null +++ b/block/blk-io-hierarchy/Kconfig @@ -0,0 +1,27 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menuconfig BLK_IO_HIERARCHY_STATS + bool "Enable hierarchy io stats" + default n + depends on BLK_DEBUG_FS=y + help + Enabling this lets the block layer to record additional information + in different io stages. Such information can be helpful to debug + performance and problems like io hang. + + If unsure, say N. + +if BLK_IO_HIERARCHY_STATS + +config HIERARCHY_THROTTLE + bool "Enable hierarchy stats layer blk-throttle" + default n + depends on BLK_DEV_THROTTLING=y + help + Enabling this lets blk hierarchy stats to record additional information + for blk-throttle. Such information can be helpful to debug performance + and problems like io hang. + + If unsure, say N. + +endif diff --git a/block/blk-io-hierarchy/Makefile b/block/blk-io-hierarchy/Makefile new file mode 100644 index 000000000000..1fb663c75521 --- /dev/null +++ b/block/blk-io-hierarchy/Makefile @@ -0,0 +1,7 @@ +# +# Make file for blk_io_hierarchy_stats +# + +obj-$(CONFIG_BLK_IO_HIERARCHY_STATS) += blk_io_hierarchy_stats.o + +blk_io_hierarchy_stats-y := stats.o debugfs.o diff --git a/block/blk-io-hierarchy/debugfs.c b/block/blk-io-hierarchy/debugfs.c new file mode 100644 index 000000000000..9072a091c013 --- /dev/null +++ b/block/blk-io-hierarchy/debugfs.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> + +#include "stats.h" + +static const char *stage_name[NR_STAGE_GROUPS] = { +#ifdef CONFIG_HIERARCHY_THROTTLE + [STAGE_THROTTLE] = "throtl", +#endif +}; + +const char *hierarchy_stage_name(enum stage_group stage) +{ + return stage_name[stage]; +} + +static int hierarchy_stats_show(void *data, struct seq_file *m) +{ + struct hierarchy_stage *hstage = data; + int cpu; + u64 dispatched[NR_STAT_GROUPS] = {0}; + u64 completed[NR_STAT_GROUPS] = {0}; + u64 latency[NR_STAT_GROUPS] = {0}; + + for_each_possible_cpu(cpu) { + int i; + struct hierarchy_stats *stat = per_cpu_ptr(hstage->hstats, cpu); + + for (i = 0; i < NR_STAT_GROUPS; ++i) { + dispatched[i] += stat->dispatched[i]; + completed[i] += stat->completed[i]; + latency[i] += stat->nsecs[i]; + } + } + + seq_printf(m, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + dispatched[STAT_READ], completed[STAT_READ], + latency[STAT_READ], dispatched[STAT_WRITE], + completed[STAT_WRITE], latency[STAT_WRITE], + dispatched[STAT_DISCARD], completed[STAT_DISCARD], + latency[STAT_DISCARD], dispatched[STAT_FLUSH], + completed[STAT_FLUSH], latency[STAT_FLUSH]); + + return 0; +} + +static struct blk_mq_debugfs_attr hierarchy_debugfs_attrs[] = { + {"stats", 0400, hierarchy_stats_show}, + {}, +}; + +static void hierarchy_register_stage(struct blk_io_hierarchy_stats *stats, + enum stage_group stage) +{ + struct hierarchy_stage *hstage = stats->hstage[stage]; + struct dentry *dir; + + if (!stage_name[stage] || hstage->debugfs_dir) + return; + + dir = debugfs_create_dir(stage_name[stage], stats->debugfs_dir); + if (IS_ERR(dir)) + return; + + hstage->debugfs_dir = dir; + debugfs_create_files(dir, hstage, hierarchy_debugfs_attrs); +} + +static void hierarchy_unregister_stage(struct blk_io_hierarchy_stats *stats, + enum stage_group stage) +{ + struct hierarchy_stage *hstage = stats->hstage[stage]; + + if (!stage_name[stage] || !hstage->debugfs_dir) + return; + + debugfs_remove_recursive(hstage->debugfs_dir); + hstage->debugfs_dir = NULL; +} + +void blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + + lockdep_assert_held(&q->debugfs_mutex); + + if (!blk_mq_hierarchy_registered(q, stage) || + !blk_mq_debugfs_enabled(q)) + return; + + hierarchy_register_stage(stats, stage); +} + +void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + + lockdep_assert_held(&q->debugfs_mutex); + + if (!blk_mq_hierarchy_registered(q, stage) || + !blk_mq_debugfs_enabled(q)) + return; + + hierarchy_unregister_stage(stats, stage); +} diff --git a/block/blk-io-hierarchy/stats.c b/block/blk-io-hierarchy/stats.c new file mode 100644 index 000000000000..f078ebc5f668 --- /dev/null +++ b/block/blk-io-hierarchy/stats.c @@ -0,0 +1,216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> + +#include "stats.h" +#include "../blk.h" +#include "../blk-mq-debugfs.h" + +#define io_hierarchy_add(statsp, field, group, nr) \ + this_cpu_add((statsp)->field[group], nr) +#define io_hierarchy_inc(statsp, field, group) \ + io_hierarchy_add(statsp, field, group, 1) + +void blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + enum stage_group stage; + + lockdep_assert_held(&q->debugfs_mutex); + + stats = q->io_hierarchy_stats; + if (!stats || !blk_mq_debugfs_enabled(q)) + return; + + stats->debugfs_dir = debugfs_create_dir("blk_io_hierarchy", + q->debugfs_dir); + + for (stage = 0; stage < NR_STAGE_GROUPS; ++stage) + blk_mq_debugfs_register_hierarchy(q, stage); +} + +void blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + enum stage_group stage; + + lockdep_assert_held(&q->debugfs_mutex); + + stats = q->io_hierarchy_stats; + if (!stats || !blk_mq_debugfs_enabled(q)) + return; + + for (stage = 0; stage < NR_STAGE_GROUPS; ++stage) + blk_mq_debugfs_unregister_hierarchy(q, stage); + + debugfs_remove_recursive(stats->debugfs_dir); + stats->debugfs_dir = NULL; +} + +int blk_io_hierarchy_stats_alloc(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + + if (!queue_is_mq(q)) + return 0; + + stats = kzalloc(sizeof(struct blk_io_hierarchy_stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + stats->q = q; + q->io_hierarchy_stats = stats; + + return 0; +} + +void blk_io_hierarchy_stats_free(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + + if (!stats) + return; + + q->io_hierarchy_stats = NULL; + kfree(stats); +} + +bool blk_mq_hierarchy_registered(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + + if (!stats) + return false; + + return stats->hstage[stage] != NULL; +} + +void blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + struct hierarchy_stage *hstage; + + if (!stats || !hierarchy_stage_name(stage)) + return; + + if (blk_mq_hierarchy_registered(q, stage)) { + pr_warn("blk-io-hierarchy: disk %s is registering stage %s again.", + q->disk->disk_name, hierarchy_stage_name(stage)); + return; + } + + /* + * Alloc memory before freeze queue, prevent deadlock if new IO is + * issued by memory reclaim. + */ + hstage = kmalloc(sizeof(*hstage), GFP_KERNEL); + if (!hstage) + return; + + hstage->hstats = alloc_percpu(struct hierarchy_stats); + if (!hstage->hstats) { + kfree(hstage); + return; + } + + hstage->stage = stage; + hstage->debugfs_dir = NULL; + + blk_mq_freeze_queue(q); + blk_mq_quiesce_queue(q); + + mutex_lock(&q->debugfs_mutex); + stats->hstage[stage] = hstage; + blk_mq_debugfs_register_hierarchy(q, stage); + mutex_unlock(&q->debugfs_mutex); + + blk_mq_unquiesce_queue(q); + blk_mq_unfreeze_queue(q); +} + +void blk_mq_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = q->io_hierarchy_stats; + struct hierarchy_stage *hstage; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + mutex_lock(&q->debugfs_mutex); + + blk_mq_debugfs_unregister_hierarchy(q, stage); + + hstage = stats->hstage[stage]; + stats->hstage[stage] = NULL; + free_percpu(hstage->hstats); + kfree(hstage); + + mutex_unlock(&q->debugfs_mutex); +} + +static enum stat_group hierarchy_op(const struct bio *bio) +{ + if (op_is_discard(bio->bi_opf)) + return STAT_DISCARD; + + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return STAT_FLUSH; + + if (op_is_write(bio->bi_opf)) + return STAT_WRITE; + + return STAT_READ; +} + + +void bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage) +{ + struct request_queue *q = bio->bi_bdev->bd_queue; + struct hierarchy_stage *hstage; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + hstage = q->io_hierarchy_stats->hstage[stage]; + io_hierarchy_inc(hstage->hstats, dispatched, hierarchy_op(bio)); + bio->hierarchy_time = ktime_get_ns(); +} + +void bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage, + u64 time) +{ + struct request_queue *q = bio->bi_bdev->bd_queue; + struct hierarchy_stage *hstage; + enum stat_group op; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + op = hierarchy_op(bio); + hstage = q->io_hierarchy_stats->hstage[stage]; + io_hierarchy_inc(hstage->hstats, completed, op); + io_hierarchy_add(hstage->hstats, nsecs, op, time - bio->hierarchy_time); +} + +void bio_list_hierarchy_end_io_acct(struct bio_list *list, + enum stage_group stage) +{ + u64 time = ktime_get_ns(); + struct bio *bio; + + bio_list_for_each(bio, list) + bio_hierarchy_end_io_acct(bio, stage, time); +} diff --git a/block/blk-io-hierarchy/stats.h b/block/blk-io-hierarchy/stats.h new file mode 100644 index 000000000000..0a86d1235715 --- /dev/null +++ b/block/blk-io-hierarchy/stats.h @@ -0,0 +1,137 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef BLK_IO_HIERARCHY_STATS_H +#define BLK_IO_HIERARCHY_STATS_H + +#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + +#include <linux/blkdev.h> +#include "../blk-mq-debugfs.h" + +struct bio_hierarchy_data { + u64 time; +}; + +struct hierarchy_stats { + u64 nsecs[NR_STAT_GROUPS]; + unsigned long dispatched[NR_STAT_GROUPS]; + unsigned long completed[NR_STAT_GROUPS]; +}; + +struct hierarchy_stage { + enum stage_group stage; + struct dentry *debugfs_dir; + struct hierarchy_stats __percpu *hstats; +}; + +struct blk_io_hierarchy_stats { + struct request_queue *q; + struct dentry *debugfs_dir; + struct hierarchy_stage *hstage[NR_STAGE_GROUPS]; +}; + +const char *hierarchy_stage_name(enum stage_group stage); +int blk_io_hierarchy_stats_alloc(struct request_queue *q); +void blk_io_hierarchy_stats_free(struct request_queue *q); + +/* APIs for stage registration */ +bool blk_mq_hierarchy_registered(struct request_queue *q, + enum stage_group stage); +void blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage); +void blk_mq_unregister_hierarchy(struct request_queue *q, + enum stage_group stage); + +/* APIs for disk level debugfs */ +void blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q); +void blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q); + +/* APIs for stage level debugfs */ +void blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage); +void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage); + +/* APIs for bio based stage io accounting */ +void bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage); +void bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage, + u64 time); +void bio_list_hierarchy_end_io_acct(struct bio_list *list, + enum stage_group stage); +#else /* CONFIG_BLK_IO_HIERARCHY_STATS */ + +static inline int +blk_io_hierarchy_stats_alloc(struct request_queue *q) +{ + return 0; +} + +static inline void +blk_io_hierarchy_stats_free(struct request_queue *q) +{ +} + +static inline bool +blk_mq_hierarchy_registered(struct request_queue *q, enum stage_group stage) +{ + return false; +} + +static inline void +blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage) +{ +} + +static inline void +blk_mq_unregister_hierarchy(struct request_queue *q, enum stage_group stage) +{ +} + +static inline void +blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q) +{ +} + +static inline void +blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q) +{ +} + +static inline void +blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage) +{ +} + +static inline void +blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ +} + +static inline void +bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage) +{ +} + +static inline void +bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage, u64 time) +{ +} + +static inline void +bio_list_hierarchy_end_io_acct(struct bio_list *list, enum stage_group stage) +{ +} +#endif /* CONFIG_BLK_IO_HIERARCHY_STATS */ +#endif /* BLK_IO_HIERARCHY_STATS_H */ diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 9ad108307344..a955ee42765f 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -12,6 +12,7 @@ #include "blk-mq-debugfs.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" +#include "blk-io-hierarchy/stats.h"
static int queue_poll_stat_show(void *data, struct seq_file *m) { @@ -642,8 +643,8 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { {}, };
-static void debugfs_create_files(struct dentry *parent, void *data, - const struct blk_mq_debugfs_attr *attr) +void debugfs_create_files(struct dentry *parent, void *data, + const struct blk_mq_debugfs_attr *attr) { if (IS_ERR_OR_NULL(parent)) return; @@ -686,6 +687,8 @@ void blk_mq_debugfs_register(struct request_queue *q) rqos = rqos->next; } } + + blk_mq_debugfs_register_hierarchy_stats(q); }
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx, diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h index 9c7d4b6117d4..4c422580ce84 100644 --- a/block/blk-mq-debugfs.h +++ b/block/blk-mq-debugfs.h @@ -35,6 +35,13 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_register_rqos(struct rq_qos *rqos); void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos); +void debugfs_create_files(struct dentry *parent, void *data, + const struct blk_mq_debugfs_attr *attr); + +static inline bool blk_mq_debugfs_enabled(struct request_queue *q) +{ + return !IS_ERR_OR_NULL(q->debugfs_dir); +} #else static inline void blk_mq_debugfs_register(struct request_queue *q) { diff --git a/block/blk-mq.c b/block/blk-mq.c index ed43c89132a7..630147c43a35 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -41,6 +41,7 @@ #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" +#include "blk-io-hierarchy/stats.h"
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); @@ -4117,6 +4118,8 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx, *next; unsigned long i;
+ blk_io_hierarchy_stats_free(q); + queue_for_each_hw_ctx(q, hctx, i) WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
@@ -4315,9 +4318,12 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* mark the queue as mq asap */ q->mq_ops = set->ops;
- if (blk_mq_alloc_ctxs(q)) + if (blk_io_hierarchy_stats_alloc(q)) goto err_exit;
+ if (blk_mq_alloc_ctxs(q)) + goto err_hierarchy_stats; + /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q);
@@ -4352,6 +4358,8 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
err_hctxs: blk_mq_release(q); +err_hierarchy_stats: + blk_io_hierarchy_stats_free(q); err_exit: q->mq_ops = NULL; return -ENOMEM; diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b1cbb3a1da0f..62662bcd2392 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -14,6 +14,7 @@ #include "blk-cgroup-rwstat.h" #include "blk-stat.h" #include "blk-throttle.h" +#include "blk-io-hierarchy/stats.h"
/* Max dispatch from a group in 1 round */ #define THROTL_GRP_QUANTUM 8 @@ -1291,6 +1292,8 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work) bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(&q->queue_lock);
+ bio_list_hierarchy_end_io_acct(&bio_list_on_stack, STAGE_THROTTLE); + if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); while ((bio = bio_list_pop(&bio_list_on_stack))) @@ -2283,6 +2286,7 @@ bool __blk_throtl_bio(struct bio *bio) td->nr_queued[rw]++; throtl_add_bio_tg(bio, qn, tg); throttled = true; + bio_hierarchy_start_io_acct(bio, STAGE_THROTTLE);
/* * Update @tg's dispatch time and force schedule dispatch if @tg @@ -2443,6 +2447,8 @@ void blk_throtl_exit(struct gendisk *disk) del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(disk, &blkcg_policy_throtl); + blk_mq_unregister_hierarchy(q, STAGE_THROTTLE); + free_percpu(q->td->latency_buckets[READ]); free_percpu(q->td->latency_buckets[WRITE]); kfree(q->td); @@ -2477,6 +2483,8 @@ void blk_throtl_register(struct gendisk *disk) if (!td->track_bio_latency) blk_stat_enable_accounting(q); #endif + + blk_mq_register_hierarchy(q, STAGE_THROTTLE); }
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index ad92611a85f8..8739f169a99e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -332,8 +332,11 @@ struct bio {
struct bio_set *bi_pool;
- +#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + KABI_USE(1, u64 hierarchy_time) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) @@ -498,6 +501,15 @@ enum stat_group { NR_STAT_GROUPS };
+enum stage_group { +#ifdef CONFIG_BLK_DEV_THROTTLING + STAGE_THROTTLE, +#endif + STAGE_RESERVE, + NR_BIO_STAGE_GROUPS, + NR_STAGE_GROUPS = NR_BIO_STAGE_GROUPS, +}; + static inline enum req_op bio_op(const struct bio *bio) { return bio->bi_opf & REQ_OP_MASK; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a41707b09d8d..bf58ae46cc59 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -575,7 +575,11 @@ struct request_queue {
bool mq_sysfs_init_done;
+#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + KABI_USE(1, struct blk_io_hierarchy_stats *io_hierarchy_stats) +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4)