hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGRKP CVE: NA
--------------------------------
Include main structures definition and provede helpers for different IO stages to record IO stats.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + block/Kconfig | 2 + block/Makefile | 1 + block/bio.c | 2 + block/blk-flush.c | 2 + block/blk-io-hierarchy/Kconfig | 12 + block/blk-io-hierarchy/Makefile | 7 + block/blk-io-hierarchy/debugfs.c | 194 +++++++++++++ block/blk-io-hierarchy/stats.c | 388 +++++++++++++++++++++++++ block/blk-io-hierarchy/stats.h | 309 ++++++++++++++++++++ block/blk-mq.c | 2 + block/blk-mq.h | 5 + block/blk.h | 7 +- include/linux/blk_types.h | 15 +- 15 files changed, 945 insertions(+), 3 deletions(-) create mode 100644 block/blk-io-hierarchy/Kconfig create mode 100644 block/blk-io-hierarchy/Makefile create mode 100644 block/blk-io-hierarchy/debugfs.c create mode 100644 block/blk-io-hierarchy/stats.c create mode 100644 block/blk-io-hierarchy/stats.h
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 4de42999f905..71e12eb64467 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -822,6 +822,7 @@ CONFIG_BLK_DEBUG_FS=y CONFIG_BLK_DEBUG_FS_ZONED=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_BIO_DISPATCH_ASYNC is not set +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 928b4379af4d..7993f0f3e7a4 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -891,6 +891,7 @@ CONFIG_BLK_WBT_MQ=y CONFIG_BLK_DEBUG_FS=y # CONFIG_BLK_SED_OPAL is not set # CONFIG_BLK_BIO_DISPATCH_ASYNC is not set +# CONFIG_BLK_IO_HIERARCHY_STATS is not set
# # Partition Types diff --git a/block/Kconfig b/block/Kconfig index 9b512a000af7..8804f21df151 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -219,6 +219,8 @@ config BLK_BIO_DISPATCH_ASYNC feature will require special care in the driver to work. If unsure, say N here.
+source "block/blk-io-hierarchy/Kconfig" + menu "Partition Types"
source "block/partitions/Kconfig" diff --git a/block/Makefile b/block/Makefile index 572b33f32c07..bb711b0c307a 100644 --- a/block/Makefile +++ b/block/Makefile @@ -37,3 +37,4 @@ obj-$(CONFIG_BLK_WBT) += blk-wbt.o obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o obj-$(CONFIG_BLK_DEBUG_FS_ZONED)+= blk-mq-debugfs-zoned.o obj-$(CONFIG_BLK_SED_OPAL) += sed-opal.o +obj-$(CONFIG_BLK_IO_HIERARCHY_STATS) += blk-io-hierarchy/ diff --git a/block/bio.c b/block/bio.c index b32dc89bb704..c3aeae529dfd 100644 --- a/block/bio.c +++ b/block/bio.c @@ -33,6 +33,7 @@ #include <trace/events/block.h> #include "blk.h" #include "blk-rq-qos.h" +#include "blk-io-hierarchy/stats.h"
/* * Test patch to inline a certain number of bi_io_vec's inside the bio @@ -251,6 +252,7 @@ void bio_uninit(struct bio *bio) bio->pid = NULL; } #endif + bio_free_hierarchy_data(bio); } EXPORT_SYMBOL(bio_uninit);
diff --git a/block/blk-flush.c b/block/blk-flush.c index 2bc03d6f7d2a..6f08e1d87f47 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -75,6 +75,7 @@ #include "blk-mq.h" #include "blk-mq-tag.h" #include "blk-mq-sched.h" +#include "blk-io-hierarchy/stats.h"
/* PREFLUSH/FUA sequences */ enum { @@ -380,6 +381,7 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq, flush_rq->end_io = flush_end_io; blk_rq_init_bi_alloc_time(flush_rq, first_rq); blk_mq_get_alloc_task(flush_rq, first_rq->bio); + blk_rq_hierarchy_stats_init(flush_rq);
/* * Order WRITE ->end_io and WRITE rq->ref, and its pair is the one diff --git a/block/blk-io-hierarchy/Kconfig b/block/blk-io-hierarchy/Kconfig new file mode 100644 index 000000000000..2b2b725ba224 --- /dev/null +++ b/block/blk-io-hierarchy/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only + +menuconfig BLK_IO_HIERARCHY_STATS + bool "Enable hierarchy io stats" + default n + depends on BLK_DEBUG_FS=y + help + Enabling this lets the block layer to record additional information + in different io stages. Such information can be helpful to debug + performance and problems like io hang. + + If unsure, say N. diff --git a/block/blk-io-hierarchy/Makefile b/block/blk-io-hierarchy/Makefile new file mode 100644 index 000000000000..1fb663c75521 --- /dev/null +++ b/block/blk-io-hierarchy/Makefile @@ -0,0 +1,7 @@ +# +# Make file for blk_io_hierarchy_stats +# + +obj-$(CONFIG_BLK_IO_HIERARCHY_STATS) += blk_io_hierarchy_stats.o + +blk_io_hierarchy_stats-y := stats.o debugfs.o diff --git a/block/blk-io-hierarchy/debugfs.c b/block/blk-io-hierarchy/debugfs.c new file mode 100644 index 000000000000..e4c8751371f1 --- /dev/null +++ b/block/blk-io-hierarchy/debugfs.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/debugfs.h> +#include <linux/blkdev.h> + +#include "../blk-mq-debugfs.h" +#include "stats.h" + +static const char *stage_name[NR_STAGE_GROUPS] = { +}; + +const char *hierarchy_stage_name(enum stage_group stage) +{ + return stage_name[stage]; +} + +static int __hierarchy_stats_show(struct hierarchy_stats_data *hstats_data, + struct seq_file *m, enum stage_group stage) +{ + u64 dispatched[NR_NEW_STAT_GROUPS] = {0}; + u64 completed[NR_NEW_STAT_GROUPS] = {0}; + u64 latency[NR_NEW_STAT_GROUPS] = {0}; + int cpu; + int i; + + for_each_possible_cpu(cpu) { + struct hierarchy_stats *stat = + per_cpu_ptr(hstats_data->hstats, cpu); + + for (i = 0; i < NR_NEW_STAT_GROUPS; ++i) { + dispatched[i] += stat->dispatched[i]; + completed[i] += stat->completed[i]; + latency[i] += stage_is_rq(stage) ? + stat->jiffies[i] : stat->nsecs[i]; + } + } + + if (stage_is_rq(stage)) + for (i = 0; i < NR_NEW_STAT_GROUPS; ++i) + latency[i] = + jiffies_to_msecs(latency[i]) * NSEC_PER_MSEC; + + seq_printf(m, "%llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu", + dispatched[STAT_READ], completed[STAT_READ], + latency[STAT_READ], dispatched[STAT_WRITE], + completed[STAT_WRITE], latency[STAT_WRITE], + dispatched[STAT_DISCARD], completed[STAT_DISCARD], + latency[STAT_DISCARD], dispatched[STAT_FLUSH], + completed[STAT_FLUSH], latency[STAT_FLUSH]); + + seq_putc(m, '\n'); + return 0; +} + +static void *hierarchy_stats_start(struct seq_file *m, loff_t *pos) +{ + enum stage_group stage = *pos; + + if (stage < 0 || stage >= NR_STAGE_GROUPS) + return NULL; + + return pos; +} + +static void *hierarchy_stats_next(struct seq_file *m, void *v, loff_t *pos) +{ + enum stage_group stage = ++(*pos); + + if (stage >= 0 && stage < NR_STAGE_GROUPS) + return pos; + + return NULL; +} + +static void hierarchy_stats_stop(struct seq_file *m, void *v) +{ +} + +static int hierarchy_stats_show(struct seq_file *m, void *v) +{ + enum stage_group stage = (*(loff_t *)v); + struct blk_io_hierarchy_stats *stats = m->private; + struct hierarchy_stats_data *hstats_data = + get_hstats_data(stats, stage); + + if (!hstats_data) + return 0; + + seq_printf(m, "%s ", hierarchy_stage_name(stage)); + __hierarchy_stats_show(hstats_data, m, stage); + put_hstats_data(stats, hstats_data); + return 0; +} + +static const struct seq_operations hierarchy_stats_ops = { + .start = hierarchy_stats_start, + .next = hierarchy_stats_next, + .stop = hierarchy_stats_stop, + .show = hierarchy_stats_show, +}; + +static int hierarchy_stats_show_single(void *v, struct seq_file *m) +{ + struct hierarchy_stage *hstage = v; + + return __hierarchy_stats_show(hstage->hstats_data, m, hstage->stage); +} + +static const struct blk_mq_debugfs_attr hierarchy_debugfs_attrs[] = { + {"stats", 0400, hierarchy_stats_show_single}, + {}, +}; + +static const struct blk_mq_debugfs_attr hierarchy_stats_attr[] = { + {"stats", 0400, .seq_ops = &hierarchy_stats_ops}, + {}, +}; + +static void hierarchy_register_stage(struct blk_io_hierarchy_stats *stats, + enum stage_group stage) +{ + struct hierarchy_stage *hstage = stats->hstage[stage]; + struct dentry *dir; + + if (!stage_name[stage] || hstage->debugfs_dir) + return; + + dir = debugfs_create_dir(stage_name[stage], stats->debugfs_dir); + if (IS_ERR(dir)) + return; + + hstage->debugfs_dir = dir; + debugfs_create_files(dir, hstage, hierarchy_debugfs_attrs); +} + +static void hierarchy_unregister_stage(struct blk_io_hierarchy_stats *stats, + enum stage_group stage) +{ + struct hierarchy_stage *hstage = stats->hstage[stage]; + + if (!stage_name[stage] || !hstage->debugfs_dir) + return; + + debugfs_remove_recursive(hstage->debugfs_dir); + hstage->debugfs_dir = NULL; +} + +void blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + + if (!blk_mq_hierarchy_registered(q, stage) || + !blk_mq_debugfs_enabled(q)) + return; + + hierarchy_register_stage(stats, stage); +} + +void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + + if (!blk_mq_hierarchy_registered(q, stage) || + !blk_mq_debugfs_enabled(q)) + return; + + hierarchy_unregister_stage(stats, stage); +} + +void blk_mq_debugfs_create_default_hierarchy_attr(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + + if (!blk_mq_debugfs_enabled(q)) + return; + + debugfs_create_files(stats->debugfs_dir, stats, hierarchy_stats_attr); +} diff --git a/block/blk-io-hierarchy/stats.c b/block/blk-io-hierarchy/stats.c new file mode 100644 index 000000000000..b173ac5e2410 --- /dev/null +++ b/block/blk-io-hierarchy/stats.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/module.h> +#include <linux/debugfs.h> + +#include "stats.h" +#include "../blk.h" +#include "../blk-mq-debugfs.h" + +#define io_hierarchy_add(statsp, field, group, nr) \ + this_cpu_add((statsp)->hstats->field[group], nr) +#define io_hierarchy_inc(statsp, field, group) \ + io_hierarchy_add(statsp, field, group, 1) + +#define PRE_ALLOC_BIO_CNT 8 + +static mempool_t *hdata_pool; + +void blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + enum stage_group stage; + + stats = queue_to_wrapper(q)->io_hierarchy_stats; + if (!stats || !blk_mq_debugfs_enabled(q)) + return; + + stats->debugfs_dir = debugfs_create_dir("blk_io_hierarchy", + q->debugfs_dir); + blk_mq_debugfs_create_default_hierarchy_attr(q); + + for (stage = 0; stage < NR_STAGE_GROUPS; ++stage) + blk_mq_debugfs_register_hierarchy(q, stage); +} + +static void bio_alloc_hierarchy_data(struct bio *bio) +{ + if (!bio->hdata) { + struct bio_hierarchy_data *hdata = + mempool_alloc(hdata_pool, GFP_NOIO); + + bio->hdata = hdata; + } +} + +void bio_free_hierarchy_data(struct bio *bio) +{ + if (!bio->hdata) + return; + + mempool_free(bio->hdata, hdata_pool); + bio->hdata = NULL; +} + +void blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + enum stage_group stage; + + stats = queue_to_wrapper(q)->io_hierarchy_stats; + if (!stats || !blk_mq_debugfs_enabled(q)) + return; + + for (stage = 0; stage < NR_STAGE_GROUPS; ++stage) + blk_mq_debugfs_unregister_hierarchy(q, stage); + + debugfs_remove_recursive(stats->debugfs_dir); + stats->debugfs_dir = NULL; +} + +int blk_io_hierarchy_stats_alloc(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats; + + if (!q->mq_ops) + return 0; + + stats = kzalloc(sizeof(struct blk_io_hierarchy_stats), GFP_KERNEL); + if (!stats) + return -ENOMEM; + + spin_lock_init(&stats->hstage_lock); + stats->q = q; + queue_to_wrapper(q)->io_hierarchy_stats = stats; + + return 0; +} + +void blk_io_hierarchy_stats_free(struct request_queue *q) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + + if (!stats) + return; + + queue_to_wrapper(q)->io_hierarchy_stats = NULL; + kfree(stats); +} + +bool blk_mq_hierarchy_registered(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + + if (!stats) + return false; + + return stats->hstage[stage] != NULL; +} +EXPORT_SYMBOL_GPL(blk_mq_hierarchy_registered); + +static struct hierarchy_stats_data *alloc_hstats_data(void) +{ + struct hierarchy_stats_data *hstats_data; + + hstats_data = kmalloc(sizeof(*hstats_data), GFP_KERNEL); + if (!hstats_data) + return NULL; + + hstats_data->hstats = alloc_percpu(struct hierarchy_stats); + if (!hstats_data->hstats) { + kfree(hstats_data); + return NULL; + } + + hstats_data->ref = 1; + return hstats_data; +} + +struct hierarchy_stats_data *get_hstats_data( + struct blk_io_hierarchy_stats *stats, + enum stage_group stage) +{ + struct hierarchy_stage *hstage; + struct hierarchy_stats_data *hstats_data = NULL; + + spin_lock(&stats->hstage_lock); + hstage = stats->hstage[stage]; + if (hstage) { + hstats_data = hstage->hstats_data; + if (hstats_data) + hstats_data->ref++; + } + spin_unlock(&stats->hstage_lock); + + return hstats_data; +} + +static void __put_hstats_data(struct blk_io_hierarchy_stats *stats, + struct hierarchy_stats_data *hstats_data) +{ + if (--hstats_data->ref == 0) { + free_percpu(hstats_data->hstats); + kfree(hstats_data); + } +} + +void put_hstats_data(struct blk_io_hierarchy_stats *stats, + struct hierarchy_stats_data *hstats_data) +{ + spin_lock(&stats->hstage_lock); + __put_hstats_data(stats, hstats_data); + spin_unlock(&stats->hstage_lock); +} + +void blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + struct hierarchy_stage *hstage; + + if (!stats || !hierarchy_stage_name(stage)) + return; + + if (blk_mq_hierarchy_registered(q, stage)) { + pr_warn("blk-io-hierarchy: disk %s is registering stage %s again.", + kobject_name(q->kobj.parent), + hierarchy_stage_name(stage)); + return; + } + + /* + * Alloc memory before freeze queue, prevent deadlock if new IO is + * issued by memory reclaim. + */ + hstage = kmalloc(sizeof(*hstage), GFP_KERNEL); + if (!hstage) + return; + + hstage->hstats_data = alloc_hstats_data(); + if (!hstage->hstats_data) { + kfree(hstage); + return; + } + + hstage->stage = stage; + hstage->unbalanced_warned = false; + hstage->debugfs_dir = NULL; + + blk_mq_freeze_queue(q); + + WRITE_ONCE(stats->hstage[stage], hstage); + blk_mq_debugfs_register_hierarchy(q, stage); + + blk_mq_unfreeze_queue(q); +} +EXPORT_SYMBOL_GPL(blk_mq_register_hierarchy); + +void blk_mq_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ + struct blk_io_hierarchy_stats *stats = + queue_to_wrapper(q)->io_hierarchy_stats; + struct hierarchy_stage *hstage; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + blk_mq_debugfs_unregister_hierarchy(q, stage); + + spin_lock(&stats->hstage_lock); + hstage = stats->hstage[stage]; + stats->hstage[stage] = NULL; + __put_hstats_data(stats, hstage->hstats_data); + spin_unlock(&stats->hstage_lock); + + kfree(hstage); +} +EXPORT_SYMBOL_GPL(blk_mq_unregister_hierarchy); + +static enum stat_group bio_hierarchy_op(struct bio *bio) +{ + if (op_is_discard(bio->bi_opf)) + return STAT_DISCARD; + + if (op_is_flush(bio->bi_opf) && !bio_sectors(bio)) + return STAT_FLUSH; + + if (op_is_write(bio->bi_opf)) + return STAT_WRITE; + + return STAT_READ; +} + + +void bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage) +{ + struct request_queue *q = bio->bi_disk->queue; + struct hierarchy_stage *hstage; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + hstage = queue_to_wrapper(q)->io_hierarchy_stats->hstage[stage]; + bio_alloc_hierarchy_data(bio); + io_hierarchy_inc(hstage->hstats_data, dispatched, + bio_hierarchy_op(bio)); + bio->hdata->time = blk_time_get_ns(); +} + +void __bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage, + u64 time) +{ + struct request_queue *q = bio->bi_disk->queue; + struct hierarchy_stage *hstage; + u64 duration; + enum stat_group op; + + if (!blk_mq_hierarchy_registered(q, stage)) + return; + + op = bio_hierarchy_op(bio); + duration = time - bio->hdata->time; + hstage = queue_to_wrapper(q)->io_hierarchy_stats->hstage[stage]; + + io_hierarchy_inc(hstage->hstats_data, completed, op); + io_hierarchy_add(hstage->hstats_data, nsecs, op, duration); +} + +static enum stat_group rq_hierarchy_op(struct request *rq) +{ + if (op_is_discard(rq->cmd_flags)) + return STAT_DISCARD; + + if (is_flush_rq(rq)) + return STAT_FLUSH; + + if (op_is_write(rq->cmd_flags)) + return STAT_WRITE; + + return STAT_READ; +} + +static void rq_hierarchy_warn_unbalanced(struct request *rq, + struct hierarchy_stage *hstage, + enum stage_group old_stage, + enum stage_group new_stage) +{ + if (hstage->unbalanced_warned) + return; + + pr_warn("blk-io-hierarchy: disk %s stage %d(%s) -> %d(%s) unbalanced accounting.", + kobject_name(rq->q->kobj.parent), + old_stage, hierarchy_stage_name(old_stage), + new_stage, hierarchy_stage_name(new_stage)); + hstage->unbalanced_warned = true; +} + +void blk_rq_hierarchy_stats_complete(struct request *rq) +{ + struct hierarchy_stage *hstage; + enum stage_group stage; + + stage = request_to_wrapper(rq)->stage; + if (stage == NR_RQ_STAGE_GROUPS) + return; + + if (!blk_mq_hierarchy_registered(rq->q, stage)) + return; + + hstage = queue_to_wrapper(rq->q)->io_hierarchy_stats->hstage[stage]; + rq_hierarchy_warn_unbalanced(rq, hstage, stage, NR_RQ_STAGE_GROUPS); + __rq_hierarchy_end_io_acct(rq, hstage); +} + +void __rq_hierarchy_start_io_acct(struct request *rq, + struct hierarchy_stage *hstage) +{ + struct request_wrapper *rq_wrapper = request_to_wrapper(rq); + + blk_rq_hierarchy_stats_complete(rq); + io_hierarchy_inc(hstage->hstats_data, dispatched, rq_hierarchy_op(rq)); + WRITE_ONCE(rq_wrapper->hierarchy_time, jiffies); + + /* + * Paired with barrier in hierarchy_show_rq_fn(), make sure + * hierarchy_time is set before stage. + */ + smp_store_release(&rq_wrapper->stage, hstage->stage); +} +EXPORT_SYMBOL_GPL(__rq_hierarchy_start_io_acct); + +void __rq_hierarchy_end_io_acct(struct request *rq, + struct hierarchy_stage *hstage) +{ + enum stat_group op; + unsigned long duration; + struct request_wrapper *rq_wrapper; + + rq_wrapper = request_to_wrapper(rq); + if (rq_wrapper->stage != hstage->stage) { + rq_hierarchy_warn_unbalanced(rq, hstage, rq_wrapper->stage, + hstage->stage); + return; + } + + op = rq_hierarchy_op(rq); + duration = jiffies - rq_wrapper->hierarchy_time; + + io_hierarchy_inc(hstage->hstats_data, completed, op); + io_hierarchy_add(hstage->hstats_data, jiffies, op, duration); + WRITE_ONCE(rq_wrapper->stage, NR_RQ_STAGE_GROUPS); +} +EXPORT_SYMBOL_GPL(__rq_hierarchy_end_io_acct); + +static int __init hierarchy_stats_init(void) +{ + hdata_pool = mempool_create_kmalloc_pool(PRE_ALLOC_BIO_CNT, + sizeof(struct bio_hierarchy_data)); + if (!hdata_pool) + panic("Failed to create hdata_pool\n"); + + return 0; +} +module_init(hierarchy_stats_init); diff --git a/block/blk-io-hierarchy/stats.h b/block/blk-io-hierarchy/stats.h new file mode 100644 index 000000000000..5f2f0ce2e34c --- /dev/null +++ b/block/blk-io-hierarchy/stats.h @@ -0,0 +1,309 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024. Huawei Technologies Co., Ltd. All rights reserved. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef BLK_IO_HIERARCHY_STATS_H +#define BLK_IO_HIERARCHY_STATS_H + +#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + +#include <linux/blkdev.h> +#include <linux/blk_types.h> +#include "../blk.h" + +struct bio_hierarchy_data { + u64 time; +}; + +struct hierarchy_stats { + union { + /* for bio based stages. */ + u64 nsecs[NR_NEW_STAT_GROUPS]; + /* for request based stages. */ + unsigned long jiffies[NR_NEW_STAT_GROUPS]; + }; + unsigned long dispatched[NR_NEW_STAT_GROUPS]; + unsigned long completed[NR_NEW_STAT_GROUPS]; +}; + +struct hierarchy_stats_data { + int ref; + struct hierarchy_stats __percpu *hstats; +}; + +struct hierarchy_stage { + enum stage_group stage; + bool unbalanced_warned; + struct dentry *debugfs_dir; + struct hierarchy_stats_data *hstats_data; +}; + +struct blk_io_hierarchy_stats { + struct request_queue *q; + struct dentry *debugfs_dir; + spinlock_t hstage_lock; + struct hierarchy_stage *hstage[NR_STAGE_GROUPS]; +}; + +static inline bool stage_is_bio(enum stage_group stage) +{ + return stage >= 0 && stage < NR_BIO_STAGE_GROUPS; +} + +static inline bool stage_is_rq(enum stage_group stage) +{ + return stage >= NR_BIO_STAGE_GROUPS && stage < NR_RQ_STAGE_GROUPS; +} + +const char *hierarchy_stage_name(enum stage_group stage); +int blk_io_hierarchy_stats_alloc(struct request_queue *q); +void blk_io_hierarchy_stats_free(struct request_queue *q); + +/* APIs for stage registration */ +bool blk_mq_hierarchy_registered(struct request_queue *q, + enum stage_group stage); +void blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage); +void blk_mq_unregister_hierarchy(struct request_queue *q, + enum stage_group stage); + +/* APIs for disk level debugfs */ +void blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q); +void blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q); +void blk_mq_debugfs_create_default_hierarchy_attr(struct request_queue *q); + +/* APIs for stage level debugfs */ +void blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage); +void blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage); +struct hierarchy_stats_data *get_hstats_data( + struct blk_io_hierarchy_stats *stats, + enum stage_group stage); +void put_hstats_data(struct blk_io_hierarchy_stats *stats, + struct hierarchy_stats_data *hstats_data); + +/* APIs for bio based stage io accounting */ +void bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage); +void __bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage, + u64 time); +void bio_free_hierarchy_data(struct bio *bio); + +static inline void bio_hierarchy_end_io_acct(struct bio *bio, + enum stage_group stage) +{ + __bio_hierarchy_end_io_acct(bio, stage, blk_time_get_ns()); +} + +static inline void bio_list_hierarchy_end_io_acct(struct bio_list *list, + enum stage_group stage) +{ + u64 time = blk_time_get_ns(); + struct bio *bio; + + bio_list_for_each(bio, list) + __bio_hierarchy_end_io_acct(bio, stage, time); +} + +/* APIs for request based stage io accounting */ +void blk_rq_hierarchy_stats_complete(struct request *rq); +void __rq_hierarchy_start_io_acct(struct request *rq, + struct hierarchy_stage *hstage); +void __rq_hierarchy_end_io_acct(struct request *rq, + struct hierarchy_stage *hstage); + +static inline void rq_hierarchy_start_io_acct(struct request *rq, + enum stage_group stage) +{ + if (!blk_mq_hierarchy_registered(rq->q, stage)) + return; + + __rq_hierarchy_start_io_acct(rq, + queue_to_wrapper(rq->q)->io_hierarchy_stats->hstage[stage]); +} + +static inline void rq_hierarchy_end_io_acct(struct request *rq, + enum stage_group stage) +{ + if (!blk_mq_hierarchy_registered(rq->q, stage)) + return; + + __rq_hierarchy_end_io_acct(rq, + queue_to_wrapper(rq->q)->io_hierarchy_stats->hstage[stage]); +} + +static inline void rq_list_hierarchy_start_io_acct(struct list_head *head, + enum stage_group stage) +{ + struct request *rq; + struct hierarchy_stage *hstage; + + if (list_empty(head)) + return; + + rq = list_first_entry(head, struct request, queuelist); + if (!blk_mq_hierarchy_registered(rq->q, stage)) + return; + + hstage = queue_to_wrapper(rq->q)->io_hierarchy_stats->hstage[stage]; + list_for_each_entry(rq, head, queuelist) + __rq_hierarchy_start_io_acct(rq, hstage); +} + +static inline void rq_list_hierarchy_end_io_acct(struct list_head *head, + enum stage_group stage) +{ + struct request *rq; + struct hierarchy_stage *hstage; + + if (list_empty(head)) + return; + + rq = list_first_entry(head, struct request, queuelist); + if (!blk_mq_hierarchy_registered(rq->q, stage)) + return; + + hstage = queue_to_wrapper(rq->q)->io_hierarchy_stats->hstage[stage]; + list_for_each_entry(rq, head, queuelist) + __rq_hierarchy_end_io_acct(rq, hstage); +} + +static inline void blk_rq_hierarchy_stats_init(struct request *rq) +{ + request_to_wrapper(rq)->stage = NR_RQ_STAGE_GROUPS; + request_to_wrapper(rq)->flush_done = false; +} + +static inline void blk_rq_hierarchy_set_flush_done(struct request *rq) +{ + request_to_wrapper(rq)->flush_done = true; +} + +static inline bool blk_rq_hierarchy_is_flush_done(struct request *rq) +{ + return request_to_wrapper(rq)->flush_done; +} + +#else /* CONFIG_BLK_IO_HIERARCHY_STATS */ + +static inline int +blk_io_hierarchy_stats_alloc(struct request_queue *q) +{ + return 0; +} + +static inline void +blk_io_hierarchy_stats_free(struct request_queue *q) +{ +} + +static inline bool +blk_mq_hierarchy_registered(struct request_queue *q, enum stage_group stage) +{ + return false; +} + +static inline void +blk_mq_register_hierarchy(struct request_queue *q, enum stage_group stage) +{ +} + +static inline void +blk_mq_unregister_hierarchy(struct request_queue *q, enum stage_group stage) +{ +} + +static inline void +blk_mq_debugfs_register_hierarchy_stats(struct request_queue *q) +{ +} + +static inline void +blk_mq_debugfs_unregister_hierarchy_stats(struct request_queue *q) +{ +} + +static inline void +blk_mq_debugfs_register_hierarchy(struct request_queue *q, + enum stage_group stage) +{ +} + +static inline void +blk_mq_debugfs_unregister_hierarchy(struct request_queue *q, + enum stage_group stage) +{ +} + +static inline void +bio_hierarchy_start_io_acct(struct bio *bio, enum stage_group stage) +{ +} + +static inline void +bio_hierarchy_end_io_acct(struct bio *bio, enum stage_group stage) +{ +} + +static inline void +bio_list_hierarchy_end_io_acct(struct bio_list *list, enum stage_group stage) +{ +} + +static inline void +bio_free_hierarchy_data(struct bio *bio) +{ +} + +static inline void +blk_rq_hierarchy_set_flush_done(struct request *rq) +{ +} + +static inline bool +blk_rq_hierarchy_is_flush_done(struct request *rq) +{ + return false; +} + +static inline void +blk_rq_hierarchy_stats_complete(struct request *rq) +{ +} + +static inline void +rq_hierarchy_start_io_acct(struct request *rq, enum stage_group stage) +{ +} + +static inline void +rq_hierarchy_end_io_acct(struct request *rq, enum stage_group stage) +{ +} + +static inline void +rq_list_hierarchy_start_io_acct(struct list_head *head, enum stage_group stage) +{ +} + +static inline void +rq_list_hierarchy_end_io_acct(struct list_head *head, enum stage_group stage) +{ +} + +static inline void +blk_rq_hierarchy_stats_init(struct request *rq) +{ +} + +#endif /* CONFIG_BLK_IO_HIERARCHY_STATS */ + +#endif /* BLK_IO_HIERARCHY_STATS_H */ diff --git a/block/blk-mq.c b/block/blk-mq.c index 539586b05646..955e80f4d0dc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -36,6 +36,7 @@ #include "blk-stat.h" #include "blk-mq-sched.h" #include "blk-rq-qos.h" +#include "blk-io-hierarchy/stats.h"
static bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); static void blk_mq_poll_stats_start(struct request_queue *q); @@ -369,6 +370,7 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, rq->start_time_ns = blk_time_get_ns(); blk_rq_init_bi_alloc_time(rq, NULL); blk_mq_get_alloc_task(rq, data->bio); + blk_rq_hierarchy_stats_init(rq);
rq->io_start_time_ns = 0; request_to_wrapper(rq)->io_end_time_ns = 0; diff --git a/block/blk-mq.h b/block/blk-mq.h index 6d860f6ddc30..b2a9efb43209 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -44,6 +44,11 @@ struct request_wrapper { #ifdef CONFIG_BLK_BIO_ALLOC_TASK struct pid *pid; #endif +#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + bool flush_done; + enum stage_group stage; + unsigned long hierarchy_time; +#endif } ____cacheline_aligned_in_smp;
static inline struct request_wrapper *request_to_wrapper(void *rq) diff --git a/block/blk.h b/block/blk.h index b7af0eff95b7..9d8a59762843 100644 --- a/block/blk.h +++ b/block/blk.h @@ -55,10 +55,13 @@ struct request_queue_wrapper { int __percpu *last_dispatch_cpu; #endif struct mutex sysfs_dir_lock; +#ifdef CONFIG_BLK_IO_HIERARCHY_STATS + struct blk_io_hierarchy_stats *io_hierarchy_stats; +#endif };
-#define queue_to_wrapper(q) \ - container_of(q, struct request_queue_wrapper, q) +#define queue_to_wrapper(__q) \ + container_of((__q), struct request_queue_wrapper, q)
extern struct kmem_cache *blk_requestq_cachep; extern struct kmem_cache *request_cachep; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 416cf84a0624..0e1334c4a43e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -213,7 +213,11 @@ struct bio { #else KABI_RESERVE(2) #endif +#if defined(CONFIG_BLK_IO_HIERARCHY_STATS) && !defined(__GENKSYMS__) + struct bio_hierarchy_data *hdata; +#else KABI_RESERVE(3) +#endif
/* * We can inline a number of vecs at the end of the bio, to avoid @@ -376,7 +380,16 @@ enum stat_group { STAT_WRITE, STAT_DISCARD,
- NR_STAT_GROUPS + NR_STAT_GROUPS, + STAT_FLUSH = NR_STAT_GROUPS, + NR_NEW_STAT_GROUPS, +}; + +enum stage_group { + STAGE_BIO_RESERVE, + NR_BIO_STAGE_GROUPS, + NR_RQ_STAGE_GROUPS, + NR_STAGE_GROUPS, };
#define bio_op(bio) \