hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAGRKP CVE: NA
--------------------------------
If blk-throtl is enabled, following new debugfs entries will be created as well:
/sys/kernel/debug/block/sda/blk_io_hierarchy/ |-- throtl | |-- io_dump | |-- stats | `-- threshold
User can use them to analyze how IO behaves in blk-throtl
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-io-hierarchy/Kconfig | 11 +++++++++++ block/blk-io-hierarchy/debugfs.c | 3 +++ block/blk-mq.c | 11 +++++++++-- block/blk-throttle.c | 23 +++++++++++++++++++++++ include/linux/blk_types.h | 3 +++ 5 files changed, 49 insertions(+), 2 deletions(-)
diff --git a/block/blk-io-hierarchy/Kconfig b/block/blk-io-hierarchy/Kconfig index f35f174835ef..2c15b5a7a006 100644 --- a/block/blk-io-hierarchy/Kconfig +++ b/block/blk-io-hierarchy/Kconfig @@ -26,4 +26,15 @@ config HIERARCHY_IO_DUMP
If unsure, say N.
+config HIERARCHY_THROTTLE + bool "Enable hierarchy stats layer blk-throttle" + default n + depends on BLK_DEV_THROTTLING=y + help + Enabling this lets blk hierarchy stats to record additional information + for blk-throttle. Such information can be helpful to debug performance + and problems like io hang. + + If unsure, say N. + endif diff --git a/block/blk-io-hierarchy/debugfs.c b/block/blk-io-hierarchy/debugfs.c index 1d35bf4e19c5..5b61646553ae 100644 --- a/block/blk-io-hierarchy/debugfs.c +++ b/block/blk-io-hierarchy/debugfs.c @@ -19,6 +19,9 @@ #include "iodump.h"
static const char *stage_name[NR_STAGE_GROUPS] = { +#ifdef CONFIG_HIERARCHY_THROTTLE + [STAGE_THROTTLE] = "throtl", +#endif };
const char *hierarchy_stage_name(enum stage_group stage) diff --git a/block/blk-mq.c b/block/blk-mq.c index 955e80f4d0dc..8f23109c797c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2772,6 +2772,8 @@ void blk_mq_release(struct request_queue *q) struct blk_mq_hw_ctx *hctx, *next; int i;
+ blk_io_hierarchy_stats_free(q); + queue_for_each_hw_ctx(q, hctx, i) WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list));
@@ -2909,14 +2911,17 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* mark the queue as mq asap */ q->mq_ops = set->ops;
+ if (blk_io_hierarchy_stats_alloc(q)) + goto err_exit; + q->poll_cb = blk_stat_alloc_callback(blk_mq_poll_stats_fn, blk_mq_poll_stats_bkt, BLK_MQ_POLL_STATS_BKTS, q); if (!q->poll_cb) - goto err_exit; + goto err_hierarchy_exit;
if (blk_mq_alloc_ctxs(q)) - goto err_exit; + goto err_hierarchy_exit;
/* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); @@ -2986,6 +2991,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, q->nr_hw_queues = 0; err_sys_init: blk_mq_sysfs_deinit(q); +err_hierarchy_exit: + blk_io_hierarchy_stats_free(q); err_exit: q->mq_ops = NULL; return ERR_PTR(-ENOMEM); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0795935574d3..a1867a2f4f18 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -14,6 +14,7 @@ #include <linux/sched/signal.h> #include <linux/delay.h> #include "blk.h" +#include "blk-io-hierarchy/stats.h"
/* Max dispatch from a group in 1 round */ static int throtl_grp_quantum = 8; @@ -1350,6 +1351,8 @@ static void blk_throtl_dispatch_work_fn(struct work_struct *work) bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(q->queue_lock);
+ bio_list_hierarchy_end_io_acct(&bio_list_on_stack, STAGE_THROTTLE); + if (!bio_list_empty(&bio_list_on_stack)) { blk_start_plug(&plug); while((bio = bio_list_pop(&bio_list_on_stack))) @@ -2333,6 +2336,20 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
tg->last_low_overflow_time[rw] = jiffies;
+ /* + * This is slow path now, bio_hierarchy_start_io_acct() might spend + * some time to allocate memory. However, it's safe because 'tg' is + * pinned by this bio, and io charge should still be accurate because + * slice is already started from tg_may_dispatch(). + */ + spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); + + bio_hierarchy_start_io_acct(bio, STAGE_THROTTLE); + + rcu_read_lock(); + spin_lock_irq(q->queue_lock); + td->nr_queued[rw]++; throtl_add_bio_tg(bio, qn, tg); throttled = true; @@ -2505,6 +2522,8 @@ void blk_throtl_drain(struct request_queue *q) bio_list_add(&bio_list_on_stack, bio); spin_unlock_irq(q->queue_lock);
+ bio_list_hierarchy_end_io_acct(&bio_list_on_stack, STAGE_THROTTLE); + if (!bio_list_empty(&bio_list_on_stack)) while ((bio = bio_list_pop(&bio_list_on_stack))) generic_make_request(bio); @@ -2561,6 +2580,8 @@ void blk_throtl_exit(struct request_queue *q) del_timer_sync(&q->td->service_queue.pending_timer); throtl_shutdown_wq(q); blkcg_deactivate_policy(q, &blkcg_policy_throtl); + blk_mq_unregister_hierarchy(q, STAGE_THROTTLE); + free_percpu(q->td->latency_buckets[READ]); free_percpu(q->td->latency_buckets[WRITE]); kfree(q->td); @@ -2593,6 +2614,8 @@ void blk_throtl_register_queue(struct request_queue *q) td->track_bio_latency = !queue_is_rq_based(q); if (!td->track_bio_latency) blk_stat_enable_accounting(q); + + blk_mq_register_hierarchy(q, STAGE_THROTTLE); }
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0e1334c4a43e..ea5564ac6f11 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -386,6 +386,9 @@ enum stat_group { };
enum stage_group { +#ifdef CONFIG_BLK_DEV_THROTTLING + STAGE_THROTTLE, +#endif STAGE_BIO_RESERVE, NR_BIO_STAGE_GROUPS, NR_RQ_STAGE_GROUPS,