hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IB4E8P CVE: NA
--------------------------------
Like blk-throttle, if wbt is enabled, following new debugfs entries will be created as well.
/sys/kernel/debug/block/sda/blk_io_hierarchy/ |-- wbt | |-- io_dump | |-- stats | `-- threshold
User can use them to analyze how IO behaves in wbt.
Signed-off-by: Yu Kuai yukuai3@huawei.com --- block/blk-io-hierarchy/Kconfig | 11 +++++++++++ block/blk-iolatency.c | 2 +- block/blk-rq-qos.c | 9 ++++++++- block/blk-rq-qos.h | 3 ++- block/blk-wbt.c | 31 ++++++++++++++++++++++++------- include/linux/blk_types.h | 3 +++ 6 files changed, 49 insertions(+), 10 deletions(-)
diff --git a/block/blk-io-hierarchy/Kconfig b/block/blk-io-hierarchy/Kconfig index 2c15b5a7a006..ad1b7abc7610 100644 --- a/block/blk-io-hierarchy/Kconfig +++ b/block/blk-io-hierarchy/Kconfig @@ -37,4 +37,15 @@ config HIERARCHY_THROTTLE
If unsure, say N.
+config HIERARCHY_WBT + bool "Enable hierarchy stats layer blk-wbt" + default n + depends on BLK_WBT + help + Enabling this lets blk hierarchy stats to record additional information + for blk-wbt. Such information can be helpful to debug performance + and problems like io hang. + + If unsure, say N. + endif diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index ebb522788d97..b256043bbed6 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -306,7 +306,7 @@ static void __blkcg_iolatency_throttle(struct rq_qos *rqos, return; }
- rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb); + rq_qos_wait(rqw, iolat, iolat_acquire_inflight, iolat_cleanup_cb, NULL); }
#define SCALE_DOWN_FACTOR 2 diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index dc510f493ba5..6ea4bdf4f802 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -230,6 +230,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, * @private_data: caller provided specific data * @acquire_inflight_cb: inc the rqw->inflight counter if we can * @cleanup_cb: the callback to cleanup in case we race with a waker + * @io_acct_cb: the callback for io accounting * * This provides a uniform place for the rq_qos users to do their throttling. * Since you can end up with a lot of things sleeping at once, this manages the @@ -242,7 +243,7 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, */ void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, - cleanup_cb_t *cleanup_cb) + cleanup_cb_t *cleanup_cb, io_acct_cb_t *io_acct_cb) { struct rq_qos_wait_data data = { .wq = { @@ -260,6 +261,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) return;
+ if (io_acct_cb) + io_acct_cb(private_data, true); + has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { @@ -284,6 +288,9 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data, set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); + + if (io_acct_cb) + io_acct_cb(private_data, false); }
void rq_qos_exit(struct request_queue *q) diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 37245c97ee61..93d1ba692973 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -91,10 +91,11 @@ void rq_qos_del(struct rq_qos *rqos);
typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data); typedef void (cleanup_cb_t)(struct rq_wait *rqw, void *private_data); +typedef void (io_acct_cb_t)(void *private_data, bool start_acct);
void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, - cleanup_cb_t *cleanup_cb); + cleanup_cb_t *cleanup_cb, io_acct_cb_t *io_acct_cb); bool rq_wait_inc_below(struct rq_wait *rq_wait, unsigned int limit); bool rq_depth_scale_up(struct rq_depth *rqd); bool rq_depth_scale_down(struct rq_depth *rqd, bool hard_throttle); diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 4933a7738ebd..6b81f2c47279 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -32,6 +32,7 @@
#ifndef __GENKSYMS__ #include "blk.h" +#include "blk-io-hierarchy/stats.h" #endif
#define CREATE_TRACE_POINTS @@ -564,38 +565,51 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf) }
struct wbt_wait_data { + struct bio *bio; struct rq_wb *rwb; enum wbt_flags wb_acct; - blk_opf_t opf; };
static bool wbt_inflight_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; - return rq_wait_inc_below(rqw, get_limit(data->rwb, data->opf)); + + return rq_wait_inc_below(rqw, get_limit(data->rwb, data->bio->bi_opf)); }
static void wbt_cleanup_cb(struct rq_wait *rqw, void *private_data) { struct wbt_wait_data *data = private_data; + wbt_rqw_done(data->rwb, rqw, data->wb_acct); }
+static void wbt_io_acct_cb(void *private_data, bool start) +{ + struct wbt_wait_data *data = private_data; + + if (start) + bio_hierarchy_start_io_acct(data->bio, STAGE_WBT); + else + bio_hierarchy_end_io_acct(data->bio, STAGE_WBT); +} + /* * Block if we will exceed our limit, or if we are currently waiting for * the timer to kick off queuing again. */ static void __wbt_wait(struct rq_wb *rwb, enum wbt_flags wb_acct, - blk_opf_t opf) + struct bio *bio) { struct rq_wait *rqw = get_rq_wait(rwb, wb_acct); struct wbt_wait_data data = { .rwb = rwb, .wb_acct = wb_acct, - .opf = opf, + .bio = bio, };
- rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb); + rq_qos_wait(rqw, &data, wbt_inflight_cb, wbt_cleanup_cb, + wbt_io_acct_cb); }
static inline bool wbt_should_throttle(struct bio *bio) @@ -659,7 +673,7 @@ static void wbt_wait(struct rq_qos *rqos, struct bio *bio) return; }
- __wbt_wait(rwb, flags, bio->bi_opf); + __wbt_wait(rwb, flags, bio);
if (!blk_stat_is_active(rwb->cb)) rwb_arm_timer(rwb); @@ -773,8 +787,10 @@ static void wbt_queue_depth_changed(struct rq_qos *rqos) static void wbt_exit(struct rq_qos *rqos) { struct rq_wb *rwb = RQWB(rqos); + struct request_queue *q = rqos->disk->queue;
- blk_stat_remove_callback(rqos->disk->queue, rwb->cb); + blk_mq_unregister_hierarchy(q, STAGE_WBT); + blk_stat_remove_callback(q, rwb->cb); blk_stat_free_callback(rwb->cb); kfree(rwb); } @@ -937,6 +953,7 @@ int wbt_init(struct gendisk *disk) goto err_free;
blk_stat_add_callback(q, rwb->cb); + blk_mq_register_hierarchy(q, STAGE_WBT);
return 0;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8c1d0790923..fe683cfbc157 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -479,6 +479,9 @@ enum stat_group { enum stage_group { #ifdef CONFIG_BLK_DEV_THROTTLING STAGE_THROTTLE, +#endif +#ifdef CONFIG_BLK_WBT + STAGE_WBT, #endif STAGE_RESERVE, NR_BIO_STAGE_GROUPS,