From: Tejun Heo tj@kernel.org
mainline inclusion from mainline-5.4-rc1 commit 6f816b4b746c2241540e537682d30d8e9997d674 category: feature bugzilla: 38688 CVE: NA
---------------------------
There are currently two start time timestamps - start_time_ns and io_start_time_ns. The former marks the request allocation and and the second issue-to-device time. The planned io.weight controller needs to measure the total time bios take to execute after it leaves rq_qos including the time spent waiting for request to become available, which can easily dominate on saturated devices.
This patch adds request->alloc_time_ns which records when the request allocation attempt started. As it isn't used for the usual stats, make it optional behind CONFIG_BLK_RQ_ALLOC_TIME and QUEUE_FLAG_RQ_ALLOC_TIME so that it can be compiled out when there are no users and it's active only on queues which need it even when compiled in.
v2: s/pre_start_time/alloc_time/ and add CONFIG_BLK_RQ_ALLOC_TIME gating as suggested by Jens.
Signed-off-by: Tejun Heo tj@kernel.org Signed-off-by: Jens Axboe axboe@kernel.dk
Conflict: include/linux/blkdev.h block/Kconfig block/blk-mq.c Signed-off-by: Yu Kuai yukuai3@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/Kconfig | 3 +++ block/blk-mq.c | 13 +++++++++++-- include/linux/blkdev.h | 14 +++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-)
diff --git a/block/Kconfig b/block/Kconfig index 1f2469a0123ce..4d42adb3b8d73 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -50,6 +50,9 @@ config LBDAF
If unsure, say Y.
+config BLK_RQ_ALLOC_TIME + bool + config BLK_SCSI_REQUEST bool
diff --git a/block/blk-mq.c b/block/blk-mq.c index ee07575b8e67d..64bc561cf99a1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -275,7 +275,7 @@ bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) EXPORT_SYMBOL(blk_mq_can_queue);
static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, - unsigned int tag, unsigned int op) + unsigned int tag, unsigned int op, u64 alloc_time_ns) { struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; @@ -309,6 +309,9 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, RB_CLEAR_NODE(&rq->rb_node); rq->rq_disk = NULL; rq->part = NULL; +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + rq->alloc_time_ns = alloc_time_ns; +#endif rq->start_time_ns = ktime_get_ns(); rq->io_start_time_ns = 0; rq->nr_phys_segments = 0; @@ -344,8 +347,14 @@ static struct request *blk_mq_get_request(struct request_queue *q, struct request *rq; unsigned int tag; bool put_ctx_on_error = false; + u64 alloc_time_ns = 0;
blk_queue_enter_live(q); + + /* alloc_time includes depth and tag waits */ + if (blk_queue_rq_alloc_time(q)) + alloc_time_ns = ktime_get_ns(); + data->q = q; if (likely(!data->ctx)) { data->ctx = blk_mq_get_ctx(q); @@ -381,7 +390,7 @@ static struct request *blk_mq_get_request(struct request_queue *q, return NULL; }
- rq = blk_mq_rq_ctx_init(data, tag, op); + rq = blk_mq_rq_ctx_init(data, tag, op, alloc_time_ns); if (!op_is_flush(op)) { rq->elv.icq = NULL; if (e && e->type->ops.mq.prepare_request) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8be98fc06c04..704901e80ee32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -212,7 +212,11 @@ struct request {
struct gendisk *rq_disk; struct hd_struct *part; - /* Time that I/O was submitted to the kernel. */ +#ifdef CONFIG_BLK_RQ_ALLOC_TIME + /* Time that the first bio started allocating this request. */ + u64 alloc_time_ns; +#endif + /* Time that this request was allocated for this IO. */ u64 start_time_ns; /* Time that I/O was submitted to the device. */ u64 io_start_time_ns; @@ -720,6 +724,7 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ +#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -753,6 +758,13 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_scsi_passthrough(q) \ test_bit(QUEUE_FLAG_SCSI_PASSTHROUGH, &(q)->queue_flags)
+#ifdef CONFIG_BLK_RQ_ALLOC_TIME +#define blk_queue_rq_alloc_time(q) \ + test_bit(QUEUE_FLAG_RQ_ALLOC_TIME, &(q)->queue_flags) +#else +#define blk_queue_rq_alloc_time(q) false +#endif + #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER))