From: Mikulas Patocka mpatocka@redhat.com
mainline inclusion from mainline-5.0-rc1 commit 5b18b5a737600fd20ba2045f320d5926ebbf341a category: bugfix bugzilla: 31388 CVE: NA ---------------------------
We want to convert to per-cpu in_flight counters.
The function part_round_stats needs the in_flight counter every jiffy, it would be too costly to sum all the percpu variables every jiffy, so it must be deleted. part_round_stats is used to calculate two counters - time_in_queue and io_ticks.
time_in_queue can be calculated without part_round_stats, by adding the duration of the I/O when the I/O ends (the value is almost as exact as the previously calculated value, except that time for in-progress I/Os is not counted).
io_ticks can be approximated by increasing the value when I/O is started or ended and the jiffies value has changed. If the I/Os take less than a jiffy, the value is as exact as the previously calculated value. If the I/Os take more than a jiffy, io_ticks can drift behind the previously calculated value.
Signed-off-by: Mikulas Patocka mpatocka@redhat.com Signed-off-by: Mike Snitzer snitzer@redhat.com Signed-off-by: Jens Axboe axboe@kernel.dk Conflict: block/bio.c block/blk-core.c block/blk-merge.c block/genhd.c include/linux/genhd.h Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/bio.c | 23 ++++++++++++++++++++--- block/blk-core.c | 6 ++++-- block/blk-merge.c | 1 - block/genhd.c | 4 ---- block/partition-generic.c | 4 ---- include/linux/genhd.h | 1 + 6 files changed, 25 insertions(+), 14 deletions(-)
diff --git a/block/bio.c b/block/bio.c index 3d7570553..48a8cf5 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1668,13 +1668,28 @@ void bio_check_pages_dirty(struct bio *bio) } EXPORT_SYMBOL_GPL(bio_check_pages_dirty);
+void update_io_ticks(int cpu, struct hd_struct *part, unsigned long now) +{ + unsigned long stamp; +again: + stamp = READ_ONCE(part->stamp); + if (unlikely(stamp != now)) { + if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) + __part_stat_add(cpu, part, io_ticks, 1); + } + if (part->partno) { + part = &part_to_disk(part)->part0; + goto again; + } +} + void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part) { const int sgrp = op_stat_group(op); int cpu = part_stat_lock();
- part_round_stats(q, cpu, part); + update_io_ticks(cpu, part, jiffies); part_stat_inc(cpu, part, ios[sgrp]); part_stat_add(cpu, part, sectors[sgrp], sectors); part_inc_in_flight(q, part, op_is_write(op)); @@ -1686,12 +1701,14 @@ void generic_start_io_acct(struct request_queue *q, int op, void generic_end_io_acct(struct request_queue *q, int req_op, struct hd_struct *part, unsigned long start_time) { - unsigned long duration = jiffies - start_time; + unsigned long now = jiffies; + unsigned long duration = now - start_time; const int sgrp = op_stat_group(req_op); int cpu = part_stat_lock();
+ update_io_ticks(cpu, part, now); part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); - part_round_stats(q, cpu, part); + part_stat_add(cpu, part, time_in_queue, duration); part_dec_in_flight(q, part, op_is_write(req_op));
part_stat_unlock(); diff --git a/block/blk-core.c b/block/blk-core.c index b64dec2..d9e3ee6 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2740,9 +2740,10 @@ void blk_account_io_done(struct request *req, u64 now) cpu = part_stat_lock(); part = req->part;
+ update_io_ticks(cpu, part, jiffies); part_stat_inc(cpu, part, ios[sgrp]); part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns); - part_round_stats(req->q, cpu, part); + part_stat_add(cpu, part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns)); part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part); @@ -2790,11 +2791,12 @@ void blk_account_io_start(struct request *rq, bool new_io) part_stat_inc(cpu, part, merges[rw]); } else { part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); - part_round_stats(rq->q, cpu, part); part_inc_in_flight(rq->q, part, rw); rq->part = part; }
+ update_io_ticks(cpu, part, jiffies); + part_stat_unlock(); }
diff --git a/block/blk-merge.c b/block/blk-merge.c index 7efa8c3..044bff9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -656,7 +656,6 @@ static void blk_account_io_merge(struct request *req) cpu = part_stat_lock(); part = req->part;
- part_round_stats(req->q, cpu, part); part_dec_in_flight(req->q, part, rq_data_dir(req));
hd_struct_put(part); diff --git a/block/genhd.c b/block/genhd.c index ff9d46d..862a2f3 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1342,7 +1342,6 @@ static int diskstats_show(struct seq_file *seqf, void *v) struct hd_struct *hd; char buf[BDEVNAME_SIZE]; unsigned int inflight[2]; - int cpu;
/* if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next) @@ -1354,9 +1353,6 @@ static int diskstats_show(struct seq_file *seqf, void *v)
disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { - cpu = part_stat_lock(); - part_round_stats(gp->queue, cpu, hd); - part_stat_unlock(); part_in_flight(gp->queue, hd, inflight); seq_printf(seqf, "%4d %7d %s " "%lu %lu %lu %u " diff --git a/block/partition-generic.c b/block/partition-generic.c index 8ad6dca..d86d794 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -121,11 +121,7 @@ ssize_t part_stat_show(struct device *dev, struct hd_struct *p = dev_to_part(dev); struct request_queue *q = part_to_disk(p)->queue; unsigned int inflight[2]; - int cpu;
- cpu = part_stat_lock(); - part_round_stats(q, cpu, p); - part_stat_unlock(); part_in_flight(q, p, inflight); return sprintf(buf, "%8lu %8lu %8llu %8u " diff --git a/include/linux/genhd.h b/include/linux/genhd.h index df0d01d..666b23a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -422,6 +422,7 @@ static inline void free_part_info(struct hd_struct *part)
/* block/blk-core.c */ extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); +void update_io_ticks(int cpu, struct hd_struct *part, unsigned long now);
/* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk);
From: Yufen Yu yuyufen@huawei.com
hulk inclusion category: bugfix bugzilla: 31388 CVE: NA ---------------------------
After introducing commit 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting"), '%util' accounted by iostat will be over reality data. In fact, the device is quite idle, but iostat may show '%util' as a big number (e.g. 50%). It can produce by fio:
fio --name=1 --direct=1 --bs=4k --rw=read --filename=/dev/sda \ --thinktime=4ms --runtime=180
We fix this by reserving part_round_stats() in io start path.
fixes: 5b18b5a73760 ("block: delete part_round_stats and switch to less precise counting") Signed-off-by: Yufen Yu yuyufen@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- block/bio.c | 4 ++-- block/blk-core.c | 8 ++------ 2 files changed, 4 insertions(+), 8 deletions(-)
diff --git a/block/bio.c b/block/bio.c index 48a8cf5..94d0f47 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1675,7 +1675,7 @@ void update_io_ticks(int cpu, struct hd_struct *part, unsigned long now) stamp = READ_ONCE(part->stamp); if (unlikely(stamp != now)) { if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) - __part_stat_add(cpu, part, io_ticks, 1); + __part_stat_add(cpu, part, io_ticks, now - stamp); } if (part->partno) { part = &part_to_disk(part)->part0; @@ -1689,7 +1689,7 @@ void generic_start_io_acct(struct request_queue *q, int op, const int sgrp = op_stat_group(op); int cpu = part_stat_lock();
- update_io_ticks(cpu, part, jiffies); + part_round_stats(q, cpu, part); part_stat_inc(cpu, part, ios[sgrp]); part_stat_add(cpu, part, sectors[sgrp], sectors); part_inc_in_flight(q, part, op_is_write(op)); diff --git a/block/blk-core.c b/block/blk-core.c index d9e3ee6..7afe44d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1674,11 +1674,8 @@ static void part_round_stats_single(struct request_queue *q, int cpu, struct hd_struct *part, unsigned long now, unsigned int inflight) { - if (inflight) { - __part_stat_add(cpu, part, time_in_queue, - inflight * (now - part->stamp)); + if (inflight) __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); - } part->stamp = now; }
@@ -2791,12 +2788,11 @@ void blk_account_io_start(struct request *rq, bool new_io) part_stat_inc(cpu, part, merges[rw]); } else { part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); + part_round_stats(rq->q, cpu, part); part_inc_in_flight(rq->q, part, rw); rq->part = part; }
- update_io_ticks(cpu, part, jiffies); - part_stat_unlock(); }