From: Li Ruilin liruilin4@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4LOJ6 CVE: NA
------------------------------
Add a list to save all prefetch requests. When an IO request comes, check if the request has overlap with some of prefetch requests. If it das have, block the request until the prefetch request is end.
Add a switch to control whether to enable this. If not enabled, count the overlapped IO request as a fake hit for performance analysis.
Signed-off-by: Li Ruilin liruilin4@huawei.com Reviewed-by: Luan Jianhai luanjianhai@huawei.com Reviewed-by: Peng Junyi pengjunyi1@huawei.com Acked-by: Xie Xiuqi xiexiuqi@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Guangxing Deng dengguangxing@huawei.com Reviewed-by: chao song chao.song@huawei.com Reviewed-by: chao song chao.song@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/md/bcache/acache.c | 113 ++++++++++++++++++++++++++++++++++ drivers/md/bcache/acache.h | 10 +++ drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/request.c | 8 +++ drivers/md/bcache/stats.c | 13 ++++ drivers/md/bcache/stats.h | 3 + drivers/md/bcache/super.c | 1 + drivers/md/bcache/sysfs.c | 6 ++ include/trace/events/bcache.h | 11 ++++ 9 files changed, 166 insertions(+)
diff --git a/drivers/md/bcache/acache.c b/drivers/md/bcache/acache.c index ff3e120d9619..a3f5c4f1ba7c 100644 --- a/drivers/md/bcache/acache.c +++ b/drivers/md/bcache/acache.c @@ -31,6 +31,12 @@ int acache_prefetch_workers = 1000; module_param_named(prefetch_workers, acache_prefetch_workers, int, 0444); MODULE_PARM_DESC(prefetch_workers, "num of workers for processing prefetch requests");
+struct inflight_list_head { + struct list_head entry; + spinlock_t io_lock; + bool initialized; +}; + struct prefetch_worker { struct acache_info s; struct work_struct work; @@ -50,6 +56,8 @@ struct acache_device {
struct acache_circ *acache_info_circ;
+ struct inflight_list_head inflight_list; + struct workqueue_struct *wq; struct prefetch_worker *prefetch_workers; struct list_head prefetch_workers_free; @@ -295,6 +303,7 @@ int acache_dev_init(void) int major; struct device *dev;
+ inflight_list_ops.init(); major = alloc_chrdev_region(&adev.devno, 0, ACACHE_NR_DEVS, DEV_NAME); if (major < 0) { pr_err("failed to allocate chrdev region: %d\n", major); @@ -377,6 +386,7 @@ int acache_dev_init(void) fail_class: unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); fail_allocdev: + inflight_list_ops.exit(); return ret; }
@@ -395,9 +405,112 @@ void acache_dev_exit(void) kfree(adev.mem_regionp); unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); class_destroy(adev.class); + inflight_list_ops.exit(); kfree(adev.prefetch_workers); }
+static struct search *__inflight_list_lookup_locked(struct search *s) +{ + struct search *iter; + struct bio *bio, *sbio; + + if (!adev.inflight_list.initialized) + return NULL; + sbio = &s->bio.bio; + list_for_each_entry(iter, &adev.inflight_list.entry, list_node) { + bio = &iter->bio.bio; + if (sbio->bi_disk == bio->bi_disk && + sbio->bi_iter.bi_sector < bio_end_sector(bio) && + bio_end_sector(sbio) > bio->bi_iter.bi_sector) { + return iter; + } + } + return NULL; +} + +static void inflight_list_init(void) +{ + INIT_LIST_HEAD(&adev.inflight_list.entry); + spin_lock_init(&adev.inflight_list.io_lock); + adev.inflight_list.initialized = true; +} + +static void inflight_list_exit(void) +{ + if (!list_empty(&adev.inflight_list.entry)) + pr_err("existing with inflight list not empty\n"); +} + +static int inflight_list_insert(struct search *s) +{ + if (!adev.inflight_list.initialized) + return -1; + + init_waitqueue_head(&s->wqh); + spin_lock(&adev.inflight_list.io_lock); + list_add_tail(&s->list_node, &adev.inflight_list.entry); + spin_unlock(&adev.inflight_list.io_lock); + + trace_bcache_inflight_list_insert(s->d, s->orig_bio); + return 0; +} + +static int inflight_list_remove(struct search *s) +{ + if (!adev.inflight_list.initialized) + return -1; + + spin_lock(&adev.inflight_list.io_lock); + list_del_init(&s->list_node); + spin_unlock(&adev.inflight_list.io_lock); + + wake_up_interruptible_all(&s->wqh); + + trace_bcache_inflight_list_remove(s->d, s->orig_bio); + return 0; +} + +static bool inflight_list_wait(struct search *s) +{ + struct search *pfs = NULL; + struct cached_dev *dc; + DEFINE_WAIT(wqe); + + if (!adev.inflight_list.initialized) + return false; + + spin_lock(&adev.inflight_list.io_lock); + pfs = __inflight_list_lookup_locked(s); + if (pfs == NULL) { + spin_unlock(&adev.inflight_list.io_lock); + return false; + } + + dc = container_of(pfs->d, struct cached_dev, disk); + if (!dc->inflight_block_enable) { + spin_unlock(&adev.inflight_list.io_lock); + return true; + } + + prepare_to_wait(&pfs->wqh, &wqe, TASK_INTERRUPTIBLE); + + /* unlock here to ensure pfs not changed. */ + spin_unlock(&adev.inflight_list.io_lock); + schedule(); + + finish_wait(&pfs->wqh, &wqe); + + return true; +} + +const struct inflight_queue_ops inflight_list_ops = { + .init = inflight_list_init, + .exit = inflight_list_exit, + .insert = inflight_list_insert, + .remove = inflight_list_remove, + .wait = inflight_list_wait, +}; + struct cached_dev *get_cached_device_by_dev(dev_t dev) { struct cache_set *c, *tc; diff --git a/drivers/md/bcache/acache.h b/drivers/md/bcache/acache.h index dea6e8cb0a05..3c6453d0c4da 100644 --- a/drivers/md/bcache/acache.h +++ b/drivers/md/bcache/acache.h @@ -66,4 +66,14 @@ void acache_dev_exit(void); struct acache_info *fetch_circ_item(struct acache_circ *circ); void save_circ_item(struct acache_info *data);
+struct inflight_queue_ops { + void (*init)(void); + void (*exit)(void); + + int (*insert)(struct search *s); + int (*remove)(struct search *s); + bool (*wait)(struct search *s); +}; +extern const struct inflight_queue_ops inflight_list_ops; + #endif diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 8b10bd5df364..53e07c958924 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -376,6 +376,7 @@ struct cached_dev { unsigned char writeback_percent; unsigned int writeback_delay;
+ unsigned int inflight_block_enable; unsigned int read_bypass;
uint64_t writeback_rate_target; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 2e9ff76b877b..fd381da32464 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -706,6 +706,9 @@ void search_free(struct closure *cl) if (s->iop.bio) bio_put(s->iop.bio);
+ if (s->prefetch) + inflight_list_ops.remove(s); + bio_complete(s); closure_debug_destroy(cl); mempool_free(s, &s->iop.c->search); @@ -974,6 +977,11 @@ void cached_dev_read(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl;
+ if (s->prefetch) + inflight_list_ops.insert(s); + else if (inflight_list_ops.wait(s)) + bch_mark_cache_prefetch_fake_hit(s->iop.c, s->d); + closure_call(&s->iop.cl, cache_lookup, NULL, cl); continue_at(cl, cached_dev_read_done_bh, NULL); } diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 503aafe188dc..c7a6c93aa9e9 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -48,6 +48,7 @@ read_attribute(cache_bypass_misses); read_attribute(cache_hit_ratio); read_attribute(cache_readaheads); read_attribute(cache_miss_collisions); +read_attribute(cache_prefetch_fake_hits); read_attribute(bypassed);
SHOW(bch_stats) @@ -66,6 +67,7 @@ SHOW(bch_stats)
var_print(cache_readaheads); var_print(cache_miss_collisions); + var_print(cache_prefetch_fake_hits); sysfs_hprint(bypassed, var(sectors_bypassed) << 9); #undef var return 0; @@ -88,6 +90,7 @@ static struct attribute *bch_stats_files[] = { &sysfs_cache_hit_ratio, &sysfs_cache_readaheads, &sysfs_cache_miss_collisions, + &sysfs_cache_prefetch_fake_hits, &sysfs_bypassed, NULL }; @@ -147,6 +150,7 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) scale_stat(&stats->cache_bypass_misses); scale_stat(&stats->cache_readaheads); scale_stat(&stats->cache_miss_collisions); + scale_stat(&stats->cache_prefetch_fake_hits); scale_stat(&stats->sectors_bypassed); } } @@ -170,6 +174,7 @@ static void scale_accounting(struct timer_list *t) move_stat(cache_bypass_misses); move_stat(cache_readaheads); move_stat(cache_miss_collisions); + move_stat(cache_prefetch_fake_hits); move_stat(sectors_bypassed);
scale_stats(&acc->total, 0); @@ -225,6 +230,14 @@ void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d) atomic_inc(&c->accounting.collector.cache_miss_collisions); }
+void bch_mark_cache_prefetch_fake_hit(struct cache_set *c, struct bcache_device *d) +{ + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + + atomic_inc(&dc->accounting.collector.cache_prefetch_fake_hits); + atomic_inc(&c->accounting.collector.cache_prefetch_fake_hits); +} + void bch_mark_sectors_bypassed(struct cache_set *c, struct cached_dev *dc, int sectors) { diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h index abfaabf7e7fc..302b76e982b4 100644 --- a/drivers/md/bcache/stats.h +++ b/drivers/md/bcache/stats.h @@ -9,6 +9,7 @@ struct cache_stat_collector { atomic_t cache_bypass_misses; atomic_t cache_readaheads; atomic_t cache_miss_collisions; + atomic_t cache_prefetch_fake_hits; atomic_t sectors_bypassed; };
@@ -21,6 +22,7 @@ struct cache_stats { unsigned long cache_bypass_misses; unsigned long cache_readaheads; unsigned long cache_miss_collisions; + unsigned long cache_prefetch_fake_hits; unsigned long sectors_bypassed;
unsigned int rescale; @@ -58,6 +60,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d); void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d); +void bch_mark_cache_prefetch_fake_hit(struct cache_set *c, struct bcache_device *d); void bch_mark_sectors_bypassed(struct cache_set *c, struct cached_dev *dc, int sectors); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e96174ca10d1..38afb2a58f14 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1439,6 +1439,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
dc->sequential_cutoff = 4 << 20; + dc->inflight_block_enable = 1; dc->read_bypass = 0;
for (io = dc->io; io < dc->io + RECENT_IO; io++) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 39c1e7a544e5..515539520428 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -109,6 +109,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff); rw_attribute(read_bypass); +rw_attribute(inflight_block_enable); rw_attribute(data_csum); rw_attribute(cache_mode); rw_attribute(readahead_cache_policy); @@ -253,6 +254,7 @@ SHOW(__bch_cached_dev) var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff); + var_print(inflight_block_enable); var_print(read_bypass); var_hprint(readahead);
@@ -351,6 +353,9 @@ STORE(__cached_dev) sysfs_strtoul_clamp(read_bypass, dc->read_bypass, 0, 1); + sysfs_strtoul_clamp(inflight_block_enable, + dc->inflight_block_enable, + 0, 1); d_strtoi_h(readahead);
if (attr == &sysfs_clear_stats) @@ -517,6 +522,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_partial_stripes_expensive, &sysfs_sequential_cutoff, &sysfs_read_bypass, + &sysfs_inflight_block_enable, &sysfs_clear_stats, &sysfs_running, &sysfs_state, diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index f7be8c6e7cff..38986cdf52cc 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -81,6 +81,17 @@ DEFINE_EVENT(bcache_request, bcache_prefetch_request, TP_ARGS(d, bio) );
+/* interface.c */ +DEFINE_EVENT(bcache_request, bcache_inflight_list_insert, + TP_PROTO(struct bcache_device *d, struct bio *bio), + TP_ARGS(d, bio) +); + +DEFINE_EVENT(bcache_request, bcache_inflight_list_remove, + TP_PROTO(struct bcache_device *d, struct bio *bio), + TP_ARGS(d, bio) +); + /* request.c */
DEFINE_EVENT(bcache_request, bcache_request_start,