From: Li Ruilin liruilin4@huawei.com
euleros inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=26 CVE: NA
------------------------------
Add a list to save all prefetch requests. When an IO request comes, check if the request has overlap with some of prefetch requests. If it das have, block the request until the prefetch request is end.
Add a switch to control whether to enable this. If not enabled, count the overlapped IO request as a fake hit for performance analysis.
Signed-off-by: Li Ruilin liruilin4@huawei.com Reviewed-by: Luan Jianhai luanjianhai@huawei.com Reviewed-by: Peng Junyi pengjunyi1@huawei.com Acked-by: Xie Xiuqi xiexiuqi@huawei.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com --- drivers/md/bcache/acache.c | 113 ++++++++++++++++++++++++++++++++++ drivers/md/bcache/acache.h | 10 +++ drivers/md/bcache/bcache.h | 1 + drivers/md/bcache/request.c | 8 +++ drivers/md/bcache/stats.c | 13 ++++ drivers/md/bcache/stats.h | 3 + drivers/md/bcache/super.c | 1 + drivers/md/bcache/sysfs.c | 6 ++ include/trace/events/bcache.h | 11 ++++ 9 files changed, 166 insertions(+)
diff --git a/drivers/md/bcache/acache.c b/drivers/md/bcache/acache.c index 1f4b71370dee..e87c53d4d609 100644 --- a/drivers/md/bcache/acache.c +++ b/drivers/md/bcache/acache.c @@ -31,6 +31,12 @@ int acache_prefetch_workers = 1000; module_param_named(prefetch_workers, acache_prefetch_workers, int, 0444); MODULE_PARM_DESC(prefetch_workers, "num of workers for processing prefetch requests");
+struct inflight_list_head { + struct list_head entry; + spinlock_t io_lock; + bool initialized; +}; + struct prefetch_worker { struct acache_info s; struct work_struct work; @@ -50,6 +56,8 @@ struct acache_device {
struct acache_circ *acache_info_circ;
+ struct inflight_list_head inflight_list; + struct workqueue_struct *wq; struct prefetch_worker *prefetch_workers; struct list_head prefetch_workers_free; @@ -295,6 +303,7 @@ int acache_dev_init(void) int major; struct device *dev;
+ inflight_list_ops.init(); major = alloc_chrdev_region(&adev.devno, 0, ACACHE_NR_DEVS, DEV_NAME); if (major < 0) { pr_err("failed to allocate chrdev region: %d", major); @@ -377,6 +386,7 @@ int acache_dev_init(void) fail_class: unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); fail_allocdev: + inflight_list_ops.exit(); return ret; }
@@ -395,9 +405,112 @@ void acache_dev_exit(void) kfree(adev.mem_regionp); unregister_chrdev_region(adev.devno, ACACHE_NR_DEVS); class_destroy(adev.class); + inflight_list_ops.exit(); kfree(adev.prefetch_workers); }
+static struct search *__inflight_list_lookup_locked(struct search *s) +{ + struct search *iter; + struct bio *bio, *sbio; + + if (!adev.inflight_list.initialized) + return NULL; + sbio = &s->bio.bio; + list_for_each_entry(iter, &adev.inflight_list.entry, list_node) { + bio = &iter->bio.bio; + if (sbio->bi_disk == bio->bi_disk && + sbio->bi_iter.bi_sector < bio_end_sector(bio) && + bio_end_sector(sbio) > bio->bi_iter.bi_sector) { + return iter; + } + } + return NULL; +} + +static void inflight_list_init(void) +{ + INIT_LIST_HEAD(&adev.inflight_list.entry); + spin_lock_init(&adev.inflight_list.io_lock); + adev.inflight_list.initialized = true; +} + +static void inflight_list_exit(void) +{ + if (!list_empty(&adev.inflight_list.entry)) + pr_err("existing with inflight list not empty"); +} + +static int inflight_list_insert(struct search *s) +{ + if (!adev.inflight_list.initialized) + return -1; + + init_waitqueue_head(&s->wqh); + spin_lock(&adev.inflight_list.io_lock); + list_add_tail(&s->list_node, &adev.inflight_list.entry); + spin_unlock(&adev.inflight_list.io_lock); + + trace_bcache_inflight_list_insert(s->d, s->orig_bio); + return 0; +} + +static int inflight_list_remove(struct search *s) +{ + if (!adev.inflight_list.initialized) + return -1; + + spin_lock(&adev.inflight_list.io_lock); + list_del_init(&s->list_node); + spin_unlock(&adev.inflight_list.io_lock); + + wake_up_interruptible_all(&s->wqh); + + trace_bcache_inflight_list_remove(s->d, s->orig_bio); + return 0; +} + +static bool inflight_list_wait(struct search *s) +{ + struct search *pfs = NULL; + struct cached_dev *dc; + DEFINE_WAIT(wqe); + + if (!adev.inflight_list.initialized) + return false; + + spin_lock(&adev.inflight_list.io_lock); + pfs = __inflight_list_lookup_locked(s); + if (pfs == NULL) { + spin_unlock(&adev.inflight_list.io_lock); + return false; + } + + dc = container_of(pfs->d, struct cached_dev, disk); + if (!dc->inflight_block_enable) { + spin_unlock(&adev.inflight_list.io_lock); + return true; + } + + prepare_to_wait(&pfs->wqh, &wqe, TASK_INTERRUPTIBLE); + + /* unlock here to ensure pfs not changed. */ + spin_unlock(&adev.inflight_list.io_lock); + schedule(); + + finish_wait(&pfs->wqh, &wqe); + + return true; +} + +const struct inflight_queue_ops inflight_list_ops = { + .init = inflight_list_init, + .exit = inflight_list_exit, + .insert = inflight_list_insert, + .remove = inflight_list_remove, + .wait = inflight_list_wait, +}; + struct cached_dev *get_cached_device_by_dev(dev_t dev) { struct cache_set *c, *tc; diff --git a/drivers/md/bcache/acache.h b/drivers/md/bcache/acache.h index dea6e8cb0a05..3c6453d0c4da 100644 --- a/drivers/md/bcache/acache.h +++ b/drivers/md/bcache/acache.h @@ -66,4 +66,14 @@ void acache_dev_exit(void); struct acache_info *fetch_circ_item(struct acache_circ *circ); void save_circ_item(struct acache_info *data);
+struct inflight_queue_ops { + void (*init)(void); + void (*exit)(void); + + int (*insert)(struct search *s); + int (*remove)(struct search *s); + bool (*wait)(struct search *s); +}; +extern const struct inflight_queue_ops inflight_list_ops; + #endif diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index f2bb640b740f..3340f5911711 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -379,6 +379,7 @@ struct cached_dev { unsigned char writeback_percent; unsigned int writeback_delay;
+ unsigned int inflight_block_enable; unsigned int read_bypass;
uint64_t writeback_rate_target; diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 800a7ba00fbe..7eff3c6cf0f1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -706,6 +706,9 @@ void search_free(struct closure *cl) if (s->iop.bio) bio_put(s->iop.bio);
+ if (s->prefetch) + inflight_list_ops.remove(s); + bio_complete(s); closure_debug_destroy(cl); mempool_free(s, &s->iop.c->search); @@ -974,6 +977,11 @@ void cached_dev_read(struct cached_dev *dc, struct search *s) { struct closure *cl = &s->cl;
+ if (s->prefetch) + inflight_list_ops.insert(s); + else if (inflight_list_ops.wait(s)) + bch_mark_cache_prefetch_fake_hit(s->iop.c, s->d); + closure_call(&s->iop.cl, cache_lookup, NULL, cl); continue_at(cl, cached_dev_read_done_bh, NULL); } diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c index 503aafe188dc..c7a6c93aa9e9 100644 --- a/drivers/md/bcache/stats.c +++ b/drivers/md/bcache/stats.c @@ -48,6 +48,7 @@ read_attribute(cache_bypass_misses); read_attribute(cache_hit_ratio); read_attribute(cache_readaheads); read_attribute(cache_miss_collisions); +read_attribute(cache_prefetch_fake_hits); read_attribute(bypassed);
SHOW(bch_stats) @@ -66,6 +67,7 @@ SHOW(bch_stats)
var_print(cache_readaheads); var_print(cache_miss_collisions); + var_print(cache_prefetch_fake_hits); sysfs_hprint(bypassed, var(sectors_bypassed) << 9); #undef var return 0; @@ -88,6 +90,7 @@ static struct attribute *bch_stats_files[] = { &sysfs_cache_hit_ratio, &sysfs_cache_readaheads, &sysfs_cache_miss_collisions, + &sysfs_cache_prefetch_fake_hits, &sysfs_bypassed, NULL }; @@ -147,6 +150,7 @@ static void scale_stats(struct cache_stats *stats, unsigned long rescale_at) scale_stat(&stats->cache_bypass_misses); scale_stat(&stats->cache_readaheads); scale_stat(&stats->cache_miss_collisions); + scale_stat(&stats->cache_prefetch_fake_hits); scale_stat(&stats->sectors_bypassed); } } @@ -170,6 +174,7 @@ static void scale_accounting(struct timer_list *t) move_stat(cache_bypass_misses); move_stat(cache_readaheads); move_stat(cache_miss_collisions); + move_stat(cache_prefetch_fake_hits); move_stat(sectors_bypassed);
scale_stats(&acc->total, 0); @@ -225,6 +230,14 @@ void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d) atomic_inc(&c->accounting.collector.cache_miss_collisions); }
+void bch_mark_cache_prefetch_fake_hit(struct cache_set *c, struct bcache_device *d) +{ + struct cached_dev *dc = container_of(d, struct cached_dev, disk); + + atomic_inc(&dc->accounting.collector.cache_prefetch_fake_hits); + atomic_inc(&c->accounting.collector.cache_prefetch_fake_hits); +} + void bch_mark_sectors_bypassed(struct cache_set *c, struct cached_dev *dc, int sectors) { diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h index abfaabf7e7fc..302b76e982b4 100644 --- a/drivers/md/bcache/stats.h +++ b/drivers/md/bcache/stats.h @@ -9,6 +9,7 @@ struct cache_stat_collector { atomic_t cache_bypass_misses; atomic_t cache_readaheads; atomic_t cache_miss_collisions; + atomic_t cache_prefetch_fake_hits; atomic_t sectors_bypassed; };
@@ -21,6 +22,7 @@ struct cache_stats { unsigned long cache_bypass_misses; unsigned long cache_readaheads; unsigned long cache_miss_collisions; + unsigned long cache_prefetch_fake_hits; unsigned long sectors_bypassed;
unsigned int rescale; @@ -58,6 +60,7 @@ void bch_mark_cache_accounting(struct cache_set *c, struct bcache_device *d, void bch_mark_cache_readahead(struct cache_set *c, struct bcache_device *d); void bch_mark_cache_miss_collision(struct cache_set *c, struct bcache_device *d); +void bch_mark_cache_prefetch_fake_hit(struct cache_set *c, struct bcache_device *d); void bch_mark_sectors_bypassed(struct cache_set *c, struct cached_dev *dc, int sectors); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 169e6ad4f16a..754e88895738 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1321,6 +1321,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size) bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
dc->sequential_cutoff = 4 << 20; + dc->inflight_block_enable = 1; dc->read_bypass = 0;
for (io = dc->io; io < dc->io + RECENT_IO; io++) { diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4adc22b11287..e23c42622939 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -105,6 +105,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff); rw_attribute(read_bypass); +rw_attribute(inflight_block_enable); rw_attribute(data_csum); rw_attribute(cache_mode); rw_attribute(readahead_cache_policy); @@ -249,6 +250,7 @@ SHOW(__bch_cached_dev) var_printf(partial_stripes_expensive, "%u");
var_hprint(sequential_cutoff); + var_print(inflight_block_enable); var_print(read_bypass); var_hprint(readahead);
@@ -347,6 +349,9 @@ STORE(__cached_dev) sysfs_strtoul_clamp(read_bypass, dc->read_bypass, 0, 1); + sysfs_strtoul_clamp(inflight_block_enable, + dc->inflight_block_enable, + 0, 1); d_strtoi_h(readahead);
if (attr == &sysfs_clear_stats) @@ -513,6 +518,7 @@ static struct attribute *bch_cached_dev_files[] = { &sysfs_partial_stripes_expensive, &sysfs_sequential_cutoff, &sysfs_read_bypass, + &sysfs_inflight_block_enable, &sysfs_clear_stats, &sysfs_running, &sysfs_state, diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h index cb15af32291e..82283c23822a 100644 --- a/include/trace/events/bcache.h +++ b/include/trace/events/bcache.h @@ -81,6 +81,17 @@ DEFINE_EVENT(bcache_request, bcache_prefetch_request, TP_ARGS(d, bio) );
+/* interface.c */ +DEFINE_EVENT(bcache_request, bcache_inflight_list_insert, + TP_PROTO(struct bcache_device *d, struct bio *bio), + TP_ARGS(d, bio) +); + +DEFINE_EVENT(bcache_request, bcache_inflight_list_remove, + TP_PROTO(struct bcache_device *d, struct bio *bio), + TP_ARGS(d, bio) +); + /* request.c */
DEFINE_EVENT(bcache_request, bcache_request_start,