From: Xu Wei xuwei56@huawei.com
euleros inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=327 CVE: NA
Bcache will move all data, including clean and dirty data, in bucket when gc running. This will cause big write amplification, which may reduce the cache device's life. This patch provice a switch for gc to move only dirty data, which can reduce write amplification.
Signed-off-by: qinghaixiang xuweiqhx@163.com Signed-off-by: Xu Wei xuwei56@huawei.com Acked-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Li Ruilin liruilin4@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/md/bcache/alloc.c | 1 + drivers/md/bcache/bcache.h | 4 ++- drivers/md/bcache/btree.c | 13 +++++--- drivers/md/bcache/btree.h | 2 +- drivers/md/bcache/movinggc.c | 57 +++++++++++++++++++++++++----------- drivers/md/bcache/sysfs.c | 4 +++ 6 files changed, 58 insertions(+), 23 deletions(-)
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index 46794cac167e7..a6ce0636f3237 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c @@ -471,6 +471,7 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b) { SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); + SET_GC_DIRTY_SECTORS(b, 0);
if (ca->set->avail_nbuckets < ca->set->nbuckets) { ca->set->avail_nbuckets++; diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 70fbde8ca70c9..76d5026c924a4 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -202,7 +202,7 @@ struct bucket { uint16_t prio; uint8_t gen; uint8_t last_gc; /* Most out of date gen in the btree */ - uint16_t gc_mark; /* Bitfield used by GC. See below for field */ + uint32_t gc_mark; /* Bitfield used by GC. See below for field */ };
/* @@ -218,6 +218,7 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE)) BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE); BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); +BITMASK(GC_DIRTY_SECTORS, struct bucket, gc_mark, 16, GC_SECTORS_USED_SIZE);
#include "journal.h" #include "stats.h" @@ -736,6 +737,7 @@ struct cache_set { unsigned int gc_always_rewrite:1; unsigned int shrinker_disabled:1; unsigned int copy_gc_enabled:1; + unsigned int gc_only_dirty_data:1;
#define BUCKET_HASH_BITS 12 struct hlist_head bucket_hash[1 << BUCKET_HASH_BITS]; diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 313a2f76213fd..f7c76efc97cdc 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -1257,12 +1257,16 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, c, "inconsistent ptrs: mark = %llu, level = %i", GC_MARK(g), level);
- if (level) + if (level) { SET_GC_MARK(g, GC_MARK_METADATA); - else if (KEY_DIRTY(k)) + } else if (KEY_DIRTY(k)) { SET_GC_MARK(g, GC_MARK_DIRTY); - else if (!GC_MARK(g)) + SET_GC_DIRTY_SECTORS(g, min_t(unsigned int, + GC_DIRTY_SECTORS(g) + KEY_SIZE(k), + MAX_GC_SECTORS_USED)); + } else if (!GC_MARK(g)) { SET_GC_MARK(g, GC_MARK_RECLAIMABLE); + }
/* guard against overflow */ SET_GC_SECTORS_USED(g, min_t(unsigned int, @@ -1746,6 +1750,7 @@ static void btree_gc_start(struct cache_set *c) if (!atomic_read(&b->pin)) { SET_GC_MARK(b, 0); SET_GC_SECTORS_USED(b, 0); + SET_GC_DIRTY_SECTORS(b, 0); } }
@@ -1860,7 +1865,7 @@ static void bch_btree_gc(struct cache_set *c)
trace_bcache_gc_end(c);
- bch_moving_gc(c); + bch_moving_gc(c, c->gc_only_dirty_data); }
static bool gc_should_run(struct cache_set *c) diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 7ddadcc485ea6..8bcca2beca986 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -263,7 +263,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
int bch_gc_thread_start(struct cache_set *c); void bch_initial_gc_finish(struct cache_set *c); -void bch_moving_gc(struct cache_set *c); +void bch_moving_gc(struct cache_set *c, bool only_move_dirty); int bch_btree_check(struct cache_set *c); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k);
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 7891fb512736d..749422b927394 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -123,7 +123,7 @@ static void read_moving_submit(struct closure *cl) continue_at(cl, write_moving, io->op.wq); }
-static void read_moving(struct cache_set *c) +static void read_moving(struct cache_set *c, bool only_move_dirty) { struct keybuf_key *w; struct moving_io *io; @@ -140,7 +140,8 @@ static void read_moving(struct cache_set *c) if (!w) break;
- if (ptr_stale(c, &w->key, 0)) { + if (ptr_stale(c, &w->key, 0) || + (only_move_dirty && (!KEY_DIRTY(&w->key)))) { bch_keybuf_del(&c->moving_gc_keys, w); continue; } @@ -187,22 +188,43 @@ static bool bucket_cmp(struct bucket *l, struct bucket *r) return GC_SECTORS_USED(l) < GC_SECTORS_USED(r); }
-static unsigned int bucket_heap_top(struct cache *ca) +static bool bucket_cmp_dirty(struct bucket *l, struct bucket *r) +{ + return GC_DIRTY_SECTORS(l) < GC_DIRTY_SECTORS(r); +} + +static unsigned int bucket_heap_top(struct cache *ca, bool only_dirty) { struct bucket *b; + if (only_dirty) + return (b = heap_peek(&ca->heap)) ? GC_DIRTY_SECTORS(b) : 0; + else + return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; +}
- return (b = heap_peek(&ca->heap)) ? GC_SECTORS_USED(b) : 0; +static unsigned bucket_sectors(struct bucket *b, bool only_dirty) +{ + if (only_dirty) + return GC_DIRTY_SECTORS(b); + else + return GC_SECTORS_USED(b); }
-void bch_moving_gc(struct cache_set *c) +void bch_moving_gc(struct cache_set *c, bool only_move_dirty) { struct cache *ca; struct bucket *b; unsigned int i; + bool (*cmp)(struct bucket*, struct bucket*);
if (!c->copy_gc_enabled) return;
+ if (only_move_dirty) + cmp = &bucket_cmp_dirty; + else + cmp = &bucket_cmp; + mutex_lock(&c->bucket_lock);
for_each_cache(ca, c, i) { @@ -214,29 +236,30 @@ void bch_moving_gc(struct cache_set *c)
for_each_bucket(b, ca) { if (GC_MARK(b) == GC_MARK_METADATA || - !GC_SECTORS_USED(b) || - GC_SECTORS_USED(b) == ca->sb.bucket_size || + !bucket_sectors(b, only_move_dirty) || + ((!only_move_dirty) && + (GC_SECTORS_USED(b) == ca->sb.bucket_size)) || atomic_read(&b->pin)) continue;
if (!heap_full(&ca->heap)) { - sectors_to_move += GC_SECTORS_USED(b); - heap_add(&ca->heap, b, bucket_cmp); - } else if (bucket_cmp(b, heap_peek(&ca->heap))) { - sectors_to_move -= bucket_heap_top(ca); - sectors_to_move += GC_SECTORS_USED(b); + sectors_to_move += bucket_sectors(b, only_move_dirty); + heap_add(&ca->heap, b, (*cmp)); + } else if ((*cmp)(b, heap_peek(&ca->heap))) { + sectors_to_move -= bucket_heap_top(ca, only_move_dirty); + sectors_to_move += bucket_sectors(b, only_move_dirty);
ca->heap.data[0] = b; - heap_sift(&ca->heap, 0, bucket_cmp); + heap_sift(&ca->heap, 0, (*cmp)); } }
while (sectors_to_move > reserve_sectors) { - heap_pop(&ca->heap, b, bucket_cmp); - sectors_to_move -= GC_SECTORS_USED(b); + heap_pop(&ca->heap, b, (*cmp)); + sectors_to_move -= bucket_sectors(b, only_move_dirty); }
- while (heap_pop(&ca->heap, b, bucket_cmp)) + while (heap_pop(&ca->heap, b, (*cmp))) SET_GC_MOVE(b, 1); }
@@ -244,7 +267,7 @@ void bch_moving_gc(struct cache_set *c)
c->moving_gc_keys.last_scanned = ZERO_KEY;
- read_moving(c); + read_moving(c, only_move_dirty); }
void bch_moving_init_cache_set(struct cache_set *c) diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 4c693ac29b0e0..178a66455481e 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -153,6 +153,7 @@ rw_attribute(expensive_debug_checks); rw_attribute(cache_replacement_policy); rw_attribute(btree_shrinker_disabled); rw_attribute(copy_gc_enabled); +rw_attribute(gc_only_dirty_data); rw_attribute(size);
static ssize_t bch_snprint_string_list(char *buf, @@ -770,6 +771,7 @@ SHOW(__bch_cache_set) sysfs_printf(gc_always_rewrite, "%i", c->gc_always_rewrite); sysfs_printf(btree_shrinker_disabled, "%i", c->shrinker_disabled); sysfs_printf(copy_gc_enabled, "%i", c->copy_gc_enabled); + sysfs_printf(gc_only_dirty_data, "%i", c->gc_only_dirty_data); sysfs_printf(io_disable, "%i", test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -898,6 +900,7 @@ STORE(__bch_cache_set) sysfs_strtoul(gc_always_rewrite, c->gc_always_rewrite); sysfs_strtoul(btree_shrinker_disabled, c->shrinker_disabled); sysfs_strtoul(copy_gc_enabled, c->copy_gc_enabled); + sysfs_strtoul(gc_only_dirty_data, c->gc_only_dirty_data);
return size; } @@ -978,6 +981,7 @@ static struct attribute *bch_cache_set_internal_files[] = { &sysfs_gc_always_rewrite, &sysfs_btree_shrinker_disabled, &sysfs_copy_gc_enabled, + &sysfs_gc_only_dirty_data, &sysfs_io_disable, &sysfs_gc_sectors, &sysfs_traffic_policy_start,