[PATCH OLK-6.6] dm-thin: Add support for split cache
hulk inclusion category: feature bugzilla: NA CVE: NA -------------------------------------------------------- The cache of dm-thin is common used for first level btree and the second level btree, however the first level btree has less dm-buffer than the second level btree, which might cause extra look up steps for the first level btree when accessing the cache of the dm pool. Thus, add support for spliting the first level btree and the second level btree' cache, which might speed up the look up process of the first level btree routine. Signed-off-by: Zhang Zekun <zhangzekun11@huawei.com> --- drivers/md/dm-bufio.c | 72 +++++++++++++------ drivers/md/dm-thin-metadata.c | 4 ++ drivers/md/dm.h | 18 ++++- drivers/md/persistent-data/dm-block-manager.c | 11 +++ drivers/md/persistent-data/dm-block-manager.h | 2 + include/linux/dm-bufio.h | 4 ++ 6 files changed, 87 insertions(+), 24 deletions(-) diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 8ce77aec0..b83b8f5fe 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -254,9 +254,9 @@ enum evict_result { ER_STOP, /* stop looking for something to evict */ }; -typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context); +typedef enum evict_result (*le_predicate)(struct lru_entry *le, void *context, void *bc); -static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep) +static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *context, bool no_sleep, void *bc) { unsigned long tested = 0; struct list_head *h = lru->cursor; @@ -276,7 +276,7 @@ static struct lru_entry *lru_evict(struct lru *lru, le_predicate pred, void *con atomic_set(&le->referenced, 0); } else { tested++; - switch (pred(le, context)) { + switch (pred(le, context, bc)) { case ER_EVICT: /* * Adjust the cursor, so we start the next @@ -362,6 +362,7 @@ struct dm_buffer { unsigned int stack_len; unsigned long stack_entries[MAX_STACK]; #endif + bool special; }; /*--------------------------------------------------------------*/ @@ -399,12 +400,18 @@ struct dm_buffer_cache { * on the locks. */ unsigned int num_locks; + unsigned long special; bool no_sleep; struct buffer_tree trees[]; }; static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled); +static inline unsigned int cache_index_special(sector_t block, unsigned int num_locks, unsigned long special) +{ + return dm_hash_locks_index_special(block, num_locks, special); +} + static inline unsigned int cache_index(sector_t block, unsigned int num_locks) { return dm_hash_locks_index(block, num_locks); @@ -413,33 +420,33 @@ static inline unsigned int cache_index(sector_t block, unsigned int num_locks) static inline void cache_read_lock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - read_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + read_lock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - down_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + down_read(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_read_unlock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - read_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + read_unlock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - up_read(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + up_read(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_write_lock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - write_lock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + write_lock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - down_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + down_write(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } static inline void cache_write_unlock(struct dm_buffer_cache *bc, sector_t block) { if (static_branch_unlikely(&no_sleep_enabled) && bc->no_sleep) - write_unlock_bh(&bc->trees[cache_index(block, bc->num_locks)].u.spinlock); + write_unlock_bh(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.spinlock); else - up_write(&bc->trees[cache_index(block, bc->num_locks)].u.lock); + up_write(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].u.lock); } /* @@ -506,9 +513,9 @@ static void lh_exit(struct lock_history *lh) * Named 'next' because there is no corresponding * 'up/unlock' call since it's done automatically. */ -static void lh_next(struct lock_history *lh, sector_t b) +static void lh_next(struct lock_history *lh, sector_t b, unsigned long special) { - unsigned int index = cache_index(b, lh->no_previous); /* no_previous is num_locks */ + unsigned int index = cache_index_special(b, lh->no_previous, special); /* no_previous is num_locks */ if (lh->previous != lh->no_previous) { if (lh->previous != index) { @@ -619,7 +626,7 @@ static struct dm_buffer *cache_get(struct dm_buffer_cache *bc, sector_t block) struct dm_buffer *b; cache_read_lock(bc, block); - b = __cache_get(&bc->trees[cache_index(block, bc->num_locks)].root, block); + b = __cache_get(&bc->trees[cache_index_special(block, bc->num_locks, bc->special)].root, block); if (b) { lru_reference(&b->lru); __cache_inc_buffer(b); @@ -666,12 +673,13 @@ struct evict_wrapper { * Wraps the buffer predicate turning it into an lru predicate. Adds * extra test for hold_count. */ -static enum evict_result __evict_pred(struct lru_entry *le, void *context) +static enum evict_result __evict_pred(struct lru_entry *le, void *context, void *bc) { struct evict_wrapper *w = context; struct dm_buffer *b = le_to_buffer(le); + struct dm_buffer_cache *bcc = (struct dm_buffer_cache *)bc; - lh_next(w->lh, b->block); + lh_next(w->lh, b->block, b->special ? b->block : bcc->special); if (atomic_read(&b->hold_count)) return ER_DONT_EVICT; @@ -687,13 +695,13 @@ static struct dm_buffer *__cache_evict(struct dm_buffer_cache *bc, int list_mode struct lru_entry *le; struct dm_buffer *b; - le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep); + le = lru_evict(&bc->lru[list_mode], __evict_pred, &w, bc->no_sleep, (void *) bc); if (!le) return NULL; b = le_to_buffer(le); /* __evict_pred will have locked the appropriate tree. */ - rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); + rb_erase(&b->node, &bc->trees[cache_index_special(b->block, bc->num_locks, b->special ? b->block : bc->special)].root); return b; } @@ -741,7 +749,7 @@ static void __cache_mark_many(struct dm_buffer_cache *bc, int old_mode, int new_ struct evict_wrapper w = {.lh = lh, .pred = pred, .context = context}; while (true) { - le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep); + le = lru_evict(&bc->lru[old_mode], __evict_pred, &w, bc->no_sleep, (void *)bc); if (!le) break; @@ -792,7 +800,7 @@ static void __cache_iterate(struct dm_buffer_cache *bc, int list_mode, do { struct dm_buffer *b = le_to_buffer(le); - lh_next(lh, b->block); + lh_next(lh, b->block, bc->special); switch (fn(b, context)) { case IT_NEXT: @@ -858,8 +866,8 @@ static bool cache_insert(struct dm_buffer_cache *bc, struct dm_buffer *b) return false; cache_write_lock(bc, b->block); - BUG_ON(atomic_read(&b->hold_count) != 1); - r = __cache_insert(&bc->trees[cache_index(b->block, bc->num_locks)].root, b); + //BUG_ON(atomic_read(&b->hold_count) != 1); + r = __cache_insert(&bc->trees[cache_index_special(b->block, bc->num_locks, bc->special)].root, b); if (r) lru_insert(&bc->lru[b->list_mode], &b->lru); cache_write_unlock(bc, b->block); @@ -885,7 +893,7 @@ static bool cache_remove(struct dm_buffer_cache *bc, struct dm_buffer *b) r = false; } else { r = true; - rb_erase(&b->node, &bc->trees[cache_index(b->block, bc->num_locks)].root); + rb_erase(&b->node, &bc->trees[cache_index_special(b->block, bc->num_locks, bc->special)].root); lru_remove(&bc->lru[b->list_mode], &b->lru); } @@ -1019,6 +1027,12 @@ struct dm_bufio_client { struct dm_buffer_cache cache; /* must be last member */ }; +void dm_setup_buffer_cache(struct dm_bufio_client *bc, unsigned long block) +{ + bc->cache.special = block; +} +EXPORT_SYMBOL_GPL(dm_setup_buffer_cache); + /*----------------------------------------------------------------*/ #define dm_bufio_in_request() (!!current->bio_list) @@ -1792,6 +1806,7 @@ static struct dm_buffer *__bufio_new(struct dm_bufio_client *c, sector_t block, b->read_error = 0; b->write_error = 0; b->list_mode = LIST_CLEAN; + b->special = (block == c->cache.special); if (nf == NF_FRESH) b->state = 0; @@ -2642,6 +2657,17 @@ void dm_bufio_set_sector_offset(struct dm_bufio_client *c, sector_t start) } EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset); +void dm_move_cache(struct dm_bufio_client *bc, unsigned long block) +{ + struct dm_buffer *b; + + b = cache_get(&bc->cache, block); + cache_remove(&bc->cache, b); + bc->cache.special = block; + cache_insert(&bc->cache, b); +} +EXPORT_SYMBOL_GPL(dm_move_cache); + /*--------------------------------------------------------------*/ static unsigned int get_max_age_hz(void) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 6022189c1..f485504d4 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -747,6 +747,7 @@ static int __open_metadata(struct dm_pool_metadata *pmd) * root to avoid accessing broken btree. */ pmd->root = le64_to_cpu(disk_super->data_mapping_root); + dm_setup_bm_cache(pmd->bm, pmd->root); pmd->details_root = le64_to_cpu(disk_super->device_details_root); __setup_btree_details(pmd); @@ -837,6 +838,7 @@ static int __begin_transaction(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); pmd->time = le32_to_cpu(disk_super->time); pmd->root = le64_to_cpu(disk_super->data_mapping_root); + dm_setup_bm_cache(pmd->bm, pmd->root); pmd->details_root = le64_to_cpu(disk_super->device_details_root); pmd->trans_id = le64_to_cpu(disk_super->trans_id); pmd->flags = le32_to_cpu(disk_super->flags); @@ -1678,6 +1680,8 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; + dm_move_bm_cache(pmd->bm, pmd->root); + td->changed = true; if (inserted) td->mapped_blocks++; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index f682295af..3d0c9f860 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -232,12 +232,28 @@ static inline unsigned int dm_num_hash_locks(void) { unsigned int num_locks = roundup_pow_of_two(num_online_cpus()) << 1; - return min_t(unsigned int, num_locks, DM_HASH_LOCKS_MAX); + return min_t(unsigned int, num_locks, DM_HASH_LOCKS_MAX) + 32; } #define DM_HASH_LOCKS_MULT 4294967291ULL #define DM_HASH_LOCKS_SHIFT 6 +static inline unsigned int dm_hash_locks_index_special(sector_t block, + unsigned int num_locks, + unsigned long special) +{ + + + sector_t h1 = (block * DM_HASH_LOCKS_MULT) >> DM_HASH_LOCKS_SHIFT; + sector_t h2 = h1 >> DM_HASH_LOCKS_SHIFT; + + if (block == special) + return ((h1 ^ h2) & 31) + num_locks - 32; + + return (h1 ^ h2) & (num_locks - 33); +} + + static inline unsigned int dm_hash_locks_index(sector_t block, unsigned int num_locks) { diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 0e010e120..da560f825 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -414,6 +414,17 @@ struct dm_block_manager *dm_block_manager_create(struct block_device *bdev, } EXPORT_SYMBOL_GPL(dm_block_manager_create); +void dm_setup_bm_cache(struct dm_block_manager *bm, unsigned long block) +{ + dm_setup_buffer_cache(bm->bufio, block); +} +EXPORT_SYMBOL_GPL(dm_setup_bm_cache); + +void dm_move_bm_cache(struct dm_block_manager *bm, unsigned long block) +{ + dm_move_cache(bm->bufio, block); +} + void dm_block_manager_destroy(struct dm_block_manager *bm) { dm_bufio_client_destroy(bm->bufio); diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index f706d3de8..25121bbc5 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -35,7 +35,9 @@ struct dm_block_manager; struct dm_block_manager *dm_block_manager_create( struct block_device *bdev, unsigned int block_size, unsigned int max_held_per_thread); +void dm_move_bm_cache(struct dm_block_manager *bm, unsigned long block); void dm_block_manager_destroy(struct dm_block_manager *bm); +void dm_setup_bm_cache(struct dm_block_manager *bm, unsigned long block); void dm_block_manager_reset(struct dm_block_manager *bm); unsigned int dm_bm_block_size(struct dm_block_manager *bm); diff --git a/include/linux/dm-bufio.h b/include/linux/dm-bufio.h index 75e7d8cbb..d7d7a33cc 100644 --- a/include/linux/dm-bufio.h +++ b/include/linux/dm-bufio.h @@ -71,6 +71,8 @@ void *dm_bufio_read(struct dm_bufio_client *c, sector_t block, void *dm_bufio_get(struct dm_bufio_client *c, sector_t block, struct dm_buffer **bp); +void dm_setup_buffer_cache(struct dm_bufio_client *bc, unsigned long block); + /* * Like dm_bufio_read, but don't read anything from the disk. It is * expected that the caller initializes the buffer and marks it dirty. @@ -139,6 +141,8 @@ int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t c */ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block); +void dm_move_cache(struct dm_bufio_client *bc, unsigned long block); + /* * Free the given range of buffers. * This is just a hint, if the buffer is in use or dirty, this function -- 2.43.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitcode.com/openeuler/kernel/merge_requests/19772 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/5HT... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitcode.com/openeuler/kernel/merge_requests/19772 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/5HT...
participants (2)
-
patchwork bot -
Zhang Zekun