mainline inclusion from v5.19-rc1 commit 80db4e4707e78cb22287da7d058d7274bd4cb370 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I59A5L?from=project-issue CVE: N/A
After making bch_sectors_dirty_init() being multithreaded, the existing incremental dirty sector counting in bch_root_node_dirty_init() doesn't release btree occupation after iterating 500000 (INIT_KEYS_EACH_TIME) bkeys. Because a read lock is added on btree root node to prevent the btree to be split during the dirty sectors counting, other I/O requester has no chance to gain the write lock even restart bcache_btree().
That is to say, the incremental dirty sectors counting is incompatible to the multhreaded bch_sectors_dirty_init(). We have to choose one and drop another one.
In my testing, with 512 bytes random writes, I generate 1.2T dirty data and a btree with 400K nodes. With single thread and incremental dirty sectors counting, it takes 30+ minites to register the backing device. And with multithreaded dirty sectors counting, the backing device registration can be accomplished within 2 minutes.
The 30+ minutes V.S. 2- minutes difference makes me decide to keep multithreaded bch_sectors_dirty_init() and drop the incremental dirty sectors counting. This is what this patch does.
But INIT_KEYS_EACH_TIME is kept, in sectors_dirty_init_fn() the CPU will be released by cond_resched() after every INIT_KEYS_EACH_TIME keys iterated. This is to avoid the watchdog reports a bogus soft lockup warning.
Fixes: b144e45fc576 ("bcache: make bch_sectors_dirty_init() to be multithreaded") Signed-off-by: Coly Li colyli@suse.de Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20220524102336.10684-4-colyli@suse.de Signed-off-by: Jens Axboe axboe@kernel.dk --- drivers/md/bcache/writeback.c | 41 +++++++++++------------------------ 1 file changed, 13 insertions(+), 28 deletions(-)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 7ca91d1eb83b..2b6cb308f73a 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -805,13 +805,11 @@ static int bch_writeback_thread(void *arg)
/* Init */ #define INIT_KEYS_EACH_TIME 500000 -#define INIT_KEYS_SLEEP_MS 100
struct sectors_dirty_init { struct btree_op op; unsigned int inode; size_t count; - struct bkey start; };
static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, @@ -827,11 +825,8 @@ static int sectors_dirty_init_fn(struct btree_op *_op, struct btree *b, KEY_START(k), KEY_SIZE(k));
op->count++; - if (atomic_read(&b->c->search_inflight) && - !(op->count % INIT_KEYS_EACH_TIME)) { - bkey_copy_key(&op->start, k); - return -EAGAIN; - } + if (!(op->count % INIT_KEYS_EACH_TIME)) + cond_resched();
return MAP_CONTINUE; } @@ -846,24 +841,16 @@ static int bch_root_node_dirty_init(struct cache_set *c, bch_btree_op_init(&op.op, -1); op.inode = d->id; op.count = 0; - op.start = KEY(op.inode, 0, 0); - - do { - ret = bcache_btree(map_keys_recurse, - k, - c->root, - &op.op, - &op.start, - sectors_dirty_init_fn, - 0); - if (ret == -EAGAIN) - schedule_timeout_interruptible( - msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); - else if (ret < 0) { - pr_warn("sectors dirty init failed, ret=%d!\n", ret); - break; - } - } while (ret == -EAGAIN); + + ret = bcache_btree(map_keys_recurse, + k, + c->root, + &op.op, + &KEY(op.inode, 0, 0), + sectors_dirty_init_fn, + 0); + if (ret < 0) + pr_warn("sectors dirty init failed, ret=%d!\n", ret);
return ret; } @@ -907,7 +894,6 @@ static int bch_dirty_init_thread(void *arg) goto out; } skip_nr--; - cond_resched(); }
if (p) { @@ -917,7 +903,6 @@ static int bch_dirty_init_thread(void *arg)
p = NULL; prev_idx = cur_idx; - cond_resched(); }
out: @@ -956,11 +941,11 @@ void bch_sectors_dirty_init(struct bcache_device *d) bch_btree_op_init(&op.op, -1); op.inode = d->id; op.count = 0; - op.start = KEY(op.inode, 0, 0);
for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid) sectors_dirty_init_fn(&op.op, c->root, k); + rw_unlock(0, c->root); return; }