From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v6.9-rc1 commit 30deff8531f469453ccc0981f14eceb0a2ea68d6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9DN5Z CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The ->map_blocks interface returns a valid range for writeback, but we still call back into it for every block, which is a bit inefficient.
Change iomap_writepage_map to use the valid range in the map until the end of the folio or the dirty range inside the folio instead of calling back into every block.
Note that the range is not used over folio boundaries as we need to be able to check the mapping sequence count under the folio lock.
Signed-off-by: Christoph Hellwig hch@lst.de Link: https://lore.kernel.org/r/20231207072710.176093-14-hch@lst.de Signed-off-by: Christian Brauner brauner@kernel.org Signed-off-by: Zhang Yi yi.zhang@huawei.com --- fs/iomap/buffered-io.c | 116 ++++++++++++++++++++++++++++------------- include/linux/iomap.h | 7 +++ 2 files changed, 88 insertions(+), 35 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 32e177782597..cee23ede46e5 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat, Inc. - * Copyright (C) 2016-2019 Christoph Hellwig. + * Copyright (C) 2016-2023 Christoph Hellwig. */ #include <linux/module.h> #include <linux/compiler.h> @@ -93,6 +93,44 @@ static inline bool ifs_block_is_dirty(struct folio *folio, return test_bit(block + blks_per_folio, ifs->state); }
+static unsigned ifs_find_dirty_range(struct folio *folio, + struct iomap_folio_state *ifs, u64 *range_start, u64 range_end) +{ + struct inode *inode = folio->mapping->host; + unsigned start_blk = + offset_in_folio(folio, *range_start) >> inode->i_blkbits; + unsigned end_blk = min_not_zero( + offset_in_folio(folio, range_end) >> inode->i_blkbits, + i_blocks_per_folio(inode, folio)); + unsigned nblks = 1; + + while (!ifs_block_is_dirty(folio, ifs, start_blk)) + if (++start_blk == end_blk) + return 0; + + while (start_blk + nblks < end_blk) { + if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks)) + break; + nblks++; + } + + *range_start = folio_pos(folio) + (start_blk << inode->i_blkbits); + return nblks << inode->i_blkbits; +} + +static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start, + u64 range_end) +{ + struct iomap_folio_state *ifs = folio->private; + + if (*range_start >= range_end) + return 0; + + if (ifs) + return ifs_find_dirty_range(folio, ifs, range_start, range_end); + return range_end - *range_start; +} + static void ifs_clear_range_dirty(struct folio *folio, struct iomap_folio_state *ifs, size_t off, size_t len) { @@ -1698,10 +1736,9 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos) */ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct folio *folio, - struct inode *inode, loff_t pos) + struct inode *inode, loff_t pos, unsigned len) { struct iomap_folio_state *ifs = folio->private; - unsigned len = i_blocksize(inode); size_t poff = offset_in_folio(folio, pos); int error;
@@ -1725,29 +1762,41 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc, struct writeback_control *wbc, struct folio *folio, - struct inode *inode, u64 pos, unsigned *count) + struct inode *inode, u64 pos, unsigned dirty_len, + unsigned *count) { int error;
- error = wpc->ops->map_blocks(wpc, inode, pos); - if (error) - goto fail; - trace_iomap_writepage_map(inode, &wpc->iomap); - - switch (wpc->iomap.type) { - case IOMAP_INLINE: - WARN_ON_ONCE(1); - error = -EIO; - break; - case IOMAP_HOLE: - break; - default: - error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos); - if (!error) - (*count)++; - } + do { + unsigned map_len; + + error = wpc->ops->map_blocks(wpc, inode, pos); + if (error) + break; + trace_iomap_writepage_map(inode, &wpc->iomap); + + map_len = min_t(u64, dirty_len, + wpc->iomap.offset + wpc->iomap.length - pos); + WARN_ON_ONCE(!folio->private && map_len < dirty_len); + + switch (wpc->iomap.type) { + case IOMAP_INLINE: + WARN_ON_ONCE(1); + error = -EIO; + break; + case IOMAP_HOLE: + break; + default: + error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos, + map_len); + if (!error) + (*count)++; + break; + } + dirty_len -= map_len; + pos += map_len; + } while (dirty_len && !error);
-fail: /* * We cannot cancel the ioend directly here on error. We may have * already set other pages under writeback and hence we have to run I/O @@ -1814,7 +1863,7 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode, * beyond i_size. */ folio_zero_segment(folio, poff, folio_size(folio)); - *end_pos = isize; + *end_pos = round_up(isize, i_blocksize(inode)); }
return true; @@ -1825,12 +1874,11 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, { struct iomap_folio_state *ifs = folio->private; struct inode *inode = folio->mapping->host; - unsigned len = i_blocksize(inode); - unsigned nblocks = i_blocks_per_folio(inode, folio); u64 pos = folio_pos(folio); u64 end_pos = pos + folio_size(folio); unsigned count = 0; - int error = 0, i; + int error = 0; + u32 rlen;
WARN_ON_ONCE(!folio_test_locked(folio)); WARN_ON_ONCE(folio_test_dirty(folio)); @@ -1844,7 +1892,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, } WARN_ON_ONCE(end_pos <= pos);
- if (nblocks > 1) { + if (i_blocks_per_folio(inode, folio) > 1) { if (!ifs) { ifs = ifs_alloc(inode, folio, 0); iomap_set_range_dirty(folio, 0, end_pos - pos); @@ -1867,18 +1915,16 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc, folio_start_writeback(folio);
/* - * Walk through the folio to find areas to write back. If we - * run off the end of the current map or find the current map - * invalid, grab a new one. + * Walk through the folio to find dirty areas to write back. */ - for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) { - if (ifs && !ifs_block_is_dirty(folio, ifs, i)) - continue; - error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, pos, - &count); + while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) { + error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, + pos, rlen, &count); if (error) break; + pos += rlen; } + if (count) wpc->nr_folios++;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h index b8d3b658ad2b..49d93f538785 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -309,6 +309,13 @@ struct iomap_writeback_ops { /* * Required, maps the blocks so that writeback can be performed on * the range starting at offset. + * + * Can return arbitrarily large regions, but we need to call into it at + * least once per folio to allow the file systems to synchronize with + * the write path that could be invalidating mappings. + * + * An existing mapping from a previous call to this method can be reused + * by the file system if it is still valid. */ int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode, loff_t offset);