From: John Garry john.g.garry@oracle.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9VTE3 CVE: NA
Reference: https://lore.kernel.org/all/20240326133813.3224593-1-john.g.garry@oracle.com...
--------------------------------
For FS_XFLAG_FORCEALIGN support, we want to treat any sub-extent IO like sub-fsblock DIO, in that we will zero the sub-extent when the mapping is unwritten.
This will be important for atomic writes support, in that atomically writing over a partially written extent would mean that we would need to do the unwritten extent conversion write separately, and the write could no longer be atomic.
It is the task of the FS to set iomap.extent_shift per iter to indicate sub-extent zeroing required.
Maybe a macro like i_blocksize() should be introduced for extent sizes, instead of using extent_shift.
Signed-off-by: John Garry john.g.garry@oracle.com Signed-off-by: Long Li leo.lilong@huawei.com --- fs/iomap/direct-io.c | 23 ++++++++++++++++------- include/linux/iomap.h | 1 + 2 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 892a4f8109e5..6cdacdd141f6 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -210,15 +210,22 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, struct page *page = ZERO_PAGE(0); int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; + unsigned size; + unsigned nr_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- bio = bio_alloc(GFP_KERNEL, 1); + bio = bio_alloc(GFP_KERNEL, nr_pages); bio_set_dev(bio, iomap->bdev); bio->bi_iter.bi_sector = iomap_sector(iomap, pos); bio->bi_private = dio; bio->bi_end_io = iomap_dio_bio_end_io;
- get_page(page); - __bio_add_page(bio, page, len, 0); + while (len > 0) { + size = len > PAGE_SIZE ? PAGE_SIZE : len; + get_page(page); + __bio_add_page(bio, page, size, 0); + len -= size; + pos += size; + } bio_set_op_attrs(bio, REQ_OP_WRITE, flags); iomap_dio_submit_bio(dio, iomap, bio, pos); } @@ -228,7 +235,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, struct iomap_dio *dio, struct iomap *iomap) { unsigned int blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); - unsigned int fs_block_size = i_blocksize(inode), pad; + unsigned int zeroing_size, pad; unsigned int align = iov_iter_alignment(dio->submit.iter); struct bio *bio; bool need_zeroout = false; @@ -237,6 +244,8 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, size_t copied = 0; size_t orig_count;
+ zeroing_size = i_blocksize(inode) << iomap->extent_shift; + if ((pos | length | align) & ((1 << blkbits) - 1)) return -EINVAL;
@@ -280,7 +289,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
if (need_zeroout) { /* zero out from the start of the block to the write offset */ - pad = pos & (fs_block_size - 1); + pad = pos & (zeroing_size - 1); if (pad) iomap_dio_zero(dio, iomap, pos - pad, pad); } @@ -345,9 +354,9 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, if (need_zeroout || ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) { /* zero out from the end of the write to the end of the block */ - pad = pos & (fs_block_size - 1); + pad = pos & (zeroing_size - 1); if (pad) - iomap_dio_zero(dio, iomap, pos, fs_block_size - pad); + iomap_dio_zero(dio, iomap, pos, zeroing_size - pad); } out: /* Undo iter limitation to current extent */ diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0965d5f12858..d14a729d40ce 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -93,6 +93,7 @@ struct iomap { u64 length; /* length of mapping, bytes */ u16 type; /* type of mapping */ u16 flags; /* flags for mapping */ + unsigned int extent_shift; struct block_device *bdev; /* block device for I/O */ struct dax_device *dax_dev; /* dax_dev for dax operations */ void *inline_data;