*** BLURB HERE ***
Yongjian Sun (2): ext4: do not always order data when partial zeroing out a block ext4: fix potential memory exposure issues during truncate in iomap mode.
fs/ext4/inode.c | 75 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 16 deletions(-)
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5WC2 CVE: NA
--------------------------------
When zeroing out a partial block during a partial truncate, zeroing range, or punching a hole, it is essential to order the data only during the partial truncate. This is necessary because there is a risk of exposing stale data. Consider a scenario in which a crash occurs just after the i_disksize transaction has been submitted but before the zeroed data is written out. In this case, the tail block will retain stale data, which could be exposed on the next expand truncate operation. However, partial zeroing range and punching hole do not have this risk. Therefore, we could move the ext4_jbd2_inode_add_write() out to ext4_truncate(), only order data for the partial truncate.
Fixes: 5721968224e0 ("ext4: implement zero_range iomap path") Signed-off-by: Yongjian Sun sunyongjian1@huawei.com --- fs/ext4/inode.c | 50 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f626f1c509c2..bbf0258062a7 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4220,7 +4220,9 @@ void ext4_set_aops(struct inode *inode) }
static int __ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) + struct address_space *mapping, + loff_t from, loff_t length, + bool *did_zero) { ext4_fsblk_t index = from >> PAGE_SHIFT; unsigned offset = from & (PAGE_SIZE-1); @@ -4300,14 +4302,16 @@ static int __ext4_block_zero_page_range(handle_t *handle,
if (ext4_should_journal_data(inode)) { err = ext4_dirty_journalled_data(handle, bh); + if (err) + goto unlock; } else { err = 0; mark_buffer_dirty(bh); - if (ext4_should_order_data(inode)) - err = ext4_jbd2_inode_add_write(handle, inode, from, - length); }
+ if (did_zero) + *did_zero = true; + unlock: folio_unlock(folio); folio_put(folio); @@ -4329,7 +4333,9 @@ static int ext4_iomap_zero_range(struct inode *inode, * that corresponds to 'from' */ static int ext4_block_zero_page_range(handle_t *handle, - struct address_space *mapping, loff_t from, loff_t length) + struct address_space *mapping, + loff_t from, loff_t length, + bool *did_zero) { struct inode *inode = mapping->host; unsigned offset = from & (PAGE_SIZE-1); @@ -4349,7 +4355,8 @@ static int ext4_block_zero_page_range(handle_t *handle, } else if (ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP)) { return ext4_iomap_zero_range(inode, from, length); } - return __ext4_block_zero_page_range(handle, mapping, from, length); + return __ext4_block_zero_page_range(handle, mapping, from, length, + did_zero); }
/* @@ -4359,12 +4366,15 @@ static int ext4_block_zero_page_range(handle_t *handle, * of that block so it doesn't yield old data if the file is later grown. */ static int ext4_block_truncate_page(handle_t *handle, - struct address_space *mapping, loff_t from) + struct address_space *mapping, loff_t from, + loff_t *zero_len) { unsigned offset = from & (PAGE_SIZE-1); unsigned length; unsigned blocksize; struct inode *inode = mapping->host; + bool did_zero = false; + int ret;
/* If we are processing an encrypted inode during orphan list handling */ if (IS_ENCRYPTED(inode) && !fscrypt_has_encryption_key(inode)) @@ -4373,7 +4383,13 @@ static int ext4_block_truncate_page(handle_t *handle, blocksize = inode->i_sb->s_blocksize; length = blocksize - (offset & (blocksize - 1));
- return ext4_block_zero_page_range(handle, mapping, from, length); + ret = ext4_block_zero_page_range(handle, mapping, from, length, + &did_zero); + if (ret) + return ret; + + *zero_len = length; + return 0; }
int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, @@ -4396,13 +4412,14 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, if (start == end && (partial_start || (partial_end != sb->s_blocksize - 1))) { err = ext4_block_zero_page_range(handle, mapping, - lstart, length); + lstart, length, NULL); return err; } /* Handle partial zero out on the start of the range */ if (partial_start) { err = ext4_block_zero_page_range(handle, mapping, - lstart, sb->s_blocksize); + lstart, sb->s_blocksize, + NULL); if (err) return err; } @@ -4410,7 +4427,7 @@ int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, if (partial_end != sb->s_blocksize - 1) err = ext4_block_zero_page_range(handle, mapping, byte_end - partial_end, - partial_end + 1); + partial_end + 1, NULL); return err; }
@@ -4705,6 +4722,7 @@ int ext4_truncate(struct inode *inode) int err = 0, err2; handle_t *handle; struct address_space *mapping = inode->i_mapping; + loff_t zero_len = 0;
/* * There is a possibility that we're either freeing the inode @@ -4748,7 +4766,15 @@ int ext4_truncate(struct inode *inode) }
if (inode->i_size & (inode->i_sb->s_blocksize - 1)) - ext4_block_truncate_page(handle, mapping, inode->i_size); + ext4_block_truncate_page(handle, mapping, inode->i_size, + &zero_len); + + if (zero_len && ext4_should_order_data(inode)) { + err = ext4_jbd2_inode_add_write(handle, inode, inode->i_size, + zero_len); + if (err) + goto out_stop; + }
/* * We add the inode to the orphan list, so that if this
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB5WC2 CVE: NA
--------------------------------
Since we do not order the data, it is essential to write out zeroed data before the i_disksize update transaction is committed. Otherwise, stale data may left over in the last block, which could be exposed during the next expand truncate operation. After we write 0, we need to perform a sync operation. So we do filemap_write_and_wait_range in the new path.
Fixes: 5721968224e0 ("ext4: implement zero_range iomap path") Signed-off-by: Yongjian Sun sunyongjian1@huawei.com --- fs/ext4/inode.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bbf0258062a7..51927bbe4350 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4318,10 +4318,10 @@ static int __ext4_block_zero_page_range(handle_t *handle, return err; }
-static int ext4_iomap_zero_range(struct inode *inode, - loff_t from, loff_t length) +static int ext4_iomap_zero_range(struct inode *inode, loff_t from, + loff_t length, bool *did_zero) { - return iomap_zero_range(inode, from, length, NULL, + return iomap_zero_range(inode, from, length, did_zero, &ext4_iomap_buffered_read_ops); }
@@ -4353,7 +4353,7 @@ static int ext4_block_zero_page_range(handle_t *handle, return dax_zero_range(inode, from, length, NULL, &ext4_iomap_ops); } else if (ext4_test_inode_state(inode, EXT4_STATE_BUFFERED_IOMAP)) { - return ext4_iomap_zero_range(inode, from, length); + return ext4_iomap_zero_range(inode, from, length, did_zero); } return __ext4_block_zero_page_range(handle, mapping, from, length, did_zero); @@ -4769,6 +4769,23 @@ int ext4_truncate(struct inode *inode) ext4_block_truncate_page(handle, mapping, inode->i_size, &zero_len);
+ /* + * inode with an iomap buffered I/O path does not order data, + * so it is necessary to write out zeroed data before the + * updating i_disksize transaction is committed. Otherwise, + * stale data may remain in the last block, which could be + * exposed during the next expand truncate operation. + */ + if (zero_len && ext4_test_inode_state(inode, + EXT4_STATE_BUFFERED_IOMAP)) { + loff_t zero_end = inode->i_size + zero_len; + + err = filemap_write_and_wait_range(mapping, + inode->i_size, zero_end - 1); + if (err) + goto out_stop; + } + if (zero_len && ext4_should_order_data(inode)) { err = ext4_jbd2_inode_add_write(handle, inode, inode->i_size, zero_len);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/13953 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/F...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/13953 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/F...