From: Zhang Yi yi.zhang@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I476C7 CVE: NA ---------------------------
No EIO simulation is required if the buffer is uptodate, so move the simulation behind read bio completeion just like inode/block bitmap simulation does.
Link: https://lore.kernel.org/linux-ext4/20210821065450.1397451-2-yi.zhang@huawei.... Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/inode.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 800e6e3de40aa..1b2ccf61354f3 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4635,8 +4635,6 @@ static int __ext4_get_inode_loc(struct inode *inode, bh = sb_getblk(sb, block); if (unlikely(!bh)) return -ENOMEM; - if (ext4_simulate_fail(sb, EXT4_SIM_INODE_EIO)) - goto simulate_eio; if (!buffer_uptodate(bh)) { lock_buffer(bh);
@@ -4721,8 +4719,8 @@ static int __ext4_get_inode_loc(struct inode *inode, trace_ext4_load_inode(inode); ext4_read_bh_nowait(bh, REQ_META | REQ_PRIO, NULL); wait_on_buffer(bh); + ext4_simulate_fail_bh(sb, bh, EXT4_SIM_INODE_EIO); if (!buffer_uptodate(bh)) { - simulate_eio: ext4_error_inode_block(inode, block, EIO, "unable to read itable block"); brelse(bh);
From: Zhang Yi yi.zhang@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I476C7 CVE: NA ---------------------------
Now that ext4_do_update_inode() return error before filling the whole inode data if we fail to set inode blocks in ext4_inode_blocks_set(). This error should never happen in theory since sb->s_maxbytes should not have allowed this, we have already init sb->s_maxbytes according to this feature in ext4_fill_super(). So even through that could only happen due to the filesystem corruption, we'd better to return after we finish updating the inode because it may left an uninitialized buffer and we could read this buffer later in "errors=continue" mode.
This patch make the updating inode data procedure atomic, call EXT4_ERROR_INODE() after we dropping i_raw_lock after something bad happened, make sure that the inode is integrated, and also drop a BUG_ON and do some small cleanups.
Link: https://lore.kernel.org/linux-ext4/20210821065450.1397451-4-yi.zhang@huawei.... Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/inode.c | 44 ++++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 16 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 1b2ccf61354f3..8b01b73367a2b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5171,8 +5171,14 @@ static int ext4_inode_blocks_set(handle_t *handle, ext4_clear_inode_flag(inode, EXT4_INODE_HUGE_FILE); return 0; } + + /* + * This should never happen since sb->s_maxbytes should not have + * allowed this, sb->s_maxbytes was set according to the huge_file + * feature in ext4_fill_super(). + */ if (!ext4_has_feature_huge_file(sb)) - return -EFBIG; + return -EFSCORRUPTED;
if (i_blocks <= 0xffffffffffffULL) { /* @@ -5279,16 +5285,14 @@ static int ext4_do_update_inode(handle_t *handle,
spin_lock(&ei->i_raw_lock);
- /* For fields not tracked in the in-memory inode, - * initialise them to zero for new inodes. */ + /* + * For fields not tracked in the in-memory inode, initialise them + * to zero for new inodes. + */ if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
err = ext4_inode_blocks_set(handle, raw_inode, ei); - if (err) { - spin_unlock(&ei->i_raw_lock); - goto out_brelse; - }
raw_inode->i_mode = cpu_to_le16(inode->i_mode); i_uid = i_uid_read(inode); @@ -5297,10 +5301,11 @@ static int ext4_do_update_inode(handle_t *handle, if (!(test_opt(inode->i_sb, NO_UID32))) { raw_inode->i_uid_low = cpu_to_le16(low_16_bits(i_uid)); raw_inode->i_gid_low = cpu_to_le16(low_16_bits(i_gid)); -/* - * Fix up interoperability with old kernels. Otherwise, old inodes get - * re-used with the upper 16 bits of the uid/gid intact - */ + /* + * Fix up interoperability with old kernels. Otherwise, + * old inodes get re-used with the upper 16 bits of the + * uid/gid intact. + */ if (ei->i_dtime && list_empty(&ei->i_orphan)) { raw_inode->i_uid_high = 0; raw_inode->i_gid_high = 0; @@ -5369,8 +5374,9 @@ static int ext4_do_update_inode(handle_t *handle, } }
- BUG_ON(!ext4_has_feature_project(inode->i_sb) && - i_projid != EXT4_DEF_PROJID); + if (i_projid != EXT4_DEF_PROJID && + !ext4_has_feature_project(inode->i_sb)) + err = err ?: -EFSCORRUPTED;
if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE && EXT4_FITS_IN_INODE(raw_inode, ei, i_projid)) @@ -5378,6 +5384,11 @@ static int ext4_do_update_inode(handle_t *handle,
ext4_inode_csum_set(inode, raw_inode, ei); spin_unlock(&ei->i_raw_lock); + if (err) { + EXT4_ERROR_INODE(inode, "corrupted inode contents"); + goto out_brelse; + } + if (inode->i_sb->s_flags & SB_LAZYTIME) ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, bh->b_data); @@ -5385,13 +5396,13 @@ static int ext4_do_update_inode(handle_t *handle, BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); err = ext4_handle_dirty_metadata(handle, NULL, bh); if (err) - goto out_brelse; + goto out_error; ext4_clear_inode_state(inode, EXT4_STATE_NEW); if (set_large_file) { BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); if (err) - goto out_brelse; + goto out_error; lock_buffer(EXT4_SB(sb)->s_sbh); ext4_set_feature_large_file(sb); ext4_superblock_csum_set(sb); @@ -5401,9 +5412,10 @@ static int ext4_do_update_inode(handle_t *handle, EXT4_SB(sb)->s_sbh); } ext4_update_inode_fsync_trans(handle, inode, need_datasync); +out_error: + ext4_std_error(inode->i_sb, err); out_brelse: brelse(bh); - ext4_std_error(inode->i_sb, err); return err; }
From: Zhang Yi yi.zhang@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I476C7 CVE: NA ---------------------------
In ext4_get_inode_loc(), we may skip IO and get an zero && uptodate inode buffer when the inode monopolize an inode block for performance reason. For most cases, ext4_mark_iloc_dirty() will fill the inode buffer to make it fine, but we could miss this call if something bad happened. Finally, __ext4_get_inode_loc_noinmem() may probably get an empty inode buffer and trigger ext4 error.
For example, if we remove a nonexistent xattr on inode A, ext4_xattr_set_handle() will return ENODATA before invoking ext4_mark_iloc_dirty(), it will left an uptodate but zero buffer. We will get checksum error message in ext4_iget() when getting inode again.
EXT4-fs error (device sda): ext4_lookup:1784: inode #131074: comm cat: iget: checksum invalid
Even worse, if we allocate another inode B at the same inode block, it will corrupt the inode A on disk when write back inode B.
So this patch postpone the initialization and mark buffer uptodate logic until shortly before we fill correct inode data in ext4_do_update_inode() if skip read I/O, ensure the buffer is really uptodate.
Link: https://lore.kernel.org/linux-ext4/20210821065450.1397451-5-yi.zhang@huawei.... Signed-off-by: Zhang Yi yi.zhang@huawei.com Reviewed-by: Jan Kara jack@suse.cz Reviewed-by: Yang Erkun yangerkun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/ext4/inode.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-)
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8b01b73367a2b..b2866f38eec20 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4677,9 +4677,12 @@ static int __ext4_get_inode_loc(struct inode *inode, } brelse(bitmap_bh); if (i == start + inodes_per_block) { - /* all other inodes are free, so skip I/O */ - memset(bh->b_data, 0, bh->b_size); - set_buffer_uptodate(bh); + /* + * All other inodes are free, skip I/O. Return + * uninitialized buffer immediately, initialization + * is postponed until shortly before we fill inode + * contents. + */ unlock_buffer(bh); goto has_buffer; } @@ -5283,6 +5286,24 @@ static int ext4_do_update_inode(handle_t *handle, gid_t i_gid; projid_t i_projid;
+ /* + * If the buffer is not uptodate, it means all information of the + * inode is in memory and we got this buffer without reading the + * block. We must be cautious that once we mark the buffer as + * uptodate, we rely on filling in the correct inode data later + * in this function. Otherwise if we left uptodate buffer without + * copying proper inode contents, we could corrupt the inode on + * disk after allocating another inode in the same block. + */ + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + memset(bh->b_data, 0, bh->b_size); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + spin_lock(&ei->i_raw_lock);
/*