hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IBALAU CVE: NA
--------------------------------
When inactiving an unlinked inode and it's attrs, if xlog is shutdown either during or just after the process of recurse deleting attribute nodes/leafs in xfs_attr3_root_inactive(), the log will records some buffer cancel items, but doesn't contain the corresponding extent entries and inode updates, this is incomplete and inconsistent. Because of the inactiving process is not completed and the unlinked inode is still in the agi_unlinked table, it will continue to be inactived after replaying the log on the next mount, the attr node/leaf blocks' created record before the cancel items could not be replayed but the inode does. So we could get corrupted data when reading the canceled blocks.
XFS (pmem0): Metadata corruption detected at xfs_da3_node_read_verify+0x53/0x220, xfs_da3_node block 0x78 XFS (pmem0): Unmount and run xfs_repair XFS (pmem0): First 128 bytes of corrupted metadata buffer: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000040: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000050: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000060: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00000070: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ XFS (pmem0): metadata I/O error in "xfs_da_read_buf+0x104/0x190" at daddr 0x78 len 8 error 117
To fix this issue, put the index node/leaf block entries and the canceling of node/leaf blocks in the same transaction. For non-root blocks, it's straightforward. For root blocks, we need to put block cancel and the unmap of root block in the same transaction, so we split attr bmap truncation into two parts.
During attr inactive process, even with any interruption, empty attr node blocks should never be indexed, as this is a corrupted image for xfs, while empty attr leaf blocks are allowed. To prevent empty root node blocks in the attr fork, we convert them to root leaf blocks in such scenarios.
Fixes: 8fd6df1c9632 ("xfs: atomic drop extent entries when inactiving attr") Link: https://patchwork.kernel.org/project/xfs/patch/20230613030434.2944173-3-yi.z... Signed-off-by: Long Li leo.lilong@huawei.com --- fs/xfs/xfs_attr_inactive.c | 94 +++++++++++++++++++++----------------- fs/xfs/xfs_inode.c | 3 +- 2 files changed, 53 insertions(+), 44 deletions(-)
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 5993960d288f..1dd3c0aa7012 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -139,7 +139,7 @@ xfs_attr3_node_inactive( xfs_daddr_t parent_blkno, child_blkno; struct xfs_buf *child_bp; struct xfs_da3_icnode_hdr ichdr; - int error, i; + int error;
/* * Since this code is recursive (gasp!) we must protect ourselves. @@ -165,7 +165,7 @@ xfs_attr3_node_inactive( * over the leaves removing all of them. If this is higher up * in the tree, recurse downward. */ - for (i = 0; i < ichdr.count; i++) { + while (ichdr.count > 0) { /* * Read the subsidiary block to see what we have to work with. * Don't do this in a transaction. This is a depth-first @@ -215,29 +215,32 @@ xfs_attr3_node_inactive( xfs_trans_binval(*trans, child_bp); child_bp = NULL;
+ error = xfs_da3_node_read_mapped(*trans, dp, + parent_blkno, &bp, XFS_ATTR_FORK); + if (error) + return error; + /* - * If we're not done, re-read the parent to get the next - * child block number. + * Remove entry form parent node, prevents being indexed to. */ - if (i + 1 < ichdr.count) { - struct xfs_da3_icnode_hdr phdr; + xfs_da3_node_entry_remove(*trans, dp, bp, 0); + + xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr); + bp = NULL; + + if (ichdr.count > 0) { + /* + * If we're not done, get the next child block number. + */ + child_fsb = be32_to_cpu(ichdr.btree[0].before);
- error = xfs_da3_node_read_mapped(*trans, dp, - parent_blkno, &bp, XFS_ATTR_FORK); + /* + * Atomically commit the whole invalidate stuff. + */ + error = xfs_trans_roll_inode(trans, dp); if (error) - return error; - xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr, - bp->b_addr); - child_fsb = be32_to_cpu(phdr.btree[i + 1].before); - xfs_trans_brelse(*trans, bp); - bp = NULL; + return error; } - /* - * Atomically commit the whole invalidate stuff. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; }
return 0; @@ -254,10 +257,8 @@ xfs_attr3_root_inactive( struct xfs_trans **trans, struct xfs_inode *dp) { - struct xfs_mount *mp = dp->i_mount; struct xfs_da_blkinfo *info; struct xfs_buf *bp; - xfs_daddr_t blkno; int error;
/* @@ -269,7 +270,6 @@ xfs_attr3_root_inactive( error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK); if (error) return error; - blkno = bp->b_bn;
/* * Invalidate the tree, even if the "tree" is only a single leaf block. @@ -280,6 +280,16 @@ xfs_attr3_root_inactive( case cpu_to_be16(XFS_DA_NODE_MAGIC): case cpu_to_be16(XFS_DA3_NODE_MAGIC): error = xfs_attr3_node_inactive(trans, dp, bp, 1); + if (error) + return error; + + /* + * Empty root node blocks are not allowed, convert it to leaf. + */ + error = xfs_attr3_leaf_init(*trans, dp, 0); + if (error) + return error; + error = xfs_trans_roll_inode(trans, dp); break; case cpu_to_be16(XFS_ATTR_LEAF_MAGIC): case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): @@ -291,26 +301,6 @@ xfs_attr3_root_inactive( xfs_trans_brelse(*trans, bp); break; } - if (error) - return error; - - /* - * Invalidate the incore copy of the root block. - */ - error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno, - XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp); - if (error) - return error; - error = bp->b_error; - if (error) { - xfs_trans_brelse(*trans, bp); - return error; - } - xfs_trans_binval(*trans, bp); /* remove from cache */ - /* - * Commit the invalidate and start the next transaction. - */ - error = xfs_trans_roll_inode(trans, dp);
return error; } @@ -329,6 +319,7 @@ xfs_attr_inactive( { struct xfs_trans *trans; struct xfs_mount *mp; + struct xfs_buf *bp; int lock_mode = XFS_ILOCK_SHARED; int error = 0;
@@ -365,10 +356,27 @@ xfs_attr_inactive( * removal below. */ if (dp->i_af.if_nextents > 0) { + /* + * Invalidate and truncate all blocks but leave the root block. + */ error = xfs_attr3_root_inactive(&trans, dp); if (error) goto out_shutdown;
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, + XFS_FSB_TO_B(mp, mp->m_attr_geo->fsbcount)); + if (error) + goto out_cancel; + + /* + * Invalidate and truncate the root block. + */ + error = xfs_da_get_buf(trans, dp, 0, &bp, XFS_ATTR_FORK); + if (error) + goto out_cancel; + + xfs_trans_binval(trans, bp); + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); if (error) goto out_cancel; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 268bbc2d978b..99261829916a 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1500,7 +1500,8 @@ xfs_itruncate_extents_flags( ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); ASSERT(!atomic_read(&VFS_I(ip)->i_count) || xfs_isilocked(ip, XFS_IOLOCK_EXCL)); - ASSERT(new_size <= XFS_ISIZE(ip)); + if (whichfork == XFS_DATA_FORK) + ASSERT(new_size <= XFS_ISIZE(ip)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(ip->i_itemp != NULL); ASSERT(ip->i_itemp->ili_lock_flags == 0);