From: Dave Chinner dchinner@redhat.com
mainline inclusion from mainline-v6.4-rc6 commit b742d7b4f0e03df25c2a772adcded35044b625ca category: bugfix bugzilla: 188883, https://gitee.com/openeuler/kernel/issues/I76JSK CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Btrees that aren't freespace management trees use the normal extent allocation and freeing routines for their blocks. Hence when a btree block is freed, a direct call to xfs_free_extent() is made and the extent is immediately freed. This puts the entire free space management btrees under this path, so we are stacking btrees on btrees in the call stack. The inobt, finobt and refcount btrees all do this.
However, the bmap btree does not do this - it calls xfs_free_extent_later() to defer the extent free operation via an XEFI and hence it gets processed in deferred operation processing during the commit of the primary transaction (i.e. via intent chaining).
We need to change xfs_free_extent() to behave in a non-blocking manner so that we can avoid deadlocks with busy extents near ENOSPC in transactions that free multiple extents. Inserting or removing a record from a btree can cause a multi-level tree merge operation and that will free multiple blocks from the btree in a single transaction. i.e. we can call xfs_free_extent() multiple times, and hence the btree manipulation transaction is vulnerable to this busy extent deadlock vector.
To fix this, convert all the remaining callers of xfs_free_extent() to use xfs_free_extent_later() to queue XEFIs and hence defer processing of the extent frees to a context that can be safely restarted if a deadlock condition is detected.
Signed-off-by: Dave Chinner dchinner@redhat.com Reviewed-by: Darrick J. Wong djwong@kernel.org Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Chandan Babu R chandan.babu@oracle.com
Conflicts: fs/xfs/libxfs/xfs_ag.c fs/xfs/libxfs/xfs_alloc.c fs/xfs/libxfs/xfs_ialloc_btree.c fs/xfs/libxfs/xfs_refcount_btree.c fs/xfs/xfs_extfree_item.c Signed-off-by: Long Li leo.lilong@huawei.com --- fs/xfs/libxfs/xfs_alloc.c | 4 ++++ fs/xfs/libxfs/xfs_alloc.h | 8 +++++--- fs/xfs/libxfs/xfs_bmap.c | 8 +++++--- fs/xfs/libxfs/xfs_bmap_btree.c | 3 ++- fs/xfs/libxfs/xfs_ialloc.c | 8 ++++---- fs/xfs/libxfs/xfs_ialloc_btree.c | 6 ++++-- fs/xfs/libxfs/xfs_refcount.c | 9 ++++++--- fs/xfs/libxfs/xfs_refcount_btree.c | 9 ++------- fs/xfs/xfs_extfree_item.c | 3 ++- fs/xfs/xfs_reflink.c | 3 ++- 10 files changed, 36 insertions(+), 25 deletions(-)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index ebfaf5f48010..2e7d442c117b 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2476,6 +2476,7 @@ xfs_defer_agfl_block( new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); new->xefi_blockcount = 1; new->xefi_owner = oinfo->oi_owner; + new->xefi_agresv = XFS_AG_RESV_AGFL;
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, new->xefi_startblock))) return -EFSCORRUPTED; @@ -2496,6 +2497,7 @@ __xfs_free_extent_later( xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type, bool skip_discard) { struct xfs_extent_free_item *new; /* new element */ @@ -2516,6 +2518,7 @@ __xfs_free_extent_later( ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif ASSERT(xfs_extfree_item_cache != NULL); + ASSERT(type != XFS_AG_RESV_AGFL);
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len))) return -EFSCORRUPTED; @@ -2524,6 +2527,7 @@ __xfs_free_extent_later( GFP_KERNEL | __GFP_NOFAIL); new->xefi_startblock = bno; new->xefi_blockcount = (xfs_extlen_t)len; + new->xefi_agresv = type; if (skip_discard) new->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 0fd02e889216..cfacb88e22d3 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -248,7 +248,7 @@ xfs_buf_to_agfl_bno(
int __xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, const struct xfs_owner_info *oinfo, - bool skip_discard); + enum xfs_ag_resv_type type, bool skip_discard);
/* * List of extents to be free "later". @@ -260,6 +260,7 @@ struct xfs_extent_free_item { xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ unsigned int xefi_flags; + enum xfs_ag_resv_type xefi_agresv; };
#define XFS_EFI_SKIP_DISCARD (1U << 0) /* don't issue discard */ @@ -271,9 +272,10 @@ xfs_free_extent_later( struct xfs_trans *tp, xfs_fsblock_t bno, xfs_filblks_t len, - const struct xfs_owner_info *oinfo) + const struct xfs_owner_info *oinfo, + enum xfs_ag_resv_type type) { - return __xfs_free_extent_later(tp, bno, len, oinfo, false); + return __xfs_free_extent_later(tp, bno, len, oinfo, type, false); }
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 3eb1d31df6e1..c5d784daed8b 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -572,7 +572,8 @@ xfs_bmap_btree_to_extents( return error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); - error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, cbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error;
@@ -5194,8 +5195,9 @@ xfs_bmap_del_extent_real( } else { error = __xfs_free_extent_later(tp, del->br_startblock, del->br_blockcount, NULL, - (bflags & XFS_BMAPI_NODISCARD) || - del->br_state == XFS_EXT_UNWRITTEN); + XFS_AG_RESV_NONE, + ((bflags & XFS_BMAPI_NODISCARD) || + del->br_state == XFS_EXT_UNWRITTEN)); if (error) goto done; } diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 021afb43c56b..4118c4c1443a 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -287,7 +287,8 @@ xfs_bmbt_free_block( int error;
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork); - error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo); + error = xfs_free_extent_later(cur->bc_tp, fsbno, 1, &oinfo, + XFS_AG_RESV_NONE); if (error) return error; ip->i_d.di_nblocks--; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index d26f966b0901..1861fc71f028 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1870,8 +1870,8 @@ xfs_difree_inode_chunk( /* not sparse, calculate extent info directly */ return xfs_free_extent_later(tp, XFS_AGB_TO_FSB(mp, agno, sagbno), - M_IGEO(mp)->ialloc_blks, - &XFS_RMAP_OINFO_INODES); + M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES, + XFS_AG_RESV_NONE); }
/* holemask is only 16-bits (fits in an unsigned long) */ @@ -1916,8 +1916,8 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); error = xfs_free_extent_later(tp, - XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk, &XFS_RMAP_OINFO_INODES); + XFS_AGB_TO_FSB(mp, agno, agbno), contigblk, + &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE); if (error) return error;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 0733270062f6..5150010ca4c6 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -154,9 +154,11 @@ __xfs_inobt_free_block( struct xfs_buf *bp, enum xfs_ag_resv_type resv) { + xfs_fsblock_t fsbno; + xfs_inobt_mod_blockcount(cur, -1); - return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)), 1, + fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp)); + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_INOBT, resv); }
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 0e1a9f047b12..4191548b4736 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -975,7 +975,8 @@ xfs_refcount_adjust_extents( cur->bc_ag.agno, tmp.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - tmp.rc_blockcount, NULL); + tmp.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } @@ -1022,7 +1023,8 @@ xfs_refcount_adjust_extents( cur->bc_ag.agno, ext.rc_startblock); error = xfs_free_extent_later(cur->bc_tp, fsbno, - ext.rc_blockcount, NULL); + ext.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_error; } @@ -1751,7 +1753,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the block. */ error = xfs_free_extent_later(tp, fsb, - rr->rr_rrec.rc_blockcount, NULL); + rr->rr_rrec.rc_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) goto out_trans;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index 59b1fee4f680..3e0d82f1802b 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -103,18 +103,13 @@ xfs_refcountbt_free_block( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp)); - int error;
trace_xfs_refcountbt_free_block(cur->bc_mp, cur->bc_ag.agno, XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno), 1); be32_add_cpu(&agf->agf_refcount_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_REFCOUNT_BLOCKS); - error = xfs_free_extent(cur->bc_tp, fsbno, 1, &XFS_RMAP_OINFO_REFC, - XFS_AG_RESV_METADATA); - if (error) - return error; - - return error; + return xfs_free_extent_later(cur->bc_tp, fsbno, 1, + &XFS_RMAP_OINFO_REFC, XFS_AG_RESV_METADATA); }
STATIC int diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 100a21fe0527..c9c648a618d6 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -383,7 +383,7 @@ xfs_trans_free_extent( free->xefi_blockcount);
error = __xfs_free_extent(tp, free->xefi_startblock, - free->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, + free->xefi_blockcount, &oinfo, free->xefi_agresv, free->xefi_flags & XFS_EFI_SKIP_DISCARD); /* * Mark the transaction dirty, even on error. This ensures the @@ -631,6 +631,7 @@ xfs_efi_item_recover( for (i = 0; i < efip->efi_format.efi_nextents; i++) { struct xfs_extent_free_item fake = { .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + .xefi_agresv = XFS_AG_RESV_NONE, };
extp = &efip->efi_format.efi_extents[i]; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e20aa87fed21..551cc92b049d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -485,7 +485,8 @@ xfs_reflink_cancel_cow_blocks( del.br_blockcount);
error = xfs_free_extent_later(*tpp, del.br_startblock, - del.br_blockcount, NULL); + del.br_blockcount, NULL, + XFS_AG_RESV_NONE); if (error) break;