From: Dave Chinner dchinner@redhat.com
stable inclusion from stable-v5.10.129 commit b261cd005ab980c4018634a849f77e036bfd4f80 category: bugfix bugzilla: 188251,https://gitee.com/openeuler/kernel/issues/I5YNDQ CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 756b1c343333a5aefcc26b0409f3fd16f72281bf upstream.
Because the iomap code using PF_MEMALLOC_NOFS to detect transaction recursion in XFS is just wrong. Remove it from the iomap code and replace it with XFS specific internal checks using current->journal_info instead.
[djwong: This change also realigns the lifetime of NOFS flag changes to match the incore transaction, instead of the inconsistent scheme we have now.]
Fixes: 9070733b4efa ("xfs: abstract PF_FSTRANS to PF_MEMALLOC_NOFS") Signed-off-by: Dave Chinner dchinner@redhat.com Reviewed-by: Darrick J. Wong djwong@kernel.org Signed-off-by: Darrick J. Wong djwong@kernel.org Reviewed-by: Christoph Hellwig hch@lst.de Signed-off-by: Amir Goldstein amir73il@gmail.com Acked-by: Darrick J. Wong djwong@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org
Conflicts: fs/xfs/xfs_aops.c fs/xfs/xfs_trans.c fs/xfs/xfs_trans.h
Signed-off-by: yangerkun yangerkun@huawei.com Reviewed-by: Zhang Yi yi.zhang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- fs/iomap/buffered-io.c | 7 ------- fs/xfs/libxfs/xfs_btree.c | 12 ++++++++++-- fs/xfs/xfs_aops.c | 15 ++++++++++++++- fs/xfs/xfs_trans.c | 20 +++++--------------- fs/xfs/xfs_trans.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 25 deletions(-)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index fbc52a65af99..d9b71c2f20f1 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -1455,13 +1455,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data) PF_MEMALLOC)) goto redirty;
- /* - * Given that we do not allow direct reclaim to call us, we should - * never be called in a recursive filesystem reclaim context. - */ - if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) - goto redirty; - /* * Is this page beyond the end of the file? * diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 98c82f4935e1..24c7d30e41df 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2811,7 +2811,7 @@ xfs_btree_split_worker( struct xfs_btree_split_args *args = container_of(work, struct xfs_btree_split_args, work); unsigned long pflags; - unsigned long new_pflags = PF_MEMALLOC_NOFS; + unsigned long new_pflags = 0;
/* * we are in a transaction context here, but may also be doing work @@ -2823,12 +2823,20 @@ xfs_btree_split_worker( new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags); + xfs_trans_set_context(args->cur->bc_tp);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, args->key, args->curp, args->stat); - complete(args->done);
+ xfs_trans_clear_context(args->cur->bc_tp); current_restore_flags_nested(&pflags, new_pflags); + + /* + * Do not access args after complete() has run here. We don't own args + * and the owner may run and free args before we return here. + */ + complete(args->done); + }
/* diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 7aa67d95c578..e341d6531e68 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -98,7 +98,7 @@ xfs_setfilesize_ioend( * thus we need to mark ourselves as being in a transaction manually. * Similarly for freeze protection. */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + xfs_trans_set_context(tp); __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */ @@ -538,6 +538,12 @@ xfs_vm_writepage( { struct xfs_writepage_ctx wpc = { };
+ if (WARN_ON_ONCE(current->journal_info)) { + redirty_page_for_writepage(wbc, page); + unlock_page(page); + return 0; + } + return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); }
@@ -548,6 +554,13 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { };
+ /* + * Writing back data in a transaction context can result in recursive + * transactions. This is bad, so issue a warning and get out of here. + */ + if (WARN_ON_ONCE(current->journal_info)) + return 0; + xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index e8a9967e7194..8836bb02d82d 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -72,6 +72,7 @@ xfs_trans_free( xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_); + xfs_trans_clear_context(tp); if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT)) sb_end_intwrite(tp->t_mountp->m_super); xfs_trans_free_dqinfo(tp); @@ -123,7 +124,8 @@ xfs_trans_dup(
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; tp->t_rtx_res = tp->t_rtx_res_used; - ntp->t_pflags = tp->t_pflags; + + xfs_trans_switch_context(tp, ntp);
/* move deferred ops over to the new tp */ xfs_defer_move(ntp, tp); @@ -157,9 +159,6 @@ xfs_trans_reserve( int error = 0; bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
- /* Mark this thread as being in a transaction */ - current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - /* * Attempt to reserve the needed disk blocks by decrementing * the number needed from the number available. This will @@ -167,10 +166,8 @@ xfs_trans_reserve( */ if (blocks > 0) { error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd); - if (error != 0) { - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); + if (error != 0) return -ENOSPC; - } tp->t_blk_res += blocks; }
@@ -244,9 +241,6 @@ xfs_trans_reserve( xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd); tp->t_blk_res = 0; } - - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - return error; }
@@ -272,6 +266,7 @@ xfs_trans_alloc( tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL); if (!(flags & XFS_TRANS_NO_WRITECOUNT)) sb_start_intwrite(mp->m_super); + xfs_trans_set_context(tp);
/* * Zero-reservation ("empty") transactions can't modify anything, so @@ -893,7 +888,6 @@ __xfs_trans_commit(
xlog_cil_commit(mp->m_log, tp, &commit_seq, regrant);
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free(tp);
/* @@ -925,7 +919,6 @@ __xfs_trans_commit( xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket); tp->t_ticket = NULL; } - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); xfs_trans_free_items(tp, !!error); xfs_trans_free(tp);
@@ -985,9 +978,6 @@ xfs_trans_cancel( tp->t_ticket = NULL; }
- /* mark this thread as no longer being in a transaction */ - current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); - xfs_trans_free_items(tp, dirty); xfs_trans_free(tp); } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index f95566fe981b..50da47f23a07 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -266,4 +266,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp, struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force, struct xfs_trans **tpp);
+static inline void +xfs_trans_set_context( + struct xfs_trans *tp) +{ + ASSERT(current->journal_info == NULL); + tp->t_pflags = memalloc_nofs_save(); + current->journal_info = tp; +} + +static inline void +xfs_trans_clear_context( + struct xfs_trans *tp) +{ + if (current->journal_info == tp) { + memalloc_nofs_restore(tp->t_pflags); + current->journal_info = NULL; + } +} + +static inline void +xfs_trans_switch_context( + struct xfs_trans *old_tp, + struct xfs_trans *new_tp) +{ + ASSERT(current->journal_info == old_tp); + new_tp->t_pflags = old_tp->t_pflags; + old_tp->t_pflags = 0; + current->journal_info = new_tp; +} + #endif /* __XFS_TRANS_H__ */