[PATCH OLK-6.6] pNFS: Fix a deadlock when returning a delegation during open()
From: Trond Myklebust <trond.myklebust@hammerspace.com> mainline inclusion from mainline-v6.19-rc6 commit 857bf9056291a16785ae3be1d291026b2437fc48 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/13583 CVE: CVE-2026-23050 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Ben Coddington reports seeing a hang in the following stack trace: 0 [ffffd0b50e1774e0] __schedule at ffffffff9ca05415 1 [ffffd0b50e177548] schedule at ffffffff9ca05717 2 [ffffd0b50e177558] bit_wait at ffffffff9ca061e1 3 [ffffd0b50e177568] __wait_on_bit at ffffffff9ca05cfb 4 [ffffd0b50e1775c8] out_of_line_wait_on_bit at ffffffff9ca05ea5 5 [ffffd0b50e177618] pnfs_roc at ffffffffc154207b [nfsv4] 6 [ffffd0b50e1776b8] _nfs4_proc_delegreturn at ffffffffc1506586 [nfsv4] 7 [ffffd0b50e177788] nfs4_proc_delegreturn at ffffffffc1507480 [nfsv4] 8 [ffffd0b50e1777f8] nfs_do_return_delegation at ffffffffc1523e41 [nfsv4] 9 [ffffd0b50e177838] nfs_inode_set_delegation at ffffffffc1524a75 [nfsv4] 10 [ffffd0b50e177888] nfs4_process_delegation at ffffffffc14f41dd [nfsv4] 11 [ffffd0b50e1778a0] _nfs4_opendata_to_nfs4_state at ffffffffc1503edf [nfsv4] 12 [ffffd0b50e1778c0] _nfs4_open_and_get_state at ffffffffc1504e56 [nfsv4] 13 [ffffd0b50e177978] _nfs4_do_open at ffffffffc15051b8 [nfsv4] 14 [ffffd0b50e1779f8] nfs4_do_open at ffffffffc150559c [nfsv4] 15 [ffffd0b50e177a80] nfs4_atomic_open at ffffffffc15057fb [nfsv4] 16 [ffffd0b50e177ad0] nfs4_file_open at ffffffffc15219be [nfsv4] 17 [ffffd0b50e177b78] do_dentry_open at ffffffff9c09e6ea 18 [ffffd0b50e177ba8] vfs_open at ffffffff9c0a082e 19 [ffffd0b50e177bd0] dentry_open at ffffffff9c0a0935 The issue is that the delegreturn is being asked to wait for a layout return that cannot complete because a state recovery was initiated. The state recovery cannot complete until the open() finishes processing the delegations it was given. The solution is to propagate the existing flags that indicate a non-blocking call to the function pnfs_roc(), so that it knows not to wait in this situation. Reported-by: Benjamin Coddington <bcodding@hammerspace.com> Fixes: 29ade5db1293 ("pNFS: Wait on outstanding layoutreturns to complete in pnfs_roc()") Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com> Conflicts: fs/nfs/pnfs.c [Commit 5468fc8298a9 ("NFSv4/pNFS: Do layout state recovery upon reboot") add definition of pnfs_layout_return_on_reboot() before pnfs_roc().] Signed-off-by: Li Lingfeng <lilingfeng3@huawei.com> --- fs/nfs/nfs4proc.c | 6 ++--- fs/nfs/pnfs.c | 58 +++++++++++++++++++++++++++++++++-------------- fs/nfs/pnfs.h | 17 ++++++-------- 3 files changed, 51 insertions(+), 30 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4410e2ad8f39..54ce1f41eb53 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3783,8 +3783,8 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait) calldata->res.seqid = calldata->arg.seqid; calldata->res.server = server; calldata->res.lr_ret = -NFS4ERR_NOMATCHING_LAYOUT; - calldata->lr.roc = pnfs_roc(state->inode, - &calldata->lr.arg, &calldata->lr.res, msg.rpc_cred); + calldata->lr.roc = pnfs_roc(state->inode, &calldata->lr.arg, + &calldata->lr.res, msg.rpc_cred, wait); if (calldata->lr.roc) { calldata->arg.lr_args = &calldata->lr.arg; calldata->res.lr_res = &calldata->lr.res; @@ -6729,7 +6729,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, data->inode = nfs_igrab_and_active(inode); if (data->inode || issync) { data->lr.roc = pnfs_roc(inode, &data->lr.arg, &data->lr.res, - cred); + cred, issync); if (data->lr.roc) { data->args.lr_args = &data->lr.arg; data->res.lr_res = &data->lr.res; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0737d9a15d86..6dbfae295f76 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1426,10 +1426,9 @@ pnfs_commit_and_return_layout(struct inode *inode) return ret; } -bool pnfs_roc(struct inode *ino, - struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - const struct cred *cred) +bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, const struct cred *cred, + bool sync) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_open_context *ctx; @@ -1440,7 +1439,7 @@ bool pnfs_roc(struct inode *ino, nfs4_stateid stateid; enum pnfs_iomode iomode = 0; bool layoutreturn = false, roc = false; - bool skip_read = false; + bool skip_read; if (!nfs_have_layout(ino)) return false; @@ -1453,20 +1452,14 @@ bool pnfs_roc(struct inode *ino, lo = NULL; goto out_noroc; } - pnfs_get_layout_hdr(lo); - if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { - spin_unlock(&ino->i_lock); - rcu_read_unlock(); - wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, - TASK_UNINTERRUPTIBLE); - pnfs_put_layout_hdr(lo); - goto retry; - } /* no roc if we hold a delegation */ + skip_read = false; if (nfs4_check_delegation(ino, FMODE_READ)) { - if (nfs4_check_delegation(ino, FMODE_WRITE)) + if (nfs4_check_delegation(ino, FMODE_WRITE)) { + lo = NULL; goto out_noroc; + } skip_read = true; } @@ -1475,12 +1468,43 @@ bool pnfs_roc(struct inode *ino, if (state == NULL) continue; /* Don't return layout if there is open file state */ - if (state->state & FMODE_WRITE) + if (state->state & FMODE_WRITE) { + lo = NULL; goto out_noroc; + } if (state->state & FMODE_READ) skip_read = true; } + if (skip_read) { + bool writes = false; + + list_for_each_entry(lseg, &lo->plh_segs, pls_list) { + if (lseg->pls_range.iomode != IOMODE_READ) { + writes = true; + break; + } + } + if (!writes) { + lo = NULL; + goto out_noroc; + } + } + + pnfs_get_layout_hdr(lo); + if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) { + if (!sync) { + pnfs_set_plh_return_info( + lo, skip_read ? IOMODE_RW : IOMODE_ANY, 0); + goto out_noroc; + } + spin_unlock(&ino->i_lock); + rcu_read_unlock(); + wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN, + TASK_UNINTERRUPTIBLE); + pnfs_put_layout_hdr(lo); + goto retry; + } list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) { if (skip_read && lseg->pls_range.iomode == IOMODE_READ) @@ -1520,7 +1544,7 @@ bool pnfs_roc(struct inode *ino, out_noroc: spin_unlock(&ino->i_lock); rcu_read_unlock(); - pnfs_layoutcommit_inode(ino, true); + pnfs_layoutcommit_inode(ino, sync); if (roc) { struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; if (ld->prepare_layoutreturn) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 79996d7dad0f..a613466f6f22 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -295,10 +295,9 @@ int pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, u32 seq); int pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo, struct list_head *lseg_list); -bool pnfs_roc(struct inode *ino, - struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - const struct cred *cred); +bool pnfs_roc(struct inode *ino, struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, const struct cred *cred, + bool sync); int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp, struct nfs4_layoutreturn_res **respp, int *ret); void pnfs_roc_release(struct nfs4_layoutreturn_args *args, @@ -769,12 +768,10 @@ pnfs_layoutcommit_outstanding(struct inode *inode) return false; } - -static inline bool -pnfs_roc(struct inode *ino, - struct nfs4_layoutreturn_args *args, - struct nfs4_layoutreturn_res *res, - const struct cred *cred) +static inline bool pnfs_roc(struct inode *ino, + struct nfs4_layoutreturn_args *args, + struct nfs4_layoutreturn_res *res, + const struct cred *cred, bool sync) { return false; } -- 2.52.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/20662 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/NHX... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/20662 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/NHX...
participants (2)
-
Li Lingfeng -
patchwork bot