From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.99 commit 8ce3c27633f1bd0b9c3c466953e98f070ba2bc62
--------------------------------
[ Upstream commit 5085607d209102b37b169bc94d0aa39566a9842a ]
If a bulk layout recall or a metadata server reboot coincides with a umount, then holding a reference to an inode is unsafe unless we also hold a reference to the super block.
Fixes: fd9a8d7160937 ("NFSv4.1: Fix bulk recall and destroy of layouts") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 33 +++++++++++++++++++++++---------- fs/nfs/pnfs.h | 1 + 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 26c1be5900c39..8dd7bffebfbb6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -787,22 +787,35 @@ static int pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct nfs_server *server, struct list_head *layout_list) + __must_hold(&clp->cl_lock) + __must_hold(RCU) { struct pnfs_layout_hdr *lo, *next; struct inode *inode;
list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || + !list_empty(&lo->plh_bulk_destroy)) continue; + /* If the sb is being destroyed, just bail */ + if (!nfs_sb_active(server->super)) + break; inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); + if (inode != NULL) { + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, + layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + } else { + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + } + nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); rcu_read_lock(); return -EAGAIN; @@ -840,7 +853,7 @@ pnfs_layout_free_bulk_destroy_list(struct list_head *layout_list, /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); - iput(inode); + nfs_iput_and_deactive(inode); } return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 670b1de3b5ebd..80fafa29e567a 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -104,6 +104,7 @@ enum { NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ };
enum layoutdriver_policy_flags {
From: Trond Myklebust trondmy@gmail.com
stable inclusion from linux-4.19.104 commit 056d1656704016990311ecae5a751c91942192c8
--------------------------------
commit 221203ce6406273cf00e5c6397257d986c003ee6 upstream.
Instead of making assumptions about the commit verifier contents, change the commit code to ensure we always check that the verifier was set by the XDR code.
Fixes: f54bcf2ecee9 ("pnfs: Prepare for flexfiles by pulling out common code") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/direct.c | 4 ++-- fs/nfs/nfs3xdr.c | 5 ++++- fs/nfs/nfs4xdr.c | 5 ++++- fs/nfs/pnfs_nfs.c | 7 +++---- fs/nfs/write.c | 4 +++- 5 files changed, 16 insertions(+), 9 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index bb8a984028ae2..e5da9d7fb69e9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -261,10 +261,10 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, data->ds_commit_index);
/* verifier not set so always fail */ - if (verfp->committed < 0) + if (verfp->committed < 0 || data->res.verf->committed <= NFS_UNSTABLE) return 1;
- return nfs_direct_cmp_verf(verfp, &data->verf); + return nfs_direct_cmp_verf(verfp, data->res.verf); }
/** diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9761f97e2c08b..0ed419bb02b0f 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2381,6 +2381,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, void *data) { struct nfs_commitres *result = data; + struct nfs_writeverf *verf = result->verf; enum nfs_stat status; int error;
@@ -2393,7 +2394,9 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req, result->op_status = status; if (status != NFS3_OK) goto out_status; - error = decode_writeverf3(xdr, &result->verf->verifier); + error = decode_writeverf3(xdr, &verf->verifier); + if (!error) + verf->committed = NFS_FILE_SYNC; out: return error; out_status: diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 6d3a70b66dd93..6236daec4cc6c 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -4443,11 +4443,14 @@ static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifi
static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res) { + struct nfs_writeverf *verf = res->verf; int status;
status = decode_op_hdr(xdr, OP_COMMIT); if (!status) - status = decode_write_verifier(xdr, &res->verf->verifier); + status = decode_write_verifier(xdr, &verf->verifier); + if (!status) + verf->committed = NFS_FILE_SYNC; return status; }
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d5e4d3cd8c7f1..acfb52bc0007d 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -30,12 +30,11 @@ EXPORT_SYMBOL_GPL(pnfs_generic_rw_release); /* Fake up some data that will cause nfs_commit_release to retry the writes. */ void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data) { - struct nfs_page *first = nfs_list_entry(data->pages.next); + struct nfs_writeverf *verf = data->res.verf;
data->task.tk_status = 0; - memcpy(&data->verf.verifier, &first->wb_verf, - sizeof(data->verf.verifier)); - data->verf.verifier.data[0]++; /* ensure verifier mismatch */ + memset(&verf->verifier, 0, sizeof(verf->verifier)); + verf->committed = NFS_UNSTABLE; } EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 4a5728acd6de5..d419d89b91f7c 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1821,6 +1821,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
static void nfs_commit_release_pages(struct nfs_commit_data *data) { + const struct nfs_writeverf *verf = data->res.verf; struct nfs_page *req; int status = data->task.tk_status; struct nfs_commit_info cinfo; @@ -1847,7 +1848,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Okay, COMMIT succeeded, apparently. Check the verifier * returned by the server against all stored verfs. */ - if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) { + if (verf->committed > NFS_UNSTABLE && + !nfs_write_verifier_cmp(&req->wb_verf, &verf->verifier)) { /* We have a match */ if (req->wb_page) nfs_inode_remove_request(req);
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.105 commit 32865d65c4d232033f73e75bd9a003233df2b066
--------------------------------
commit cd1b659d8ce7697ee9799b64f887528315b9097b upstream.
Turning caching off for writes on the server should improve performance.
Fixes: fba83f34119a ("NFS: Pass "privileged" value to nfs4_init_sequence()") Signed-off-by: Olga Kornievskaia kolga@netapp.com Reviewed-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 672cef2c7ce71..73228c5e06b88 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5110,7 +5110,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; - nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 1, 0); + nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr); }
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.118 commit 401876dbcf6be94b31a957ccccb8e028e9d3d9cc
--------------------------------
[ Upstream commit d911c57a19551c6bef116a3b55c6b089901aacb0 ]
Make sure to test the stateid for validity so that we catch instances where the server may have been reusing stateids in nfs_layout_find_inode_by_stateid().
Fixes: 7b410d9ce460 ("pNFS: Delay getting the layout header in CB_LAYOUTRECALL handlers") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/callback_proc.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 3159673549540..bcc51f131a496 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -130,6 +130,8 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!pnfs_layout_is_valid(lo)) + continue; if (stateid != NULL && !nfs4_stateid_match_other(stateid, &lo->plh_stateid)) continue;
From: Andreas Gruenbacher agruenba@redhat.com
stable inclusion from linux-4.19.121 commit 7b4e9bfa245fa6b0c149326e1c8abb14cae5e9b8
--------------------------------
commit 7648f939cb919b9d15c21fff8cd9eba908d595dc upstream.
nfs3_set_acl keeps track of the acl it allocated locally to determine if an acl needs to be released at the end. This results in a memory leak when the function allocates an acl as well as a default acl. Fix by releasing acls that differ from the acl originally passed into nfs3_set_acl.
Fixes: b7fa0554cf1b ("[PATCH] NFS: Add support for NFSv3 ACLs") Reported-by: Xiyu Yang xiyuyang19@fudan.edu.cn Signed-off-by: Andreas Gruenbacher agruenba@redhat.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs3acl.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-)
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index c5c3fc6e6c600..26c94b32d6f49 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -253,37 +253,45 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type) { - struct posix_acl *alloc = NULL, *dfacl = NULL; + struct posix_acl *orig = acl, *dfacl = NULL, *alloc; int status;
if (S_ISDIR(inode->i_mode)) { switch(type) { case ACL_TYPE_ACCESS: - alloc = dfacl = get_acl(inode, ACL_TYPE_DEFAULT); + alloc = get_acl(inode, ACL_TYPE_DEFAULT); if (IS_ERR(alloc)) goto fail; + dfacl = alloc; break;
case ACL_TYPE_DEFAULT: - dfacl = acl; - alloc = acl = get_acl(inode, ACL_TYPE_ACCESS); + alloc = get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(alloc)) goto fail; + dfacl = acl; + acl = alloc; break; } }
if (acl == NULL) { - alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + alloc = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); if (IS_ERR(alloc)) goto fail; + acl = alloc; } status = __nfs3_proc_setacls(inode, acl, dfacl); - posix_acl_release(alloc); +out: + if (acl != orig) + posix_acl_release(acl); + if (dfacl != orig) + posix_acl_release(dfacl); return status;
fail: - return PTR_ERR(alloc); + status = PTR_ERR(alloc); + goto out; }
const struct xattr_handler *nfs3_xattr_handlers[] = {
From: Olga Kornievskaia olga.kornievskaia@gmail.com
stable inclusion from linux-4.19.130 commit e80980e5f3b14455c196e414c6d9bf969228eba8
--------------------------------
[ Upstream commit 1c709b766e73e54d64b1dde1b7cfbcf25bcb15b9 ]
Fixes: 02a95dee8cf0 ("NFS add callback_ops to nfs4_proc_bind_conn_to_session_callback") Signed-off-by: Olga Kornievskaia kolga@netapp.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 73228c5e06b88..5d57c4a5f5785 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7615,7 +7615,7 @@ nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata) }
static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = { - .rpc_call_done = &nfs4_bind_one_conn_to_session_done, + .rpc_call_done = nfs4_bind_one_conn_to_session_done, };
/*
From: Olga Kornievskaia olga.kornievskaia@gmail.com
stable inclusion from linux-4.19.131 commit e66a37c80e8ca9457d5dbd71e7d34091b894cfa1
--------------------------------
commit d03727b248d0dae6199569a8d7b629a681154633 upstream.
Figuring out the root case for the REMOVE/CLOSE race and suggesting the solution was done by Neil Brown.
Currently what happens is that direct IO calls hold a reference on the open context which is decremented as an asynchronous task in the nfs_direct_complete(). Before reference is decremented, control is returned to the application which is free to close the file. When close is being processed, it decrements its reference on the open_context but since directIO still holds one, it doesn't sent a close on the wire. It returns control to the application which is free to do other operations. For instance, it can delete a file. Direct IO is finally releasing its reference and triggering an asynchronous close. Which races with the REMOVE. On the server, REMOVE can be processed before the CLOSE, failing the REMOVE with EACCES as the file is still opened.
Signed-off-by: Olga Kornievskaia kolga@netapp.com Suggested-by: Neil Brown neilb@suse.com CC: stable@vger.kernel.org Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/direct.c | 13 +++++++++---- fs/nfs/file.c | 1 + 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index e5da9d7fb69e9..1e883df26d4aa 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -396,8 +396,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode;
- inode_dio_end(inode); - if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -409,7 +407,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
+ igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); }
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -539,8 +540,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; }
@@ -957,8 +960,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - inode_dio_end(inode); + igrab(inode); nfs_direct_req_release(dreq); + inode_dio_end(inode); + iput(inode); return result < 0 ? result : -EIO; }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index d8f25b1ae2332..f4d69aecb613a 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -82,6 +82,7 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE); + inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; }
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.135 commit 8f0d0b36cec3dc51e978af069574eb1efac04ec9
--------------------------------
commit 65caafd0d2145d1dd02072c4ced540624daeab40 upstream.
Reverting commit d03727b248d0 "NFSv4 fix CLOSE not waiting for direct IO compeletion". This patch made it so that fput() by calling inode_dio_done() in nfs_file_release() would wait uninterruptably for any outstanding directIO to the file (but that wait on IO should be killable).
The problem the patch was also trying to address was REMOVE returning ERR_ACCESS because the file is still opened, is supposed to be resolved by server returning ERR_FILE_OPEN and not ERR_ACCESS.
Signed-off-by: Olga Kornievskaia kolga@netapp.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/direct.c | 13 ++++--------- fs/nfs/file.c | 1 - 2 files changed, 4 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1e883df26d4aa..e5da9d7fb69e9 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -396,6 +396,8 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq) { struct inode *inode = dreq->inode;
+ inode_dio_end(inode); + if (dreq->iocb) { long res = (long) dreq->error; if (dreq->count != 0) { @@ -407,10 +409,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
- igrab(inode); nfs_direct_req_release(dreq); - inode_dio_end(inode); - iput(inode); }
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) @@ -540,10 +539,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - igrab(inode); - nfs_direct_req_release(dreq); inode_dio_end(inode); - iput(inode); + nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; }
@@ -960,10 +957,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * generic layer handle the completion. */ if (requested_bytes == 0) { - igrab(inode); - nfs_direct_req_release(dreq); inode_dio_end(inode); - iput(inode); + nfs_direct_req_release(dreq); return result < 0 ? result : -EIO; }
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index f4d69aecb613a..d8f25b1ae2332 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -82,7 +82,6 @@ nfs_file_release(struct inode *inode, struct file *filp) dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE); - inode_dio_wait(inode); nfs_file_clear_open_context(filp); return 0; }
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.140 commit ceefd5f9a60e25a89f6acd8f00bd4d1ecc229b00
--------------------------------
commit ff041727e9e029845857cac41aae118ead5e261b upstream.
If the layout segment is still in use for a read or a write, we should not move it to the layout plh_return_segs list. If we do, we can end up returning the layout while I/O is still in progress.
Fixes: e0b7d420f72a ("pNFS: Don't discard layout segments that are marked for return") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8dd7bffebfbb6..90f379493b080 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2327,16 +2327,6 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) return ERR_PTR(-EAGAIN); }
-static int -mark_lseg_invalid_or_return(struct pnfs_layout_segment *lseg, - struct list_head *tmp_list) -{ - if (!mark_lseg_invalid(lseg, tmp_list)) - return 0; - pnfs_cache_lseg_for_layoutreturn(lseg->pls_layout, lseg); - return 1; -} - /** * pnfs_mark_matching_lsegs_return - Free or return matching layout segments * @lo: pointer to layout header @@ -2373,7 +2363,7 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo, lseg, lseg->pls_range.iomode, lseg->pls_range.offset, lseg->pls_range.length); - if (mark_lseg_invalid_or_return(lseg, tmp_list)) + if (mark_lseg_invalid(lseg, tmp_list)) continue; remaining++; set_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags);
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.607 commit 5052b997592af482f29c5441b8bc39831015818c
--------------------------------
commit d474f96104bd4377573526ebae2ee212205a6839 upstream.
If the NFS_LAYOUT_RETURN_REQUESTED flag is set, we want to return the layout as soon as possible, meaning that the affected layout segments should be marked as invalid, and should no longer be in use for I/O.
Fixes: f0b429819b5f ("pNFS: Ignore non-recalled layouts in pnfs_layout_need_return()") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 90f379493b080..46ca5592b8b0d 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1210,31 +1210,27 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, const nfs4_stateid *stateid, return status; }
+static bool +pnfs_layout_segments_returnable(struct pnfs_layout_hdr *lo, + enum pnfs_iomode iomode, + u32 seq) +{ + struct pnfs_layout_range recall_range = { + .length = NFS4_MAX_UINT64, + .iomode = iomode, + }; + return pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &recall_range, seq) != -EBUSY; +} + /* Return true if layoutreturn is needed */ static bool pnfs_layout_need_return(struct pnfs_layout_hdr *lo) { - struct pnfs_layout_segment *s; - enum pnfs_iomode iomode; - u32 seq; - if (!test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags)) return false; - - seq = lo->plh_return_seq; - iomode = lo->plh_return_iomode; - - /* Defer layoutreturn until all recalled lsegs are done */ - list_for_each_entry(s, &lo->plh_segs, pls_list) { - if (seq && pnfs_seqid_is_newer(s->pls_seq, seq)) - continue; - if (iomode != IOMODE_ANY && s->pls_range.iomode != iomode) - continue; - if (test_bit(NFS_LSEG_LAYOUTRETURN, &s->pls_flags)) - return false; - } - - return true; + return pnfs_layout_segments_returnable(lo, lo->plh_return_iomode, + lo->plh_return_seq); }
static void pnfs_layoutreturn_before_put_layout_hdr(struct pnfs_layout_hdr *lo)
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.147 commit c642341b09f367b5d85c97e30e45afc00241f1f9
--------------------------------
[ Upstream commit 3d7a9520f0c3e6a68b6de8c5812fc8b6d7a52626 ]
A client should be able to handle getting an ERR_DELAY error while doing a LOCK call to reclaim state due to delegation being recalled. This is a transient error that can happen due to server moving its volumes and invalidating its file location cache and upon reference to it during the LOCK call needing to do an expensive lookup (leading to an ERR_DELAY error on a PUTFH).
Signed-off-by: Olga Kornievskaia kolga@netapp.com Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4proc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5d57c4a5f5785..901c9e2b70733 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6999,7 +6999,12 @@ int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, err = nfs4_set_lock_state(state, fl); if (err != 0) return err; - err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + do { + err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW); + if (err != -NFS4ERR_DELAY) + break; + ssleep(1); + } while (err == -NFS4ERR_DELAY); return nfs4_handle_delegation_recall_error(server, state, stateid, fl, err); }
From: Ashish Sangwan ashishsangwan2@gmail.com
stable inclusion from linux-4.19.155 commit dd7992a5f7448ba33892151784c26b93fe005cfe
--------------------------------
commit 247db73560bc3e5aef6db50c443c3c0db115bc93 upstream.
We are generating incorrect path in case of rename retry because we are restarting from wrong dentry. We should restart from the dentry which was received in the call to nfs_path.
CC: stable@vger.kernel.org Signed-off-by: Ashish Sangwan ashishsangwan2@gmail.com Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/namespace.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e5686be67be8d..d57d453aecc2d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -30,9 +30,9 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; /* * nfs_path - reconstruct the path given an arbitrary dentry * @base - used to return pointer to the end of devname part of path - * @dentry - pointer to dentry + * @dentry_in - pointer to dentry * @buffer - result buffer - * @buflen - length of buffer + * @buflen_in - length of buffer * @flags - options (see below) * * Helper function for constructing the server pathname @@ -47,15 +47,19 @@ int nfs_mountpoint_expiry_timeout = 500 * HZ; * the original device (export) name * (if unset, the original name is returned verbatim) */ -char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen, - unsigned flags) +char *nfs_path(char **p, struct dentry *dentry_in, char *buffer, + ssize_t buflen_in, unsigned flags) { char *end; int namelen; unsigned seq; const char *base; + struct dentry *dentry; + ssize_t buflen;
rename_retry: + buflen = buflen_in; + dentry = dentry_in; end = buffer+buflen; *--end = '\0'; buflen--;
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.155 commit 3c040b924988be85e99819dd48c1f3130ae683cc
--------------------------------
commit 8c39076c276be0b31982e44654e2c2357473258a upstream.
RFC 7862 introduced a new flag that either client or server is allowed to set: EXCHGID4_FLAG_SUPP_FENCE_OPS.
Client needs to update its bitmask to allow for this flag value.
v2: changed minor version argument to unsigned int
Signed-off-by: Olga Kornievskaia kolga@netapp.com CC: stable@vger.kernel.org Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4proc.c | 9 ++++++--- include/uapi/linux/nfs4.h | 3 +++ 2 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 901c9e2b70733..e79c38e542412 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7591,9 +7591,11 @@ int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, * both PNFS and NON_PNFS flags set, and not having one of NON_PNFS, PNFS, or * DS flags set. */ -static int nfs4_check_cl_exchange_flags(u32 flags) +static int nfs4_check_cl_exchange_flags(u32 flags, u32 version) { - if (flags & ~EXCHGID4_FLAG_MASK_R) + if (version >= 2 && (flags & ~EXCHGID4_2_FLAG_MASK_R)) + goto out_inval; + else if (version < 2 && (flags & ~EXCHGID4_FLAG_MASK_R)) goto out_inval; if ((flags & EXCHGID4_FLAG_USE_PNFS_MDS) && (flags & EXCHGID4_FLAG_USE_NON_PNFS)) @@ -7988,7 +7990,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, if (status != 0) goto out;
- status = nfs4_check_cl_exchange_flags(resp->flags); + status = nfs4_check_cl_exchange_flags(resp->flags, + clp->cl_mvops->minor_version); if (status != 0) goto out;
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h index 8572930cf5b00..54a78529c8b38 100644 --- a/include/uapi/linux/nfs4.h +++ b/include/uapi/linux/nfs4.h @@ -136,6 +136,8 @@
#define EXCHGID4_FLAG_UPD_CONFIRMED_REC_A 0x40000000 #define EXCHGID4_FLAG_CONFIRMED_R 0x80000000 + +#define EXCHGID4_FLAG_SUPP_FENCE_OPS 0x00000004 /* * Since the validity of these bits depends on whether * they're set in the argument or response, have separate @@ -143,6 +145,7 @@ */ #define EXCHGID4_FLAG_MASK_A 0x40070103 #define EXCHGID4_FLAG_MASK_R 0x80070103 +#define EXCHGID4_2_FLAG_MASK_R 0x80070107
#define SEQ4_STATUS_CB_PATH_DOWN 0x00000001 #define SEQ4_STATUS_CB_GSS_CONTEXTS_EXPIRING 0x00000002
From: Olga Kornievskaia kolga@netapp.com
stable inclusion from linux-4.19.164 commit 2cdde54b6099025db7e870d81ff6e4bb8c98e950
--------------------------------
[ Upstream commit 05ad917561fca39a03338cb21fe9622f998b0f9c ]
Currently, the client will always ask for security_labels if the server returns that it supports that feature regardless of any LSM modules (such as Selinux) enforcing security policy. This adds performance penalty to the READDIR operation.
Client adjusts superblock's support of the security_label based on the server's support but also current client's configuration of the LSM modules. Thus, prior to using the default bitmask in READDIR, this patch checks the server's capabilities and then instructs READDIR to remove FATTR4_WORD2_SECURITY_LABEL from the bitmask.
v5: fixing silly mistakes of the rushed v4 v4: simplifying logic v3: changing label's initialization per Ondrej's comment v2: dropping selinux hook and using the sb cap.
Suggested-by: Ondrej Mosnacek omosnace@redhat.com Suggested-by: Scott Mayhew smayhew@redhat.com Signed-off-by: Olga Kornievskaia kolga@netapp.com Fixes: 2b0143b5c986 ("VFS: normal filesystems (and lustre): d_inode() annotations") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/nfs4proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index e79c38e542412..d6abca23406e5 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4678,12 +4678,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), .pages = pages, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask, .plus = plus, }; struct nfs4_readdir_res res; @@ -4698,9 +4698,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, dentry, (unsigned long long)cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase;
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.173 commit 8a9ccffb3c1a6ecbfa2c811fe329f8220696dbc0
--------------------------------
[ Upstream commit 814b84971388cd5fb182f2e914265b3827758455 ]
If the server returns a new stateid that does not match the one in our cache, then pnfs_layout_process() will leak the layout segments returned by pnfs_mark_layout_stateid_invalid().
Fixes: 9888d837f3cf ("pNFS: Force a retry of LAYOUTGET if the stateid doesn't match our cache") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 46ca5592b8b0d..4b165aa5a2561 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2320,6 +2320,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) spin_unlock(&ino->i_lock); lseg->pls_layout = lo; NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); + pnfs_free_lseg_list(&free_me); return ERR_PTR(-EAGAIN); }
From: Trond Myklebust trond.myklebust@hammerspace.com
stable inclusion from linux-4.19.176 commit 682821d905f77005b1b85684608dfc75e75422a9
--------------------------------
[ Upstream commit 08bd8dbe88825760e953759d7ec212903a026c75 ]
If the server returns a new stateid that does not match the one in our cache, then try to return the one we hold instead of just invalidating it on the client side. This ensures that both client and server will agree that the stateid is invalid.
Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4b165aa5a2561..55965e8e9a2ed 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2301,7 +2301,13 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) * We got an entirely new state ID. Mark all segments for the * inode invalid, and retry the layoutget */ - pnfs_mark_layout_stateid_invalid(lo, &free_me); + struct pnfs_layout_range range = { + .iomode = IOMODE_ANY, + .length = NFS4_MAX_UINT64, + }; + pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); + pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs, + &range, 0); goto out_forget; }
From: Timo Rothenpieler timo@rothenpieler.org
stable inclusion from linux-4.19.184 commit 208142a84d57b11f8da9cbf710a54a205277eea7
--------------------------------
[ Upstream commit a0590473c5e6c4ef17c3132ad08fbad170f72d55 ]
This follows what was done in 8c2fabc6542d9d0f8b16bd1045c2eda59bdcde13. With the default being m, it's impossible to build the module into the kernel.
Signed-off-by: Timo Rothenpieler timo@rothenpieler.org Signed-off-by: Anna Schumaker Anna.Schumaker@Netapp.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Hou Tao houtao1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index ac3e06367cb68..e55f86713948b 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -127,7 +127,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate depends on NFS_V4_1 && NFS_V3 - default m + default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain"