From: Trond Myklebust trond.myklebust@hammerspace.com
[ Upstream commit 5085607d209102b37b169bc94d0aa39566a9842a ]
If a bulk layout recall or a metadata server reboot coincides with a umount, then holding a reference to an inode is unsafe unless we also hold a reference to the super block.
Fixes: fd9a8d7160937 ("NFSv4.1: Fix bulk recall and destroy of layouts") Signed-off-by: Trond Myklebust trond.myklebust@hammerspace.com Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- fs/nfs/pnfs.c | 33 +++++++++++++++++++++++---------- fs/nfs/pnfs.h | 1 + 2 files changed, 24 insertions(+), 10 deletions(-)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c818f98..66f699e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -758,22 +758,35 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) pnfs_layout_bulk_destroy_byserver_locked(struct nfs_client *clp, struct nfs_server *server, struct list_head *layout_list) + __must_hold(&clp->cl_lock) + __must_hold(RCU) { struct pnfs_layout_hdr *lo, *next; struct inode *inode;
list_for_each_entry_safe(lo, next, &server->layouts, plh_layouts) { - if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags)) + if (test_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags) || + test_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags) || + !list_empty(&lo->plh_bulk_destroy)) continue; + /* If the sb is being destroyed, just bail */ + if (!nfs_sb_active(server->super)) + break; inode = igrab(lo->plh_inode); - if (inode == NULL) - continue; - list_del_init(&lo->plh_layouts); - if (pnfs_layout_add_bulk_destroy_list(inode, layout_list)) - continue; - rcu_read_unlock(); - spin_unlock(&clp->cl_lock); - iput(inode); + if (inode != NULL) { + list_del_init(&lo->plh_layouts); + if (pnfs_layout_add_bulk_destroy_list(inode, + layout_list)) + continue; + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + iput(inode); + } else { + rcu_read_unlock(); + spin_unlock(&clp->cl_lock); + set_bit(NFS_LAYOUT_INODE_FREEING, &lo->plh_flags); + } + nfs_sb_deactive(server->super); spin_lock(&clp->cl_lock); rcu_read_lock(); return -EAGAIN; @@ -811,7 +824,7 @@ static bool pnfs_seqid_is_newer(u32 s1, u32 s2) /* Free all lsegs that are attached to commit buckets */ nfs_commit_inode(inode, 0); pnfs_put_layout_hdr(lo); - iput(inode); + nfs_iput_and_deactive(inode); } return ret; } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index ece367e..3ba4481 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -104,6 +104,7 @@ enum { NFS_LAYOUT_RETURN_REQUESTED, /* Return this layout ASAP */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ + NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */ };
enum layoutdriver_policy_flags {