From: Yang Erkun yangerkun@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IB4H3O CVE: NA
--------------------------------
Our syztester report hungtask as below:
INFO: task NFSv4 callback:93188 blocked for more than 1310 seconds. Not tainted 5.10.0-00574-gc310db27923f-dirty #16 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. task:NFSv4 callback state:D stack: 0 pid:93188 ppid: 2 flags:0x00000208 Call trace: __switch_to+0x98/0xdc arch/arm64/kernel/process.c:639 context_switch kernel/sched/core.c:4055 [inline] __schedule+0x690/0xf0c kernel/sched/core.c:5003 schedule+0xd8/0x220 kernel/sched/core.c:5139 schedule_timeout+0x390/0x42c kernel/time/timer.c:2126 do_wait_for_common kernel/sched/completion.c:85 [inline] __wait_for_common kernel/sched/completion.c:106 [inline] wait_for_common+0x148/0x240 kernel/sched/completion.c:117 wait_for_completion+0x20/0x30 kernel/sched/completion.c:138 kthread_stop+0xf8/0x39c kernel/kthread.c:671 svc_stop_kthreads+0x138/0x260 net/sunrpc/svc.c:797 svc_set_num_threads+0xc8/0xe4 net/sunrpc/svc.c:818 nfs_callback_down+0x120/0x200 fs/nfs/callback.c:325 nfs4_destroy_callback fs/nfs/nfs4client.c:279 [inline] nfs4_shutdown_client+0x138/0x150 fs/nfs/nfs4client.c:287 nfs4_free_client+0x20/0x40 fs/nfs/nfs4client.c:303 nfs_put_client+0x238/0x340 fs/nfs/client.c:273 nfs4_callback_compound+0x2cc/0x630 fs/nfs/callback_xdr.c:981 nfs_callback_dispatch+0x44/0x6c fs/nfs/callback_xdr.c:995 svc_process_common+0x9a0/0xdf0 net/sunrpc/svc.c:1400 svc_process+0x138/0x1c4 net/sunrpc/svc.c:1542 nfs4_callback_svc+0x58/0x90 fs/nfs/callback.c:89 kthread+0x1e0/0x220 kernel/kthread.c:328 ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:1727
When cl_count for nfs_client down to zero, nfs_put_client will trigger nfs4_free_client, then svc_set_num_threads in nfs_callback_down will signal all callback threads and wait until they success exist. So if the last cl_count was put from nfs4_callback_compound, we will deadlock.
Fix it using async mode to do this.
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Yang Erkun yangerkun@huawei.com
Conflicts: fs/nfs/callback_xdr.c fs/nfs/internal.h [Commit 2bb50aabb6f3("NFS4: Report callback authentication errors") separates the judgment of cps.clp and check_gss_callback_principal; commit 10b7a70cbb81("NFS: Cleanup - add nfs_clients_exit to mirror nfs_clients_init") remove nfs_cleanup_cb_ident_idr from fs/nfs/internal.h.] Signed-off-by: Li Lingfeng lilingfeng3@huawei.com Reviewed-by: Yang Erkun yangerkun@huawei.com --- fs/nfs/callback_xdr.c | 6 +++--- fs/nfs/client.c | 33 +++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 1 + include/linux/nfs_fs_sb.h | 1 + 4 files changed, 38 insertions(+), 3 deletions(-)
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 2f84c612838c..2d7dd9b65a8c 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -952,7 +952,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident); if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp)) { if (cps.clp) - nfs_put_client(cps.clp); + nfs_async_put_client(cps.clp); goto out_invalidcred; } } @@ -962,7 +962,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) hdr_res.tag = hdr_arg.tag; if (encode_compound_hdr_res(&xdr_out, &hdr_res) != 0) { if (cps.clp) - nfs_put_client(cps.clp); + nfs_async_put_client(cps.clp); return rpc_system_err; } while (status == 0 && nops != hdr_arg.nops) { @@ -982,7 +982,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) *hdr_res.status = status; *hdr_res.nops = htonl(nops); nfs4_cb_free_slot(&cps); - nfs_put_client(cps.clp); + nfs_async_put_client(cps.clp); return rpc_success;
out_invalidcred: diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 7d02dc52209d..a05ee74d6443 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -283,6 +283,39 @@ void nfs_put_client(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs_put_client);
+static void nfs_free_client_work(struct work_struct *work) +{ + struct nfs_client *clp = + container_of(work, struct nfs_client, free_work); + + clp->rpc_ops->free_client(clp); +} + +/* + * Similar to nfs_put_client, but call free_client with async mode + */ +void nfs_async_put_client(struct nfs_client *clp) +{ + struct nfs_net *nn; + + if (!clp) + return; + + nn = net_generic(clp->cl_net, nfs_net_id); + + if (refcount_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { + list_del(&clp->cl_share_link); + nfs_cb_idr_remove_locked(clp); + spin_unlock(&nn->nfs_client_lock); + + WARN_ON_ONCE(!list_empty(&clp->cl_superblocks)); + + INIT_WORK(&clp->free_work, nfs_free_client_work); + queue_work(nfsiod_workqueue, &clp->free_work); + } +} +EXPORT_SYMBOL_GPL(nfs_async_put_client); + /* * Find an nfs_client on the list that matches the initialisation data * that is supplied. diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ad938771813c..44ca5300e6e2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -180,6 +180,7 @@ void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *);
extern void nfs_cleanup_cb_ident_idr(struct net *); extern void nfs_put_client(struct nfs_client *); +extern void nfs_async_put_client(struct nfs_client *clp); extern void nfs_free_client(struct nfs_client *); extern struct nfs_client *nfs4_find_client_ident(struct net *, int); extern struct nfs_client * diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 7023ae64e3d7..14a40789be5d 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -52,6 +52,7 @@ struct nfs_client { char * cl_acceptor; /* GSSAPI acceptor name */ struct list_head cl_share_link; /* link in global client list */ struct list_head cl_superblocks; /* List of nfs_server structs */ + struct work_struct free_work;
struct rpc_clnt * cl_rpcclient; const struct nfs_rpc_ops *rpc_ops; /* NFS protocol vector */