From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-v6.6.11 commit 39d04e558882ca317c5e64cc99c2a03494047257 bugzilla: https://gitee.com/openeuler/kernel/issues/I99TJK
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
[ Upstream commit 85d68222ddc5f4522e456d97d201166acb50f716 ]
Commit 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") added a kfree() call to free any user provided affinity mask, if present. It was changed later to use kfree_rcu() in commit 9a5418bc48ba ("sched/core: Use kfree_rcu() in do_set_cpus_allowed()") to avoid a circular locking dependency problem.
It turns out that even kfree_rcu() isn't safe for avoiding circular locking problem. As reported by kernel test robot, the following circular locking dependency now exists:
&rdp->nocb_lock --> rcu_node_0 --> &rq->__lock
Solve this by breaking the rcu_node_0 --> &rq->__lock chain by moving the resched_cpu() out from under rcu_node lock.
[peterz: heavily borrowed from Waiman's Changelog] [paulmck: applied Z qiang feedback]
Fixes: 851a723e45d1 ("sched: Always clear user_cpus_ptr in do_set_cpus_allowed()") Reported-by: kernel test robot oliver.sang@intel.com Acked-by: Waiman Long longman@redhat.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lore.kernel.org/oe-lkp/202310302207.a25f1a30-oliver.sang@intel.com Signed-off-by: Paul E. McKenney paulmck@kernel.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: ZhangPeng zhangpeng362@huawei.com --- kernel/rcu/tree.c | 36 +++++++++++++++++++++++++----------- 1 file changed, 25 insertions(+), 11 deletions(-)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 7b4517dc4657..92a090e16186 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -755,14 +755,19 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) }
/* - * Return true if the specified CPU has passed through a quiescent - * state by virtue of being in or having passed through an dynticks - * idle state since the last call to dyntick_save_progress_counter() - * for this same CPU, or by virtue of having been offline. + * Returns positive if the specified CPU has passed through a quiescent state + * by virtue of being in or having passed through an dynticks idle state since + * the last call to dyntick_save_progress_counter() for this same CPU, or by + * virtue of having been offline. + * + * Returns negative if the specified CPU needs a force resched. + * + * Returns zero otherwise. */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { unsigned long jtsq; + int ret = 0; struct rcu_node *rnp = rdp->mynode;
/* @@ -848,8 +853,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) || rcu_state.cbovld)) { WRITE_ONCE(rdp->rcu_urgent_qs, true); - resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); + ret = -1; }
/* @@ -862,8 +867,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) if (time_after(jiffies, rcu_state.jiffies_resched)) { if (time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq)) { - resched_cpu(rdp->cpu); WRITE_ONCE(rdp->last_fqs_resched, jiffies); + ret = -1; } if (IS_ENABLED(CONFIG_IRQ_WORK) && !rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq && @@ -892,7 +897,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) } }
- return 0; + return ret; }
/* Trace-event wrapper function for trace_rcu_future_grace_period. */ @@ -2270,15 +2275,15 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) { int cpu; unsigned long flags; - unsigned long mask; - struct rcu_data *rdp; struct rcu_node *rnp;
rcu_state.cbovld = rcu_state.cbovldnext; rcu_state.cbovldnext = false; rcu_for_each_leaf_node(rnp) { + unsigned long mask = 0; + unsigned long rsmask = 0; + cond_resched_tasks_rcu_qs(); - mask = 0; raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.cbovldnext |= !!rnp->cbovldmask; if (rnp->qsmask == 0) { @@ -2296,11 +2301,17 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) continue; } for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) { + struct rcu_data *rdp; + int ret; + rdp = per_cpu_ptr(&rcu_data, cpu); - if (f(rdp)) { + ret = f(rdp); + if (ret > 0) { mask |= rdp->grpmask; rcu_disable_urgency_upon_qs(rdp); } + if (ret < 0) + rsmask |= rdp->grpmask; } if (mask != 0) { /* Idle/offline CPUs, report (releases rnp->lock). */ @@ -2309,6 +2320,9 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp)) /* Nothing to do here, so just drop the lock. */ raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } + + for_each_leaf_node_cpu_mask(rnp, cpu, rsmask) + resched_cpu(cpu); } }