From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-v5.14-rc1 commit d2dfa17bc7de67e99685c4d6557837bf801a102c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I5OOWG CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------------------------------------------------
When a sibling is forced-idle to match the core-cookie; search for matching tasks to fill the core.
rcu_read_unlock() can incur an infrequent deadlock in sched_core_balance(). Fix this by using the RCU-sched flavor instead.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Tested-by: Don Hiatt dhiatt@digitalocean.com Tested-by: Hongyu Ning hongyu.ning@linux.intel.com Tested-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210422123308.800048269@infradead.org Signed-off-by: Lin Shengwang linshengwang1@huawei.com Reviewed-by: lihua hucool.lihua@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/sched.h | 1 + kernel/sched/core.c | 130 +++++++++++++++++++++++++++++++++++++++++- kernel/sched/idle.c | 1 + kernel/sched/sched.h | 6 ++ 4 files changed, 137 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index d2a399e0cd9b..dd1feff73c7d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,6 +734,7 @@ struct task_struct { #ifdef CONFIG_SCHED_CORE struct rb_node core_node; unsigned long core_cookie; + unsigned int core_occupation; #endif
#ifdef CONFIG_CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 78401da6e597..d4598582240b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -195,6 +195,21 @@ static struct task_struct *sched_core_find(struct rq *rq, unsigned long cookie) return __node_2_sc(node); }
+static struct task_struct *sched_core_next(struct task_struct *p, unsigned long cookie) +{ + struct rb_node *node = &p->core_node; + + node = rb_next(node); + if (!node) + return NULL; + + p = container_of(node, struct task_struct, core_node); + if (p->core_cookie != cookie) + return NULL; + + return p; +} + /* * Magic required such that: * @@ -4824,8 +4839,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) const struct sched_class *class; const struct cpumask *smt_mask; bool fi_before = false; + int i, j, cpu, occ = 0; bool need_sync; - int i, j, cpu;
if (!sched_core_enabled(rq)) return __pick_next_task(rq, prev, rf); @@ -4947,6 +4962,9 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (!p) continue;
+ if (!is_task_rq_idle(p)) + occ++; + rq_i->core_pick = p; if (rq_i->idle == p && rq_i->nr_running) { rq->core->core_forceidle = true; @@ -4978,6 +4996,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
cpu_rq(j)->core_pick = NULL; } + occ = 1; goto again; } } @@ -5023,6 +5042,8 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) if (!(fi_before && rq->core->core_forceidle)) task_vruntime_update(rq_i, rq_i->core_pick, rq->core->core_forceidle);
+ rq_i->core_pick->core_occupation = occ; + if (i == cpu) { rq_i->core_pick = NULL; continue; @@ -5044,6 +5065,113 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) return next; }
+static bool try_steal_cookie(int this, int that) +{ + struct rq *dst = cpu_rq(this), *src = cpu_rq(that); + struct task_struct *p; + unsigned long cookie; + bool success = false; + + local_irq_disable(); + double_rq_lock(dst, src); + + cookie = dst->core->core_cookie; + if (!cookie) + goto unlock; + + if (dst->curr != dst->idle) + goto unlock; + + p = sched_core_find(src, cookie); + if (p == src->idle) + goto unlock; + + do { + if (p == src->core_pick || p == src->curr) + goto next; + + if (!cpumask_test_cpu(this, &p->cpus_mask)) + goto next; + + if (p->core_occupation > dst->idle->core_occupation) + goto next; + + p->on_rq = TASK_ON_RQ_MIGRATING; + deactivate_task(src, p, 0); + set_task_cpu(p, this); + activate_task(dst, p, 0); + p->on_rq = TASK_ON_RQ_QUEUED; + + resched_curr(dst); + + success = true; + break; + +next: + p = sched_core_next(p, cookie); + } while (p); + +unlock: + double_rq_unlock(dst, src); + local_irq_enable(); + + return success; +} + +static bool steal_cookie_task(int cpu, struct sched_domain *sd) +{ + int i; + + for_each_cpu_wrap(i, sched_domain_span(sd), cpu) { + if (i == cpu) + continue; + + if (need_resched()) + break; + + if (try_steal_cookie(cpu, i)) + return true; + } + + return false; +} + +static void sched_core_balance(struct rq *rq) +{ + struct sched_domain *sd; + int cpu = cpu_of(rq); + + preempt_disable(); + rcu_read_lock(); + raw_spin_rq_unlock_irq(rq); + for_each_domain(cpu, sd) { + if (need_resched()) + break; + + if (steal_cookie_task(cpu, sd)) + break; + } + raw_spin_rq_lock_irq(rq); + rcu_read_unlock(); + preempt_enable(); +} + +static DEFINE_PER_CPU(struct callback_head, core_balance_head); + +void queue_core_balance(struct rq *rq) +{ + if (!sched_core_enabled(rq)) + return; + + if (!rq->core->core_cookie) + return; + + if (!rq->nr_running) /* not forced idle */ + return; + + queue_balance_callback(rq, &per_cpu(core_balance_head, rq->cpu), sched_core_balance); +} + static inline void sched_core_cpu_starting(unsigned int cpu) { const struct cpumask *smt_mask = cpu_smt_mask(cpu); diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 3c6396d61a04..58bcb9517dfe 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -430,6 +430,7 @@ static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool fir { update_idle_core(rq); schedstat_inc(rq->sched_goidle); + queue_core_balance(rq); }
#ifdef CONFIG_SMP diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ff709a805815..4fe48700e926 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1208,6 +1208,8 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq)
bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
+extern void queue_core_balance(struct rq *rq); + #else /* !CONFIG_SCHED_CORE */
static inline bool sched_core_enabled(struct rq *rq) @@ -1230,6 +1232,10 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *rq) return &rq->__lock; }
+static inline void queue_core_balance(struct rq *rq) +{ +} + #endif /* CONFIG_SCHED_CORE */
static inline void lockdep_assert_rq_held(struct rq *rq)