From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.4-rc1 commit 3c29e651e16dd3b3179cfb2d055ee9538e37515c category: feature bugzilla: 38260 CVE: NA ---------------------------
We try to find an idle CPU to run the next task, but in case we don't find an idle CPU it is better to pick a CPU which will run the task the soonest, for performance reason.
A CPU which isn't idle but has only SCHED_IDLE activity queued on it should be a good target based on this criteria as any normal fair task will most likely preempt the currently running SCHED_IDLE task immediately. In fact, choosing a SCHED_IDLE CPU over a fully idle one shall give better results as it should be able to run the task sooner than an idle CPU (which requires to be woken up from an idle state).
This patch updates both fast and slow paths with this optimization.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Daniel Lezcano daniel.lezcano@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vincent Guittot vincent.guittot@linaro.org Cc: chris.redpath@arm.com Cc: quentin.perret@linaro.org Cc: songliubraving@fb.com Cc: steven.sistare@oracle.com Cc: subhra.mazumdar@oracle.com Cc: tkjos@google.com Link: https://lkml.kernel.org/r/eeafa25fdeb6f6edd5b2da716bc8f0ba7708cbcf.156152354... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 47 +++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c48d47fbb8fc..bebc176f4d40 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5379,6 +5379,15 @@ static struct {
#endif /* CONFIG_NO_HZ_COMMON */
+/* CPU only has SCHED_IDLE tasks enqueued */ +static int sched_idle_cpu(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running && + rq->nr_running); +} + /** * __cpu_load_update - update the rq->cpu_load[] statistics * @this_rq: The rq to update statistics for @@ -5938,7 +5947,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0; int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1; + int shallowest_idle_cpu = -1, si_cpu = -1; int i;
/* Check if we have any choice: */ @@ -5969,7 +5978,12 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; } - } else if (shallowest_idle_cpu == -1) { + } else if (shallowest_idle_cpu == -1 && si_cpu == -1) { + if (sched_idle_cpu(i)) { + si_cpu = i; + continue; + } + load = weighted_cpuload(cpu_rq(i)); if (load < min_load) { min_load = load; @@ -5978,7 +5992,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } }
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; + if (shallowest_idle_cpu != -1) + return shallowest_idle_cpu; + if (si_cpu != -1) + return si_cpu; + return least_loaded_cpu; }
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, @@ -6131,7 +6149,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu; + int cpu, si_cpu = -1;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6141,9 +6159,11 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t continue; if (available_idle_cpu(cpu)) return cpu; + if (si_cpu == -1 && sched_idle_cpu(cpu)) + si_cpu = cpu; }
- return -1; + return si_cpu; }
#else /* CONFIG_SCHED_SMT */ @@ -6171,11 +6191,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 avg_cost, avg_idle; u64 time, cost; s64 delta; - int cpu, nr = INT_MAX; + int cpu, nr = INT_MAX, si_cpu = -1;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) - return -1; + return si_cpu;
/* * Due to large variance we need a large fuzz factor; hackbench in @@ -6185,7 +6205,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t avg_cost = this_sd->avg_scan_cost + 1;
if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return -1; + return si_cpu;
if (sched_feat(SIS_PROP)) { u64 span_avg = sd->span_weight * avg_idle; @@ -6199,11 +6219,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { if (!--nr) - return -1; + return si_cpu; if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (available_idle_cpu(cpu)) break; + if (si_cpu == -1 && sched_idle_cpu(cpu)) + si_cpu = cpu; }
time = local_clock() - time; @@ -6222,13 +6244,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) struct sched_domain *sd; int i, recent_used_cpu;
- if (available_idle_cpu(target)) + if (available_idle_cpu(target) || sched_idle_cpu(target)) return target;
/* * If the previous CPU is cache affine and idle, don't be stupid: */ - if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev)) + if (prev != target && cpus_share_cache(prev, target) && + (available_idle_cpu(prev) || sched_idle_cpu(prev))) return prev;
/* Check a recently used CPU as a potential idle candidate: */ @@ -6236,7 +6259,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - available_idle_cpu(recent_used_cpu) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { /* * Replace recent_used_cpu with prev as it is a potential