From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.6-rc1 commit 17346452b25b98acfb395d2a82ec2e4ad0cb7a01 category: feature bugzilla: 38260 CVE: NA ---------------------------
There are instances where we keep searching for an idle CPU despite already having a sched-idle CPU (in find_idlest_group_cpu(), select_idle_smt() and select_idle_cpu() and then there are places where we don't necessarily do that and return a sched-idle CPU as soon as we find one (in select_idle_sibling()). This looks a bit inconsistent and it may be worth having the same policy everywhere.
On the other hand, choosing a sched-idle CPU over a idle one shall be beneficial from performance and power point of view as well, as we don't need to get the CPU online from a deep idle state which wastes quite a lot of time and energy and delays the scheduling of the newly woken up task.
This patch tries to simplify code around sched-idle CPU selection and make it consistent throughout.
Testing is done with the help of rt-app on hikey board (ARM64 octa-core, 2 clusters, 0-3 and 4-7). The cpufreq governor was set to performance to avoid any side affects from CPU frequency. Following are the tests performed:
Test 1: 1-cfs-task:
A single SCHED_NORMAL task is pinned to CPU5 which runs for 2333 us out of 7777 us (so gives time for the cluster to go in deep idle state).
Test 2: 1-cfs-1-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and single SCHED_IDLE task is pinned on CPU6 (to make sure cluster 1 doesn't go in deep idle state).
Test 3: 1-cfs-8-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and eight SCHED_IDLE tasks are created which run forever (not pinned anywhere, so they run on all CPUs). Checked with kernelshark that as soon as NORMAL task sleeps, the SCHED_IDLE task starts running on CPU5.
And here are the results on mean latency (in us), using the "st" tool.
$ st 1-cfs-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 90 592 197180 307.134 109.906
$ st 1-cfs-1-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 67 311 113850 177.336 41.4251
$ st 1-cfs-8-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 643 29 173 41364 64.3297 13.2344
The mean latency when we need to:
- wakeup from deep idle state is 307 us. - wakeup from shallow idle state is 177 us. - preempt a SCHED_IDLE task is 64 us.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Link: https://lkml.kernel.org/r/b90cbcce608cef4e02a7bbfe178335f76d201bab.157372834... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b0a6a42e9a9a..9d0db0c89c3e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5947,7 +5947,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0; int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1, si_cpu = -1; + int shallowest_idle_cpu = -1; int i;
/* Check if we have any choice: */ @@ -5956,6 +5956,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
/* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { + if (sched_idle_cpu(i)) + return i; + if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -5978,12 +5981,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; } - } else if (shallowest_idle_cpu == -1 && si_cpu == -1) { - if (sched_idle_cpu(i)) { - si_cpu = i; - continue; - } - + } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(cpu_rq(i)); if (load < min_load) { min_load = load; @@ -5992,11 +5990,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } }
- if (shallowest_idle_cpu != -1) - return shallowest_idle_cpu; - if (si_cpu != -1) - return si_cpu; - return least_loaded_cpu; + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; }
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, @@ -6149,7 +6143,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu, si_cpu = -1; + int cpu;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6157,13 +6151,11 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (available_idle_cpu(cpu)) + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; }
- return si_cpu; + return -1; }
#else /* CONFIG_SCHED_SMT */ @@ -6192,11 +6184,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 avg_cost, avg_idle; u64 time, cost; s64 delta; - int cpu, nr = INT_MAX, si_cpu = -1; + int cpu, nr = INT_MAX;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) - return si_cpu; + return -1;
/* * Due to large variance we need a large fuzz factor; hackbench in @@ -6206,7 +6198,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t avg_cost = this_sd->avg_scan_cost + 1;
if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return si_cpu; + return -1;
if (sched_feat(SIS_PROP)) { u64 span_avg = sd->span_weight * avg_idle; @@ -6222,11 +6214,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) - return si_cpu; - if (available_idle_cpu(cpu)) + return -1; + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) break; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; }
time = local_clock() - time;