From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.0-rc1 commit 1da1843f9f0334e2428308945d396ffecc2acfe1 category: feature bugzilla: 38260 CVE: NA ---------------------------
We already have task_has_rt_policy() and task_has_dl_policy() helpers, create task_has_idle_policy() as well and update sched core to start using it.
While at it, use task_has_dl_policy() at one more place.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Daniel Lezcano daniel.lezcano@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vincent Guittot vincent.guittot@linaro.org Link: http://lkml.kernel.org/r/ce3915d5b490fc81af926a3b6bfb775e7188e005.1541416894... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/core.c | 4 ++-- kernel/sched/debug.c | 2 +- kernel/sched/fair.c | 10 +++++----- kernel/sched/sched.h | 5 +++++ 4 files changed, 13 insertions(+), 8 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0fbdc620697b..c1513905c0de 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -724,7 +724,7 @@ static void set_load_weight(struct task_struct *p, bool update_load) /* * SCHED_IDLE tasks get minimal weight: */ - if (idle_policy(p->policy)) { + if (task_has_idle_policy(p)) { load->weight = scale_load(WEIGHT_IDLEPRIO); load->inv_weight = WMULT_IDLEPRIO; return; @@ -4270,7 +4270,7 @@ static int __sched_setscheduler(struct task_struct *p, * Treat SCHED_IDLE as nice 20. Only allow a switch to * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. */ - if (idle_policy(p->policy) && !idle_policy(policy)) { + if (task_has_idle_policy(p) && !idle_policy(policy)) { if (!can_nice(p, task_nice(p))) return -EPERM; } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index c542d84dafce..f12382877390 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -1012,7 +1012,7 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, #endif P(policy); P(prio); - if (p->policy == SCHED_DEADLINE) { + if (task_has_dl_policy(p)) { P(dl.runtime); P(dl.deadline); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 09b0d83d5bbc..807c7fb78b6f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6609,7 +6609,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
static void set_last_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) + if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se)))) return;
for_each_sched_entity(se) { @@ -6621,7 +6621,7 @@ static void set_last_buddy(struct sched_entity *se)
static void set_next_buddy(struct sched_entity *se) { - if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE)) + if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se)))) return;
for_each_sched_entity(se) { @@ -6679,8 +6679,8 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return;
/* Idle tasks are by definition preempted by non-idle tasks. */ - if (unlikely(curr->policy == SCHED_IDLE) && - likely(p->policy != SCHED_IDLE)) + if (unlikely(task_has_idle_policy(curr)) && + likely(!task_has_idle_policy(p))) goto preempt;
/* @@ -7090,7 +7090,7 @@ static int task_hot(struct task_struct *p, struct lb_env *env) if (p->sched_class != &fair_sched_class) return 0;
- if (unlikely(p->policy == SCHED_IDLE)) + if (unlikely(task_has_idle_policy(p))) return 0;
/* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 955abd645ff9..fbe3b3b8a19f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -184,6 +184,11 @@ static inline bool valid_policy(int policy) rt_policy(policy) || dl_policy(policy); }
+static inline int task_has_idle_policy(struct task_struct *p) +{ + return idle_policy(p->policy); +} + static inline int task_has_rt_policy(struct task_struct *p) { return rt_policy(p->policy);
From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.4-rc1 commit 43e9f7f231e40e4534fc3a735da152911a085c16 category: feature bugzilla: 38260 CVE: NA ---------------------------
Track how many tasks are present with SCHED_IDLE policy in each cfs_rq. This will be used by later commits.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Daniel Lezcano daniel.lezcano@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vincent Guittot vincent.guittot@linaro.org Cc: chris.redpath@arm.com Cc: quentin.perret@linaro.org Cc: songliubraving@fb.com Cc: steven.sistare@oracle.com Cc: subhra.mazumdar@oracle.com Cc: tkjos@google.com Link: https://lkml.kernel.org/r/0d3cdc427fc68808ad5bccc40e86ed0bf9da8bb4.156152354... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 14 ++++++++++++-- kernel/sched/sched.h | 3 ++- 2 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 807c7fb78b6f..c48d47fbb8fc 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4484,7 +4484,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; - long task_delta, dequeue = 1; + long task_delta, idle_task_delta, dequeue = 1; bool empty;
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; @@ -4495,6 +4495,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) rcu_read_unlock();
task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; for_each_sched_entity(se) { struct cfs_rq *qcfs_rq = cfs_rq_of(se); /* throttled entity or throttle-on-deactivate */ @@ -4504,6 +4505,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) if (dequeue) dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP); qcfs_rq->h_nr_running -= task_delta; + qcfs_rq->idle_h_nr_running -= idle_task_delta;
if (qcfs_rq->load.weight) dequeue = 0; @@ -4543,7 +4545,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; int enqueue = 1; - long task_delta; + long task_delta, idle_task_delta;
se = cfs_rq->tg->se[cpu_of(rq)];
@@ -4563,6 +4565,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) return;
task_delta = cfs_rq->h_nr_running; + idle_task_delta = cfs_rq->idle_h_nr_running; for_each_sched_entity(se) { if (se->on_rq) enqueue = 0; @@ -4571,6 +4574,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) if (enqueue) enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP); cfs_rq->h_nr_running += task_delta; + cfs_rq->idle_h_nr_running += idle_task_delta;
if (cfs_rq_throttled(cfs_rq)) break; @@ -5156,6 +5160,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; + int idle_h_nr_running = task_has_idle_policy(p);
/* * The code below (indirectly) updates schedutil which looks at @@ -5188,6 +5193,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running++; + cfs_rq->idle_h_nr_running += idle_h_nr_running;
flags = ENQUEUE_WAKEUP; } @@ -5195,6 +5201,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running++; + cfs_rq->idle_h_nr_running += idle_h_nr_running;
if (cfs_rq_throttled(cfs_rq)) break; @@ -5238,6 +5245,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; + int idle_h_nr_running = task_has_idle_policy(p);
for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); @@ -5252,6 +5260,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (cfs_rq_throttled(cfs_rq)) break; cfs_rq->h_nr_running--; + cfs_rq->idle_h_nr_running -= idle_h_nr_running;
/* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { @@ -5271,6 +5280,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); cfs_rq->h_nr_running--; + cfs_rq->idle_h_nr_running -= idle_h_nr_running;
if (cfs_rq_throttled(cfs_rq)) break; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fbe3b3b8a19f..b499bb3e4cec 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -495,7 +495,8 @@ struct cfs_rq { struct load_weight load; unsigned long runnable_weight; unsigned int nr_running; - unsigned int h_nr_running; + unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ + unsigned int idle_h_nr_running; /* SCHED_IDLE */
u64 exec_clock; u64 min_vruntime;
From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.4-rc1 commit 3c29e651e16dd3b3179cfb2d055ee9538e37515c category: feature bugzilla: 38260 CVE: NA ---------------------------
We try to find an idle CPU to run the next task, but in case we don't find an idle CPU it is better to pick a CPU which will run the task the soonest, for performance reason.
A CPU which isn't idle but has only SCHED_IDLE activity queued on it should be a good target based on this criteria as any normal fair task will most likely preempt the currently running SCHED_IDLE task immediately. In fact, choosing a SCHED_IDLE CPU over a fully idle one shall give better results as it should be able to run the task sooner than an idle CPU (which requires to be woken up from an idle state).
This patch updates both fast and slow paths with this optimization.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Daniel Lezcano daniel.lezcano@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Vincent Guittot vincent.guittot@linaro.org Cc: chris.redpath@arm.com Cc: quentin.perret@linaro.org Cc: songliubraving@fb.com Cc: steven.sistare@oracle.com Cc: subhra.mazumdar@oracle.com Cc: tkjos@google.com Link: https://lkml.kernel.org/r/eeafa25fdeb6f6edd5b2da716bc8f0ba7708cbcf.156152354... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 47 +++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index c48d47fbb8fc..bebc176f4d40 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5379,6 +5379,15 @@ static struct {
#endif /* CONFIG_NO_HZ_COMMON */
+/* CPU only has SCHED_IDLE tasks enqueued */ +static int sched_idle_cpu(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + + return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running && + rq->nr_running); +} + /** * __cpu_load_update - update the rq->cpu_load[] statistics * @this_rq: The rq to update statistics for @@ -5938,7 +5947,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0; int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1; + int shallowest_idle_cpu = -1, si_cpu = -1; int i;
/* Check if we have any choice: */ @@ -5969,7 +5978,12 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; } - } else if (shallowest_idle_cpu == -1) { + } else if (shallowest_idle_cpu == -1 && si_cpu == -1) { + if (sched_idle_cpu(i)) { + si_cpu = i; + continue; + } + load = weighted_cpuload(cpu_rq(i)); if (load < min_load) { min_load = load; @@ -5978,7 +5992,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } }
- return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; + if (shallowest_idle_cpu != -1) + return shallowest_idle_cpu; + if (si_cpu != -1) + return si_cpu; + return least_loaded_cpu; }
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, @@ -6131,7 +6149,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu; + int cpu, si_cpu = -1;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6141,9 +6159,11 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t continue; if (available_idle_cpu(cpu)) return cpu; + if (si_cpu == -1 && sched_idle_cpu(cpu)) + si_cpu = cpu; }
- return -1; + return si_cpu; }
#else /* CONFIG_SCHED_SMT */ @@ -6171,11 +6191,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 avg_cost, avg_idle; u64 time, cost; s64 delta; - int cpu, nr = INT_MAX; + int cpu, nr = INT_MAX, si_cpu = -1;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) - return -1; + return si_cpu;
/* * Due to large variance we need a large fuzz factor; hackbench in @@ -6185,7 +6205,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t avg_cost = this_sd->avg_scan_cost + 1;
if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return -1; + return si_cpu;
if (sched_feat(SIS_PROP)) { u64 span_avg = sd->span_weight * avg_idle; @@ -6199,11 +6219,13 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { if (!--nr) - return -1; + return si_cpu; if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; if (available_idle_cpu(cpu)) break; + if (si_cpu == -1 && sched_idle_cpu(cpu)) + si_cpu = cpu; }
time = local_clock() - time; @@ -6222,13 +6244,14 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) struct sched_domain *sd; int i, recent_used_cpu;
- if (available_idle_cpu(target)) + if (available_idle_cpu(target) || sched_idle_cpu(target)) return target;
/* * If the previous CPU is cache affine and idle, don't be stupid: */ - if (prev != target && cpus_share_cache(prev, target) && available_idle_cpu(prev)) + if (prev != target && cpus_share_cache(prev, target) && + (available_idle_cpu(prev) || sched_idle_cpu(prev))) return prev;
/* Check a recently used CPU as a potential idle candidate: */ @@ -6236,7 +6259,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (recent_used_cpu != prev && recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && - available_idle_cpu(recent_used_cpu) && + (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && cpumask_test_cpu(p->recent_used_cpu, &p->cpus_allowed)) { /* * Replace recent_used_cpu with prev as it is a potential
From: Cheng Jian cj.chengjian@huawei.com
mainline inclusion from mainline-v5.6-rc1 commit 60588bfa223ff675b95f866249f90616613fbe31 category: bugfix bugzilla: 38260 CVE: NA ---------------------------
select_idle_cpu() will scan the LLC domain for idle CPUs, it's always expensive. so the next commit :
1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()")
introduces a way to limit how many CPUs we scan.
But it consume some CPUs out of 'nr' that are not allowed for the task and thus waste our attempts. The function always return nr_cpumask_bits, and we can't find a CPU which our task is allowed to run.
Cpumask may be too big, similar to select_idle_core(), use per_cpu_ptr 'select_idle_mask' to prevent stack overflow.
Fixes: 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()") Signed-off-by: Cheng Jian cj.chengjian@huawei.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Srikar Dronamraju srikar@linux.vnet.ibm.com Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Reviewed-by: Valentin Schneider valentin.schneider@arm.com Link: https://lkml.kernel.org/r/20191213024530.28052-1-cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index bebc176f4d40..b0a6a42e9a9a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6187,6 +6187,7 @@ static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd */ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { + struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); struct sched_domain *this_sd; u64 avg_cost, avg_idle; u64 time, cost; @@ -6217,11 +6218,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
time = local_clock();
- for_each_cpu_wrap(cpu, sched_domain_span(sd), target) { + cpumask_and(cpus, sched_domain_span(sd), &p->cpus_allowed); + + for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) return si_cpu; - if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) - continue; if (available_idle_cpu(cpu)) break; if (si_cpu == -1 && sched_idle_cpu(cpu))
From: Viresh Kumar viresh.kumar@linaro.org
mainline inclusion from mainline-v5.6-rc1 commit 17346452b25b98acfb395d2a82ec2e4ad0cb7a01 category: feature bugzilla: 38260 CVE: NA ---------------------------
There are instances where we keep searching for an idle CPU despite already having a sched-idle CPU (in find_idlest_group_cpu(), select_idle_smt() and select_idle_cpu() and then there are places where we don't necessarily do that and return a sched-idle CPU as soon as we find one (in select_idle_sibling()). This looks a bit inconsistent and it may be worth having the same policy everywhere.
On the other hand, choosing a sched-idle CPU over a idle one shall be beneficial from performance and power point of view as well, as we don't need to get the CPU online from a deep idle state which wastes quite a lot of time and energy and delays the scheduling of the newly woken up task.
This patch tries to simplify code around sched-idle CPU selection and make it consistent throughout.
Testing is done with the help of rt-app on hikey board (ARM64 octa-core, 2 clusters, 0-3 and 4-7). The cpufreq governor was set to performance to avoid any side affects from CPU frequency. Following are the tests performed:
Test 1: 1-cfs-task:
A single SCHED_NORMAL task is pinned to CPU5 which runs for 2333 us out of 7777 us (so gives time for the cluster to go in deep idle state).
Test 2: 1-cfs-1-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and single SCHED_IDLE task is pinned on CPU6 (to make sure cluster 1 doesn't go in deep idle state).
Test 3: 1-cfs-8-idle-task:
A single SCHED_NORMAL task is pinned on CPU5 and eight SCHED_IDLE tasks are created which run forever (not pinned anywhere, so they run on all CPUs). Checked with kernelshark that as soon as NORMAL task sleeps, the SCHED_IDLE task starts running on CPU5.
And here are the results on mean latency (in us), using the "st" tool.
$ st 1-cfs-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 90 592 197180 307.134 109.906
$ st 1-cfs-1-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 642 67 311 113850 177.336 41.4251
$ st 1-cfs-8-idle-task/rt-app-cfs_thread-0.log N min max sum mean stddev 643 29 173 41364 64.3297 13.2344
The mean latency when we need to:
- wakeup from deep idle state is 307 us. - wakeup from shallow idle state is 177 us. - preempt a SCHED_IDLE task is 64 us.
Signed-off-by: Viresh Kumar viresh.kumar@linaro.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Link: https://lkml.kernel.org/r/b90cbcce608cef4e02a7bbfe178335f76d201bab.157372834... Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 38 ++++++++++++++------------------------ 1 file changed, 14 insertions(+), 24 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b0a6a42e9a9a..9d0db0c89c3e 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5947,7 +5947,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this unsigned int min_exit_latency = UINT_MAX; u64 latest_idle_timestamp = 0; int least_loaded_cpu = this_cpu; - int shallowest_idle_cpu = -1, si_cpu = -1; + int shallowest_idle_cpu = -1; int i;
/* Check if we have any choice: */ @@ -5956,6 +5956,9 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this
/* Traverse only the allowed CPUs */ for_each_cpu_and(i, sched_group_span(group), &p->cpus_allowed) { + if (sched_idle_cpu(i)) + return i; + if (available_idle_cpu(i)) { struct rq *rq = cpu_rq(i); struct cpuidle_state *idle = idle_get_state(rq); @@ -5978,12 +5981,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this latest_idle_timestamp = rq->idle_stamp; shallowest_idle_cpu = i; } - } else if (shallowest_idle_cpu == -1 && si_cpu == -1) { - if (sched_idle_cpu(i)) { - si_cpu = i; - continue; - } - + } else if (shallowest_idle_cpu == -1) { load = weighted_cpuload(cpu_rq(i)); if (load < min_load) { min_load = load; @@ -5992,11 +5990,7 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this } }
- if (shallowest_idle_cpu != -1) - return shallowest_idle_cpu; - if (si_cpu != -1) - return si_cpu; - return least_loaded_cpu; + return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu; }
static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p, @@ -6149,7 +6143,7 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int */ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) { - int cpu, si_cpu = -1; + int cpu;
if (!static_branch_likely(&sched_smt_present)) return -1; @@ -6157,13 +6151,11 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t for_each_cpu(cpu, cpu_smt_mask(target)) { if (!cpumask_test_cpu(cpu, &p->cpus_allowed)) continue; - if (available_idle_cpu(cpu)) + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) return cpu; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; }
- return si_cpu; + return -1; }
#else /* CONFIG_SCHED_SMT */ @@ -6192,11 +6184,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t u64 avg_cost, avg_idle; u64 time, cost; s64 delta; - int cpu, nr = INT_MAX, si_cpu = -1; + int cpu, nr = INT_MAX;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) - return si_cpu; + return -1;
/* * Due to large variance we need a large fuzz factor; hackbench in @@ -6206,7 +6198,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t avg_cost = this_sd->avg_scan_cost + 1;
if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return si_cpu; + return -1;
if (sched_feat(SIS_PROP)) { u64 span_avg = sd->span_weight * avg_idle; @@ -6222,11 +6214,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) - return si_cpu; - if (available_idle_cpu(cpu)) + return -1; + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) break; - if (si_cpu == -1 && sched_idle_cpu(cpu)) - si_cpu = cpu; }
time = local_clock() - time;