From: Zheng Zengkai zhengzengkai@huawei.com
Scan for an idle sibling in a single pass
Mel Gorman (4): sched/fair: Remove SIS_AVG_CPU sched/fair: Move avg_scan_cost calculations under SIS_PROP sched/fair: Remove select_idle_smt() sched/fair: Merge select_idle_core/cpu()
kernel/sched/fair.c | 150 +++++++++++++++++++--------------------- kernel/sched/features.h | 1 - 2 files changed, 70 insertions(+), 81 deletions(-)
From: Mel Gorman mgorman@techsingularity.net
mainline inclusion from mainline-5.12-rc1 commit 9fe1f127b913318c631d0041ecf71486e38c2c2d category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41A4K CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Both select_idle_core() and select_idle_cpu() do a loop over the same cpumask. Observe that by clearing the already visited CPUs, we can fold the iteration and iterate a core at a time.
All we need to do is remember any non-idle CPU we encountered while scanning for an idle core. This way we'll only iterate every CPU once.
Signed-off-by: Mel Gorman mgorman@techsingularity.net Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210127135203.19633-5-mgorman@techsingularity.net Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com
Conflicts: kernel/sched/fair.c Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/fair.c | 98 +++++++++++++++++++++++++++------------------ 1 file changed, 59 insertions(+), 39 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 526b60b752ff..3ead3c612ab8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6032,6 +6032,14 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p return new_cpu; }
+static inline int __select_idle_cpu(int cpu) +{ + if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) + return cpu; + + return -1; +} + #ifdef CONFIG_SCHED_SMT DEFINE_STATIC_KEY_FALSE(sched_smt_present); EXPORT_SYMBOL_GPL(sched_smt_present); @@ -6090,47 +6098,51 @@ void __update_idle_core(struct rq *rq) * there are no idle cores left in the system; tracked through * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above. */ -static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) +static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu) { - struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); - int core, cpu; + bool idle = true; + int cpu;
if (!static_branch_likely(&sched_smt_present)) - return -1; - - if (!test_idle_cores(target, false)) - return -1; - - cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); - - for_each_cpu_wrap(core, cpus, target) { - bool idle = true; + return __select_idle_cpu(core);
- for_each_cpu(cpu, cpu_smt_mask(core)) { - if (!available_idle_cpu(cpu)) { - idle = false; - break; + for_each_cpu(cpu, cpu_smt_mask(core)) { + if (!available_idle_cpu(cpu)) { + idle = false; + if (*idle_cpu == -1) { + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { + *idle_cpu = cpu; + break; + } + continue; } + break; } - cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); - - if (idle) - return core; + if (*idle_cpu == -1 && cpumask_test_cpu(cpu, p->cpus_ptr)) + *idle_cpu = cpu; }
- /* - * Failed to find an idle core; stop looking for one. - */ - set_idle_cores(target, 0); + if (idle) + return core;
+ cpumask_andnot(cpus, cpus, cpu_smt_mask(core)); return -1; }
#else /* CONFIG_SCHED_SMT */
-static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) +static inline void set_idle_cores(int cpu, int val) { - return -1; +} + +static inline bool test_idle_cores(int cpu, bool def) +{ + return def; +} + +static inline int select_idle_core(struct task_struct *p, int core, struct cpumask *cpus, int *idle_cpu) +{ + return __select_idle_cpu(core); }
#endif /* CONFIG_SCHED_SMT */ @@ -6143,10 +6155,11 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target) { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); + int i, cpu, idle_cpu = -1, nr = INT_MAX; + bool smt = test_idle_cores(target, false); + int this = smp_processor_id(); struct sched_domain *this_sd; u64 time; - int this = smp_processor_id(); - int cpu, nr = INT_MAX;
this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc)); if (!this_sd) @@ -6154,7 +6167,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t
cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
- if (sched_feat(SIS_PROP)) { + if (sched_feat(SIS_PROP) && !smt) { u64 avg_cost, avg_idle, span_avg;
/* @@ -6174,18 +6187,29 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t }
for_each_cpu_wrap(cpu, cpus, target) { - if (!--nr) - return -1; - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) - break; + if (smt) { + i = select_idle_core(p, cpu, cpus, &idle_cpu); + if ((unsigned int)i < nr_cpumask_bits) + return i; + + } else { + if (!--nr) + return -1; + idle_cpu = __select_idle_cpu(cpu); + if ((unsigned int)idle_cpu < nr_cpumask_bits) + break; + } }
- if (sched_feat(SIS_PROP)) { + if (smt) + set_idle_cores(this, false); + + if (sched_feat(SIS_PROP) && !smt) { time = cpu_clock(this) - time; update_avg(&this_sd->avg_scan_cost, time); }
- return cpu; + return idle_cpu; }
/* @@ -6314,10 +6338,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if (!sd) return target;
- i = select_idle_core(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; - i = select_idle_cpu(p, sd, target); if ((unsigned)i < nr_cpumask_bits) return i;
From: Mel Gorman mgorman@techsingularity.net
mainline inclusion from mainline-5.12-rc1 commit 6cd56ef1df399a004f90ecb682427f9964969fc9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41A4K CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
In order to make the next patch more readable, and to quantify the actual effectiveness of this pass, start by removing it.
Signed-off-by: Mel Gorman mgorman@techsingularity.net Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210125085909.4600-4-mgorman@techsingularity.net Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/fair.c | 30 ------------------------------ 1 file changed, 30 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7a70d97fb15d..526b60b752ff 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6126,27 +6126,6 @@ static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int return -1; }
-/* - * Scan the local SMT mask for idle CPUs. - */ -static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) -{ - int cpu; - - if (!static_branch_likely(&sched_smt_present)) - return -1; - - for_each_cpu(cpu, cpu_smt_mask(target)) { - if (!cpumask_test_cpu(cpu, p->cpus_ptr) || - !cpumask_test_cpu(cpu, sched_domain_span(sd))) - continue; - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) - return cpu; - } - - return -1; -} - #else /* CONFIG_SCHED_SMT */
static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target) @@ -6154,11 +6133,6 @@ static inline int select_idle_core(struct task_struct *p, struct sched_domain *s return -1; }
-static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target) -{ - return -1; -} - #endif /* CONFIG_SCHED_SMT */
/* @@ -6348,10 +6322,6 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) if ((unsigned)i < nr_cpumask_bits) return i;
- i = select_idle_smt(p, sd, target); - if ((unsigned)i < nr_cpumask_bits) - return i; - return target; }
From: Mel Gorman mgorman@techsingularity.net
mainline inclusion from mainline-5.12-rc1 commit e6e0dc2d5497f7f3ed970052917e2923c6f453f4 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41A4K CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
SIS_AVG_CPU was introduced as a means of avoiding a search when the average search cost indicated that the search would likely fail. It was a blunt instrument and disabled by commit 4c77b18cf8b7 ("sched/fair: Make select_idle_cpu() more aggressive") and later replaced with a proportional search depth by commit 1ad3aaf3fcd2 ("sched/core: Implement new approach to scale select_idle_cpu()").
While there are corner cases where SIS_AVG_CPU is better, it has now been disabled for almost three years. As the intent of SIS_PROP is to reduce the time complexity of select_idle_cpu(), lets drop SIS_AVG_CPU and focus on SIS_PROP as a throttling mechanism.
Signed-off-by: Mel Gorman mgorman@techsingularity.net Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210125085909.4600-2-mgorman@techsingularity.net Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/fair.c | 20 +++++++++----------- kernel/sched/features.h | 1 - 2 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index a7d34016b6be..be1d3bcb6e11 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6170,7 +6170,6 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); struct sched_domain *this_sd; - u64 avg_cost, avg_idle; u64 time; int this = smp_processor_id(); int cpu, nr = INT_MAX; @@ -6179,18 +6178,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t if (!this_sd) return -1;
- /* - * Due to large variance we need a large fuzz factor; hackbench in - * particularly is sensitive here. - */ - avg_idle = this_rq()->avg_idle / 512; - avg_cost = this_sd->avg_scan_cost + 1; + if (sched_feat(SIS_PROP)) { + u64 avg_cost, avg_idle, span_avg;
- if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost) - return -1; + /* + * Due to large variance we need a large fuzz factor; + * hackbench in particularly is sensitive here. + */ + avg_idle = this_rq()->avg_idle / 512; + avg_cost = this_sd->avg_scan_cost + 1;
- if (sched_feat(SIS_PROP)) { - u64 span_avg = sd->span_weight * avg_idle; + span_avg = sd->span_weight * avg_idle; if (span_avg > 4*avg_cost) nr = div_u64(span_avg, avg_cost); else diff --git a/kernel/sched/features.h b/kernel/sched/features.h index f1bf5e12d889..129ca8f50cf5 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -54,7 +54,6 @@ SCHED_FEAT(TTWU_QUEUE, true) /* * When doing wakeups, attempt to limit superfluous scans of the LLC domain. */ -SCHED_FEAT(SIS_AVG_CPU, false) SCHED_FEAT(SIS_PROP, true)
/*
From: Mel Gorman mgorman@techsingularity.net
mainline inclusion from mainline-5.12-rc1 commit bae4ec13640b0915e7dd86da7e65c5d085160571 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41A4K CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
As noted by Vincent Guittot, avg_scan_costs are calculated for SIS_PROP even if SIS_PROP is disabled. Move the time calculations under a SIS_PROP check and while we are at it, exclude the cost of initialising the CPU mask from the average scan cost.
Signed-off-by: Mel Gorman mgorman@techsingularity.net Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Reviewed-by: Vincent Guittot vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20210125085909.4600-3-mgorman@techsingularity.net Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/fair.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index be1d3bcb6e11..7a70d97fb15d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6178,6 +6178,8 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t if (!this_sd) return -1;
+ cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + if (sched_feat(SIS_PROP)) { u64 avg_cost, avg_idle, span_avg;
@@ -6193,11 +6195,9 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t nr = div_u64(span_avg, avg_cost); else nr = 4; - } - - time = cpu_clock(this);
- cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); + time = cpu_clock(this); + }
for_each_cpu_wrap(cpu, cpus, target) { if (!--nr) @@ -6206,8 +6206,10 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int t break; }
- time = cpu_clock(this) - time; - update_avg(&this_sd->avg_scan_cost, time); + if (sched_feat(SIS_PROP)) { + time = cpu_clock(this) - time; + update_avg(&this_sd->avg_scan_cost, time); + }
return cpu; }