From: tanghui tanghui20@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7NS6Y
--------------------------------
Compare taskgroup 'util_avg' in perferred cpu with capacity preferred cpu, dynamicly adjust cpu range for task wakeup process.
Signed-off-by: tanghui tanghui20@huawei.com --- include/linux/sched.h | 1 + include/linux/sched/sysctl.h | 3 + kernel/sched/fair.c | 133 +++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 11 +++ 4 files changed, 148 insertions(+)
diff --git a/include/linux/sched.h b/include/linux/sched.h index ce6208bfb530..238ac254af0d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1536,6 +1536,7 @@ struct task_struct {
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY cpumask_t *prefer_cpus; + const cpumask_t *select_cpus; #endif
/* diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 5a64582b086b..ede157a678f8 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -29,4 +29,7 @@ extern int sysctl_numa_balancing_mode; #define sysctl_numa_balancing_mode 0 #endif
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +extern int sysctl_sched_util_low_pct; +#endif #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 373ff5f55884..b5265b610a7d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6704,7 +6704,11 @@ find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this return cpumask_first(sched_group_span(group));
/* Traverse only the allowed CPUs */ +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + for_each_cpu_and(i, sched_group_span(group), p->select_cpus) { +#else for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) { +#endif struct rq *rq = cpu_rq(i);
if (!sched_core_cookie_match(rq, p)) @@ -6751,7 +6755,11 @@ static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p { int new_cpu = cpu;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_intersects(sched_domain_span(sd), p->select_cpus)) +#else if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr)) +#endif return prev_cpu;
/* @@ -6875,7 +6883,11 @@ static int select_idle_core(struct task_struct *p, int core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle = false; if (*idle_cpu == -1) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->select_cpus)) { +#else if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, p->cpus_ptr)) { +#endif *idle_cpu = cpu; break; } @@ -6901,7 +6913,11 @@ static int select_idle_smt(struct task_struct *p, int target) { int cpu;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + for_each_cpu_and(cpu, cpu_smt_mask(target), p->select_cpus) { +#else for_each_cpu_and(cpu, cpu_smt_mask(target), p->cpus_ptr) { +#endif if (cpu == target) continue; if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) @@ -6949,7 +6965,11 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool struct sched_domain *this_sd = NULL; u64 time = 0;
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_and(cpus, sched_domain_span(sd), p->select_cpus); +#else cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr); +#endif
if (sched_feat(SIS_PROP) && !has_idle_core) { u64 avg_cost, avg_idle, span_avg; @@ -7122,6 +7142,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) lockdep_assert_irqs_disabled();
if ((available_idle_cpu(target) || sched_idle_cpu(target)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(target, p->select_cpus) && +#endif asym_fits_cpu(task_util, util_min, util_max, target)) return target;
@@ -7130,6 +7153,9 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) */ if (prev != target && cpus_share_cache(prev, target) && (available_idle_cpu(prev) || sched_idle_cpu(prev)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(prev, p->select_cpus) && +#endif asym_fits_cpu(task_util, util_min, util_max, prev)) return prev;
@@ -7156,7 +7182,11 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target) recent_used_cpu != target && cpus_share_cache(recent_used_cpu, target) && (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) && +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + cpumask_test_cpu(p->recent_used_cpu, p->select_cpus) && +#else cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) && +#endif asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { return recent_used_cpu; } @@ -7638,6 +7668,82 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu) return target; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +/* + * Low utilization threshold for CPU + * + * (default: 85%), units: percentage of CPU utilization) + */ +int sysctl_sched_util_low_pct = 85; + +static inline bool prefer_cpus_valid(struct task_struct *p) +{ + return p->prefer_cpus && + !cpumask_empty(p->prefer_cpus) && + !cpumask_equal(p->prefer_cpus, p->cpus_ptr) && + cpumask_subset(p->prefer_cpus, p->cpus_ptr); +} + +/* + * set_task_select_cpus: select the cpu range for task + * @p: the task whose available cpu range will to set + * @idlest_cpu: the cpu which is the idlest in prefer cpus + * + * If sum of 'util_avg' among 'prefer_cpus' lower than the percentage + * 'sysctl_sched_util_low_pct' of 'prefer_cpus' capacity, select + * 'prefer_cpus' range for task, otherwise select 'cpus_ptr' for task. + * + * The available cpu range set to p->select_cpus. Idlest cpu in preferred cpus + * set to @idlest_cpu, which is set to wakeup cpu when fast path wakeup cpu + * without p->select_cpus. + */ +static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, + int sd_flag) +{ + unsigned long util_avg_sum = 0; + unsigned long tg_capacity = 0; + long min_util = INT_MIN; + struct task_group *tg; + long spare; + int cpu; + + p->select_cpus = p->cpus_ptr; + if (!prefer_cpus_valid(p)) + return; + + rcu_read_lock(); + tg = task_group(p); + for_each_cpu(cpu, p->prefer_cpus) { + if (unlikely(!tg->se[cpu])) + continue; + + if (idlest_cpu && available_idle_cpu(cpu)) { + *idlest_cpu = cpu; + } else if (idlest_cpu) { + spare = (long)(capacity_of(cpu) - tg->se[cpu]->avg.util_avg); + if (spare > min_util) { + min_util = spare; + *idlest_cpu = cpu; + } + } + + if (available_idle_cpu(cpu)) { + rcu_read_unlock(); + p->select_cpus = p->prefer_cpus; + return; + } + + util_avg_sum += tg->se[cpu]->avg.util_avg; + tg_capacity += capacity_of(cpu); + } + rcu_read_unlock(); + + if (tg_capacity > cpumask_weight(p->prefer_cpus) && + util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) + p->select_cpus = p->prefer_cpus; +} +#endif + /* * select_task_rq_fair: Select target runqueue for the waking task in domains * that have the relevant SD flag set. In practice, this is SD_BALANCE_WAKE, @@ -7658,11 +7764,19 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) int want_affine = 0; /* SD_flags and WF_flags share the first nibble */ int sd_flag = wake_flags & 0xF; +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + int idlest_cpu = 0; +#endif
/* * required for stable ->cpus_allowed */ lockdep_assert_held(&p->pi_lock); + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + set_task_select_cpus(p, &idlest_cpu, sd_flag); +#endif + if (wake_flags & WF_TTWU) { record_wakee(p);
@@ -7673,7 +7787,11 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) new_cpu = prev_cpu; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->select_cpus); +#else want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr); +#endif }
rcu_read_lock(); @@ -7684,7 +7802,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) */ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) && cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) { +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + new_cpu = cpu; + if (cpu != prev_cpu && + cpumask_test_cpu(prev_cpu, p->select_cpus)) +#else if (cpu != prev_cpu) +#endif new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
sd = NULL; /* Prefer wake_affine over balance flags */ @@ -7711,6 +7835,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) } rcu_read_unlock();
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_test_cpu(new_cpu, p->select_cpus)) + new_cpu = idlest_cpu; +#endif return new_cpu; }
@@ -9860,8 +9988,13 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) int local_group;
/* Skip over this group if it has no CPUs allowed */ +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + if (!cpumask_intersects(sched_group_span(group), + p->select_cpus)) +#else if (!cpumask_intersects(sched_group_span(group), p->cpus_ptr)) +#endif continue;
/* Skip over this group if no cookie matched */ diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bfe53e835524..acc20b417dc8 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2054,6 +2054,17 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_INT_MAX, }, +#endif +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + { + .procname = "sched_util_low_pct", + .data = &sysctl_sched_util_low_pct, + .maxlen = sizeof(sysctl_sched_util_low_pct), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE_HUNDRED, + }, #endif { } };