From: tanghui tanghui20@huawei.com
optimise the way to get util.
Signed-off-by: tanghui tanghui20@huawei.com Signed-off-by: Wang ShaoBo bobo.shaobowang@huawei.com
v2: refresh low_pct change way through sysctl.
--- fs/proc/stat.c | 4 ++ include/linux/sched/cputime.h | 3 ++ include/linux/sched/sysctl.h | 2 + kernel/sched/fair.c | 83 +++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 1 + kernel/sysctl.c | 9 ++++ 6 files changed, 102 insertions(+)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 7e832b24847dd..3fe60a77b0b4d 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -63,7 +63,11 @@ u64 get_idle_time(int cpu) return idle; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +u64 get_iowait_time(int cpu) +#else static u64 get_iowait_time(int cpu) +#endif { u64 iowait, iowait_usecs = -1ULL;
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 6b1793606fc95..4a092e006f5b2 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -189,6 +189,9 @@ task_sched_runtime(struct task_struct *task); extern int use_sched_idle_time; extern int sched_idle_time_adjust(int cpu, u64 *utime, u64 *stime); extern unsigned long long sched_get_idle_time(int cpu); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +extern u64 get_iowait_time(int cpu); +#endif
#ifdef CONFIG_PROC_FS extern u64 get_idle_time(int cpu); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 04eb5b127867b..8223a1fce176c 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -31,6 +31,8 @@ extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; extern unsigned int sysctl_sched_child_runs_first; +extern int sysctl_sched_util_update_interval; +extern unsigned long sysctl_sched_util_update_interval_max;
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY extern int sysctl_sched_util_low_pct; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ad6a7923c9edb..af55a26d11fcb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6682,6 +6682,73 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) */ int sysctl_sched_util_low_pct = 85;
+struct cpu_timeinfo { + u64 systime; + u64 idletime; + unsigned long next_update; + int vutil; +}; + +/* + * The time interval to update CPU utilization + * (default 1ms, max 10min) + */ +int sysctl_sched_util_update_interval = 1; +unsigned long sysctl_sched_util_update_interval_max = 600000; + +static DEFINE_PER_CPU(struct cpu_timeinfo, qos_cputime); + +static inline u64 cpu_systime(int cpu) +{ + u64 user, nice, system, idle, iowait, irq, softirq, steal; + + user = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + system = kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + iowait = get_iowait_time(cpu); + irq = kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + softirq = kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + nice = kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + steal = kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + idle = get_idle_time(cpu); + + return user + system + iowait + irq + softirq + nice + idle + steal; +} + +static inline u64 cpu_idletime(int cpu) +{ + return get_idle_time(cpu) + get_iowait_time(cpu); +} + +static inline void update_cpu_vutil(void) +{ + struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, smp_processor_id()); + u64 delta_systime, delta_idle, systime, idletime; + int cpu = smp_processor_id(); + unsigned long interval; + + if (time_after(jiffies, cputime->next_update)) { + interval = msecs_to_jiffies(sysctl_sched_util_update_interval); + cputime->next_update = jiffies + interval; + systime = cpu_systime(cpu); + idletime = cpu_idletime(cpu); + delta_systime = systime - cputime->systime; + delta_idle = idletime - cputime->idletime; + if (!delta_systime) + return; + + cputime->systime = systime; + cputime->idletime = idletime; + cputime->vutil = (delta_systime - delta_idle) * 100 / delta_systime; + } +} + +static inline int cpu_vutil_of(int cpu) +{ + struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, cpu); + + return cputime->vutil; +} + static inline bool prefer_cpus_valid(struct task_struct *p) { return p->prefer_cpus && @@ -6741,17 +6808,29 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, return; }
+#if 0 util_avg_sum += tg->se[cpu]->avg.util_avg; tg_capacity += capacity_of(cpu); +#endif + util_avg_sum += cpu_vutil_of(cpu); } rcu_read_unlock();
+#if 0 if (tg_capacity > cpumask_weight(p->prefer_cpus) && util_avg_sum * 100 <= tg_capacity * sysctl_sched_util_low_pct) { p->select_cpus = p->prefer_cpus; if (sd_flag & SD_BALANCE_WAKE) schedstat_inc(p->se.dyn_affi_stats->nr_wakeups_preferred_cpus); } +#endif + + if (util_avg_sum < sysctl_sched_util_low_pct * + cpumask_weight(p->prefer_cpus)) { + p->select_cpus = p->prefer_cpus; + if (sd_flag & SD_BALANCE_WAKE) + schedstat_inc(p->se.dyn_affi_stats->nr_wakeups_preferred_cpus); + } } #endif
@@ -10610,6 +10689,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + update_cpu_vutil(); +#endif }
/* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ae30681530938..045fbb3871bbe 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2354,3 +2354,4 @@ static inline void membarrier_switch_mm(struct rq *rq, { } #endif + diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ad62ea156afd9..685f9881b8e23 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -352,6 +352,15 @@ static struct ctl_table kern_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "sched_util_update_interval_ms", + .data = &sysctl_sched_util_update_interval, + .maxlen = sizeof(sysctl_sched_util_update_interval), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &sysctl_sched_util_update_interval_max, + }, #ifdef CONFIG_SCHED_DEBUG { .procname = "sched_min_granularity_ns",