From: tanghui tanghui20@huawei.com
sched: optimise the way to get util, which only canbe used when CONFIG_QOS_SCHED_DYNAMIC_AFFINITY is set.
Signed-off-by: tanghui tanghui20@huawei.com --- fs/proc/stat.c | 4 ++ include/linux/sched/cputime.h | 3 ++ include/linux/sched/sysctl.h | 2 + kernel/sched/fair.c | 97 ++++++++++++++++++++++++++++++----- kernel/sched/sched.h | 1 + kernel/sysctl.c | 9 ++++ 6 files changed, 104 insertions(+), 12 deletions(-)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index c83a10e895f4..2eaba2b78f47 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -63,7 +63,11 @@ u64 get_idle_time(int cpu) return idle; }
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +u64 get_iowait_time(int cpu) +#else static u64 get_iowait_time(int cpu) +#endif { u64 iowait, iowait_usecs = -1ULL;
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 1ebbeec02051..f6244d48f357 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -190,5 +190,8 @@ extern int use_sched_idle_time; extern int sched_idle_time_adjust(int cpu, u64 *utime, u64 *stime); extern unsigned long long sched_get_idle_time(int cpu); extern u64 get_idle_time(int cpu); +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +extern u64 get_iowait_time(int cpu); +#endif
#endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index f5031a607df8..386ef53017ca 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -28,6 +28,8 @@ extern unsigned int sysctl_sched_child_runs_first; extern int sysctl_sched_util_low_pct; extern int sysctl_sched_util_higher_pct; extern int sysctl_sched_load_higher_pct; +extern int sysctl_sched_util_update_interval; +extern unsigned long sysctl_sched_util_update_interval_max; #endif
enum sched_tunable_scaling { diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5344a68a463e..3217e3998fdf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6458,6 +6458,15 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu) }
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY +#define UTIL_PCT_HIGH 85 + +struct cpu_timeinfo { + u64 systime; + u64 idletime; + unsigned long next_update; + int vutil; +}; + /* * Light load threshold for CPU: just use cpu Utilization to measure * @@ -6484,15 +6493,75 @@ int sysctl_sched_util_higher_pct = 100; */ int sysctl_sched_load_higher_pct = 10;
+/* + * The time interval to update CPU utilization + * (default 1ms, max 10min) + */ +int sysctl_sched_util_update_interval = 1; +unsigned long sysctl_sched_util_update_interval_max = 600000; + +static DEFINE_PER_CPU(struct cpu_timeinfo, qos_cputime); + +static inline u64 cpu_systime(int cpu) +{ + u64 user, nice, system, idle, iowait, irq, softirq, steal; + + user = kcpustat_cpu(cpu).cpustat[CPUTIME_USER]; + system = kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM]; + iowait = get_iowait_time(cpu); + irq = kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ]; + softirq = kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ]; + nice = kcpustat_cpu(cpu).cpustat[CPUTIME_NICE]; + steal = kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL]; + idle = get_idle_time(cpu); + + return user + system + iowait + irq + softirq + nice + idle + steal; +} + +static inline u64 cpu_idletime(int cpu) +{ + return get_idle_time(cpu) + get_iowait_time(cpu); +} + +static inline void update_cpu_vutil(void) +{ + struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, smp_processor_id()); + u64 delta_systime, delta_idle, systime, idletime; + int cpu = smp_processor_id(); + unsigned long interval; + + if (time_after(jiffies, cputime->next_update)) { + interval = msecs_to_jiffies(sysctl_sched_util_update_interval); + cputime->next_update = jiffies + interval; + systime = cpu_systime(cpu); + idletime = cpu_idletime(cpu); + delta_systime = systime - cputime->systime; + delta_idle = idletime - cputime->idletime; + if (!delta_systime) + return; + + cputime->systime = systime; + cputime->idletime = idletime; + cputime->vutil = (delta_systime - delta_idle) * 100 / delta_systime; + } +} + +static inline int cpu_vutil_of(int cpu) +{ + struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, cpu); + + return cputime->vutil; +} + static inline bool prefer_cpu_util_low(int cpu) { unsigned long capacity = capacity_of(cpu); - unsigned long util = cpu_util(cpu); + unsigned long util_pct = cpu_vutil_of(cpu);
- if (util >= capacity || capacity <= 1) + if (util_pct >= 100 || capacity <= 1) return sysctl_sched_util_low_pct == 100;
- return util * 100 <= capacity * sysctl_sched_util_low_pct; + return util_pct <= sysctl_sched_util_low_pct; }
/* @@ -6504,8 +6573,8 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu) { unsigned long capacity_cpux = capacity_of(preferred_cpu); unsigned long capacity_cpuy = capacity_of(external_cpu); - unsigned long cpu_util_x = cpu_util(preferred_cpu); - unsigned long cpu_util_y = cpu_util(external_cpu); + unsigned long cpu_util_x = cpu_vutil_of(preferred_cpu); + unsigned long cpu_util_y = cpu_vutil_of(external_cpu); int ratio;
/* @@ -6519,7 +6588,7 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu) if (capacity_cpux <= 1) return 1;
- if (cpu_util_x >= capacity_cpux && available_idle_cpu(external_cpu)) + if (cpu_util_x >= UTIL_PCT_HIGH && available_idle_cpu(external_cpu)) return 1;
if (!cpu_util_x) @@ -6529,12 +6598,12 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu) * The lower the CPU utilization, the larger the ratio of * CPU utilization gap. */ - ratio = cpu_util_x >= capacity_cpux ? 1 : capacity_cpux / cpu_util_x; + ratio = cpu_util_x >= 100 ? 0 : 100 / cpu_util_x; if (ratio > 10) ratio = 10;
- return (sysctl_sched_util_higher_pct * ratio + 100) * cpu_util_y * - capacity_cpux < 100 * cpu_util_x * capacity_cpuy; + return (sysctl_sched_util_higher_pct * ratio + 100) * + cpu_util_y * capacity_cpux < 100 * cpu_util_x * capacity_cpuy; }
static inline bool prefer_cpus_valid(struct task_struct *p) @@ -6549,7 +6618,7 @@ static inline bool prefer_cpus_valid(struct task_struct *p) * @p: the task whose available cpu range will to set * @idlest_cpu: the cpu which is the idlest in prefer cpus * - * x: the cpu of min util_avg from preferred set + * x: the cpu of min util from preferred set * y: the cpu from allowed set but exclude preferred set * * If x's utilization is low, select preferred cpu range for task @@ -6578,7 +6647,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, goto cpus_allowed;
cpu = cpumask_first(p->se.prefer_cpus); - min_util = cpu_util(cpu); + min_util = cpu_vutil_of(cpu); for_each_cpu(i, p->se.prefer_cpus) { if (prefer_cpu_util_low(i) || available_idle_cpu(i)) { cpu = i; @@ -6589,7 +6658,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu, if (capacity_of(i) <= 1) continue;
- c_util = cpu_util(i); + c_util = cpu_vutil_of(i); if (min_util * capacity_of(i) > c_util * capacity_of(cpu)) { min_util = c_util; cpu = i; @@ -10278,6 +10347,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing)) task_tick_numa(rq, curr); + +#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY + update_cpu_vutil(); +#endif }
/* diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 55216a03d327..4de9c966da01 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2348,3 +2348,4 @@ static inline void membarrier_switch_mm(struct rq *rq, { } #endif + diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b309aa206697..d52e003f3f56 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1274,6 +1274,15 @@ static struct ctl_table kern_table[] = { .extra1 = &zero, .extra2 = &one_hundred, }, + { + .procname = "sched_util_update_interval_ms", + .data = &sysctl_sched_util_update_interval, + .maxlen = sizeof(sysctl_sched_util_update_interval), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &sysctl_sched_util_update_interval_max, + }, #endif #endif { }