From: tanghui <tanghui20(a)huawei.com>
sched: optimise the way to get util, which only canbe used when
CONFIG_QOS_SCHED_DYNAMIC_AFFINITY is set.
Signed-off-by: tanghui <tanghui20(a)huawei.com>
---
fs/proc/stat.c | 4 ++
include/linux/sched/cputime.h | 3 ++
include/linux/sched/sysctl.h | 2 +
kernel/sched/fair.c | 97 ++++++++++++++++++++++++++++++-----
kernel/sched/sched.h | 1 +
kernel/sysctl.c | 9 ++++
6 files changed, 104 insertions(+), 12 deletions(-)
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index c83a10e895f4..2eaba2b78f47 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -63,7 +63,11 @@ u64 get_idle_time(int cpu)
return idle;
}
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+u64 get_iowait_time(int cpu)
+#else
static u64 get_iowait_time(int cpu)
+#endif
{
u64 iowait, iowait_usecs = -1ULL;
diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h
index 1ebbeec02051..f6244d48f357 100644
--- a/include/linux/sched/cputime.h
+++ b/include/linux/sched/cputime.h
@@ -190,5 +190,8 @@ extern int use_sched_idle_time;
extern int sched_idle_time_adjust(int cpu, u64 *utime, u64 *stime);
extern unsigned long long sched_get_idle_time(int cpu);
extern u64 get_idle_time(int cpu);
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+extern u64 get_iowait_time(int cpu);
+#endif
#endif /* _LINUX_SCHED_CPUTIME_H */
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index f5031a607df8..386ef53017ca 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -28,6 +28,8 @@ extern unsigned int sysctl_sched_child_runs_first;
extern int sysctl_sched_util_low_pct;
extern int sysctl_sched_util_higher_pct;
extern int sysctl_sched_load_higher_pct;
+extern int sysctl_sched_util_update_interval;
+extern unsigned long sysctl_sched_util_update_interval_max;
#endif
enum sched_tunable_scaling {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5344a68a463e..3217e3998fdf 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6458,6 +6458,15 @@ static int wake_cap(struct task_struct *p, int cpu, int prev_cpu)
}
#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+#define UTIL_PCT_HIGH 85
+
+struct cpu_timeinfo {
+ u64 systime;
+ u64 idletime;
+ unsigned long next_update;
+ int vutil;
+};
+
/*
* Light load threshold for CPU: just use cpu Utilization to measure
*
@@ -6484,15 +6493,75 @@ int sysctl_sched_util_higher_pct = 100;
*/
int sysctl_sched_load_higher_pct = 10;
+/*
+ * The time interval to update CPU utilization
+ * (default 1ms, max 10min)
+ */
+int sysctl_sched_util_update_interval = 1;
+unsigned long sysctl_sched_util_update_interval_max = 600000;
+
+static DEFINE_PER_CPU(struct cpu_timeinfo, qos_cputime);
+
+static inline u64 cpu_systime(int cpu)
+{
+ u64 user, nice, system, idle, iowait, irq, softirq, steal;
+
+ user = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+ system = kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+ iowait = get_iowait_time(cpu);
+ irq = kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+ softirq = kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+ nice = kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+ steal = kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+ idle = get_idle_time(cpu);
+
+ return user + system + iowait + irq + softirq + nice + idle + steal;
+}
+
+static inline u64 cpu_idletime(int cpu)
+{
+ return get_idle_time(cpu) + get_iowait_time(cpu);
+}
+
+static inline void update_cpu_vutil(void)
+{
+ struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, smp_processor_id());
+ u64 delta_systime, delta_idle, systime, idletime;
+ int cpu = smp_processor_id();
+ unsigned long interval;
+
+ if (time_after(jiffies, cputime->next_update)) {
+ interval = msecs_to_jiffies(sysctl_sched_util_update_interval);
+ cputime->next_update = jiffies + interval;
+ systime = cpu_systime(cpu);
+ idletime = cpu_idletime(cpu);
+ delta_systime = systime - cputime->systime;
+ delta_idle = idletime - cputime->idletime;
+ if (!delta_systime)
+ return;
+
+ cputime->systime = systime;
+ cputime->idletime = idletime;
+ cputime->vutil = (delta_systime - delta_idle) * 100 / delta_systime;
+ }
+}
+
+static inline int cpu_vutil_of(int cpu)
+{
+ struct cpu_timeinfo *cputime = per_cpu_ptr(&qos_cputime, cpu);
+
+ return cputime->vutil;
+}
+
static inline bool prefer_cpu_util_low(int cpu)
{
unsigned long capacity = capacity_of(cpu);
- unsigned long util = cpu_util(cpu);
+ unsigned long util_pct = cpu_vutil_of(cpu);
- if (util >= capacity || capacity <= 1)
+ if (util_pct >= 100 || capacity <= 1)
return sysctl_sched_util_low_pct == 100;
- return util * 100 <= capacity * sysctl_sched_util_low_pct;
+ return util_pct <= sysctl_sched_util_low_pct;
}
/*
@@ -6504,8 +6573,8 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu)
{
unsigned long capacity_cpux = capacity_of(preferred_cpu);
unsigned long capacity_cpuy = capacity_of(external_cpu);
- unsigned long cpu_util_x = cpu_util(preferred_cpu);
- unsigned long cpu_util_y = cpu_util(external_cpu);
+ unsigned long cpu_util_x = cpu_vutil_of(preferred_cpu);
+ unsigned long cpu_util_y = cpu_vutil_of(external_cpu);
int ratio;
/*
@@ -6519,7 +6588,7 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu)
if (capacity_cpux <= 1)
return 1;
- if (cpu_util_x >= capacity_cpux && available_idle_cpu(external_cpu))
+ if (cpu_util_x >= UTIL_PCT_HIGH && available_idle_cpu(external_cpu))
return 1;
if (!cpu_util_x)
@@ -6529,12 +6598,12 @@ static inline int compare_cpu_util(int preferred_cpu, int external_cpu)
* The lower the CPU utilization, the larger the ratio of
* CPU utilization gap.
*/
- ratio = cpu_util_x >= capacity_cpux ? 1 : capacity_cpux / cpu_util_x;
+ ratio = cpu_util_x >= 100 ? 0 : 100 / cpu_util_x;
if (ratio > 10)
ratio = 10;
- return (sysctl_sched_util_higher_pct * ratio + 100) * cpu_util_y *
- capacity_cpux < 100 * cpu_util_x * capacity_cpuy;
+ return (sysctl_sched_util_higher_pct * ratio + 100) *
+ cpu_util_y * capacity_cpux < 100 * cpu_util_x * capacity_cpuy;
}
static inline bool prefer_cpus_valid(struct task_struct *p)
@@ -6549,7 +6618,7 @@ static inline bool prefer_cpus_valid(struct task_struct *p)
* @p: the task whose available cpu range will to set
* @idlest_cpu: the cpu which is the idlest in prefer cpus
*
- * x: the cpu of min util_avg from preferred set
+ * x: the cpu of min util from preferred set
* y: the cpu from allowed set but exclude preferred set
*
* If x's utilization is low, select preferred cpu range for task
@@ -6578,7 +6647,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu,
goto cpus_allowed;
cpu = cpumask_first(p->se.prefer_cpus);
- min_util = cpu_util(cpu);
+ min_util = cpu_vutil_of(cpu);
for_each_cpu(i, p->se.prefer_cpus) {
if (prefer_cpu_util_low(i) || available_idle_cpu(i)) {
cpu = i;
@@ -6589,7 +6658,7 @@ static void set_task_select_cpus(struct task_struct *p, int *idlest_cpu,
if (capacity_of(i) <= 1)
continue;
- c_util = cpu_util(i);
+ c_util = cpu_vutil_of(i);
if (min_util * capacity_of(i) > c_util * capacity_of(cpu)) {
min_util = c_util;
cpu = i;
@@ -10278,6 +10347,10 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
+
+#ifdef CONFIG_QOS_SCHED_DYNAMIC_AFFINITY
+ update_cpu_vutil();
+#endif
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 55216a03d327..4de9c966da01 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2348,3 +2348,4 @@ static inline void membarrier_switch_mm(struct rq *rq,
{
}
#endif
+
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b309aa206697..d52e003f3f56 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1274,6 +1274,15 @@ static struct ctl_table kern_table[] = {
.extra1 = &zero,
.extra2 = &one_hundred,
},
+ {
+ .procname = "sched_util_update_interval_ms",
+ .data = &sysctl_sched_util_update_interval,
+ .maxlen = sizeof(sysctl_sched_util_update_interval),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &one,
+ .extra2 = &sysctl_sched_util_update_interval_max,
+ },
#endif
#endif
{ }
--
2.23.0