openEuler inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4CIJQ CVE: NA
----------------------------------------------------------------------
The default way of calculating CPU utilization is to check which task is executed during the interval of two ticks. This leads to the inaccurate results of CPU utilization.
This problem can be solved by counting the idle time via scheduler rather than the tick interval. We can record the time before executing idle process and calculate the execute time before quiting the idle process. The idle time of each CPU is given in the /proc/stat2 file. This way can give higher precision in accounting the CPU idle time compared with the /proc/stat.
Signed-off-by: Hongyu Li 543306408@qq.com --- fs/proc/Kconfig | 7 ++++ fs/proc/Makefile | 1 + fs/proc/stat2.c | 91 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/cputime.c | 34 ++++++++++++++++ kernel/sched/idle.c | 28 +++++++++++++ 5 files changed, 161 insertions(+) create mode 100644 fs/proc/stat2.c
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index c930001056f9..33588a37579e 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -107,3 +107,10 @@ config PROC_PID_ARCH_STATUS config PROC_CPU_RESCTRL def_bool n depends on PROC_FS + +config PROC_IDLE + bool "include /proc/stat2 file" + depends on PROC_FS + default y + help + Provide the CPU idle time in the /proc/stat2 file. diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 8704d41dd67c..b0d5f2b347d7 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o +proc-$(CONFIG_PROC_IDLE) += stat2.o obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o diff --git a/fs/proc/stat2.c b/fs/proc/stat2.c new file mode 100644 index 000000000000..6036a946c71d --- /dev/null +++ b/fs/proc/stat2.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/fs/proc/stat2.c + * + * Copyright (C) 2007 + * + * cpu idle time accouting + */ + +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/sched/stat.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/irqnr.h> +#include <linux/sched/cputime.h> +#include <linux/tick.h> + +#ifdef CONFIG_PROC_IDLE + +#define PROC_NAME "stat2" + +extern u64 cal_idle_sum_exec_runtime(int cpu); + +static u64 get_idle_sum_exec_runtime(int cpu) +{ + u64 idle = cal_idle_sum_exec_runtime(cpu); + + return idle; +} + +static int show_idle(struct seq_file *p, void *v) +{ + int i; + u64 idle; + + idle = 0; + + for_each_possible_cpu(i) { + + idle += get_idle_sum_exec_runtime(i); + + } + + seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + + for_each_online_cpu(i) { + + idle = get_idle_sum_exec_runtime(i); + + seq_printf(p, "cpu%d", i); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + } + + return 0; +} + +static int idle_open(struct inode *inode, struct file *file) +{ + unsigned int size = 32 + 32 * num_online_cpus(); + + return single_open_size(file, show_idle, NULL, size); +} + +static struct proc_ops idle_procs_ops = { + .proc_open = idle_open, + .proc_read_iter = seq_read_iter, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init kernel_module_init(void) +{ + proc_create(PROC_NAME, 0, NULL, &idle_procs_ops); + return 0; +} + +fs_initcall(kernel_module_init); + +#endif /*CONFIG_PROC_IDLE*/ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d2300452..25218a8f822f 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -19,6 +19,8 @@ */ DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
+extern struct static_key_true proc_idle; + static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void) @@ -1078,3 +1080,35 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ + + +#ifdef CONFIG_PROC_IDLE + + +u64 cal_idle_sum_exec_runtime(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + struct sched_entity *idle_se = &rq->idle->se; + u64 idle = idle_se->sum_exec_runtime; + + if (!static_branch_likely(&proc_idle)) + return 0ULL; + + if (rq->curr == rq->idle) { + u64 now = sched_clock(); + u64 delta_exec; + + delta_exec = now - idle_se->exec_start; + if (unlikely((s64)delta_exec <= 0)) + return idle; + + schedstat_set(idle_se->statistics.exec_max, + max(delta_exec, idle_se->statistics.exec_max)); + + idle += delta_exec; + } + + return idle; +} + +#endif /* CONFIG_PROC_IDLE */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..15f076ab5823 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -10,6 +10,8 @@
#include <trace/events/power.h>
+DEFINE_STATIC_KEY_TRUE(proc_idle); + /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
@@ -424,6 +426,23 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_PROC_IDLE + if (!static_branch_likely(&proc_idle)) + return; + + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + u64 delta_exec; + + delta_exec = now - idle_se->exec_start; + if (unlikely((s64)delta_exec <= 0)) + return; + + schedstat_set(idle_se->statistics.exec_max, + max(delta_exec, idle_se->statistics.exec_max)); + + idle_se->sum_exec_runtime += delta_exec; +#endif }
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) @@ -436,6 +455,15 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle;
+#ifdef CONFIG_PROC_IDLE + if (static_branch_likely(&proc_idle)) { + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + + idle_se->exec_start = now; + } +#endif + set_next_task_idle(rq, next, true);
return next;
proc_idle static key 没有提供修改接口,你可以通过 启动参数或者 sysctl 对外提供接口,给用户使用。
________________________________
谢秀奇 Xie Xiuqi Mobile: +86-50000020953(For Welink,eSpace Calls) Email: xiexiuqi@huawei.commailto:xiexiuqi@huawei.com 发件人:Hongyu Li 543306408@qq.com 收件人:kernel kernel@openeuler.org 抄 送:Xiexiuqi xiexiuqi@huawei.com;chengjian (D) cj.chengjian@huawei.com;Zhengzengkai zhengzengkai@huawei.com 时 间:2021-09-30 20:56:54 主 题:[PATCH openEuler-21.09 1/2] eulerfs: add the /proc/stat2 file.
openEuler inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4CIJQ CVE: NA
________________________________ ________________________________
The default way of calculating CPU utilization is to check which task is executed during the interval of two ticks. This leads to the inaccurate results of CPU utilization.
This problem can be solved by counting the idle time via scheduler rather than the tick interval. We can record the time before executing idle process and calculate the execute time before quiting the idle process. The idle time of each CPU is given in the /proc/stat2 file. This way can give higher precision in accounting the CPU idle time compared with the /proc/stat.
Signed-off-by: Hongyu Li <543306408@qq.commailto:543306408@qq.com> --- fs/proc/Kconfig | 7 ++++ fs/proc/Makefile | 1 + fs/proc/stat2.c | 91 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/cputime.c | 34 ++++++++++++++++ kernel/sched/idle.c | 28 +++++++++++++ 5 files changed, 161 insertions(+) create mode 100644 fs/proc/stat2.c
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index c930001056f9..33588a37579e 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -107,3 +107,10 @@ config PROC_PID_ARCH_STATUS config PROC_CPU_RESCTRL def_bool n depends on PROC_FS + +config PROC_IDLE + bool "include /proc/stat2 file" + depends on PROC_FS + default y + help + Provide the CPU idle time in the /proc/stat2 file. diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 8704d41dd67c..b0d5f2b347d7 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o +proc-$(CONFIG_PROC_IDLE) += stat2.o obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o diff --git a/fs/proc/stat2.c b/fs/proc/stat2.c new file mode 100644 index 000000000000..6036a946c71d --- /dev/null +++ b/fs/proc/stat2.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/fs/proc/stat2.c + * + * Copyright (C) 2007 + * + * cpu idle time accouting + */ + +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/sched/stat.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/irqnr.h> +#include <linux/sched/cputime.h> +#include <linux/tick.h> + +#ifdef CONFIG_PROC_IDLE + +#define PROC_NAME "stat2" + +extern u64 cal_idle_sum_exec_runtime(int cpu); + +static u64 get_idle_sum_exec_runtime(int cpu) +{ + u64 idle = cal_idle_sum_exec_runtime(cpu); + + return idle; +} + +static int show_idle(struct seq_file *p, void *v) +{ + int i; + u64 idle; + + idle = 0; + + for_each_possible_cpu(i) { + + idle += get_idle_sum_exec_runtime(i); + + } + + seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + + for_each_online_cpu(i) { + + idle = get_idle_sum_exec_runtime(i); + + seq_printf(p, "cpu%d", i); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + } + + return 0; +} + +static int idle_open(struct inode *inode, struct file *file) +{ + unsigned int size = 32 + 32 * num_online_cpus(); + + return single_open_size(file, show_idle, NULL, size); +} + +static struct proc_ops idle_procs_ops = { + .proc_open = idle_open, + .proc_read_iter = seq_read_iter, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init kernel_module_init(void) +{ + proc_create(PROC_NAME, 0, NULL, &idle_procs_ops); + return 0; +} + +fs_initcall(kernel_module_init); + +#endif /*CONFIG_PROC_IDLE*/ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d2300452..25218a8f822f 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -19,6 +19,8 @@ */ DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
+extern struct static_key_true proc_idle; + static int sched_clock_irqtime;
void enable_sched_clock_irqtime(void) @@ -1078,3 +1080,35 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ + + +#ifdef CONFIG_PROC_IDLE + + +u64 cal_idle_sum_exec_runtime(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + struct sched_entity *idle_se = &rq->idle->se; + u64 idle = idle_se->sum_exec_runtime; + + if (!static_branch_likely(&proc_idle)) + return 0ULL; + + if (rq->curr == rq->idle) { + u64 now = sched_clock(); + u64 delta_exec; + + delta_exec = now - idle_se->exec_start; + if (unlikely((s64)delta_exec <= 0)) + return idle; + + schedstat_set(idle_se->statistics.exec_max, + max(delta_exec, idle_se->statistics.exec_max)); + + idle += delta_exec; + } + + return idle; +} + +#endif /* CONFIG_PROC_IDLE */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..15f076ab5823 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -10,6 +10,8 @@
#include <trace/events/power.h>
+DEFINE_STATIC_KEY_TRUE(proc_idle); + /* Linker adds these: start and end of __cpuidle functions */ extern char __cpuidle_text_start[], __cpuidle_text_end[];
@@ -424,6 +426,23 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_PROC_IDLE + if (!static_branch_likely(&proc_idle)) + return; + + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + u64 delta_exec; + + delta_exec = now - idle_se->exec_start; + if (unlikely((s64)delta_exec <= 0)) + return; + + schedstat_set(idle_se->statistics.exec_max, + max(delta_exec, idle_se->statistics.exec_max)); + + idle_se->sum_exec_runtime += delta_exec; +#endif }
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) @@ -436,6 +455,15 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle;
+#ifdef CONFIG_PROC_IDLE + if (static_branch_likely(&proc_idle)) { + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + + idle_se->exec_start = now; + } +#endif + set_next_task_idle(rq, next, true);
return next; -- 2.17.1