From: Hongyu Li <“543306408@qq.com”>
openEuler inclusion category: bugfix bugzilla: https://gitee.com/openeuler-competition/summer-2021/issues/I3EBT6 CVE: NA
----------------------------------------------------------------------
The /proc/idle can give higher precision in accounting the CPU idle time compared with the traditional /proc/stat ways.
Signed-off-by: Hongyu Li <“543306408@qq.com”> --- fs/proc/Kconfig | 7 ++++ fs/proc/Makefile | 1 + fs/proc/proc_idle.c | 82 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/cputime.c | 16 +++++++++ kernel/sched/idle.c | 21 +++++++++++ 5 files changed, 127 insertions(+) create mode 100644 fs/proc/proc_idle.c
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index c930001056f9..46620afe9cac 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -107,3 +107,10 @@ config PROC_PID_ARCH_STATUS config PROC_CPU_RESCTRL def_bool n depends on PROC_FS + +config PROC_IDLE + bool "include /proc/idle file" + depends on PROC_FS + default y + help + Provide the CPU idle time in the /proc/idle file. diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 8704d41dd67c..69dd2da3a080 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o +proc-$(CONFIG_PROC_IDLE) += proc_idle.o obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o diff --git a/fs/proc/proc_idle.c b/fs/proc/proc_idle.c new file mode 100644 index 000000000000..bbb52e247448 --- /dev/null +++ b/fs/proc/proc_idle.c @@ -0,0 +1,82 @@ +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/sched/stat.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/irqnr.h> +#include <linux/sched/cputime.h> +#include <linux/tick.h> + +#ifdef CONFIG_PROC_IDLE + +#define PROC_NAME "idle" + +extern u64 cpu_rq_get(int cpu); + +static u64 get_idle_sum_exec_runtime(int cpu) +{ + u64 idle = cpu_rq_get(cpu); + + return idle; +} + +static int show_idle(struct seq_file *p, void *v) +{ + int i; + u64 idle; + + idle = 0; + + for_each_possible_cpu(i) { + + idle += get_idle_sum_exec_runtime(i); + + } + + seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + + for_each_online_cpu(i) { + + idle = get_idle_sum_exec_runtime(i); + + seq_printf(p, "cpu%d", i); + seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle)); + seq_putc(p, '\n'); + } + + return 0; +} + +static int idle_open(struct inode *inode, struct file *file) +{ + unsigned int size = 1024 + 128 * num_online_cpus(); + + return single_open_size(file, show_idle, NULL, size); +} + +static struct proc_ops idle_procs_ops = { + .proc_open = idle_open, + .proc_read_iter = seq_read_iter, + .proc_lseek = seq_lseek, + .proc_release = single_release, +}; + +static int __init kernel_module_init(void) +{ + proc_create(PROC_NAME, 0, NULL, &idle_procs_ops); + return 0; +} + +fs_initcall(kernel_module_init); + +#endif /*CONFIG_PROC_IDLE*/ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d2300452..bb280852bb06 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -1078,3 +1078,19 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ + + +#ifdef CONFIG_PROC_IDLE + + +u64 cpu_rq_get(int cpu) +{ + struct rq *rq = cpu_rq(cpu); + struct sched_entity *idle_se = &rq->idle->se; + u64 idle = idle_se->sum_exec_runtime; + + return idle; +} +EXPORT_SYMBOL_GPL(cpu_rq_get); + +#endif /* CONFIG_PROC_IDLE */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..e3fb940a61e8 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -424,6 +424,20 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_PROC_IDLE + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + u64 delta_exec; + + delta_exec = now - idle_se->exec_start; + if (unlikely((s64)delta_exec <= 0)) + return; + + schedstat_set(idle_se->statistics.exec_max, + max(delta_exec, idle_se->statistics.exec_max)); + + idle_se->sum_exec_runtime += delta_exec; +#endif }
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) @@ -436,6 +450,13 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle;
+#ifdef CONFIG_PROC_IDLE + struct sched_entity *idle_se = &rq->idle->se; + u64 now = sched_clock(); + + idle_se->exec_start = now; +#endif + set_next_task_idle(rq, next, true);
return next;
Hi,
Thanks for your patch.
邮件标题,前缀总写上对应的分支名。
On 2021/9/29 15:42, Hongyu Li wrote:
From: Hongyu Li <“543306408@qq.com”>
邮箱不用加双引号。
openEuler inclusion category: bugfix bugzilla: https://gitee.com/openeuler-competition/summer-2021/issues/I3EBT6
请在 https://gitee.com/openeuler/kernel/issues/ 下面建一条需求,并把链接放到这里。 不用直接贴 openeuler-competition 的 issue 链接,可以在 openeuler/kernel 的issue 描述中 附上这个链接。
CVE: NA
The /proc/idle can give higher precision in accounting the CPU idle time compared with the traditional /proc/stat ways.
Commit message ,建议描述下存在的问题,和解决方法,以及怎么使用这个新的接口。
Signed-off-by: Hongyu Li <“543306408@qq.com”>
fs/proc/Kconfig | 7 ++++ fs/proc/Makefile | 1 + fs/proc/proc_idle.c | 82 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/cputime.c | 16 +++++++++ kernel/sched/idle.c | 21 +++++++++++ 5 files changed, 127 insertions(+) create mode 100644 fs/proc/proc_idle.c
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index c930001056f9..46620afe9cac 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig @@ -107,3 +107,10 @@ config PROC_PID_ARCH_STATUS config PROC_CPU_RESCTRL def_bool n depends on PROC_FS
+config PROC_IDLE
- bool "include /proc/idle file"
- depends on PROC_FS
- default y
- help
Provide the CPU idle time in the /proc/idle file.
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 8704d41dd67c..69dd2da3a080 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -34,5 +34,6 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PRINTK) += kmsg.o proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o proc-$(CONFIG_BOOT_CONFIG) += bootconfig.o +proc-$(CONFIG_PROC_IDLE) += proc_idle.o obj-$(CONFIG_ETMEM_SCAN) += etmem_scan.o obj-$(CONFIG_ETMEM_SWAP) += etmem_swap.o diff --git a/fs/proc/proc_idle.c b/fs/proc/proc_idle.c new file mode 100644 index 000000000000..bbb52e247448 --- /dev/null +++ b/fs/proc/proc_idle.c
我建议叫 fs/proc/stat2.c 比较好,方便将来扩展其他属性。 叫 idle 的话,名字限制比较大。
另外,新增文件的文件头要有 licence (GPL v2),参考下其他文档。
@@ -0,0 +1,82 @@ +#include <linux/cpumask.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/kernel_stat.h> +#include <linux/module.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/sched/stat.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <linux/irqnr.h> +#include <linux/sched/cputime.h> +#include <linux/tick.h>
+#ifdef CONFIG_PROC_IDLE
+#define PROC_NAME "idle"
+extern u64 cpu_rq_get(int cpu);
+static u64 get_idle_sum_exec_runtime(int cpu) +{
- u64 idle = cpu_rq_get(cpu);
- return idle;
+}
+static int show_idle(struct seq_file *p, void *v) +{
- int i;
- u64 idle;
- idle = 0;
- for_each_possible_cpu(i) {
idle += get_idle_sum_exec_runtime(i);
- }
- seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(idle));
- seq_putc(p, '\n');
- for_each_online_cpu(i) {
idle = get_idle_sum_exec_runtime(i);
这个直接返回统计的时间,会有一个窗口存在不准确的情况。 比如,当前是 idle 状态,持续了一段时间,这里返回的还是上次更新的值, 并没有把这段持续时间算上。
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, " ", nsec_to_clock_t(idle));
seq_putc(p, '\n');
- }
- return 0;
+}
+static int idle_open(struct inode *inode, struct file *file) +{
- unsigned int size = 1024 + 128 * num_online_cpus();
1024 和 128 是什么含义?
- return single_open_size(file, show_idle, NULL, size);
+}
+static struct proc_ops idle_procs_ops = {
- .proc_open = idle_open,
- .proc_read_iter = seq_read_iter,
- .proc_lseek = seq_lseek,
- .proc_release = single_release,
+};
+static int __init kernel_module_init(void) +{
- proc_create(PROC_NAME, 0, NULL, &idle_procs_ops);
- return 0;
+}
+fs_initcall(kernel_module_init);
+#endif /*CONFIG_PROC_IDLE*/ diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 5a55d2300452..bb280852bb06 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -1078,3 +1078,19 @@ void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu) EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
+#ifdef CONFIG_PROC_IDLE
+u64 cpu_rq_get(int cpu) +{
- struct rq *rq = cpu_rq(cpu);
- struct sched_entity *idle_se = &rq->idle->se;
- u64 idle = idle_se->sum_exec_runtime;
- return idle;
+} +EXPORT_SYMBOL_GPL(cpu_rq_get);
这个是不是不导出也可以? 另外,这个名字没表示出含义。
+#endif /* CONFIG_PROC_IDLE */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 36b545f17206..e3fb940a61e8 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -424,6 +424,20 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl
static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) { +#ifdef CONFIG_PROC_IDLE
- struct sched_entity *idle_se = &rq->idle->se;
- u64 now = sched_clock();
- u64 delta_exec;
- delta_exec = now - idle_se->exec_start;
- if (unlikely((s64)delta_exec <= 0))
return;
- schedstat_set(idle_se->statistics.exec_max,
max(delta_exec, idle_se->statistics.exec_max));
- idle_se->sum_exec_runtime += delta_exec;
+#endif }
static void set_next_task_idle(struct rq *rq, struct task_struct *next, bool first) @@ -436,6 +450,13 @@ struct task_struct *pick_next_task_idle(struct rq *rq) { struct task_struct *next = rq->idle;
+#ifdef CONFIG_PROC_IDLE
- struct sched_entity *idle_se = &rq->idle->se;
- u64 now = sched_clock();
- idle_se->exec_start = now;
+#endif
建议增加一个动态开关,static_key,如果不使能这个特性的话,就不必引入这些开销。
set_next_task_idle(rq, next, true);
return next;