From 40fbbe466f421bc8534325e60b4369dcff7626ac Mon Sep 17 00:00:00 2001 From: zhoukang zhoukang7@huawei.com Date: Sat, 20 Mar 2021 07:07:58 +0000 Subject: [PATCH] cpu: add cpuload for debug cpu usage
cpuload calculates the cpu usage, showing which tasks run out of cpu resource.
It display top N tasks when the cpu usage exceeds more than P% and calculates every T ms.
This works by tracing the sched switch events using tracepoints.
Since this uses BPF, only the root user can use this tool.
optional arguments: -h, --help show this help message and exit -t TIME, --time TIME interval to calculate, default 1000 -n NUMBER, --number NUMBER maximum tasks to print, default 3 -p PERCENT, --percent PERCENT minimum percent to print, default 30
example: [root@localhost ~]# ./cpuload.py -p 50 -n 2 -t 100 Tracing task switch. Output when cpu is overload. Ctrl-C to end. DATE COMM PID CPU TIME(ms) %CPU 2021-01-27 10:40:39 stress-ng-cpu 33179 1 100.529 96.68% 2021-01-27 10:40:39 cpuload.py 395575 1 3.363 03.23% 2021-01-27 10:40:39 stress-ng-cpu 33175 3 107.704 99.73% 2021-01-27 10:40:39 sshd 2259 3 0.226 00.21% 2021-01-27 10:40:39 stress-ng-cpu 33176 0 131.978 99.99% 2021-01-27 10:40:39 kworker/0:0 388650 0 0.017 00.01% 2021-01-27 10:40:39 stress-ng-cpu 33178 2 183.987 99.99% 2021-01-27 10:40:39 kworker/2:0 391880 2 0.011 00.01%
Signed-off-by: Liu Chao liuchao173@huawei.com --- doc/cpuload.en.md | 43 ++++++++++ doc/cpuload.md | 13 +++ src/cpu/cpuload.py | 197 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 253 insertions(+) create mode 100644 doc/cpuload.en.md create mode 100644 doc/cpuload.md create mode 100755 src/cpu/cpuload.py
diff --git a/doc/cpuload.en.md b/doc/cpuload.en.md new file mode 100644 index 0000000..2c37567 --- /dev/null +++ b/doc/cpuload.en.md @@ -0,0 +1,43 @@ +# cpuload + +The CPU usage detection tool cpuload can be used to print processes with high CPU usage. +Implementation principle: Use the bcc tool to accurately trace scheduling tracks and collect statistics on processes with high CPU usage. +python /usr/share/bcc/tools/cpuload +-t Interval for calculating the CPU usage. The value ranges from 0 to 60000, in milliseconds. If the value is 0, thread information is printed each time a scheduling occurs. The default value is 1000. +-n Displays the top CPU usage. The default value is 3. +-p Sets the CPU usage threshold. When the CPU usage exceeds the threshold, the system displays information. The value ranges from 0 to 100. The default value is 90. +-m Sets the size of the circular buffer for recording scheduling tracks. 1000 to 1000000. The default value is 10000. + + +cpuload calculates the cpu usage, showing which tasks run out of cpu resource. + +It display top N tasks when the cpu usage exceeds more than P% and calculates +every T ms. + +This works by tracing the sched switch events using tracepoints. + +Since this uses BPF, only the root user can use this tool. + +optional arguments: + -h, --help show this help message and exit + -t TIME, --time TIME interval to calculate, default 1000 + -n NUMBER, --number NUMBER + maximum tasks to print, default 3 + -p PERCENT, --percent PERCENT + minimum percent to print, default 30 + +example: +[root@localhost ~]# ./cpuload.py -p 50 -n 2 -t 100 +Tracing task switch. Output when cpu is overload. Ctrl-C to end. +DATE COMM PID CPU TIME(ms) %CPU +2021-01-27 10:40:39 stress-ng-cpu 33179 1 100.529 96.68% +2021-01-27 10:40:39 cpuload.py 395575 1 3.363 03.23% +2021-01-27 10:40:39 stress-ng-cpu 33175 3 107.704 99.73% +2021-01-27 10:40:39 sshd 2259 3 0.226 00.21% +2021-01-27 10:40:39 stress-ng-cpu 33176 0 131.978 99.99% +2021-01-27 10:40:39 kworker/0:0 388650 0 0.017 00.01% +2021-01-27 10:40:39 stress-ng-cpu 33178 2 183.987 99.99% +2021-01-27 10:40:39 kworker/2:0 391880 2 0.011 00.01% + + + diff --git a/doc/cpuload.md b/doc/cpuload.md new file mode 100644 index 0000000..c56af1f --- /dev/null +++ b/doc/cpuload.md @@ -0,0 +1,13 @@ +# cpuload + +CPU冲高检测工具cpuload,使用该工具能够将CPU使用率高的进程打印出来。 +实现原理, 通过bcc工具精确trace调度轨迹, 统计分析CPU占用率高的进程; + +```shell +python /usr/share/bcc/tools/cpuload +-t 计算CPU使用率的周期。单位为毫秒,取值为0~60000,取0则每次发生调度都会打印出线程的信息,默认值为1000。 +-n 打印CPU使用率top。默认值为3。 +-p 设置CPU使用率的水线,超过时打印。0~100,默认值为90。 +-m 设置记录调度轨迹的循环缓冲区大小。1000~1000000,默认值为10000。 +``` + diff --git a/src/cpu/cpuload.py b/src/cpu/cpuload.py new file mode 100755 index 0000000..47062e9 --- /dev/null +++ b/src/cpu/cpuload.py @@ -0,0 +1,197 @@ +#!/usr/bin/python +# @lint-avoid-python-3-compatibility-imports +# +# cpuload Display top N tasks use more than U percent cpu resource when +# the cpu doesn't enter idle state for more than T ms. +# +# USAGE: cpuload [-h] [-t time] [-n number] [-p percent_limit] [-m max_entry] +# +# This uses in-kernel eBPF maps to cache task details (PID and comm) by +# sched_switch, as well as a running time for calculating cpu usage. + +from __future__ import print_function +from bcc import BPF +from bcc.utils import printb +import argparse +from datetime import datetime + +# arguments +examples = """examples: + ./cpuload # display tasks when cpu overload + ./cpuload -t 100 # calculate cpu usage every 100 ms + ./cpuload -n 5 # display top 5 tasks details + ./cpuload -p 30 # display tasks when cpu usage exceeds 30% + ./cpuload -m 10000 # set the maximum number of entry to 10,000 +""" +parser = argparse.ArgumentParser( + description="display tasks when cpu overload", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=examples) +parser.add_argument("-t", "--time", default=1000, + help="interval for calculating the CPU usage, in milliseconds(0 - 60000), default 1000") +parser.add_argument("-n", "--number", default=3, + help="display top n tasks with high cpu usage, default 3") +parser.add_argument("-p", "--percent_limit", default=90, + help="display when the usage of a cpu exceeds percent_limit(0 - 100), default 90") +parser.add_argument("-m", "--max_entry", default=10000, + help="size of the cyclic buffer for recording the scheduling track(1000 - 1000000), default 10000") +parser.add_argument("--ebpf", action="store_true", + help=argparse.SUPPRESS) +args = parser.parse_args() +time_ms = int(args.time) +time_ns = time_ms * 1000000 +number = int(args.number) +percent_limit = int(args.percent_limit) +max_entry = int(args.max_entry) +debug = 0 + +if time_ms > 60000 or time_ms < 0: + print("time invalid") + exit(1) + +if percent_limit > 100 or percent_limit < 0: + print("percent_limit invalid") + exit(1) + +if max_entry > 1000000 or max_entry < 1000: + print("max_entry invalid") + exit(1) + +# define BPF program +bpf_text = """ +#include <linux/sched.h> + +#define MAX_TIME """ + str(time_ns) + """ +#define THRESHOLD """ + str(percent_limit) + """ +#define MAX_ENTRY """ + str(max_entry) + """ + +struct cpu_data_t { + u32 index; + u32 number; + u64 prev_time; + u64 busy_time; + u64 total_time; +}; + +struct task_data_t { + u32 pid; + char comm[TASK_COMM_LEN]; + u64 delta; +}; + +struct data_t { + u32 index; + u32 number; + u64 total_time; +}; + +BPF_PERCPU_ARRAY(cpu_data, struct cpu_data_t, 1); + +BPF_PERCPU_ARRAY(task_data, struct task_data_t, MAX_ENTRY); + +BPF_PERF_OUTPUT(events); +TRACEPOINT_PROBE(sched, sched_switch) { + u32 index = 0; + u64 now = bpf_ktime_get_ns(), delta; + struct data_t data = {}; + struct cpu_data_t *cpu = cpu_data.lookup(&index); + struct task_data_t *task; + + if (cpu == NULL) + return 0; + + if (cpu->prev_time == 0) { + cpu->prev_time = now; + return 0; + } + + index = (cpu->index + cpu->number) % MAX_ENTRY; + task = task_data.lookup(&index); + if (task == NULL) + return 0; + + delta = now - cpu->prev_time; + if (args->prev_pid != 0) { + cpu->busy_time += delta; + task->pid = args->prev_pid; + __builtin_memcpy(&task->comm, &args->prev_comm, sizeof(task->comm)); + task->delta = now - cpu->prev_time; + cpu->number++; + } + + cpu->prev_time = now; + cpu->total_time += delta; + + if (cpu->total_time > MAX_TIME || cpu->number == MAX_ENTRY) { + if (cpu->busy_time * 100 > cpu->total_time * THRESHOLD) { + data.index = cpu->index; + data.number = cpu->number; + data.total_time = cpu->total_time; + events.perf_submit(args, &data, sizeof(data)); + cpu->index = (index + 1) % MAX_ENTRY; + } + cpu->number = 0; + cpu->busy_time = 0; + cpu->total_time = 0; + cpu->prev_time = now; + } + + return 0; +} +""" + +if debug or args.ebpf: + print(bpf_text) + if args.ebpf: + exit() + +# initialize BPF +b = BPF(text=bpf_text) + +print("Tracing task switch. Output when cpu is overload. Ctrl-C to end.") + +print("%-19s %-14s %-7s %-4s %-8s %-5s" % + ("DATE", "COMM", "PID", "CPU", "TIME(ms)", "%CPU")) + +# process event +def print_event(cpu, data, size): + date = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + data = b["events"].event(data) + dic = {} + tasks = b["task_data"] + if data.total_time < time_ns: + print("max_entry is too small, please set more than %d" % + (max_entry * time_ns / data.total_time)) + for i in range(data.index, data.number + data.index): + task = tasks[i % max_entry][cpu] + entry = dic.get(task.pid) + if entry is not None: + entry.delta += task.delta + else: + dic[task.pid] = task + + count = 0 + for item in sorted(dic.items(), key=lambda x: x[1].delta, reverse=True): + if count >= number: + break + task = item[1] + u = task.delta * 100 / data.total_time + print("%s %-14.14s %-7s %-4s %-8.3f %05.2f%%" % ( + date, + task.comm.decode("utf-8", "replace"), + task.pid, + cpu, + float(task.delta) / 1000000, + u)) + count += 1 + dic.clear() + print("---------------------------------------------------------------") + +# loop with callback to print_event +b["events"].open_perf_buffer(print_event) +while 1: + try: + b.perf_buffer_poll() + except KeyboardInterrupt: + exit() +