
From: Namhyung Kim <namhyung@kernel.org> This patch adds a new software event to count context switches involving cgroup switches. So it's counted only if cgroups of previous and next tasks are different. Note that it only checks the cgroups in the perf_event subsystem. For cgroup v2, it shouldn't matter anyway. One can argue that we can do this by using existing sched_switch event with eBPF. But some systems might not have eBPF for some reason so I'd like to add this as a simple way. Signed-off-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20210210083327.22726-2-namhyung@kernel.org Signed-off-by: Tengda Wu <wutengda2@huawei.com> --- include/linux/perf_event.h | 7 +++++++ include/uapi/linux/perf_event.h | 1 + 2 files changed, 8 insertions(+) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 789338a342c2..1038d32d3893 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1244,6 +1244,13 @@ static inline void perf_event_task_sched_out(struct task_struct *prev, if (__perf_sw_enabled(PERF_COUNT_SW_CONTEXT_SWITCHES)) __perf_sw_event_sched(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 0); +#ifdef CONFIG_CGROUP_PERF + if (__perf_sw_enabled(PERF_COUNT_SW_CGROUP_SWITCHES) && + perf_cgroup_from_task(prev, NULL) != + perf_cgroup_from_task(next, NULL)) + __perf_sw_event_sched(PERF_COUNT_SW_CGROUP_SWITCHES, 1, 0); +#endif + if (static_branch_unlikely(&perf_sched_events)) __perf_event_task_sched_out(prev, next); } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 4b0893023877..e1b9861b028c 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -112,6 +112,7 @@ enum perf_sw_ids { PERF_COUNT_SW_EMULATION_FAULTS = 8, PERF_COUNT_SW_DUMMY = 9, PERF_COUNT_SW_BPF_OUTPUT = 10, + PERF_COUNT_SW_CGROUP_SWITCHES = 11, PERF_COUNT_SW_MAX, /* non-ABI */ }; -- 2.34.1