
From: Xu Kuohai <xukuohai@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICAOAT -------------------------------- Add smt interference track support. Signed-off-by: Xu Kuohai <xukuohai@huawei.com> Signed-off-by: Pu Lehui <pulehui@huawei.com> --- arch/x86/kernel/smpboot.c | 5 +- drivers/base/arch_topology.c | 4 ++ include/linux/cgroup.h | 12 +++- kernel/cgroup/ifs.c | 103 +++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 1 + 5 files changed, 123 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a5e84700b319..2ce5ddb7a043 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -61,6 +61,7 @@ #include <linux/cpuhotplug.h> #include <linux/mc146818rtc.h> #include <linux/acpi.h> +#include <linux/cgroup.h> #include <linux/cpuset.h> #include <asm/acpi.h> @@ -681,8 +682,10 @@ void set_cpu_sibling_map(int cpu) if (match_pkg(c, o) && !topology_same_node(c, o)) x86_has_numa_in_package = true; - if ((i == cpu) || (has_smt && match_smt(c, o))) + if ((i == cpu) || (has_smt && match_smt(c, o))) { link_mask(topology_sibling_cpumask, cpu, i); + cgroup_ifs_set_smt(topology_sibling_cpumask(cpu)); + } if ((i == cpu) || (has_mp && match_llc(c, o))) link_mask(cpu_llc_shared_mask, cpu, i); diff --git a/drivers/base/arch_topology.c b/drivers/base/arch_topology.c index 1525d3e2f9d5..a26169094712 100644 --- a/drivers/base/arch_topology.c +++ b/drivers/base/arch_topology.c @@ -807,6 +807,8 @@ void update_siblings_masks(unsigned int cpuid) cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); } + + cgroup_ifs_set_smt(&cpuid_topo->thread_sibling); } static void clear_cpu_topology(int cpu) @@ -823,6 +825,8 @@ static void clear_cpu_topology(int cpu) cpumask_set_cpu(cpu, &cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + + cgroup_ifs_set_smt(&cpu_topo->thread_sibling); } void __init reset_cpu_topology(void) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fd5617e7f0de..256d04d28ea2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -863,7 +863,7 @@ void cgroup_move_task_to_root(struct task_struct *tsk); #ifdef CONFIG_CGROUP_IFS enum ifs_types { - IFS_STUB, + IFS_SMT, NR_IFS_TYPES, }; @@ -913,6 +913,16 @@ static inline void cgroup_ifs_account_delta(struct cgroup_ifs_cpu *ifsc, ifsc->time[type] += delta; } +void cgroup_ifs_account_smttime(struct task_struct *prev, + struct task_struct *next, + struct task_struct *idle); +void cgroup_ifs_set_smt(cpumask_t *sibling); + +#else /* !CONFIG_CGROUP_IFS */ +static inline void cgroup_ifs_account_smttime(struct task_struct *prev, + struct task_struct *next, + struct task_struct *idle) {} +static inline void cgroup_ifs_set_smt(cpumask_t *sibling) {} #endif /* CONFIG_CGROUP_IFS */ #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/cgroup/ifs.c b/kernel/cgroup/ifs.c index fc761cb02a6d..634ce18e0773 100644 --- a/kernel/cgroup/ifs.c +++ b/kernel/cgroup/ifs.c @@ -5,8 +5,20 @@ * Copyright (C) 2025-2025 Huawei Technologies Co., Ltd */ +#include <linux/sched/clock.h> #include "cgroup-internal.h" +/* smt information */ +struct smt_info { + u64 total_time; + u64 prev_read_time; + u64 noidle_enter_time; + bool is_noidle; +}; + +static DEFINE_PER_CPU(struct smt_info, smt_info); +static DEFINE_PER_CPU_READ_MOSTLY(int, smt_sibling) = -1; + static DEFINE_PER_CPU(struct cgroup_ifs_cpu, cgrp_root_ifs_cpu); struct cgroup_ifs cgroup_root_ifs = { .pcpu = &cgrp_root_ifs_cpu, @@ -26,6 +38,94 @@ static int __init setup_ifs(char *str) } __setup("cgroup_ifs=", setup_ifs); +void cgroup_ifs_set_smt(cpumask_t *sibling) +{ + int cpu; + int cpuid1 = -1; + int cpuid2 = -1; + bool off = false; + + for_each_cpu(cpu, sibling) { + if (cpuid1 == -1) { + cpuid1 = cpu; + } else if (cpuid2 == -1) { + cpuid2 = cpu; + } else { + *per_cpu_ptr(&smt_sibling, cpu) = -1; + off = true; + } + } + + if (cpuid1 != -1) + *per_cpu_ptr(&smt_sibling, cpuid1) = off ? -1 : cpuid2; + + if (cpuid2 != -1) + *per_cpu_ptr(&smt_sibling, cpuid2) = off ? -1 : cpuid1; +} + +static void account_smttime(struct task_struct *task) +{ + u64 delta; + struct cgroup_ifs *ifs; + struct smt_info *info; + + ifs = task_ifs(task); + if (!ifs) + return; + + info = this_cpu_ptr(&smt_info); + + delta = info->total_time - info->prev_read_time; + info->prev_read_time = info->total_time; + + cgroup_ifs_account_delta(this_cpu_ptr(ifs->pcpu), IFS_SMT, delta); +} + +void cgroup_ifs_account_smttime(struct task_struct *prev, + struct task_struct *next, + struct task_struct *idle) +{ + struct smt_info *ci, *si; + u64 now, delta; + int sibling; + + sibling = this_cpu_read(smt_sibling); + if (sibling == -1 || prev == next) + return; + + ci = this_cpu_ptr(&smt_info); + si = per_cpu_ptr(&smt_info, sibling); + + now = sched_clock_cpu(smp_processor_id()); + + /* leave noidle */ + if (prev != idle && next == idle) { + ci->is_noidle = false; + /* account interference time */ + if (ci->noidle_enter_time && si->is_noidle) { + delta = now - ci->noidle_enter_time; + + ci->total_time += delta; + si->total_time += delta; + + si->noidle_enter_time = 0; + ci->noidle_enter_time = 0; + + account_smttime(prev); + } + /* enter noidle */ + } else if (prev == idle && next != idle) { + /* if the sibling is also nonidle, there is smt interference */ + if (si->is_noidle) { + ci->noidle_enter_time = now; + si->noidle_enter_time = now; + } + ci->is_noidle = true; + /* cgroup changed */ + } else if (task_ifs(prev) != task_ifs(next)) + account_smttime(prev); +} + int cgroup_ifs_alloc(struct cgroup *cgrp) { cgrp->ifs = kzalloc(sizeof(struct cgroup_ifs), GFP_KERNEL); @@ -52,6 +152,9 @@ static const char *ifs_type_name(int type) char *name = NULL; switch (type) { + case IFS_SMT: + name = "smt"; + break; default: break; } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b41f3f30ef57..17ca1d72b03e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6750,6 +6750,7 @@ static void __sched notrace __schedule(unsigned int sched_mode) migrate_disable_switch(rq, prev); psi_sched_switch(prev, next, !task_on_rq_queued(prev)); + cgroup_ifs_account_smttime(prev, next, rq->idle); trace_sched_switch(sched_mode & SM_MASK_PREEMPT, prev, next, prev_state); -- 2.34.1