
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IC8X6H -------------------------------- On Kunpeng server, each LLC domain contains multiple clusters. When multiple services are deployed within the same LLC domain, their tasks become distributed across all clusters. This results in: 1. High cache synchronization overheadbetween different tasks of the same service. 2. Severe cache contention among tasks from different services. The Soft Domain architecture partitions resources by clusters. Under low-load conditions, each service operates exclusively within its dedicated domain to prevent cross-service interference, thereby enhancing both CPU isolation and improving cache locality. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- include/linux/sched/topology.h | 21 ++++++++++ init/Kconfig | 11 +++++ kernel/sched/build_policy.c | 3 ++ kernel/sched/core.c | 1 + kernel/sched/sched.h | 12 ++++++ kernel/sched/soft_domain.c | 74 ++++++++++++++++++++++++++++++++++ 6 files changed, 122 insertions(+) create mode 100644 kernel/sched/soft_domain.c diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 8e4d9bbdaa40..7f37b5caad42 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -77,6 +77,27 @@ extern int sched_domain_level_max; struct sched_group; +#ifdef CONFIG_SCHED_SOFT_DOMAIN + +struct soft_subdomain { + /* the count of task group attached this sub domain. */ + int attached; + struct list_head node; + unsigned long span[]; +}; + +/* + * Each LLC builds a soft domain: + * A soft scheduling domain is divided into multiple subdomains, + * typically based on the physical structure of CPU clusters. + */ +struct soft_domain { + struct list_head child_domain; + int nr_available_cpus; + unsigned long span[]; +}; +#endif + struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; diff --git a/init/Kconfig b/init/Kconfig index 5af21834fbff..367a7e76b6e0 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1138,6 +1138,17 @@ config QOS_SCHED_DYNAMIC_AFFINITY of taskgroup is below threshold setted, otherwise make taskgroup to use cpus allowed. +config SCHED_SOFT_DOMAIN + bool "Soft domain scheduler" + depends on FAIR_GROUP_SCHED + default n + help + This feature builds a CPU soft domain for each task group. Tasks are + prioritized and aggregated to execute within soft domains, which optimizes + resource allocation and enhances cache locality. + + If in doubt, say N. + config SCHED_MM_CID def_bool n depends on SMP && RSEQ diff --git a/kernel/sched/build_policy.c b/kernel/sched/build_policy.c index d9dc9ab3773f..6aff6482b1e7 100644 --- a/kernel/sched/build_policy.c +++ b/kernel/sched/build_policy.c @@ -52,3 +52,6 @@ #include "cputime.c" #include "deadline.c" +#ifdef CONFIG_SCHED_SOFT_DOMAIN +#include "soft_domain.c" +#endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index de6dd807dd81..b3092685f267 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9964,6 +9964,7 @@ void __init sched_init_smp(void) sched_smp_initialized = true; sched_grid_zone_init(); + build_soft_domain(); #ifdef CONFIG_QOS_SCHED_SMART_GRID init_auto_affinity(&root_task_group); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 761870540a21..056a680ae9ed 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3734,4 +3734,16 @@ extern int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se); bool bpf_sched_is_cpu_allowed(struct task_struct *p, int cpu); #endif +#ifdef CONFIG_SCHED_SOFT_DOMAIN +void build_soft_domain(void); +static inline struct cpumask *soft_domain_span(unsigned long span[]) +{ + return to_cpumask(span); +} +#else + +static inline void build_soft_domain(void) { } + +#endif + #endif /* _KERNEL_SCHED_SCHED_H */ diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c new file mode 100644 index 000000000000..e69e0d4d7343 --- /dev/null +++ b/kernel/sched/soft_domain.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Common code for Soft Domain Scheduling + * + * Copyright (C) 2025-2025 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + + +static DEFINE_PER_CPU(struct soft_domain *, g_sf_d); + +static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) +{ + struct cpumask *span = sched_domain_span(sd); + int nid = cpu_to_node(cpumask_first(span)); + struct soft_domain *sf_d = NULL; + int i; + + sf_d = kzalloc_node(sizeof(struct soft_domain) + cpumask_size(), + GFP_KERNEL, nid); + if (!sf_d) + return -ENOMEM; + + INIT_LIST_HEAD(&sf_d->child_domain); + sf_d->nr_available_cpus = cpumask_weight(span); + cpumask_copy(to_cpumask(sf_d->span), span); + + for_each_cpu_and(i, sched_domain_span(sd), cpus) { + struct soft_subdomain *sub_d = NULL; + + sub_d = kzalloc_node(sizeof(struct soft_subdomain) + cpumask_size(), + GFP_KERNEL, nid); + if (!sub_d) + return -ENOMEM; + + list_add_tail(&sub_d->node, &sf_d->child_domain); + cpumask_copy(soft_domain_span(sub_d->span), cpu_clustergroup_mask(i)); + cpumask_andnot(cpus, cpus, cpu_clustergroup_mask(i)); + } + + for_each_cpu(i, sched_domain_span(sd)) { + rcu_assign_pointer(per_cpu(g_sf_d, i), sf_d); + } + + return 0; +} + +void build_soft_domain(void) +{ + struct sched_domain *sd; + static struct cpumask cpus; + int i; + + cpumask_copy(&cpus, cpu_active_mask); + rcu_read_lock(); + for_each_cpu(i, &cpus) { + /* build soft domain for each llc domain. */ + sd = rcu_dereference(per_cpu(sd_llc, i)); + if (sd && build_soft_sub_domain(sd, &cpus)) + goto out; + } + +out: + rcu_read_unlock(); +} -- 2.18.0.huawei.25