
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/release-management/issues/IC8X6H -------------------------------- On Kunpeng server, each LLC domain contains multiple clusters. When multiple services are deployed within the same LLC domain, their tasks become distributed across all clusters. This results in: 1. High cache synchronization overheadbetween different tasks of the same service. 2. Severe cache contention among tasks from different services. The Soft Domain architecture partitions resources by clusters. Under low-load conditions, each service operates exclusively within its dedicated domain to prevent cross-service interference, thereby enhancing both CPU isolation and improving cache locality. Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com> --- include/linux/sched/topology.h | 21 ++++++ init/Kconfig | 12 ++++ kernel/sched/Makefile | 1 + kernel/sched/core.c | 1 + kernel/sched/sched.h | 11 ++++ kernel/sched/soft_domain.c | 114 +++++++++++++++++++++++++++++++++ 6 files changed, 160 insertions(+) create mode 100644 kernel/sched/soft_domain.c diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index c60eea1c805e..31b9ccf21d5e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -78,6 +78,27 @@ extern int sched_domain_level_max; struct sched_group; +#ifdef CONFIG_SCHED_SOFT_DOMAIN + +struct soft_subdomain { + /* the count of task group attached this sub domain. */ + int attached; + struct list_head node; + unsigned long span[]; +}; + +/* + * Each LLC builds a soft domain: + * A soft scheduling domain is divided into multiple subdomains, + * typically based on the physical structure of CPU clusters. + */ +struct soft_domain { + struct list_head child_domain; + int nr_available_cpus; + unsigned long span[]; +}; +#endif + struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; diff --git a/init/Kconfig b/init/Kconfig index 5f88cce193e8..7fcda138c76f 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1115,6 +1115,18 @@ config QOS_SCHED_NUMA_ICON If in doubt, say N. +config SCHED_SOFT_DOMAIN + bool "Soft domain scheduler" + depends on FAIR_GROUP_SCHED + depends on SCHED_CLUSTER + default n + help + This feature builds a CPU soft domain for each task group. Tasks are + prioritized and aggregated to execute within soft domains, which optimizes + resource allocation and enhances cache locality. + + If in doubt, say N. + config UCLAMP_TASK_GROUP bool "Utilization clamping per group of tasks" depends on CGROUP_SCHED diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index ff9ff2c17f79..cc5d2cc388bf 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -42,3 +42,4 @@ obj-$(CONFIG_BPF_SCHED) += bpf_topology.o obj-$(CONFIG_QOS_SCHED_SMART_GRID) += grid/ obj-$(CONFIG_SCHED_TASK_RELATIONSHIP) += relationship.o relationship_ioctl.o obj-$(CONFIG_QOS_SCHED_NUMA_ICON) += numa_icon.o +obj-$(CONFIG_SCHED_SOFT_DOMAIN) += soft_domain.o diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 457eeebc7b62..73ce0ce36c83 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8202,6 +8202,7 @@ void __init sched_init_smp(void) sched_smp_initialized = true; sched_grid_zone_init(); + build_soft_domain(); #ifdef CONFIG_QOS_SCHED_SMART_GRID init_auto_affinity(&root_task_group); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fe6342305b0f..6232148c5099 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3221,3 +3221,14 @@ static __always_inline int task_has_qos_idle_policy(struct task_struct *p) void swake_up_all_locked(struct swait_queue_head *q); void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait); +#ifdef CONFIG_SCHED_SOFT_DOMAIN +void build_soft_domain(void); +static inline struct cpumask *soft_domain_span(unsigned long span[]) +{ + return to_cpumask(span); +} +#else + +static inline void build_soft_domain(void) { } + +#endif diff --git a/kernel/sched/soft_domain.c b/kernel/sched/soft_domain.c new file mode 100644 index 000000000000..ea4754a3ee65 --- /dev/null +++ b/kernel/sched/soft_domain.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Common code for Soft Domain Scheduling + * + * Copyright (C) 2025-2025 Huawei Technologies Co., Ltd + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include "sched.h" + +static DEFINE_PER_CPU(struct soft_domain *, g_sf_d); + +static void free_sub_soft_domain(struct soft_domain *sf_d); + +static int build_soft_sub_domain(struct sched_domain *sd, struct cpumask *cpus) +{ + struct cpumask *span = sched_domain_span(sd); + int nid = cpu_to_node(cpumask_first(span)); + struct soft_domain *sf_d = NULL; + int i; + + sf_d = kzalloc_node(sizeof(struct soft_domain) + cpumask_size(), + GFP_KERNEL, nid); + if (!sf_d) + return -ENOMEM; + + INIT_LIST_HEAD(&sf_d->child_domain); + sf_d->nr_available_cpus = cpumask_weight(span); + cpumask_copy(to_cpumask(sf_d->span), span); + + for_each_cpu_and(i, sched_domain_span(sd), cpus) { + struct soft_subdomain *sub_d = NULL; + + sub_d = kzalloc_node(sizeof(struct soft_subdomain) + cpumask_size(), + GFP_KERNEL, nid); + if (!sub_d) { + free_sub_soft_domain(sf_d); + return -ENOMEM; + } + + list_add_tail(&sub_d->node, &sf_d->child_domain); + cpumask_copy(soft_domain_span(sub_d->span), cpu_clustergroup_mask(i)); + cpumask_andnot(cpus, cpus, cpu_clustergroup_mask(i)); + } + + for_each_cpu(i, sched_domain_span(sd)) { + rcu_assign_pointer(per_cpu(g_sf_d, i), sf_d); + } + + return 0; +} + +static void free_sub_soft_domain(struct soft_domain *sf_d) +{ + struct list_head *children = &sf_d->child_domain; + struct soft_subdomain *entry = NULL, *next = NULL; + int i; + + list_for_each_entry_safe(entry, next, children, node) { + list_del(&entry->node); + kfree(entry); + } + + for_each_cpu(i, to_cpumask(sf_d->span)) { + rcu_assign_pointer(per_cpu(g_sf_d, i), NULL); + } + + kfree(sf_d); +} + +static void free_soft_domain(void) +{ + struct soft_domain *sf_d = NULL; + int i; + + for_each_cpu(i, cpu_active_mask) { + sf_d = rcu_dereference(per_cpu(g_sf_d, i)); + if (sf_d) + free_sub_soft_domain(sf_d); + } +} + +void build_soft_domain(void) +{ + struct sched_domain *sd; + static struct cpumask cpus; + int i, ret; + + cpumask_copy(&cpus, cpu_active_mask); + rcu_read_lock(); + for_each_cpu(i, &cpus) { + /* build soft domain for each llc domain. */ + sd = rcu_dereference(per_cpu(sd_llc, i)); + if (sd) { + ret = build_soft_sub_domain(sd, &cpus); + if (ret) { + free_soft_domain(); + goto out; + } + } + } + +out: + rcu_read_unlock(); +} -- 2.18.0.huawei.25