[PATCH OLK-6.6 v2 4/7] sched: introduce smart grid qos zone

3 Jan 2024

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZBSR
CVE: NA
----------------------------------------
Since commit b869720191ec ("sched: smart grid: init sched_grid_qos
structure on QOS purpose") introduced a smart_grid-based QOS
partitioningmechanism, this commit further expands the partitioning
mechanism to implement smart_grid zone.
In the default configuration smart_grid the entire system is divided
into two partitions:
1. Hot zone (performance first)
2. Warm zone (energy consumption priority)
In addition, the smart_grid will dynamically maintain the size of the hot
zone in the current system based on the task load status in the current
partition, which based on commit 65523f55989a ("sched: Introduce smart
grid scheduling strategy for cfs").
--------        --------        --------
| group0 |      | group1 |      | group2 |
 --------        --------        --------
    |                |              |
    v                v              v
 -------------------------    --------------
|                         |  |              |
|         hot zone        |  |   warm zone  |
|                         |  |              |
 -------------------------   ---------------
Signed-off-by: Yipeng Zou zouyipeng@huawei.com
---
 include/linux/sched/grid_qos.h | 21 ++++++++
 kernel/sched/core.c            |  7 +++
 kernel/sched/fair.c            |  7 +++
 kernel/sched/grid/qos.c        | 88 ++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h           |  1 +
 5 files changed, 124 insertions(+)

diff --git a/include/linux/sched/grid_qos.h b/include/linux/sched/grid_qos.h
index 23d08dbb6ae6..3bfb10d9f58a 100644
--- a/include/linux/sched/grid_qos.h
+++ b/include/linux/sched/grid_qos.h
@@ -84,7 +84,28 @@ void sched_grid_qos_free(struct task_struct *p);
int sched_grid_preferred_interleave_nid(struct mempolicy *policy);
 int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask);
+
+enum sg_zone_type {
+	SMART_GRID_ZONE_HOT = 0,
+	SMART_GRID_ZONE_WARM,
+	SMART_GRID_ZONE_NR
+};
+
+struct auto_affinity;
+struct sched_grid_zone {
+	raw_spinlock_t lock;
+	struct cpumask cpus[SMART_GRID_ZONE_NR];
+	struct list_head af_list_head;	/* struct auto_affinity list head */
+};
+
+int __init sched_grid_zone_init(void);
+int sched_grid_zone_update(bool is_locked);
+int sched_grid_zone_add_af(struct auto_affinity *af);
+int sched_grid_zone_del_af(struct auto_affinity *af);
+struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone);
 #else
+static inline int __init sched_grid_zone_init(void) { return 0; }
+
 static inline int
 sched_grid_preferred_interleave_nid(struct mempolicy *policy)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6a5600223413..657cf96bc6f3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -96,6 +96,8 @@
 #include "../../io_uring/io-wq.h"
 #include "../smpboot.h"
+#include <linux/sched/grid_qos.h>
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
@@ -9889,6 +9891,7 @@ void __init sched_init_smp(void)
sched_smp_initialized = true;
+	sched_grid_zone_init();
    init_auto_affinity(&root_task_group);
 }
@@ -11438,6 +11441,10 @@ static int cpu_affinity_stat_show(struct seq_file *sf, void *v)
    seq_printf(sf, "dcount %d\n", ad->dcount);
    seq_printf(sf, "domain_mask 0x%x\n", ad->domain_mask);
    seq_printf(sf, "curr_level %d\n", ad->curr_level);
+	seq_printf(sf, "zone hot %*pbl\n",
+			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_HOT)));
+	seq_printf(sf, "zone warm %*pbl\n",
+			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_WARM)));
    for (i = 0; i < ad->dcount; i++)
    	seq_printf(sf, "sd_level %d, cpu list %*pbl, stay_cnt %llu\n",
    		i, cpumask_pr_args(ad->domains[i]),
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 88eaaa2eb55f..d5247c055829 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6662,6 +6662,7 @@ static void affinity_domain_up(struct task_group *tg)
    	if (IS_DOMAIN_SET(level + 1, ad->domain_mask) &&
    	    cpumask_weight(ad->domains[level + 1]) > 0) {
    		ad->curr_level = level + 1;
+			sched_grid_zone_update(false);
    		return;
    	}
    	level++;
@@ -6682,6 +6683,7 @@ static void affinity_domain_down(struct task_group *tg)
if (IS_DOMAIN_SET(level - 1, ad->domain_mask)) {
    		ad->curr_level = level - 1;
+			sched_grid_zone_update(false);
    		return;
    	}
    	level--;
@@ -6753,6 +6755,7 @@ static int tg_update_affinity_domain_down(struct task_group *tg, void *data)
    	}
}
+	sched_grid_zone_update(false);
    raw_spin_unlock_irqrestore(&auto_affi->lock, flags);
return 0;
@@ -6815,6 +6818,7 @@ void stop_auto_affinity(struct auto_affinity *auto_affi)
    raw_spin_unlock_irq(&auto_affi->lock);
smart_grid_usage_dec();
+	sched_grid_zone_update(false);
    mutex_unlock(&smart_grid_used_mutex);
 }
@@ -7020,6 +7024,8 @@ int init_auto_affinity(struct task_group *tg)
auto_affi->tg = tg;
    tg->auto_affinity = auto_affi;
+	INIT_LIST_HEAD(&auto_affi->af_list);
+	sched_grid_zone_add_af(auto_affi);
    return 0;
 }
@@ -7037,6 +7043,7 @@ static void destroy_auto_affinity(struct task_group *tg)
    	smart_grid_usage_dec();
hrtimer_cancel(&auto_affi->period_timer);
+	sched_grid_zone_del_af(auto_affi);
    free_affinity_domains(&auto_affi->ad);
kfree(tg->auto_affinity);
diff --git a/kernel/sched/grid/qos.c b/kernel/sched/grid/qos.c
index 4d36c3640753..90d3c33e8f1d 100644
--- a/kernel/sched/grid/qos.c
+++ b/kernel/sched/grid/qos.c
@@ -24,6 +24,7 @@
 #include <linux/sched/cputime.h>
 #include <linux/sched/grid_qos.h>
 #include "internal.h"
+#include <../kernel/sched/sched.h>
static inline int qos_affinity_set(struct task_struct *p)
 {
@@ -154,3 +155,90 @@ int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask)
return nd;
 }
+
+static struct sched_grid_zone sg_zone;
+
+int __init sched_grid_zone_init(void)
+{
+	int index;
+
+	for (index = 0; index < SMART_GRID_ZONE_NR; index++)
+		cpumask_clear(&sg_zone.cpus[index]);
+
+	raw_spin_lock_init(&sg_zone.lock);
+	INIT_LIST_HEAD(&sg_zone.af_list_head);
+	return 0;
+}
+
+int sched_grid_zone_update(bool is_locked)
+{
+	struct list_head *pos;
+	struct auto_affinity *af_pos;
+	unsigned long flags;
+
+	if (!is_locked)
+		raw_spin_lock_irqsave(&sg_zone.lock, flags);
+
+	cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_HOT]);
+	cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_WARM]);
+
+	list_for_each(pos, &sg_zone.af_list_head) {
+		af_pos = list_entry(pos, struct auto_affinity, af_list);
+
+		/* when smart_grid not used we need calculate all task_group */
+		/* when smart_grid used we only calculate enabled task_group */
+		if (smart_grid_used() && af_pos->mode == 0)
+			continue;
+
+		cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_HOT], &sg_zone.cpus[SMART_GRID_ZONE_HOT],
+			   af_pos->ad.domains[af_pos->ad.curr_level]);
+		/* Update warm zone CPUs to max level first */
+		cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM],
+			   af_pos->ad.domains[af_pos->ad.dcount - 1]);
+	}
+
+	/* Then reset warm zone CPUs without hot zone CPUs */
+	cpumask_andnot(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM],
+		       &sg_zone.cpus[SMART_GRID_ZONE_HOT]);
+
+	if (!is_locked)
+		raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+
+	return 0;
+}
+
+int sched_grid_zone_add_af(struct auto_affinity *af)
+{
+	unsigned long flags;
+
+	if (af == NULL)
+		return -1;
+
+	raw_spin_lock_irqsave(&sg_zone.lock, flags);
+	list_add_tail(&af->af_list, &sg_zone.af_list_head);
+	sched_grid_zone_update(true);
+	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+	return 0;
+}
+
+int sched_grid_zone_del_af(struct auto_affinity *af)
+{
+	unsigned long flags;
+
+	if (af == NULL)
+		return -1;
+
+	raw_spin_lock_irqsave(&sg_zone.lock, flags);
+	list_del(&af->af_list);
+	sched_grid_zone_update(true);
+	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+	return 0;
+}
+
+struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone)
+{
+	if (zone >= SMART_GRID_ZONE_NR)
+		return NULL;
+
+	return &sg_zone.cpus[zone];
+}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 8d931717b201..ef614401573b 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -384,6 +384,7 @@ struct auto_affinity {
    int			period_active;
    struct affinity_domain	ad;
    struct task_group	*tg;
+	struct list_head	af_list;
 #endif
 };
-- 
2.34.1


    

2025

2024

2023

2022

2021

2020

2019

[PATCH OLK-6.6 v2 4/7] sched: introduce smart grid qos zone