[PATCH OLK-6.6 v4 4/8] sched: introduce smart grid qos zone

9 Jan 2024

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZBSR
CVE: NA

----------------------------------------

Since commit b869720191ec ("sched: smart grid: init sched_grid_qos
structure on QOS purpose") introduced a smart_grid-based QOS
partitioningmechanism, this commit further expands the partitioning
mechanism to implement smart_grid zone.

In the default configuration smart_grid the entire system is divided
into two partitions:

1. Hot zone (performance first)
2. Warm zone (energy consumption priority)

In addition, the smart_grid will dynamically maintain the size of the hot
zone in the current system based on the task load status in the current
partition, which based on commit 65523f55989a ("sched: Introduce smart
grid scheduling strategy for cfs").

 --------        --------        --------
| group0 |      | group1 |      | group2 |
 --------        --------        --------
    |                |              |
    v                v              v
 -------------------------    --------------
|                         |  |              |
|         hot zone        |  |   warm zone  |
|                         |  |              |
 -------------------------   ---------------

Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
---
 include/linux/sched/grid_qos.h | 21 ++++++++
 kernel/sched/core.c            |  7 +++
 kernel/sched/fair.c            |  7 +++
 kernel/sched/grid/qos.c        | 88 ++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h           |  1 +
 5 files changed, 124 insertions(+)

diff --git a/include/linux/sched/grid_qos.h b/include/linux/sched/grid_qos.h
index 23d08dbb6ae6..3bfb10d9f58a 100644
--- a/include/linux/sched/grid_qos.h
+++ b/include/linux/sched/grid_qos.h
@@ -84,7 +84,28 @@ void sched_grid_qos_free(struct task_struct *p);
 
 int sched_grid_preferred_interleave_nid(struct mempolicy *policy);
 int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask);
+
+enum sg_zone_type {
+	SMART_GRID_ZONE_HOT = 0,
+	SMART_GRID_ZONE_WARM,
+	SMART_GRID_ZONE_NR
+};
+
+struct auto_affinity;
+struct sched_grid_zone {
+	raw_spinlock_t lock;
+	struct cpumask cpus[SMART_GRID_ZONE_NR];
+	struct list_head af_list_head;	/* struct auto_affinity list head */
+};
+
+int __init sched_grid_zone_init(void);
+int sched_grid_zone_update(bool is_locked);
+int sched_grid_zone_add_af(struct auto_affinity *af);
+int sched_grid_zone_del_af(struct auto_affinity *af);
+struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone);
 #else
+static inline int __init sched_grid_zone_init(void) { return 0; }
+
 static inline int
 sched_grid_preferred_interleave_nid(struct mempolicy *policy)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fc837f6992ab..9d708732ef20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -96,6 +96,8 @@
 #include "../../io_uring/io-wq.h"
 #include "../smpboot.h"
 
+#include <linux/sched/grid_qos.h>
+
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu);
 EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
 
@@ -9920,6 +9922,7 @@ void __init sched_init_smp(void)
 
 	sched_smp_initialized = true;
 
+	sched_grid_zone_init();
 	init_auto_affinity(&root_task_group);
 }
 
@@ -11472,6 +11475,10 @@ static int cpu_affinity_stat_show(struct seq_file *sf, void *v)
 	seq_printf(sf, "dcount %d\n", ad->dcount);
 	seq_printf(sf, "domain_mask 0x%x\n", ad->domain_mask);
 	seq_printf(sf, "curr_level %d\n", ad->curr_level);
+	seq_printf(sf, "zone hot %*pbl\n",
+			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_HOT)));
+	seq_printf(sf, "zone warm %*pbl\n",
+			cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_WARM)));
 	for (i = 0; i < ad->dcount; i++)
 		seq_printf(sf, "sd_level %d, cpu list %*pbl, stay_cnt %llu\n",
 			i, cpumask_pr_args(ad->domains[i]),
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index dfe8be91dc20..0d815340317a 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6808,6 +6808,7 @@ static void affinity_domain_up(struct task_group *tg)
 		if (IS_DOMAIN_SET(level + 1, ad->domain_mask) &&
 		    cpumask_weight(ad->domains[level + 1]) > 0) {
 			ad->curr_level = level + 1;
+			sched_grid_zone_update(false);
 			return;
 		}
 		level++;
@@ -6825,6 +6826,7 @@ static void affinity_domain_down(struct task_group *tg)
 
 		if (IS_DOMAIN_SET(level - 1, ad->domain_mask)) {
 			ad->curr_level = level - 1;
+			sched_grid_zone_update(false);
 			return;
 		}
 		level--;
@@ -6896,6 +6898,7 @@ static int tg_update_affinity_domain_down(struct task_group *tg, void *data)
 		}
 
 	}
+	sched_grid_zone_update(false);
 	raw_spin_unlock_irqrestore(&auto_affi->lock, flags);
 
 	return 0;
@@ -6958,6 +6961,7 @@ void stop_auto_affinity(struct auto_affinity *auto_affi)
 	raw_spin_unlock_irq(&auto_affi->lock);
 
 	smart_grid_usage_dec();
+	sched_grid_zone_update(false);
 	mutex_unlock(&smart_grid_used_mutex);
 }
 
@@ -7165,6 +7169,8 @@ int init_auto_affinity(struct task_group *tg)
 
 	auto_affi->tg = tg;
 	tg->auto_affinity = auto_affi;
+	INIT_LIST_HEAD(&auto_affi->af_list);
+	sched_grid_zone_add_af(auto_affi);
 	return 0;
 }
 
@@ -7182,6 +7188,7 @@ static void destroy_auto_affinity(struct task_group *tg)
 		smart_grid_usage_dec();
 
 	hrtimer_cancel(&auto_affi->period_timer);
+	sched_grid_zone_del_af(auto_affi);
 	free_affinity_domains(&auto_affi->ad);
 
 	kfree(tg->auto_affinity);
diff --git a/kernel/sched/grid/qos.c b/kernel/sched/grid/qos.c
index 4d36c3640753..90d3c33e8f1d 100644
--- a/kernel/sched/grid/qos.c
+++ b/kernel/sched/grid/qos.c
@@ -24,6 +24,7 @@
 #include <linux/sched/cputime.h>
 #include <linux/sched/grid_qos.h>
 #include "internal.h"
+#include <../kernel/sched/sched.h>
 
 static inline int qos_affinity_set(struct task_struct *p)
 {
@@ -154,3 +155,90 @@ int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask)
 
 	return nd;
 }
+
+static struct sched_grid_zone sg_zone;
+
+int __init sched_grid_zone_init(void)
+{
+	int index;
+
+	for (index = 0; index < SMART_GRID_ZONE_NR; index++)
+		cpumask_clear(&sg_zone.cpus[index]);
+
+	raw_spin_lock_init(&sg_zone.lock);
+	INIT_LIST_HEAD(&sg_zone.af_list_head);
+	return 0;
+}
+
+int sched_grid_zone_update(bool is_locked)
+{
+	struct list_head *pos;
+	struct auto_affinity *af_pos;
+	unsigned long flags;
+
+	if (!is_locked)
+		raw_spin_lock_irqsave(&sg_zone.lock, flags);
+
+	cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_HOT]);
+	cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_WARM]);
+
+	list_for_each(pos, &sg_zone.af_list_head) {
+		af_pos = list_entry(pos, struct auto_affinity, af_list);
+
+		/* when smart_grid not used we need calculate all task_group */
+		/* when smart_grid used we only calculate enabled task_group */
+		if (smart_grid_used() && af_pos->mode == 0)
+			continue;
+
+		cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_HOT], &sg_zone.cpus[SMART_GRID_ZONE_HOT],
+			   af_pos->ad.domains[af_pos->ad.curr_level]);
+		/* Update warm zone CPUs to max level first */
+		cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM],
+			   af_pos->ad.domains[af_pos->ad.dcount - 1]);
+	}
+
+	/* Then reset warm zone CPUs without hot zone CPUs */
+	cpumask_andnot(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM],
+		       &sg_zone.cpus[SMART_GRID_ZONE_HOT]);
+
+	if (!is_locked)
+		raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+
+	return 0;
+}
+
+int sched_grid_zone_add_af(struct auto_affinity *af)
+{
+	unsigned long flags;
+
+	if (af == NULL)
+		return -1;
+
+	raw_spin_lock_irqsave(&sg_zone.lock, flags);
+	list_add_tail(&af->af_list, &sg_zone.af_list_head);
+	sched_grid_zone_update(true);
+	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+	return 0;
+}
+
+int sched_grid_zone_del_af(struct auto_affinity *af)
+{
+	unsigned long flags;
+
+	if (af == NULL)
+		return -1;
+
+	raw_spin_lock_irqsave(&sg_zone.lock, flags);
+	list_del(&af->af_list);
+	sched_grid_zone_update(true);
+	raw_spin_unlock_irqrestore(&sg_zone.lock, flags);
+	return 0;
+}
+
+struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone)
+{
+	if (zone >= SMART_GRID_ZONE_NR)
+		return NULL;
+
+	return &sg_zone.cpus[zone];
+}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 205148dd4885..99629caeb8c9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -387,6 +387,7 @@ struct auto_affinity {
 	int			period_active;
 	struct affinity_domain	ad;
 	struct task_group	*tg;
+	struct list_head	af_list;
 };
 #endif
 
-- 
2.34.1