hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7ZBSR CVE: NA
----------------------------------------
Since commit b869720191ec ("sched: smart grid: init sched_grid_qos structure on QOS purpose") introduced a smart_grid-based QOS partitioningmechanism, this commit further expands the partitioning mechanism to implement smart_grid zone.
In the default configuration smart_grid the entire system is divided into two partitions:
1. Hot zone (performance first) 2. Warm zone (energy consumption priority)
In addition, the smart_grid will dynamically maintain the size of the hot zone in the current system based on the task load status in the current partition, which based on commit 65523f55989a ("sched: Introduce smart grid scheduling strategy for cfs").
-------- -------- -------- | group0 | | group1 | | group2 | -------- -------- -------- | | | v v v ------------------------- -------------- | | | | | hot zone | | warm zone | | | | | ------------------------- ---------------
Signed-off-by: Yipeng Zou zouyipeng@huawei.com --- include/linux/sched/grid_qos.h | 21 ++++++++ kernel/sched/core.c | 7 +++ kernel/sched/fair.c | 7 +++ kernel/sched/grid/qos.c | 88 ++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 1 + 5 files changed, 124 insertions(+)
diff --git a/include/linux/sched/grid_qos.h b/include/linux/sched/grid_qos.h index 23d08dbb6ae6..3bfb10d9f58a 100644 --- a/include/linux/sched/grid_qos.h +++ b/include/linux/sched/grid_qos.h @@ -84,7 +84,28 @@ void sched_grid_qos_free(struct task_struct *p);
int sched_grid_preferred_interleave_nid(struct mempolicy *policy); int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask); + +enum sg_zone_type { + SMART_GRID_ZONE_HOT = 0, + SMART_GRID_ZONE_WARM, + SMART_GRID_ZONE_NR +}; + +struct auto_affinity; +struct sched_grid_zone { + raw_spinlock_t lock; + struct cpumask cpus[SMART_GRID_ZONE_NR]; + struct list_head af_list_head; /* struct auto_affinity list head */ +}; + +int __init sched_grid_zone_init(void); +int sched_grid_zone_update(bool is_locked); +int sched_grid_zone_add_af(struct auto_affinity *af); +int sched_grid_zone_del_af(struct auto_affinity *af); +struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone); #else +static inline int __init sched_grid_zone_init(void) { return 0; } + static inline int sched_grid_preferred_interleave_nid(struct mempolicy *policy) { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index fc837f6992ab..9d708732ef20 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -96,6 +96,8 @@ #include "../../io_uring/io-wq.h" #include "../smpboot.h"
+#include <linux/sched/grid_qos.h> + EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpu); EXPORT_TRACEPOINT_SYMBOL_GPL(ipi_send_cpumask);
@@ -9920,6 +9922,7 @@ void __init sched_init_smp(void)
sched_smp_initialized = true;
+ sched_grid_zone_init(); init_auto_affinity(&root_task_group); }
@@ -11472,6 +11475,10 @@ static int cpu_affinity_stat_show(struct seq_file *sf, void *v) seq_printf(sf, "dcount %d\n", ad->dcount); seq_printf(sf, "domain_mask 0x%x\n", ad->domain_mask); seq_printf(sf, "curr_level %d\n", ad->curr_level); + seq_printf(sf, "zone hot %*pbl\n", + cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_HOT))); + seq_printf(sf, "zone warm %*pbl\n", + cpumask_pr_args(sched_grid_zone_cpumask(SMART_GRID_ZONE_WARM))); for (i = 0; i < ad->dcount; i++) seq_printf(sf, "sd_level %d, cpu list %*pbl, stay_cnt %llu\n", i, cpumask_pr_args(ad->domains[i]), diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dfe8be91dc20..0d815340317a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6808,6 +6808,7 @@ static void affinity_domain_up(struct task_group *tg) if (IS_DOMAIN_SET(level + 1, ad->domain_mask) && cpumask_weight(ad->domains[level + 1]) > 0) { ad->curr_level = level + 1; + sched_grid_zone_update(false); return; } level++; @@ -6825,6 +6826,7 @@ static void affinity_domain_down(struct task_group *tg)
if (IS_DOMAIN_SET(level - 1, ad->domain_mask)) { ad->curr_level = level - 1; + sched_grid_zone_update(false); return; } level--; @@ -6896,6 +6898,7 @@ static int tg_update_affinity_domain_down(struct task_group *tg, void *data) }
} + sched_grid_zone_update(false); raw_spin_unlock_irqrestore(&auto_affi->lock, flags);
return 0; @@ -6958,6 +6961,7 @@ void stop_auto_affinity(struct auto_affinity *auto_affi) raw_spin_unlock_irq(&auto_affi->lock);
smart_grid_usage_dec(); + sched_grid_zone_update(false); mutex_unlock(&smart_grid_used_mutex); }
@@ -7165,6 +7169,8 @@ int init_auto_affinity(struct task_group *tg)
auto_affi->tg = tg; tg->auto_affinity = auto_affi; + INIT_LIST_HEAD(&auto_affi->af_list); + sched_grid_zone_add_af(auto_affi); return 0; }
@@ -7182,6 +7188,7 @@ static void destroy_auto_affinity(struct task_group *tg) smart_grid_usage_dec();
hrtimer_cancel(&auto_affi->period_timer); + sched_grid_zone_del_af(auto_affi); free_affinity_domains(&auto_affi->ad);
kfree(tg->auto_affinity); diff --git a/kernel/sched/grid/qos.c b/kernel/sched/grid/qos.c index 4d36c3640753..90d3c33e8f1d 100644 --- a/kernel/sched/grid/qos.c +++ b/kernel/sched/grid/qos.c @@ -24,6 +24,7 @@ #include <linux/sched/cputime.h> #include <linux/sched/grid_qos.h> #include "internal.h" +#include <../kernel/sched/sched.h>
static inline int qos_affinity_set(struct task_struct *p) { @@ -154,3 +155,90 @@ int sched_grid_preferred_nid(int preferred_nid, nodemask_t *nodemask)
return nd; } + +static struct sched_grid_zone sg_zone; + +int __init sched_grid_zone_init(void) +{ + int index; + + for (index = 0; index < SMART_GRID_ZONE_NR; index++) + cpumask_clear(&sg_zone.cpus[index]); + + raw_spin_lock_init(&sg_zone.lock); + INIT_LIST_HEAD(&sg_zone.af_list_head); + return 0; +} + +int sched_grid_zone_update(bool is_locked) +{ + struct list_head *pos; + struct auto_affinity *af_pos; + unsigned long flags; + + if (!is_locked) + raw_spin_lock_irqsave(&sg_zone.lock, flags); + + cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_HOT]); + cpumask_clear(&sg_zone.cpus[SMART_GRID_ZONE_WARM]); + + list_for_each(pos, &sg_zone.af_list_head) { + af_pos = list_entry(pos, struct auto_affinity, af_list); + + /* when smart_grid not used we need calculate all task_group */ + /* when smart_grid used we only calculate enabled task_group */ + if (smart_grid_used() && af_pos->mode == 0) + continue; + + cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_HOT], &sg_zone.cpus[SMART_GRID_ZONE_HOT], + af_pos->ad.domains[af_pos->ad.curr_level]); + /* Update warm zone CPUs to max level first */ + cpumask_or(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM], + af_pos->ad.domains[af_pos->ad.dcount - 1]); + } + + /* Then reset warm zone CPUs without hot zone CPUs */ + cpumask_andnot(&sg_zone.cpus[SMART_GRID_ZONE_WARM], &sg_zone.cpus[SMART_GRID_ZONE_WARM], + &sg_zone.cpus[SMART_GRID_ZONE_HOT]); + + if (!is_locked) + raw_spin_unlock_irqrestore(&sg_zone.lock, flags); + + return 0; +} + +int sched_grid_zone_add_af(struct auto_affinity *af) +{ + unsigned long flags; + + if (af == NULL) + return -1; + + raw_spin_lock_irqsave(&sg_zone.lock, flags); + list_add_tail(&af->af_list, &sg_zone.af_list_head); + sched_grid_zone_update(true); + raw_spin_unlock_irqrestore(&sg_zone.lock, flags); + return 0; +} + +int sched_grid_zone_del_af(struct auto_affinity *af) +{ + unsigned long flags; + + if (af == NULL) + return -1; + + raw_spin_lock_irqsave(&sg_zone.lock, flags); + list_del(&af->af_list); + sched_grid_zone_update(true); + raw_spin_unlock_irqrestore(&sg_zone.lock, flags); + return 0; +} + +struct cpumask *sched_grid_zone_cpumask(enum sg_zone_type zone) +{ + if (zone >= SMART_GRID_ZONE_NR) + return NULL; + + return &sg_zone.cpus[zone]; +} diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 205148dd4885..99629caeb8c9 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -387,6 +387,7 @@ struct auto_affinity { int period_active; struct affinity_domain ad; struct task_group *tg; + struct list_head af_list; }; #endif