hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAQWPQ
-----------------------------------------
We add a new cpu.steal_task interface in cgroup v1 for a certain cgroup. The default value is 0, which means it is disabled, and 1, which means it is enabled.
If we want to enable the steal task feature for a cgroup, we first need to configure group_steal in cmdline, then configure STEAL in sched_feature, and finally configure the value of cpu.steal_task to 1.
Signed-off-by: Cheng Yu serein.chengyu@huawei.com --- include/linux/sched.h | 4 ++ kernel/sched/core.c | 108 ++++++++++++++++++++++++++++++++++++++++++ kernel/sched/sched.h | 19 +++++++- 3 files changed, 130 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 97c216bfb0fc..57de624f17a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -522,7 +522,11 @@ struct sched_entity { #else KABI_RESERVE(1) #endif +#ifdef CONFIG_SCHED_STEAL + KABI_USE(2, int steal_task) +#else KABI_RESERVE(2) +#endif KABI_RESERVE(3) KABI_RESERVE(4) }; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7595a3fef28f..900637a6ac09 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -8278,6 +8278,9 @@ void __init sched_init(void) #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER root_task_group.smt_expell = TG_SMT_EXPELL; #endif +#ifdef CONFIG_SCHED_STEAL + root_task_group.steal_task = TG_STEAL_NO; +#endif #ifdef CONFIG_RT_GROUP_SCHED root_task_group.rt_se = (struct sched_rt_entity **)ptr; ptr += nr_cpu_ids * sizeof(void **); @@ -8718,6 +8721,20 @@ static void sched_free_group(struct task_group *tg) kmem_cache_free(task_group_cache, tg); }
+#ifdef CONFIG_SCHED_STEAL +static void sched_change_steal_group(struct task_struct *tsk, struct task_group *tg) +{ + struct sched_entity *se = &tsk->se; + + se->steal_task = tg->steal_task; +} + +static inline void tg_init_steal(struct task_group *tg, struct task_group *ptg) +{ + tg->steal_task = ptg->steal_task; +} +#endif + #ifdef CONFIG_BPF_SCHED static inline void tg_init_tag(struct task_group *tg, struct task_group *ptg) { @@ -8745,6 +8762,10 @@ struct task_group *sched_create_group(struct task_group *parent) if (!alloc_rt_sched_group(tg, parent)) goto err;
+#ifdef CONFIG_SCHED_STEAL + tg_init_steal(tg, parent); +#endif + #ifdef CONFIG_BPF_SCHED tg_init_tag(tg, parent); #endif @@ -8820,6 +8841,10 @@ static void sched_change_group(struct task_struct *tsk, int type) sched_change_qos_group(tsk, tg); #endif
+#ifdef CONFIG_SCHED_STEAL + sched_change_steal_group(tsk, tg); +#endif + #ifdef CONFIG_BPF_SCHED /* * This function has cleared and restored the task status, @@ -9784,6 +9809,81 @@ static inline s64 cpu_qos_read(struct cgroup_subsys_state *css, } #endif
+#ifdef CONFIG_SCHED_STEAL +static inline s64 cpu_steal_task_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + return css_tg(css)->steal_task; +} + +void sched_setsteal(struct task_struct *tsk, s64 steal_task) +{ + struct sched_entity *se = &tsk->se; + int queued, running, queue_flags = + DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; + struct rq_flags rf; + struct rq *rq; + + if (se->steal_task == steal_task) + return; + + rq = task_rq_lock(tsk, &rf); + + running = task_current(rq, tsk); + queued = task_on_rq_queued(tsk); + + update_rq_clock(rq); + if (queued) + dequeue_task(rq, tsk, queue_flags); + if (running) + put_prev_task(rq, tsk); + + se->steal_task = steal_task; + + if (queued) + enqueue_task(rq, tsk, queue_flags); + if (running) + set_next_task(rq, tsk); + + task_rq_unlock(rq, tsk, &rf); +} + +int tg_change_steal(struct task_group *tg, void *data) +{ + struct css_task_iter it; + struct task_struct *tsk; + s64 steal_task = *(s64 *)data; + struct cgroup_subsys_state *css = &tg->css; + + tg->steal_task = steal_task; + + css_task_iter_start(css, 0, &it); + while ((tsk = css_task_iter_next(&it))) + sched_setsteal(tsk, steal_task); + css_task_iter_end(&it); + + return 0; +} + +static int cpu_steal_task_write(struct cgroup_subsys_state *css, + struct cftype *cftype, s64 steal_task) +{ + struct task_group *tg = css_tg(css); + + if (!group_steal_used()) + return -EPERM; + + if (steal_task < TG_STEAL_NO || steal_task > TG_STEAL) + return -EINVAL; + + rcu_read_lock(); + walk_tg_tree_from(tg, tg_change_steal, tg_nop, (void *)(&steal_task)); + rcu_read_unlock(); + + return 0; +} +#endif + #ifdef CONFIG_BPF_SCHED void sched_settag(struct task_struct *tsk, s64 tag) { @@ -9950,6 +10050,14 @@ static struct cftype cpu_legacy_files[] = { .write_s64 = cpu_smt_expell_write, }, #endif +#ifdef CONFIG_SCHED_STEAL + { + .name = "steal_task", + .flags = CFTYPE_NOT_ON_ROOT, + .read_s64 = cpu_steal_task_read, + .write_s64 = cpu_steal_task_write, + }, +#endif #ifdef CONFIG_BPF_SCHED { .name = "tag", diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 87c63fa5625e..ceea107a1dc8 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -402,7 +402,6 @@ struct cfs_bandwidth { #endif };
- #ifdef CONFIG_QOS_SCHED_SMART_GRID #define AD_LEVEL_MAX 8
@@ -497,7 +496,13 @@ struct task_group { #else KABI_RESERVE(2) #endif + +#ifdef CONFIG_SCHED_STEAL + KABI_USE(3, int steal_task) +#else KABI_RESERVE(3) +#endif + #if defined(CONFIG_QOS_SCHED_SMART_GRID) && !defined(__GENKSYMS__) KABI_USE(4, struct auto_affinity *auto_affinity) #else @@ -505,6 +510,18 @@ struct task_group { #endif };
+#ifdef CONFIG_SCHED_STEAL +enum tg_steal_task { + TG_STEAL_NO = 0, + TG_STEAL = 1, +}; + +static inline bool is_tg_steal(int steal_task) +{ + return steal_task == TG_STEAL; +} +#endif + #ifdef CONFIG_FAIR_GROUP_SCHED #define ROOT_TASK_GROUP_LOAD NICE_0_LOAD