From: Peter Zijlstra <peterz@infradead.org> mainline inclusion from mainline-v6.12-rc1 commit aef6987d89544d63a47753cf3741cabff0b5574c category: feature bugzilla: https://atomgit.com/src-openeuler/kernel/issues/15498 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------- In the absence of an explicit cgroup slice configureation, make mixed slice length work with cgroups by propagating the min_slice up the hierarchy. This ensures the cgroup entity gets timely service to service its entities that have this timing constraint set on them. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Tested-by: Valentin Schneider <vschneid@redhat.com> Link: https://lkml.kernel.org/r/20240727105030.948188417@infradead.org Conflicts: kernel/sched/fair.c include/linux/sched.h [a trival context conflict] Signed-off-by: Chen Jinghuang <chenjinghuang2@huawei.com> --- include/linux/sched.h | 1 + kernel/sched/fair.c | 58 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index 66a3d77a29e7..b23143162c01 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -582,6 +582,7 @@ struct sched_entity { struct rb_node run_node; u64 deadline; u64 min_vruntime; + u64 min_slice; struct list_head group_node; unsigned int on_rq; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3381780841c8..34142bc8fa37 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1060,6 +1060,21 @@ int entity_eligible(struct cfs_rq *cfs_rq, struct sched_entity *se) return vruntime_eligible(cfs_rq, se->vruntime); } +static inline u64 cfs_rq_min_slice(struct cfs_rq *cfs_rq) +{ + struct sched_entity *root = __pick_root_entity(cfs_rq); + struct sched_entity *curr = cfs_rq->curr; + u64 min_slice = ~0ULL; + + if (curr && curr->on_rq) + min_slice = curr->slice; + + if (root) + min_slice = min(min_slice, root->min_slice); + + return min_slice; +} + static inline bool __entity_less(struct rb_node *a, const struct rb_node *b) { return entity_before(__node_2_se(a), __node_2_se(b)); @@ -1075,19 +1090,35 @@ static inline void __min_vruntime_update(struct sched_entity *se, struct rb_node } } +static inline void __min_slice_update(struct sched_entity *se, struct rb_node *node) +{ + if (node) { + struct sched_entity *rse = __node_2_se(node); + + if (rse->min_slice < se->min_slice) + se->min_slice = rse->min_slice; + } +} + /* * se->min_vruntime = min(se->vruntime, {left,right}->min_vruntime) */ static inline bool min_vruntime_update(struct sched_entity *se, bool exit) { u64 old_min_vruntime = se->min_vruntime; + u64 old_min_slice = se->min_slice; struct rb_node *node = &se->run_node; se->min_vruntime = se->vruntime; __min_vruntime_update(se, node->rb_right); __min_vruntime_update(se, node->rb_left); - return se->min_vruntime == old_min_vruntime; + se->min_slice = se->slice; + __min_slice_update(se, node->rb_right); + __min_slice_update(se, node->rb_left); + + return se->min_vruntime == old_min_vruntime && + se->min_slice == old_min_slice; } RB_DECLARE_CALLBACKS(static, min_vruntime_cb, struct sched_entity, @@ -1100,6 +1131,7 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) { sum_w_vruntime_add(cfs_rq, se); se->min_vruntime = se->vruntime; + se->min_slice = se->slice; rb_add_augmented_cached(&se->run_node, &cfs_rq->tasks_timeline, __entity_less, &min_vruntime_cb); } @@ -7646,6 +7678,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) #endif int task_new = !(flags & ENQUEUE_WAKEUP); unsigned int prev_nr = rq->cfs.h_nr_running; + u64 slice = 0; /* * The code below (indirectly) updates schedutil which looks at @@ -7667,7 +7700,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) if (se->on_rq) break; cfs_rq = cfs_rq_of(se); + + /* + * Basically set the slice of group entries to the min_slice of + * their respective cfs_rq. This ensures the group can service + * its entities in the desired time-frame. + */ + if (slice) { + se->slice = slice; + se->custom_slice = 1; + } enqueue_entity(cfs_rq, se, flags); + slice = cfs_rq_min_slice(cfs_rq); cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; @@ -7692,6 +7736,9 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) se_update_runnable(se); update_cfs_group(se); + se->slice = slice; + slice = cfs_rq_min_slice(cfs_rq); + cfs_rq->h_nr_running++; cfs_rq->idle_h_nr_running += idle_h_nr_running; #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER @@ -7756,11 +7803,15 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) int idle_h_nr_running = 0; int h_nr_running = 0; struct cfs_rq *cfs_rq; + u64 slice = 0; if (entity_is_task(se)) { p = task_of(se); h_nr_running = 1; idle_h_nr_running = task_has_idle_policy(p); + } else { + cfs_rq = group_cfs_rq(se); + slice = cfs_rq_min_slice(cfs_rq); } for_each_sched_entity(se) { @@ -7782,6 +7833,8 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) { + slice = cfs_rq_min_slice(cfs_rq); + /* Avoid re-evaluating load for this entity: */ se = parent_entity(se); /* @@ -7802,6 +7855,9 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags) se_update_runnable(se); update_cfs_group(se); + se->slice = slice; + slice = cfs_rq_min_slice(cfs_rq); + cfs_rq->h_nr_running -= h_nr_running; cfs_rq->idle_h_nr_running -= idle_h_nr_running; #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER -- 2.34.1