From: Peter Zijlstra <peterz@infradead.org> mainline inclusion from mainline-v6.12-rc1 commit 3b3dd89b8bb0f03657859c22c86c19224f778638 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDC9YK Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commi... -------------------------------- Implement pick_next_task_fair() in terms of pick_task_fair() to de-duplicate the pick loop. More importantly, this makes all the pick loops use the state-invariant form, which is useful to introduce further re-try conditions in later patches. Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Valentin Schneider <vschneid@redhat.com> Tested-by: Valentin Schneider <vschneid@redhat.com> Link: https://lkml.kernel.org/r/20240727105028.725062368@infradead.org Conflicts: kernel/sched/fair.c [The conflict is with 926b9b0cd97e ("sched: Throttle qos cfs_rq when current cpu is running online task"), some part are moved from pick_next_task_fair() to pick_task_fair(), but there are addition hulk inclusion inside, so moved them together with this mainline patch.] Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- kernel/sched/fair.c | 79 +++++++++++---------------------------------- 1 file changed, 18 insertions(+), 61 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 53e9c568732b..508fbe8cbcf8 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10177,7 +10177,6 @@ void qos_smt_check_need_resched(void) } #endif -#ifdef CONFIG_SMP static struct task_struct *pick_task_fair(struct rq *rq) { struct sched_entity *se; @@ -10189,7 +10188,7 @@ static struct task_struct *pick_task_fair(struct rq *rq) return NULL; do { - /* When we pick for a remote RQ, we'll not have done put_prev_entity() */ + /* Might not have done put_prev_entity() */ if (cfs_rq->curr && cfs_rq->curr->on_rq) update_curr(cfs_rq); @@ -10198,11 +10197,20 @@ static struct task_struct *pick_task_fair(struct rq *rq) se = pick_next_entity(cfs_rq); cfs_rq = group_cfs_rq(se); +#ifdef CONFIG_QOS_SCHED + if (check_qos_cfs_rq(cfs_rq)) { + cfs_rq = &rq->cfs; + WARN(cfs_rq->nr_running == 0, + "rq->nr_running=%u, cfs_rq->idle_h_nr_running=%u\n", + rq->nr_running, cfs_rq->idle_h_nr_running); + if (unlikely(!cfs_rq->nr_running)) + return NULL; + } +#endif } while (cfs_rq); return task_of(se); } -#endif struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) @@ -10230,8 +10238,10 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf } #endif - if (!sched_fair_runnable(rq)) + p = pick_task_fair(rq); + if (!p) goto idle; + se = &p->se; #ifdef CONFIG_FAIR_GROUP_SCHED if (!prev || prev->sched_class != &fair_sched_class) { @@ -10249,62 +10259,14 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf * * Therefore attempt to avoid putting and setting the entire cgroup * hierarchy, only change the part that actually changes. - */ - - do { - struct sched_entity *curr = cfs_rq->curr; - - /* - * Since we got here without doing put_prev_entity() we also - * have to consider cfs_rq->curr. If it is still a runnable - * entity, update_curr() will update its vruntime, otherwise - * forget we've ever seen it. - */ - if (curr) { - if (curr->on_rq) - update_curr(cfs_rq); - else - curr = NULL; - - /* - * This call to check_cfs_rq_runtime() will do the - * throttle and dequeue its entity in the parent(s). - * Therefore the nr_running test will indeed - * be correct. - */ - if (unlikely(check_cfs_rq_runtime(cfs_rq))) { - cfs_rq = &rq->cfs; - - if (!cfs_rq->nr_running) - goto idle; - - goto simple; - } - } - - se = pick_next_entity(cfs_rq); - cfs_rq = group_cfs_rq(se); -#ifdef CONFIG_QOS_SCHED - if (check_qos_cfs_rq(cfs_rq)) { - cfs_rq = &rq->cfs; - WARN(cfs_rq->nr_running == 0, - "rq->nr_running=%u, cfs_rq->idle_h_nr_running=%u\n", - rq->nr_running, cfs_rq->idle_h_nr_running); - if (unlikely(!cfs_rq->nr_running)) - return NULL; - } -#endif - } while (cfs_rq); - - p = task_of(se); - - /* + * * Since we haven't yet done put_prev_entity and if the selected task * is a different task than we started out with, try and touch the * least amount of cfs_rqs. */ if (prev != p) { struct sched_entity *pse = &prev->se; + struct cfs_rq *cfs_rq; while (!(cfs_rq = is_same_group(se, pse))) { int se_depth = se->depth; @@ -10358,13 +10320,8 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf if (prev) put_prev_task(rq, prev); - do { - se = pick_next_entity(cfs_rq); - set_next_entity(cfs_rq, se); - cfs_rq = group_cfs_rq(se); - } while (cfs_rq); - - p = task_of(se); + for_each_sched_entity(se) + set_next_entity(cfs_rq_of(se), se); done: __maybe_unused; #ifdef CONFIG_SMP -- 2.34.1