From: Tejun Heo <tj@kernel.org> mainline inclusion from mainline-v6.12-rc1 commit 744d83601ffa11ebbca52c0ec0b039e269d05054 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDC9YK Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commi... -------------------------------- For flexibility, sched_ext allows the BPF scheduler to select the CPU to execute a task on at dispatch time so that e.g. a queue can be shared across multiple CPUs. To enable this, the dispatch path is executed from balance() so that a dispatched task can be hot-migrated to its target CPU. This means that sched_ext needs its balance() method invoked before every pick_next_task() even when the CPU is waking up from SCHED_IDLE. for_balance_class_range() defined in kernel/sched/ext.h implements this selective iteration promotion. However, the indirection obfuscates more than helps. Open code the iteration promotion in put_prev_task_balance() and remove for_balance_class_range(). No functional changes intended. Signed-off-by: Tejun Heo <tj@kernel.org> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Acked-by: David Vernet <void@manifault.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- kernel/sched/core.c | 14 +++++++++++++- kernel/sched/ext.h | 9 --------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 233053ffc7c1..d9584815c0c7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5890,7 +5890,19 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { #ifdef CONFIG_SMP + const struct sched_class *start_class = prev->sched_class; const struct sched_class *class; + +#ifdef CONFIG_SCHED_CLASS_EXT + /* + * SCX requires a balance() call before every pick_next_task() including + * when waking up from SCHED_IDLE. If @start_class is below SCX, start + * from SCX instead. + */ + if (sched_class_above(&ext_sched_class, start_class)) + start_class = &ext_sched_class; +#endif + /* * We must do the balancing pass before put_prev_task(), such * that when we release the rq->lock the task is in the same @@ -5899,7 +5911,7 @@ static void put_prev_task_balance(struct rq *rq, struct task_struct *prev, * We can terminate the balance pass as soon as we know there is * a runnable task of @class priority or higher. */ - for_balance_class_range(class, prev->sched_class, &idle_sched_class) { + for_active_class_range(class, start_class, &idle_sched_class) { if (class->balance(rq, prev, rf)) break; } diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index 229007693504..1d7837bdfaba 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -68,14 +68,6 @@ static inline const struct sched_class *next_active_class(const struct sched_cla #define for_each_active_class(class) \ for_active_class_range(class, __sched_class_highest, __sched_class_lowest) -/* - * SCX requires a balance() call before every pick_next_task() call including - * when waking up from idle. - */ -#define for_balance_class_range(class, prev_class, end_class) \ - for_active_class_range(class, (prev_class) > &ext_sched_class ? \ - &ext_sched_class : (prev_class), (end_class)) - #ifdef CONFIG_SCHED_CORE bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, bool in_fi); @@ -100,7 +92,6 @@ static inline bool task_on_scx(const struct task_struct *p) { return false; } static inline void init_sched_ext_class(void) {} #define for_each_active_class for_each_class -#define for_balance_class_range for_class_range #endif /* CONFIG_SCHED_CLASS_EXT */ -- 2.34.1