From: Tejun Heo <tj@kernel.org> mainline inclusion from mainline-v6.12-rc1 commit b999e365c2982dbd50f01fec520215d3c61ea2aa category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDC9YK Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commi... -------------------------------- scx_next_task_picked() is used by sched_ext to notify the BPF scheduler when a CPU is taken away by a task dispatched from a higher priority sched_class so that the BPF scheduler can, e.g., punt the task[s] which was running or were waiting for the CPU to other CPUs. Replace the sched_ext specific hook scx_next_task_picked() with a new sched_class operation switch_class(). The changes are straightforward and the code looks better afterwards. However, when !CONFIG_SCHED_CLASS_EXT, this ends up adding an unused hook which is unlikely to be useful to other sched_classes. For further discussion on this subject, please refer to the following: http://lkml.kernel.org/r/CAHk-=wjFPLqo7AXu8maAGEGnOy6reUg-F4zzFhVB0Kyu22h7pw... Signed-off-by: Tejun Heo <tj@kernel.org> Suggested-by: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- kernel/sched/core.c | 5 ++++- kernel/sched/ext.c | 20 ++++++++++---------- kernel/sched/ext.h | 4 ---- kernel/sched/sched.h | 2 ++ 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 68390280e43f..5794f32c738c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5954,7 +5954,10 @@ __pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) for_each_active_class(class) { p = class->pick_next_task(rq); if (p) { - scx_next_task_picked(rq, p, class); + const struct sched_class *prev_class = prev->sched_class; + + if (class != prev_class && prev_class->switch_class) + prev_class->switch_class(rq, p); return p; } } diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index 15c6b36d66cf..c4ba8c605cfa 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2754,10 +2754,9 @@ preempt_reason_from_class(const struct sched_class *class) return SCX_CPU_PREEMPT_UNKNOWN; } -void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active) +static void switch_class_scx(struct rq *rq, struct task_struct *next) { - lockdep_assert_rq_held(rq); + const struct sched_class *next_class = next->sched_class; if (!scx_enabled()) return; @@ -2774,12 +2773,11 @@ void scx_next_task_picked(struct rq *rq, struct task_struct *p, /* * The callback is conceptually meant to convey that the CPU is no - * longer under the control of SCX. Therefore, don't invoke the - * callback if the CPU is is staying on SCX, or going idle (in which - * case the SCX scheduler has actively decided not to schedule any - * tasks on the CPU). + * longer under the control of SCX. Therefore, don't invoke the callback + * if the next class is below SCX (in which case the BPF scheduler has + * actively decided not to schedule any tasks on the CPU). */ - if (likely(active >= &ext_sched_class)) + if (sched_class_above(&ext_sched_class, next_class)) return; /* @@ -2794,8 +2792,8 @@ void scx_next_task_picked(struct rq *rq, struct task_struct *p, if (!rq->scx.cpu_released) { if (SCX_HAS_OP(cpu_release)) { struct scx_cpu_release_args args = { - .reason = preempt_reason_from_class(active), - .task = p, + .reason = preempt_reason_from_class(next_class), + .task = next, }; SCX_CALL_OP(SCX_KF_CPU_RELEASE, @@ -3501,6 +3499,8 @@ DEFINE_SCHED_CLASS(ext) = { .put_prev_task = put_prev_task_scx, .set_next_task = set_next_task_scx, + .switch_class = switch_class_scx, + #ifdef CONFIG_SMP .balance = balance_scx, .select_task_rq = select_task_rq_scx, diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h index c41d742b5d62..bf6f2cfa49d5 100644 --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -33,8 +33,6 @@ static inline bool task_on_scx(const struct task_struct *p) return scx_enabled() && p->sched_class == &ext_sched_class; } -void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active); void scx_tick(struct rq *rq); void init_scx_entity(struct sched_ext_entity *scx); void scx_pre_fork(struct task_struct *p); @@ -82,8 +80,6 @@ bool scx_prio_less(const struct task_struct *a, const struct task_struct *b, #define scx_enabled() false #define scx_switched_all() false -static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active) {} static inline void scx_tick(struct rq *rq) {} static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 213e94c0dc91..f0e6faf20a2b 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2524,6 +2524,8 @@ struct sched_class { void (*put_prev_task)(struct rq *rq, struct task_struct *p); void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first); + void (*switch_class)(struct rq *rq, struct task_struct *next); + #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags); -- 2.34.1