From: Tejun Heo <tj@kernel.org> mainline inclusion from mainline-v6.12-rc1 commit 96fd6c65efc652e9054163e6d3cf254b9e5b93d2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IDC9YK Reference: https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commi... -------------------------------- RT, DL, thermal and irq load and utilization metrics need to be decayed and updated periodically and before consumption to keep the numbers reasonable. This is currently done from __update_blocked_others() as a part of the fair class load balance path. Let's factor it out to update_other_load_avgs(). Pure refactor. No functional changes. This will be used by the new BPF extensible scheduling class to ensure that the above metrics are properly maintained. v2: Refreshed on top of tip:sched/core. Signed-off-by: Tejun Heo <tj@kernel.org> Reviewed-by: David Vernet <dvernet@meta.com> Conflicts: kernel/sched/fair.c kernel/sched/sched.h [use the logic in __update_blocked_others of this patch and fix compile err for update_other_load_avgs. Fix the compile error for 'update_other_load_avgs()' in kernel/sched/syscalls.c. Because the mainline patch 96fd6c65efc6 ("sched: Factor out update_other_load_avgs() from __update_blocked_others()") uses 'arch_scale_hw_pressure()' and 'update_hw_load_avg()' inside 'update_other_load_avgs()', but current version does not have them, so use other functions instead.] Signed-off-by: Zicheng Qu <quzicheng@huawei.com> --- kernel/sched/fair.c | 16 +++------------- kernel/sched/sched.h | 7 ++++++- kernel/sched/syscalls.c | 19 +++++++++++++++++++ 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3db34740ad2d..3e316c5cdbd0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11277,28 +11277,18 @@ static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) { static bool __update_blocked_others(struct rq *rq, bool *done) { - const struct sched_class *curr_class; - u64 now = rq_clock_pelt(rq); - unsigned long thermal_pressure; - bool decayed; + bool updated; /* * update_load_avg() can call cpufreq_update_util(). Make sure that RT, * DL and IRQ signals have been updated before updating CFS. */ - curr_class = rq->curr->sched_class; - - thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq)); - - decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) | - update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) | - update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) | - update_irq_load_avg(rq, 0); + updated = update_other_load_avgs(rq); if (others_have_blocked(rq)) *done = false; - return decayed; + return updated; } #ifdef CONFIG_FAIR_GROUP_SCHED diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b3a0c5e2cb09..cba4b475be99 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3235,6 +3235,8 @@ static inline unsigned long capacity_orig_of(int cpu) return cpu_rq(cpu)->cpu_capacity_orig; } +bool update_other_load_avgs(struct rq *rq); + unsigned long effective_cpu_util(int cpu, unsigned long util_cfs, unsigned long *min, unsigned long *max); @@ -3277,7 +3279,10 @@ static inline unsigned long cpu_util_rt(struct rq *rq) { return READ_ONCE(rq->avg_rt.util_avg); } -#endif + +#else /* !CONFIG_SMP */ +static inline bool update_other_load_avgs(struct rq *rq) { return false; } +#endif /* CONFIG_SMP */ #ifdef CONFIG_UCLAMP_TASK unsigned long uclamp_eff_value(struct task_struct *p, enum uclamp_id clamp_id); diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c index f3b3acec7a3d..ebf339797d8b 100644 --- a/kernel/sched/syscalls.c +++ b/kernel/sched/syscalls.c @@ -232,6 +232,25 @@ int sched_core_idle_cpu(int cpu) #endif #ifdef CONFIG_SMP +/* + * Load avg and utiliztion metrics need to be updated periodically and before + * consumption. This function updates the metrics for all subsystems except for + * the fair class. @rq must be locked and have its clock updated. + */ +bool update_other_load_avgs(struct rq *rq) +{ + u64 now = rq_clock_pelt(rq); + const struct sched_class *curr_class = rq->curr->sched_class; + unsigned long hw_pressure = arch_scale_thermal_pressure(cpu_of(rq)); + + lockdep_assert_rq_held(rq); + + return update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) | + update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) | + update_thermal_load_avg(now, rq, hw_pressure) | + update_irq_load_avg(rq, 0); +} + /* * This function computes an effective utilization for the given CPU, to be * used for frequency selection given the linear relation: f = u * f_max. -- 2.34.1