From: Valentin Schneider <vschneid@redhat.com> mainline inclusion from mainline-v6.18-rc1 commit 7fc2d14392475e368a2a7be458aba4eecdf2439b category: feature bugzilla: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Implement throttle_cfs_rq_work() task work which gets executed on task's ret2user path where the task is dequeued and marked as throttled. Signed-off-by: Valentin Schneider <vschneid@redhat.com> Signed-off-by: Aaron Lu <ziqianlu@bytedance.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Chengming Zhou <chengming.zhou@linux.dev> Tested-by: Valentin Schneider <vschneid@redhat.com> Tested-by: Matteo Martelli <matteo.martelli@codethink.co.uk> Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> Link: https://lore.kernel.org/r/20250829081120.806-3-ziqianlu@bytedance.com Conflicts: kernel/sched/fair.c [Context differences.] Signed-off-by: Wang Tao <wangtao554@huawei.com> --- kernel/sched/fair.c | 68 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ad79472212d1..b17fee3cbd33 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5893,8 +5893,54 @@ static inline int throttled_lb_pair(struct task_group *tg, throttled_hierarchy(dest_cfs_rq); } +static inline bool task_is_throttled(struct task_struct *p) +{ + return cfs_bandwidth_used() && p->throttled; +} + +static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags); static void throttle_cfs_rq_work(struct callback_head *work) { + struct task_struct *p = container_of(work, struct task_struct, sched_throttle_work); + struct sched_entity *se; + struct cfs_rq *cfs_rq; + struct rq_flags rf; + struct rq *rq; + + SCHED_WARN_ON(p != current); + p->sched_throttle_work.next = &p->sched_throttle_work; + + /* + * If task is exiting, then there won't be a return to userspace, so we + * don't have to bother with any of this. + */ + if ((p->flags & PF_EXITING)) + return; + + rq = task_rq_lock(p, &rf); + se = &p->se; + cfs_rq = cfs_rq_of(se); + + /* Raced, forget */ + if (p->sched_class != &fair_sched_class) + goto out; + + /* + * If not in limbo, then either replenish has happened or this + * task got migrated out of the throttled cfs_rq, move along. + */ + if (!cfs_rq->throttle_count) + goto out; + + update_rq_clock(rq); + SCHED_WARN_ON(p->throttled || !list_empty(&p->throttle_node)); + dequeue_task_fair(rq, p, DEQUEUE_SLEEP); + list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list); + p->throttled = true; + resched_curr(rq); + +out: + task_rq_unlock(rq, p, &rf); } void init_cfs_throttle_work(struct task_struct *p) @@ -5934,6 +5980,26 @@ static int tg_unthrottle_up(struct task_group *tg, void *data) return 0; } +static inline bool task_has_throttle_work(struct task_struct *p) +{ + return p->sched_throttle_work.next != &p->sched_throttle_work; +} + +static inline void task_throttle_setup_work(struct task_struct *p) +{ + if (task_has_throttle_work(p)) + return; + + /* + * Kthreads and exiting tasks don't return to userspace, so adding the + * work is pointless + */ + if ((p->flags & (PF_EXITING | PF_KTHREAD))) + return; + + task_work_add(p, &p->sched_throttle_work, TWA_RESUME); +} + static int tg_throttle_down(struct task_group *tg, void *data) { struct rq *rq = data; @@ -6880,6 +6946,8 @@ static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; } static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {} static inline void sync_throttle(struct task_group *tg, int cpu) {} static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {} +static void task_throttle_setup_work(struct task_struct *p) {} +static bool task_is_throttled(struct task_struct *p) { return false; } static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq) { -- 2.18.0