From: Chengming Zhou zhouchengming@bytedance.com
mainline inclusion from mainline-v6.1-rc1 commit c530a3c716b963625e43aa915e0de6b4d1ce8ad9 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8BCV4
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
We don't want to wake periodic aggregation work back up if the task change is the aggregation worker itself going to sleep, or we'll ping-pong forever.
Previously, we would use psi_task_change() in psi_dequeue() when task going to sleep, so this check was put in psi_task_change().
But commit 4117cebf1a9f ("psi: Optimize task switch inside shared cgroups") defer task sleep handling to psi_task_switch(), won't go through psi_task_change() anymore.
So this patch move this check to psi_task_switch().
Fixes: 4117cebf1a9f ("psi: Optimize task switch inside shared cgroups") Signed-off-by: Chengming Zhou zhouchengming@bytedance.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Acked-by: Johannes Weiner hannes@cmpxchg.org Link: https://lore.kernel.org/r/20220825164111.29534-2-zhouchengming@bytedance.com Signed-off-by: Lu Jialin lujialin4@huawei.com --- kernel/sched/psi.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index fd4c9847219c..0fa8c3fdec33 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c @@ -818,7 +818,6 @@ void psi_task_change(struct task_struct *task, int clear, int set) { int cpu = task_cpu(task); struct psi_group *group; - bool wake_clock = true; void *iter = NULL; u64 now;
@@ -828,19 +827,9 @@ void psi_task_change(struct task_struct *task, int clear, int set) psi_flags_change(task, clear, set);
now = cpu_clock(cpu); - /* - * Periodic aggregation shuts off if there is a period of no - * task changes, so we wake it back up if necessary. However, - * don't do this if the task change is the aggregation worker - * itself going to sleep, or we'll ping-pong forever. - */ - if (unlikely((clear & TSK_RUNNING) && - (task->flags & PF_WQ_WORKER) && - wq_worker_last_func(task) == psi_avgs_work)) - wake_clock = false;
while ((group = iterate_groups(task, &iter))) - psi_group_change(group, cpu, clear, set, now, wake_clock); + psi_group_change(group, cpu, clear, set, now, true); }
void psi_task_switch(struct task_struct *prev, struct task_struct *next, @@ -877,6 +866,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next,
if (prev->pid) { int clear = TSK_ONCPU, set = 0; + bool wake_clock = true;
/* * When we're going to sleep, psi_dequeue() lets us @@ -890,13 +880,23 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, clear |= TSK_MEMSTALL_RUNNING; if (prev->in_iowait) set |= TSK_IOWAIT; + + /* + * Periodic aggregation shuts off if there is a period of no + * task changes, so we wake it back up if necessary. However, + * don't do this if the task change is the aggregation worker + * itself going to sleep, or we'll ping-pong forever. + */ + if (unlikely((prev->flags & PF_WQ_WORKER) && + wq_worker_last_func(prev) == psi_avgs_work)) + wake_clock = false; }
psi_flags_change(prev, clear, set);
iter = NULL; while ((group = iterate_groups(prev, &iter)) && group != common) - psi_group_change(group, cpu, clear, set, now, true); + psi_group_change(group, cpu, clear, set, now, wake_clock);
/* * TSK_ONCPU is handled up to the common ancestor. If we're tasked @@ -905,7 +905,7 @@ void psi_task_switch(struct task_struct *prev, struct task_struct *next, if (sleep) { clear &= ~TSK_ONCPU; for (; group; group = iterate_groups(prev, &iter)) - psi_group_change(group, cpu, clear, set, now, true); + psi_group_change(group, cpu, clear, set, now, wake_clock); } } }