From: Steve Sistare steven.sistare@oracle.com
hulk inclusion category: feature bugzilla: 38261 CVE: NA
---------------------------
An overloaded CPU has more than 1 runnable task. When a CFS task wakes on a CPU, if h_nr_running transitions from 1 to more, then set the CPU in the cfs_overload_cpus bitmap. When a CFS task sleeps, if h_nr_running transitions from 2 to less, then clear the CPU in cfs_overload_cpus.
Signed-off-by: Steve Sistare steven.sistare@oracle.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- kernel/sched/fair.c | 51 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 9d0db0c89c3e..8d88f8ee5625 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -21,6 +21,7 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ #include "sched.h" +#include "sparsemask.h"
#include <trace/events/sched.h>
@@ -3810,6 +3811,28 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) WRITE_ONCE(p->se.avg.util_est, ue); }
+static void overload_clear(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_clear_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + +static void overload_set(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_set_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + #else /* CONFIG_SMP */
#define UPDATE_TG 0x0 @@ -3833,6 +3856,9 @@ static inline int idle_balance(struct rq *rq, struct rq_flags *rf) return 0; }
+static inline void overload_clear(struct rq *rq) {} +static inline void overload_set(struct rq *rq) {} + static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
@@ -4482,6 +4508,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) static void throttle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; @@ -4511,8 +4538,12 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) dequeue = 0; }
- if (!se) + if (!se) { sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq); + + }
cfs_rq->throttled = 1; cfs_rq->throttled_clock = rq_clock(rq); @@ -4542,6 +4573,7 @@ static void throttle_cfs_rq(struct cfs_rq *cfs_rq) void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; int enqueue = 1; @@ -4582,8 +4614,11 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
assert_list_leaf_cfs_rq(rq);
- if (!se) + if (!se) { add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq); + }
/* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) @@ -5161,6 +5196,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); + unsigned int prev_nr = rq->cfs.h_nr_running;
/* * The code below (indirectly) updates schedutil which looks at @@ -5210,8 +5246,11 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_group(se); }
- if (!se) + if (!se) { add_nr_running(rq, 1); + if (prev_nr == 1) + overload_set(rq); + }
if (cfs_bandwidth_used()) { /* @@ -5246,6 +5285,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); + unsigned int prev_nr = rq->cfs.h_nr_running;
for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); @@ -5289,8 +5329,11 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) update_cfs_group(se); }
- if (!se) + if (!se) { sub_nr_running(rq, 1); + if (prev_nr == 2) + overload_clear(rq); + }
util_est_dequeue(&rq->cfs, p, task_sleep); hrtick_update(rq);