From: Steve Sistare steven.sistare@oracle.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8PIYZ CVE: NA
Reference: https://lore.kernel.org/lkml/1541767840-93588-5-git-send-email-steven.sistar...
---------------------------
An overloaded CPU has more than 1 runnable task. When a CFS task wakes on a CPU, if h_nr_running transitions from 1 to more, then set the CPU in the cfs_overload_cpus bitmap. When a CFS task sleeps, if h_nr_running transitions from 2 to less, then clear the CPU in cfs_overload_cpus.
Signed-off-by: Steve Sistare steven.sistare@oracle.com Signed-off-by: Cheng Yu serein.chengyu@huawei.com --- kernel/sched/fair.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 640c0a73e73a..2c106a223b73 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -62,6 +62,7 @@ #include <linux/resume_user_mode.h> #endif
+#include "sparsemask.h" /* * The initial- and re-scaling of tunables is configurable * @@ -5080,6 +5081,28 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1); }
+static void overload_clear(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_clear_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + +static void overload_set(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_set_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + #else /* CONFIG_SMP */
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) @@ -5109,6 +5132,9 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) return 0; }
+static inline void overload_clear(struct rq *rq) {} +static inline void overload_set(struct rq *rq) {} + static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
@@ -5712,6 +5738,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; @@ -5785,6 +5812,8 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq);
done: /* @@ -5801,6 +5830,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta; @@ -5883,6 +5913,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq);
unthrottle_throttle: assert_list_leaf_cfs_rq(rq); @@ -6695,6 +6727,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); int task_new = !(flags & ENQUEUE_WAKEUP); + unsigned int prev_nr = rq->cfs.h_nr_running;
/* * The code below (indirectly) updates schedutil which looks at @@ -6751,6 +6784,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, 1); + if (prev_nr == 1) + overload_set(rq);
/* * Since new tasks are assigned an initial util_avg equal to @@ -6788,6 +6823,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); + unsigned int prev_nr = rq->cfs.h_nr_running; bool was_sched_idle = sched_idle_rq(rq);
util_est_dequeue(&rq->cfs, p); @@ -6842,6 +6878,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, 1); + if (prev_nr == 2) + overload_clear(rq);
/* balance early to pull high priority tasks */ if (unlikely(!was_sched_idle && sched_idle_rq(rq))) @@ -8475,6 +8513,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; long task_delta, idle_task_delta; + unsigned int prev_nr = cfs_rq->h_nr_running;
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
@@ -8521,6 +8560,8 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq);
done: if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) @@ -8536,6 +8577,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; + unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta;
se = cfs_rq->tg->se[cpu_of(rq)]; @@ -8598,6 +8640,8 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) }
add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq);
unthrottle_throttle: