From: Steve Sistare steven.sistare@oracle.com
maillist inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8PIYZ CVE: NA
Reference: https://lore.kernel.org/lkml/1541767840-93588-5-git-send-email-steven.sistar...
---------------------------
An overloaded CPU has more than 1 runnable task. When a CFS task wakes on a CPU, if h_nr_running transitions from 1 to more, then set the CPU in the cfs_overload_cpus bitmap. When a CFS task sleeps, if h_nr_running transitions from 2 to less, then clear the CPU in cfs_overload_cpus.
Signed-off-by: Steve Sistare steven.sistare@oracle.com Signed-off-by: Cheng Yu serein.chengyu@huawei.com --- kernel/sched/fair.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 0de55884f9da..ab5e37a99ba3 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -62,6 +62,7 @@ #include <linux/resume_user_mode.h> #endif
+#include "sparsemask.h" /* * The initial- and re-scaling of tunables is configurable * @@ -5060,6 +5061,28 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1); }
+static void overload_clear(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_clear_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + +static void overload_set(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_set_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + #else /* CONFIG_SMP */
static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq) @@ -5089,6 +5112,9 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) return 0; }
+static inline void overload_clear(struct rq *rq) {} +static inline void overload_set(struct rq *rq) {} + static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
@@ -5692,6 +5718,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; @@ -5765,6 +5792,8 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq);
done: /* @@ -5781,6 +5810,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta; @@ -5863,6 +5893,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq);
unthrottle_throttle: assert_list_leaf_cfs_rq(rq); @@ -6675,6 +6707,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); int task_new = !(flags & ENQUEUE_WAKEUP); + unsigned int prev_nr = rq->cfs.h_nr_running;
/* * The code below (indirectly) updates schedutil which looks at @@ -6731,6 +6764,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, 1); + if (prev_nr == 1) + overload_set(rq);
/* * Since new tasks are assigned an initial util_avg equal to @@ -6768,6 +6803,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); + unsigned int prev_nr = rq->cfs.h_nr_running; bool was_sched_idle = sched_idle_rq(rq);
util_est_dequeue(&rq->cfs, p); @@ -6822,6 +6858,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, 1); + if (prev_nr == 2) + overload_clear(rq);
/* balance early to pull high priority tasks */ if (unlikely(!was_sched_idle && sched_idle_rq(rq))) @@ -8306,6 +8344,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; long task_delta, idle_task_delta; + unsigned int prev_nr = cfs_rq->h_nr_running;
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
@@ -8352,6 +8391,8 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq);
done: if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) @@ -8367,6 +8408,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; + unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta;
se = cfs_rq->tg->se[cpu_of(rq)]; @@ -8429,6 +8471,8 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) }
add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq);
unthrottle_throttle: