From: Steve Sistare steven.sistare@oracle.com
hulk inclusion category: feature bugzilla: 38261, https://gitee.com/openeuler/kernel/issues/I49XPZ CVE: NA
---------------------------
An overloaded CPU has more than 1 runnable task. When a CFS task wakes on a CPU, if h_nr_running transitions from 1 to more, then set the CPU in the cfs_overload_cpus bitmap. When a CFS task sleeps, if h_nr_running transitions from 2 to less, then clear the CPU in cfs_overload_cpus.
Signed-off-by: Steve Sistare steven.sistare@oracle.com Signed-off-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/fair.c | 54 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 27641d5c6db2..450d16e5651b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -21,6 +21,7 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ #include "sched.h" +#include "sparsemask.h"
/* * Targeted preemption latency for CPU-bound tasks: @@ -4152,6 +4153,28 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1); }
+static void overload_clear(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_clear_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + +static void overload_set(struct rq *rq) +{ + struct sparsemask *overload_cpus; + + rcu_read_lock(); + overload_cpus = rcu_dereference(rq->cfs_overload_cpus); + if (overload_cpus) + sparsemask_set_elem(overload_cpus, rq->cpu); + rcu_read_unlock(); +} + #else /* CONFIG_SMP */
#define UPDATE_TG 0x0 @@ -4175,6 +4198,9 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) return 0; }
+static inline void overload_clear(struct rq *rq) {} +static inline void overload_set(struct rq *rq) {} + static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
@@ -4877,6 +4903,7 @@ static int tg_throttle_down(struct task_group *tg, void *data) static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta, dequeue = 1; @@ -4931,8 +4958,11 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) dequeue = 0; }
- if (!se) + if (!se) { sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq); + }
/* * Note: distribution will already see us throttled via the @@ -4946,6 +4976,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq) void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); + unsigned int prev_nr = rq->cfs.h_nr_running; struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; long task_delta, idle_task_delta; @@ -5007,6 +5038,8 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq);
unthrottle_throttle: /* @@ -5618,6 +5651,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int idle_h_nr_running = task_has_idle_policy(p); int task_new = !(flags & ENQUEUE_WAKEUP); + unsigned int prev_nr = rq->cfs.h_nr_running;
/* * The code below (indirectly) updates schedutil which looks at @@ -5675,6 +5709,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ add_nr_running(rq, 1); + if (prev_nr == 1) + overload_set(rq);
/* * Since new tasks are assigned an initial util_avg equal to @@ -5727,6 +5763,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) struct sched_entity *se = &p->se; int task_sleep = flags & DEQUEUE_SLEEP; int idle_h_nr_running = task_has_idle_policy(p); + unsigned int prev_nr = rq->cfs.h_nr_running; bool was_sched_idle = sched_idle_rq(rq);
util_est_dequeue(&rq->cfs, p); @@ -5775,6 +5812,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
/* At this point se is NULL and we are at root level*/ sub_nr_running(rq, 1); + if (prev_nr == 2) + overload_clear(rq);
/* balance early to pull high priority tasks */ if (unlikely(!was_sched_idle && sched_idle_rq(rq))) @@ -7133,6 +7172,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { struct rq *rq = rq_of(cfs_rq); struct sched_entity *se; + unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta, dequeue = 1;
se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))]; @@ -7159,8 +7199,12 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) dequeue = 0; }
- if (!se) + if (!se) { sub_nr_running(rq, task_delta); + if (prev_nr >= 2 && prev_nr - task_delta < 2) + overload_clear(rq); + + }
cfs_rq->throttled = 1; cfs_rq->throttled_clock = rq_clock(rq); @@ -7174,6 +7218,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg); struct sched_entity *se; int enqueue = 1; + unsigned int prev_nr = cfs_rq->h_nr_running; long task_delta, idle_task_delta;
se = cfs_rq->tg->se[cpu_of(rq)]; @@ -7209,8 +7254,11 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
assert_list_leaf_cfs_rq(rq);
- if (!se) + if (!se) { add_nr_running(rq, task_delta); + if (prev_nr < 2 && prev_nr + task_delta >= 2) + overload_set(rq); + }
/* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running)