From: Guan Jing guanjing6@huawei.com
Offering: HULK hulk inclusion category: feature bugzilla: 188161
-------------------------------
Here is the typical case that priority inversion will caused occasionally by SMT expelling: Assuming that there are two SMT cores-cA and cB, online tasks are running on cA while offline tasks on cB. With SMT expelling, online task will drives off offline tasks to occupy all SMT cores exclusively, which, in turn, will starve the offline task to release the related resources other tasks with higher priority need.
Hence, this patch will introduce another mechanism to alleviate this situation. For all offline tasks, one metric of profiling the maximum task expelling duration is set up and the default value is 5 seconds, if such offline task exsits, all offline tasks will be allowed to run into one small sleep(msleep) loop in kernel before they goes into usermode; and further, if the two SMT cores(such as cA and cB) are idle or don't get any online tasks to run, for these offline tasks, they will continue to run in usermode for the next schedule.
Signed-off-by: Guan Jing guanjing6@huawei.com
Signed-off-by: Guan Jing guanjing6@huawei.com --- kernel/sched/fair.c | 46 +++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 769508559ee8..1503c290aa52 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -137,6 +137,7 @@ static DEFINE_PER_CPU(int, qos_cpu_overload); unsigned int sysctl_overload_detect_period = 5000; /* in ms */ unsigned int sysctl_offline_wait_interval = 100; /* in ms */ static int unthrottle_qos_cfs_rqs(int cpu); +static bool qos_smt_expelled(int this_cpu); #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER @@ -7324,6 +7325,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ }
#ifdef CONFIG_QOS_SCHED +static inline bool qos_timer_is_activated(int cpu) +{ + return hrtimer_active(per_cpu_ptr(&qos_overload_timer, cpu)); +} + +static inline void cancel_qos_timer(int cpu) +{ + hrtimer_cancel(per_cpu_ptr(&qos_overload_timer, cpu)); +}
static inline bool is_offline_task(struct task_struct *p) { @@ -7383,7 +7393,7 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
}
- if (list_empty(&per_cpu(qos_throttled_cfs_rq, cpu_of(rq)))) + if (!qos_timer_is_activated(cpu_of(rq))) start_qos_hrtimer(cpu_of(rq));
cfs_rq->throttled = QOS_THROTTLED; @@ -7483,10 +7493,6 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq) }
assert_list_leaf_cfs_rq(rq); - - /* Determine whether we need to wake up potentially idle CPU: */ - if (rq->curr == rq->idle && rq->cfs.nr_running) - resched_curr(rq); }
static int __unthrottle_qos_cfs_rqs(int cpu) @@ -7508,10 +7514,10 @@ static int __unthrottle_qos_cfs_rqs(int cpu) static int unthrottle_qos_cfs_rqs(int cpu) { int res; - res = __unthrottle_qos_cfs_rqs(cpu); - if (res) - hrtimer_cancel(&(per_cpu(qos_overload_timer, cpu))); + + if (qos_timer_is_activated(cpu) && !qos_smt_expelled(cpu)) + cancel_qos_timer(cpu);
return res; } @@ -7564,8 +7570,13 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) struct rq *rq = this_rq();
rq_lock_irqsave(rq, &rf); - if (__unthrottle_qos_cfs_rqs(smp_processor_id())) - __this_cpu_write(qos_cpu_overload, 1); + __unthrottle_qos_cfs_rqs(smp_processor_id()); + __this_cpu_write(qos_cpu_overload, 1); + + /* Determine whether we need to wake up potentially idle CPU. */ + if (rq->curr == rq->idle && rq->cfs.nr_running) + resched_curr(rq); + rq_unlock_irqrestore(rq, &rf);
return HRTIMER_NORESTART; @@ -7605,6 +7616,12 @@ static void qos_schedule_throttle(struct task_struct *p) } }
+#ifndef CONFIG_QOS_SCHED_SMT_EXPELLER +static bool qos_smt_expelled(int this_cpu) +{ + return false; +} +#endif #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER @@ -7792,8 +7809,12 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
again: #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER - if (qos_smt_expelled(this_cpu)) { + if (qos_smt_expelled(this_cpu) && !__this_cpu_read(qos_cpu_overload)) { __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + + if (!qos_timer_is_activated(this_cpu)) + start_qos_hrtimer(this_cpu); + schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled); trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu)); return NULL; @@ -8001,7 +8022,8 @@ done: __maybe_unused; goto again; }
- __this_cpu_write(qos_cpu_overload, 0); + if (!qos_smt_expelled(cpu_of(rq))) + __this_cpu_write(qos_cpu_overload, 0); #endif /* * rq is about to be idle, check if we need to update the