From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I52611 CVE: NA
--------------------------------
We implement the function of qos smt expeller by this following two points: a)when online tasks and offline tasks are running on the same physical cpu, online tasks will send ipi to expel offline tasks on the smt sibling cpus. b)when online tasks are running, the smt sibling cpus will not allow offline tasks to be selected.
Signed-off-by: Guan Jing guanjing6@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Change-Id: Id0912268295277486aa6042c714dc07ae603b503 Signed-off-by: Zhengyuan Liu liuzhengyuan@kylinos.cn --- include/linux/sched.h | 4 + kernel/sched/core.c | 4 + kernel/sched/fair.c | 180 +++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 6 +- 4 files changed, 192 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index ca020a991b33..00cba1ebc89a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1669,6 +1669,10 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); __get_task_comm(buf, sizeof(buf), tsk); \ })
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +void qos_smt_check_need_resched(void); +#endif + #ifdef CONFIG_SMP void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36d7422da0ac..219c5e6fa554 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1767,6 +1767,10 @@ void sched_ttwu_pending(void)
void scheduler_ipi(void) { +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_check_need_resched(); +#endif + /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7d553a4c5120..1c4a12c06008 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -111,6 +111,10 @@ unsigned int sysctl_offline_wait_interval = 100; /* in ms */ static int unthrottle_qos_cfs_rqs(int cpu); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static DEFINE_PER_CPU(int, qos_smt_status); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -7137,6 +7141,131 @@ static void qos_schedule_throttle(struct task_struct *p)
#endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static bool qos_smt_check_siblings_status(int this_cpu) +{ + int cpu; + + if (!sched_smt_active()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE) + return true; + } + + return false; +} + +static bool qos_smt_expelled(int this_cpu) +{ + /* + * The qos_smt_status of siblings cpu is online, and current cpu only has + * offline tasks enqueued, there is not suitable task, + * so pick_next_task_fair return null. + */ + if (qos_smt_check_siblings_status(this_cpu) && sched_idle_cpu(this_cpu)) + return true; + + return false; +} + +static bool qos_smt_update_status(struct task_struct *p) +{ + int status = QOS_LEVEL_OFFLINE; + + if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE) + status = QOS_LEVEL_ONLINE; + + if (__this_cpu_read(qos_smt_status) == status) + return false; + + __this_cpu_write(qos_smt_status, status); + + return true; +} + +static void qos_smt_send_ipi(int this_cpu) +{ + int cpu; + struct rq *rq = NULL; + + if (!sched_smt_active()) + return; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + /* + * There are two cases where current don't need to send scheduler_ipi: + * a) The qos_smt_status of siblings cpu is online; + * b) The cfs.h_nr_running of siblings cpu is 0. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE || + rq->cfs.h_nr_running == 0) + continue; + + smp_send_reschedule(cpu); + } +} + +static void qos_smt_expel(int this_cpu, struct task_struct *p) +{ + if (qos_smt_update_status(p)) + qos_smt_send_ipi(this_cpu); +} + +static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) +{ + int cpu; + + if (!sched_smt_active()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + /* + * There are two cases rely on the set need_resched to drive away + * offline task: + * a) The qos_smt_status of siblings cpu is online, the task of current cpu is offline; + * b) The qos_smt_status of siblings cpu is offline, the task of current cpu is idle, + * and current cpu only has SCHED_IDLE tasks enqueued. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && + task_group(current)->qos_level < QOS_LEVEL_ONLINE) + return true; + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && + rq->curr == rq->idle && sched_idle_cpu(this_cpu)) + return true; + } + + return false; +} + +void qos_smt_check_need_resched(void) +{ + struct rq *rq = this_rq(); + int this_cpu = rq->cpu; + + if (test_tsk_need_resched(current)) + return; + + if (_qos_smt_check_need_resched(this_cpu, rq)) { + set_tsk_need_resched(current); + set_preempt_need_resched(); + } +} +#endif + static struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -7145,13 +7274,28 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf struct task_struct *p; int new_tasks; unsigned long time; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int this_cpu = rq->cpu; +#endif
again: +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + if (qos_smt_expelled(this_cpu)) { + __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + return NULL; + } +#endif + if (!cfs_rq->nr_running) goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED - if (prev->sched_class != &fair_sched_class) + if (!prev || prev->sched_class != &fair_sched_class) { +#ifdef CONFIG_QOS_SCHED + if (cfs_rq->idle_h_nr_running != 0 && rq->online) + goto qos_simple; + else +#endif goto simple;
/* @@ -7236,6 +7380,34 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf }
goto done; + +#ifdef CONFIG_QOS_SCHED +qos_simple: + if (prev) + put_prev_task(rq, prev); + + do { + se = pick_next_entity(cfs_rq, NULL); + if (check_qos_cfs_rq(group_cfs_rq(se))) { + cfs_rq = &rq->cfs; + if (!cfs_rq->nr_running) + goto idle; + continue; + } + + cfs_rq = group_cfs_rq(se); + } while (cfs_rq); + + p = task_of(se); + + while (se) { + set_next_entity(cfs_rq_of(se), se); + se = parent_entity(se); + } + + goto done; +#endif + simple: #endif
@@ -7266,6 +7438,9 @@ done: __maybe_unused; qos_schedule_throttle(p); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, p); +#endif return p;
idle: @@ -7307,6 +7482,9 @@ done: __maybe_unused; __this_cpu_write(qos_cpu_overload, 0); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, NULL); +#endif return NULL; }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ae3068153093..fc5fd528001a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1000,6 +1000,11 @@ static inline int cpu_of(struct rq *rq) }
#ifdef CONFIG_QOS_SCHED +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, + QOS_LEVEL_MAX +}; void init_qos_hrtimer(int cpu); #endif
@@ -1669,7 +1674,6 @@ extern const struct sched_class rt_sched_class; extern const struct sched_class fair_sched_class; extern const struct sched_class idle_sched_class;
- #ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu);