From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I52611 CVE: NA
--------------------------------
We introduce the qos smt expeller, which lets online tasks to expel offline tasks on the smt sibling cpus, and exclusively occupy CPU resources.In this way we are able to improve QOS of online tasks in co-location.
Change-Id: I1860d20d5e78467773e67cc47b4fa2d1f0110783 Signed-off-by: Guan Jing guanjing6@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Zhengyuan Liu liuzhengyuan@kylinos.cn --- init/Kconfig | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/init/Kconfig b/init/Kconfig index ac1c864524ac..dd81d19e2fcb 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -794,6 +794,15 @@ config QOS_SCHED
default n
+config QOS_SCHED_SMT_EXPELLER + bool "Qos smt expeller" + depends on SCHED_SMT + depends on QOS_SCHED + default n + help + This feature enable online tasks to expel offline tasks + on the smt sibling cpus, and exclusively occupy CPU resources. + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED
From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I52611 CVE: NA
--------------------------------
We implement the function of qos smt expeller by this following two points: a)when online tasks and offline tasks are running on the same physical cpu, online tasks will send ipi to expel offline tasks on the smt sibling cpus. b)when online tasks are running, the smt sibling cpus will not allow offline tasks to be selected.
Signed-off-by: Guan Jing guanjing6@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Change-Id: Id0912268295277486aa6042c714dc07ae603b503 Signed-off-by: Zhengyuan Liu liuzhengyuan@kylinos.cn --- include/linux/sched.h | 4 + kernel/sched/core.c | 4 + kernel/sched/fair.c | 180 +++++++++++++++++++++++++++++++++++++++++- kernel/sched/sched.h | 6 +- 4 files changed, 192 insertions(+), 2 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index ca020a991b33..00cba1ebc89a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1669,6 +1669,10 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); __get_task_comm(buf, sizeof(buf), tsk); \ })
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +void qos_smt_check_need_resched(void); +#endif + #ifdef CONFIG_SMP void scheduler_ipi(void); extern unsigned long wait_task_inactive(struct task_struct *, long match_state); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 36d7422da0ac..219c5e6fa554 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1767,6 +1767,10 @@ void sched_ttwu_pending(void)
void scheduler_ipi(void) { +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_check_need_resched(); +#endif + /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 7d553a4c5120..1c4a12c06008 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -111,6 +111,10 @@ unsigned int sysctl_offline_wait_interval = 100; /* in ms */ static int unthrottle_qos_cfs_rqs(int cpu); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static DEFINE_PER_CPU(int, qos_smt_status); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -7137,6 +7141,131 @@ static void qos_schedule_throttle(struct task_struct *p)
#endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +static bool qos_smt_check_siblings_status(int this_cpu) +{ + int cpu; + + if (!sched_smt_active()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE) + return true; + } + + return false; +} + +static bool qos_smt_expelled(int this_cpu) +{ + /* + * The qos_smt_status of siblings cpu is online, and current cpu only has + * offline tasks enqueued, there is not suitable task, + * so pick_next_task_fair return null. + */ + if (qos_smt_check_siblings_status(this_cpu) && sched_idle_cpu(this_cpu)) + return true; + + return false; +} + +static bool qos_smt_update_status(struct task_struct *p) +{ + int status = QOS_LEVEL_OFFLINE; + + if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE) + status = QOS_LEVEL_ONLINE; + + if (__this_cpu_read(qos_smt_status) == status) + return false; + + __this_cpu_write(qos_smt_status, status); + + return true; +} + +static void qos_smt_send_ipi(int this_cpu) +{ + int cpu; + struct rq *rq = NULL; + + if (!sched_smt_active()) + return; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + rq = cpu_rq(cpu); + + /* + * There are two cases where current don't need to send scheduler_ipi: + * a) The qos_smt_status of siblings cpu is online; + * b) The cfs.h_nr_running of siblings cpu is 0. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE || + rq->cfs.h_nr_running == 0) + continue; + + smp_send_reschedule(cpu); + } +} + +static void qos_smt_expel(int this_cpu, struct task_struct *p) +{ + if (qos_smt_update_status(p)) + qos_smt_send_ipi(this_cpu); +} + +static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) +{ + int cpu; + + if (!sched_smt_active()) + return false; + + for_each_cpu(cpu, cpu_smt_mask(this_cpu)) { + if (cpu == this_cpu) + continue; + + /* + * There are two cases rely on the set need_resched to drive away + * offline task: + * a) The qos_smt_status of siblings cpu is online, the task of current cpu is offline; + * b) The qos_smt_status of siblings cpu is offline, the task of current cpu is idle, + * and current cpu only has SCHED_IDLE tasks enqueued. + */ + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && + task_group(current)->qos_level < QOS_LEVEL_ONLINE) + return true; + + if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && + rq->curr == rq->idle && sched_idle_cpu(this_cpu)) + return true; + } + + return false; +} + +void qos_smt_check_need_resched(void) +{ + struct rq *rq = this_rq(); + int this_cpu = rq->cpu; + + if (test_tsk_need_resched(current)) + return; + + if (_qos_smt_check_need_resched(this_cpu, rq)) { + set_tsk_need_resched(current); + set_preempt_need_resched(); + } +} +#endif + static struct task_struct * pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) { @@ -7145,13 +7274,28 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf struct task_struct *p; int new_tasks; unsigned long time; +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + int this_cpu = rq->cpu; +#endif
again: +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + if (qos_smt_expelled(this_cpu)) { + __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + return NULL; + } +#endif + if (!cfs_rq->nr_running) goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED - if (prev->sched_class != &fair_sched_class) + if (!prev || prev->sched_class != &fair_sched_class) { +#ifdef CONFIG_QOS_SCHED + if (cfs_rq->idle_h_nr_running != 0 && rq->online) + goto qos_simple; + else +#endif goto simple;
/* @@ -7236,6 +7380,34 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf }
goto done; + +#ifdef CONFIG_QOS_SCHED +qos_simple: + if (prev) + put_prev_task(rq, prev); + + do { + se = pick_next_entity(cfs_rq, NULL); + if (check_qos_cfs_rq(group_cfs_rq(se))) { + cfs_rq = &rq->cfs; + if (!cfs_rq->nr_running) + goto idle; + continue; + } + + cfs_rq = group_cfs_rq(se); + } while (cfs_rq); + + p = task_of(se); + + while (se) { + set_next_entity(cfs_rq_of(se), se); + se = parent_entity(se); + } + + goto done; +#endif + simple: #endif
@@ -7266,6 +7438,9 @@ done: __maybe_unused; qos_schedule_throttle(p); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, p); +#endif return p;
idle: @@ -7307,6 +7482,9 @@ done: __maybe_unused; __this_cpu_write(qos_cpu_overload, 0); #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + qos_smt_expel(this_cpu, NULL); +#endif return NULL; }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index ae3068153093..fc5fd528001a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1000,6 +1000,11 @@ static inline int cpu_of(struct rq *rq) }
#ifdef CONFIG_QOS_SCHED +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, + QOS_LEVEL_MAX +}; void init_qos_hrtimer(int cpu); #endif
@@ -1669,7 +1674,6 @@ extern const struct sched_class rt_sched_class; extern const struct sched_class fair_sched_class; extern const struct sched_class idle_sched_class;
- #ifdef CONFIG_SMP
extern void update_group_capacity(struct sched_domain *sd, int cpu);
From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I52611 CVE: NA
--------------------------------
We have added two statistics for qos smt expeller: a) nr_qos_smt_send_ipi:the times of ipi which online task expel offline tasks; b) nr_qos_smt_expelled:the statistics that offline task will not be picked times.
Signed-off-by: Guan Jing guanjing6@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Zhengyuan Liu liuzhengyuan@kylinos.cn Change-Id: Ifc7820509d3e72f4c7ffb2b0f615af17e0d6d7e5 --- include/linux/sched.h | 5 ++++- kernel/sched/debug.c | 4 ++++ kernel/sched/fair.c | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h index 00cba1ebc89a..ada8cd8ce8bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -441,6 +441,10 @@ struct sched_statistics { u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; +#if defined(CONFIG_QOS_SCHED_SMT_EXPELLER) + u64 nr_qos_smt_send_ipi; + u64 nr_qos_smt_expelled; +#endif #endif };
@@ -1967,5 +1971,4 @@ static inline int sched_qos_cpu_overload(void) return 0; } #endif - #endif diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index fcf2a07ece05..a240144e92f9 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -983,6 +983,10 @@ void proc_sched_show_task(struct task_struct *p, struct pid_namespace *ns, P_SCHEDSTAT(se.statistics.nr_wakeups_affine_attempts); P_SCHEDSTAT(se.statistics.nr_wakeups_passive); P_SCHEDSTAT(se.statistics.nr_wakeups_idle); +#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER + P_SCHEDSTAT(se.statistics.nr_qos_smt_send_ipi); + P_SCHEDSTAT(se.statistics.nr_qos_smt_expelled); +#endif
avg_atom = p->se.sum_exec_runtime; if (nr_switches) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 1c4a12c06008..dc9157bb1a2f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7211,6 +7211,7 @@ static void qos_smt_send_ipi(int this_cpu) rq->cfs.h_nr_running == 0) continue;
+ schedstat_inc(current->se.statistics.nr_qos_smt_send_ipi); smp_send_reschedule(cpu); } } @@ -7282,6 +7283,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER if (qos_smt_expelled(this_cpu)) { __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); + schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled); return NULL; } #endif
From: Guan Jing guanjing6@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I52611 CVE: NA
--------------------------------
There are two caces that we add tracepoint: a) while online task of sibling cpu is running, it is running that offline task of local cpu will be set TIF_NEED_RESCHED; b) while online task of sibling cpu is running, it will expell that next picked offline task of local cpu.
Signed-off-by: Guan Jing guanjing6@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Signed-off-by: Zhengyuan Liu liuzhengyuan@kylinos.cn --- include/trace/events/sched.h | 55 ++++++++++++++++++++++++++++++++++++ kernel/sched/fair.c | 9 ++++-- 2 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 9a4bdfadab07..5cc17855a590 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -183,6 +183,61 @@ TRACE_EVENT(sched_switch, __entry->next_comm, __entry->next_pid, __entry->next_prio) );
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER +/* + * Tracepoint for a offline task being resched: + */ +TRACE_EVENT(sched_qos_smt_expel, + + TP_PROTO(struct task_struct *sibling_p, int qos_smt_status), + + TP_ARGS(sibling_p, qos_smt_status), + + TP_STRUCT__entry( + __array(char, sibling_comm, TASK_COMM_LEN) + __field(pid_t, sibling_pid) + __field(int, sibling_qos_status) + __field(int, sibling_cpu) + ), + + TP_fast_assign( + memcpy(__entry->sibling_comm, sibling_p->comm, TASK_COMM_LEN); + __entry->sibling_pid = sibling_p->pid; + __entry->sibling_qos_status = qos_smt_status; + __entry->sibling_cpu = task_cpu(sibling_p); + ), + + TP_printk("sibling_comm=%s sibling_pid=%d sibling_qos_status=%d sibling_cpu=%d", + __entry->sibling_comm, __entry->sibling_pid, __entry->sibling_qos_status, + __entry->sibling_cpu) +); + +/* + * Tracepoint for a offline task being expelled: + */ +TRACE_EVENT(sched_qos_smt_expelled, + + TP_PROTO(struct task_struct *p, int qos_smt_status), + + TP_ARGS(p, qos_smt_status), + + TP_STRUCT__entry( + __array(char, comm, TASK_COMM_LEN) + __field(pid_t, pid) + __field(int, qos_status) + ), + + TP_fast_assign( + memcpy(__entry->comm, p->comm, TASK_COMM_LEN); + __entry->pid = p->pid; + __entry->qos_status = qos_smt_status; + ), + + TP_printk("comm=%s pid=%d qos_status=%d", + __entry->comm, __entry->pid, __entry->qos_status) +); +#endif + /* * Tracepoint for a task being migrated: */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index dc9157bb1a2f..ee243a3370fe 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7241,12 +7241,16 @@ static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq) * and current cpu only has SCHED_IDLE tasks enqueued. */ if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE && - task_group(current)->qos_level < QOS_LEVEL_ONLINE) + task_group(current)->qos_level < QOS_LEVEL_ONLINE) { + trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); return true; + }
if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE && - rq->curr == rq->idle && sched_idle_cpu(this_cpu)) + rq->curr == rq->idle && sched_idle_cpu(this_cpu)) { + trace_sched_qos_smt_expel(cpu_curr(cpu), per_cpu(qos_smt_status, cpu)); return true; + } }
return false; @@ -7284,6 +7288,7 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf if (qos_smt_expelled(this_cpu)) { __this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE); schedstat_inc(rq->curr->se.statistics.nr_qos_smt_expelled); + trace_sched_qos_smt_expelled(rq->curr, per_cpu(qos_smt_status, this_cpu)); return NULL; } #endif