[PATCH v3 openEuler-23.09 2/8] sched: Implement the function of qos smt expeller

5 Sep 2023

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I7YRZR
CVE: NA
--------------------------------
We implement the function of qos smt expeller by this following
two points：
a) when online tasks and offline tasks are running on the same
physical cpu, online tasks will send ipi to expel offline tasks
on the smt sibling cpus.
b) when online tasks are running, the smt sibling cpus will not
allow offline tasks to be selected.
Adapted to openEuler-6.4.
Signed-off-by: Guan Jing guanjing6@huawei.com
Reviewed-by: Chen Hui judy.chenhui@huawei.com
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com
Signed-off-by: Xia Fukun xiafukun@huawei.com
---
 include/linux/sched.h |   7 ++
 kernel/sched/fair.c   | 189 +++++++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h  |   5 ++
 3 files changed, 199 insertions(+), 2 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6db0879089df..ddf9d10bdc37 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2007,9 +2007,16 @@ extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk);
    __get_task_comm(buf, sizeof(buf), tsk);		\
 })
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+void qos_smt_check_need_resched(void);
+#endif
+
 #ifdef CONFIG_SMP
 static __always_inline void scheduler_ipi(void)
 {
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+	qos_smt_check_need_resched();
+#endif
    /*
     * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting
     * TIF_NEED_RESCHED remotely (for the first time) will also send
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bc58182b201f..9d1b546daa8f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -61,6 +61,10 @@
 #include <linux/resume_user_mode.h>
 #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+#include <trace/events/ipi.h>
+#endif
+
 /*
  * Targeted preemption latency for CPU-bound tasks:
  *
@@ -185,6 +189,10 @@ unsigned int sysctl_offline_wait_interval = 100;  /* in ms */
 static int unthrottle_qos_cfs_rqs(int cpu);
 #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+static DEFINE_PER_CPU(int, qos_smt_status);
+#endif
+
 #ifdef CONFIG_CFS_BANDWIDTH
 /*
  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
@@ -8402,6 +8410,131 @@ static void qos_schedule_throttle(struct task_struct *p)
#endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+static bool qos_smt_check_siblings_status(int this_cpu)
+{
+	int cpu;
+
+	if (!sched_smt_active())
+		return false;
+
+	for_each_cpu(cpu, cpu_smt_mask(this_cpu)) {
+		if (cpu == this_cpu)
+			continue;
+
+		if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE)
+			return true;
+	}
+
+	return false;
+}
+
+static bool qos_smt_expelled(int this_cpu)
+{
+	/*
+	 * The qos_smt_status of siblings cpu is online, and current cpu only has
+	 * offline tasks enqueued, there is not suitable task,
+	 * so pick_next_task_fair return null.
+	 */
+	if (qos_smt_check_siblings_status(this_cpu) && sched_idle_cpu(this_cpu))
+		return true;
+
+	return false;
+}
+
+static bool qos_smt_update_status(struct task_struct *p)
+{
+	int status = QOS_LEVEL_OFFLINE;
+
+	if (p != NULL && task_group(p)->qos_level >= QOS_LEVEL_ONLINE)
+		status = QOS_LEVEL_ONLINE;
+
+	if (__this_cpu_read(qos_smt_status) == status)
+		return false;
+
+	__this_cpu_write(qos_smt_status, status);
+
+	return true;
+}
+
+static void qos_smt_send_ipi(int this_cpu)
+{
+	int cpu;
+	struct rq *rq = NULL;
+
+	if (!sched_smt_active())
+		return;
+
+	for_each_cpu(cpu, cpu_smt_mask(this_cpu)) {
+		if (cpu == this_cpu)
+			continue;
+
+		rq = cpu_rq(cpu);
+
+		/*
+		* There are two cases where current don't need to send scheduler_ipi:
+		* a) The qos_smt_status of siblings cpu is online;
+		* b) The cfs.h_nr_running of siblings cpu is 0.
+		*/
+		if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE ||
+		    rq->cfs.h_nr_running == 0)
+			continue;
+
+		smp_send_reschedule(cpu);
+	}
+}
+
+static void qos_smt_expel(int this_cpu, struct task_struct *p)
+{
+	if (qos_smt_update_status(p))
+		qos_smt_send_ipi(this_cpu);
+}
+
+static bool _qos_smt_check_need_resched(int this_cpu, struct rq *rq)
+{
+	int cpu;
+
+	if (!sched_smt_active())
+		return false;
+
+	for_each_cpu(cpu, cpu_smt_mask(this_cpu)) {
+		if (cpu == this_cpu)
+			continue;
+
+		/*
+		* There are two cases rely on the set need_resched to drive away
+		* offline task：
+		* a) The qos_smt_status of siblings cpu is online, the task of current cpu is offline;
+		* b) The qos_smt_status of siblings cpu is offline, the task of current cpu is idle,
+		*    and current cpu only has SCHED_IDLE tasks enqueued.
+		*/
+		if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_ONLINE &&
+		    task_group(current)->qos_level < QOS_LEVEL_ONLINE)
+			return true;
+
+		if (per_cpu(qos_smt_status, cpu) == QOS_LEVEL_OFFLINE &&
+		    rq->curr == rq->idle && sched_idle_cpu(this_cpu))
+			return true;
+	}
+
+	return false;
+}
+
+void qos_smt_check_need_resched(void)
+{
+	struct rq *rq = this_rq();
+	int this_cpu = rq->cpu;
+
+	if (test_tsk_need_resched(current))
+		return;
+
+	if (_qos_smt_check_need_resched(this_cpu, rq)) {
+		set_tsk_need_resched(current);
+		set_preempt_need_resched();
+	}
+}
+#endif
+
 #ifdef CONFIG_SMP
 static struct task_struct *pick_task_fair(struct rq *rq)
 {
@@ -8442,14 +8575,30 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
    struct sched_entity *se;
    struct task_struct *p;
    int new_tasks;
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+	int this_cpu = rq->cpu;
+#endif
again:
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+	if (qos_smt_expelled(this_cpu)) {
+		__this_cpu_write(qos_smt_status, QOS_LEVEL_OFFLINE);
+		return NULL;
+	}
+#endif
+
    if (!sched_fair_runnable(rq))
    	goto idle;
#ifdef CONFIG_FAIR_GROUP_SCHED
-	if (!prev || prev->sched_class != &fair_sched_class)
-		goto simple;
+	if (!prev || prev->sched_class != &fair_sched_class) {
+#ifdef CONFIG_QOS_SCHED
+		if (cfs_rq->idle_h_nr_running != 0 && rq->online)
+			goto qos_simple;
+		else
+#endif
+			goto simple;
+	}
/*
     * Because of the set_next_buddy() in dequeue_task_fair() it is rather
@@ -8533,6 +8682,34 @@ pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf
    }
goto done;
+
+#ifdef CONFIG_QOS_SCHED
+qos_simple:
+	if (prev)
+		put_prev_task(rq, prev);
+
+	do {
+		se = pick_next_entity(cfs_rq, NULL);
+		if (check_qos_cfs_rq(group_cfs_rq(se))) {
+			cfs_rq = &rq->cfs;
+			if (!cfs_rq->nr_running)
+				goto idle;
+			continue;
+		}
+
+		cfs_rq = group_cfs_rq(se);
+	} while (cfs_rq);
+
+	p = task_of(se);
+
+	while (se) {
+		set_next_entity(cfs_rq_of(se), se);
+		se = parent_entity(se);
+	}
+
+	goto done;
+#endif
+
 simple:
 #endif
    if (prev)
@@ -8565,6 +8742,10 @@ done: __maybe_unused;
    qos_schedule_throttle(p);
 #endif
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+	qos_smt_expel(this_cpu, p);
+#endif
+
    return p;
idle:
@@ -8598,6 +8779,10 @@ done: __maybe_unused;
     */
    update_idle_rq_clock_pelt(rq);
+#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER
+	qos_smt_expel(this_cpu, NULL);
+#endif
+
    return NULL;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7393c1a62513..701a165639e9 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1406,6 +1406,11 @@ do {						\
 } while (0)
#ifdef CONFIG_QOS_SCHED
+enum task_qos_level {
+	QOS_LEVEL_OFFLINE = -1,
+	QOS_LEVEL_ONLINE = 0,
+	QOS_LEVEL_MAX
+};
 void init_qos_hrtimer(int cpu);
 #endif
-- 
2.34.1


    

2024

2023

2022

2021

2020

2019

[PATCH v3 openEuler-23.09 2/8] sched: Implement the function of qos smt expeller