hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7YS6M
-------------------------------
Expand qos_level from {-1,0} to [-2, 2], to distinguish the tasks expected to be with extremely high or low priority level. Using qos_level_weight to reweight the shares when calculating group's weight. Meanwhile, set offline task's schedule policy to SCHED_IDLE so that it can be preempted at check_preempt_wakeup.
Signed-off-by: Zhao Wenhui zhaowenhui8@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/sched/sysctl.h | 4 ++ init/Kconfig | 9 ++++ kernel/sched/core.c | 24 ++++++---- kernel/sched/fair.c | 64 ++++++++++++++++++++++++-- kernel/sched/sched.h | 26 ++++++++++- kernel/sysctl.c | 9 ++++ 8 files changed, 125 insertions(+), 13 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index a64923c8f1c9..1b591206471b 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -181,6 +181,7 @@ CONFIG_CGROUP_PERF=y CONFIG_CGROUP_BPF=y # CONFIG_CGROUP_MISC is not set CONFIG_QOS_SCHED=y +CONFIG_QOS_SCHED_MULTILEVEL=y # CONFIG_CGROUP_DEBUG is not set CONFIG_SOCK_CGROUP_DATA=y CONFIG_CGROUP_FILES=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index a0669731cef4..73d87040d650 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -190,6 +190,7 @@ CONFIG_FAIR_GROUP_SCHED=y CONFIG_QOS_SCHED_SMT_EXPELLER=y CONFIG_CFS_BANDWIDTH=y CONFIG_QOS_SCHED=y +CONFIG_QOS_SCHED_MULTILEVEL=y CONFIG_RT_GROUP_SCHED=y CONFIG_SCHED_MM_CID=y CONFIG_QOS_SCHED_DYNAMIC_AFFINITY=y diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 28d9be8e4614..3a02a76b08ca 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -37,4 +37,8 @@ extern unsigned int sysctl_overload_detect_period; extern unsigned int sysctl_offline_wait_interval; #endif
+#ifdef CONFIG_QOS_SCHED_MULTILEVEL +extern unsigned int sysctl_qos_level_weights[]; +#endif + #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/init/Kconfig b/init/Kconfig index a12109fe4385..12a5ffbb5252 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1015,6 +1015,15 @@ config QOS_SCHED_SMT_EXPELLER This feature enable online tasks to expel offline tasks on the smt sibling cpus, and exclusively occupy CPU resources.
+config QOS_SCHED_MULTILEVEL + bool "Multiple qos level task scheduling" + depends on QOS_SCHED + default n + help + This feature enable multiple qos level on task scheduling. + Expand the qos_level to [-2,2] to distinguish the tasks expected + to be with extremely high or low priority level. + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 652c06bd546d..238b5b55c38a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7689,7 +7689,7 @@ static int __sched_setscheduler(struct task_struct *p, * other than SCHED_IDLE, the online task preemption and cpu resource * isolation will be invalid, so return -EINVAL in this case. */ - if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) { + if (unlikely(is_offline_level(task_group(p)->qos_level) && !idle_policy(policy))) { retval = -EINVAL; goto unlock; } @@ -10356,7 +10356,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t */ if (!(tsk->flags & PF_EXITING) && !task_group_is_autogroup(tg) && - (tg->qos_level == -1)) { + (is_offline_level(tg->qos_level))) { attr.sched_priority = 0; attr.sched_policy = SCHED_IDLE; attr.sched_nice = PRIO_TO_NICE(tsk->static_prio); @@ -10385,7 +10385,7 @@ void sched_move_offline_task(struct task_struct *p) { struct offline_args *args;
- if (unlikely(task_group(p)->qos_level != -1)) + if (unlikely(!is_offline_level(task_group(p)->qos_level))) return;
args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC); @@ -11275,7 +11275,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data) struct cgroup_subsys_state *css = &tg->css;
tg->qos_level = qos_level; - if (qos_level == -1) + if (is_offline_level(qos_level)) policy = SCHED_IDLE; else policy = SCHED_NORMAL; @@ -11297,19 +11297,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css, if (!tg->se[0]) return -EINVAL;
- if (qos_level != -1 && qos_level != 0) +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX) +#else + if (qos_level != QOS_LEVEL_OFFLINE && qos_level != QOS_LEVEL_ONLINE) +#endif return -EINVAL;
if (tg->qos_level == qos_level) goto done;
- if (tg->qos_level == -1 && qos_level == 0) +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (!is_normal_level(tg->qos_level)) +#else + if (tg->qos_level == QOS_LEVEL_OFFLINE && qos_level == QOS_LEVEL_ONLINE) +#endif return -EINVAL;
cpus_read_lock(); - if (qos_level == -1) + if (is_offline_level(qos_level)) cfs_bandwidth_usage_inc(); - else + else if (is_offline_level(tg->qos_level) && !is_offline_level(qos_level)) cfs_bandwidth_usage_dec(); cpus_read_unlock();
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ec2be284d185..bd833504f741 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -199,6 +199,23 @@ static bool qos_smt_expelled(int this_cpu); static DEFINE_PER_CPU(int, qos_smt_status); #endif
+#ifdef CONFIG_QOS_SCHED_MULTILEVEL +#define QOS_LEVEL_WEIGHT_OFFLINE_EX 1 +#define QOS_LEVEL_WEIGHT_OFFLINE 10 +#define QOS_LEVEL_WEIGHT_ONLINE 100 +#define QOS_LEVEL_WEIGHT_HIGH 1000 +#define QOS_LEVEL_WEIGHT_HIGH_EX 10000 + +unsigned int sysctl_qos_level_weights[5] = { + QOS_LEVEL_WEIGHT_OFFLINE_EX, + QOS_LEVEL_WEIGHT_OFFLINE, + QOS_LEVEL_WEIGHT_ONLINE, + QOS_LEVEL_WEIGHT_HIGH, + QOS_LEVEL_WEIGHT_HIGH_EX, +}; +static long qos_reweight(long shares, struct task_group *tg); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -3537,6 +3554,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg;
tg_shares = READ_ONCE(tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + tg_shares = qos_reweight(tg_shares, tg); +#endif
load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
@@ -3583,6 +3603,9 @@ static void update_cfs_group(struct sched_entity *se)
#ifndef CONFIG_SMP shares = READ_ONCE(gcfs_rq->tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + shares = qos_reweight(shares, gcfs_rq->tg); +#endif
if (likely(se->load.weight == shares)) return; @@ -8317,7 +8340,7 @@ static inline void cancel_qos_timer(int cpu)
static inline bool is_offline_task(struct task_struct *p) { - return task_group(p)->qos_level == -1; + return task_group(p)->qos_level < QOS_LEVEL_ONLINE; }
static void start_qos_hrtimer(int cpu); @@ -8510,7 +8533,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq) if (unlikely(__this_cpu_read(qos_cpu_overload))) return false;
- if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 && + if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) && !sched_idle_cpu(smp_processor_id()) && cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) { throttle_qos_cfs_rq(cfs_rq); @@ -8526,7 +8549,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq) struct rq_flags rf;
rq_lock_irqsave(rq, &rf); - if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq)) + if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq)) unthrottle_qos_cfs_rq(cfs_rq); rq_unlock_irqrestore(rq, &rf); } @@ -8539,7 +8562,7 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || fatal_signal_pending(current)) + if (!is_offline_level(qos_level) || fatal_signal_pending(current)) break;
schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); @@ -8569,6 +8592,39 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) return HRTIMER_NORESTART; }
+#ifdef CONFIG_QOS_SCHED_MULTILEVEL +static long qos_reweight(long shares, struct task_group *tg) +{ + long qos_weight = 100; + long div = 100; + long scale_shares; + + switch (tg->qos_level) { + case QOS_LEVEL_OFFLINE_EX: + qos_weight = sysctl_qos_level_weights[0]; + break; + case QOS_LEVEL_OFFLINE: + qos_weight = sysctl_qos_level_weights[1]; + break; + case QOS_LEVEL_ONLINE: + qos_weight = sysctl_qos_level_weights[2]; + break; + case QOS_LEVEL_HIGH: + qos_weight = sysctl_qos_level_weights[3]; + break; + case QOS_LEVEL_HIGH_EX: + qos_weight = sysctl_qos_level_weights[4]; + break; + } + if (qos_weight > LONG_MAX / shares) + scale_shares = LONG_MAX / div; + else + scale_shares = shares * qos_weight / div; + scale_shares = clamp_t(long, scale_shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); + return scale_shares; +} +#endif + static void start_qos_hrtimer(int cpu) { ktime_t time; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0d981063bf48..5782b770e120 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1420,11 +1420,20 @@ do { \ } while (0)
#ifdef CONFIG_QOS_SCHED +#ifdef CONFIG_QOS_SCHED_MULTILEVEL enum task_qos_level { + QOS_LEVEL_OFFLINE_EX = -2, QOS_LEVEL_OFFLINE = -1, QOS_LEVEL_ONLINE = 0, - QOS_LEVEL_MAX + QOS_LEVEL_HIGH = 1, + QOS_LEVEL_HIGH_EX = 2 }; +#else +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, +}; +#endif void init_qos_hrtimer(int cpu); #endif
@@ -3269,6 +3278,21 @@ static inline int qos_idle_policy(int policy) { return policy == QOS_LEVEL_OFFLINE; } + +static inline int is_high_level(long qos_level) +{ + return qos_level > QOS_LEVEL_ONLINE; +} + +static inline int is_normal_level(long qos_level) +{ + return qos_level == QOS_LEVEL_ONLINE; +} + +static inline int is_offline_level(long qos_level) +{ + return qos_level < QOS_LEVEL_ONLINE; +} #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e9af234bf882..1714abd73f23 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2068,6 +2068,15 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ONE_HUNDRED, .extra2 = &one_thousand, }, +#endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + { + .procname = "qos_level_weights", + .data = &sysctl_qos_level_weights, + .maxlen = 5*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif { .procname = "max_rcu_stall_to_panic",