hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8S4JH
-------------------------------
Expand qos_level from {-1,0} to [-2, 2], to distinguish the tasks expected to be with extremely high or low priority level. Using qos_level_weight to reweight the shares when calculating group's weight. Meanwhile, set offline task's schedule policy to SCHED_IDLE so that it can be preempted at check_preempt_wakeup.
Signed-off-by: Zhao Wenhui zhaowenhui8@huawei.com --- init/Kconfig | 9 ++++++ kernel/sched/core.c | 24 +++++++++----- kernel/sched/fair.c | 74 +++++++++++++++++++++++++++++++++++++++++--- kernel/sched/sched.h | 26 +++++++++++++++- 4 files changed, 120 insertions(+), 13 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig index 869eea4108d0..f347e737205b 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1049,6 +1049,15 @@ config QOS_SCHED_PRIO_LB for Qos scheduler, which prefer migrating online tasks and migrating offline tasks secondly between CPUs.
+config QOS_SCHED_MULTILEVEL + bool "Multiple qos level task scheduling" + depends on QOS_SCHED + default n + help + This feature enable multiple qos level on task scheduling. + Expand the qos_level to [-2,2] to distinguish the tasks expected + to be with extremely high or low priority level. + config FAIR_GROUP_SCHED bool "Group scheduling for SCHED_OTHER" depends on CGROUP_SCHED diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7a0997e7e136..81cf1e396710 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7764,7 +7764,7 @@ static int __sched_setscheduler(struct task_struct *p, * other than SCHED_IDLE, the online task preemption and cpu resource * isolation will be invalid, so return -EINVAL in this case. */ - if (unlikely(task_group(p)->qos_level == -1 && !idle_policy(policy))) { + if (unlikely(is_offline_level(task_group(p)->qos_level) && !idle_policy(policy))) { retval = -EINVAL; goto unlock; } @@ -10413,7 +10413,7 @@ static void sched_change_qos_group(struct task_struct *tsk, struct task_group *t */ if (!(tsk->flags & PF_EXITING) && !task_group_is_autogroup(tg) && - (tg->qos_level == -1)) { + (is_offline_level(tg->qos_level))) { attr.sched_priority = 0; attr.sched_policy = SCHED_IDLE; attr.sched_nice = PRIO_TO_NICE(tsk->static_prio); @@ -10442,7 +10442,7 @@ void sched_move_offline_task(struct task_struct *p) { struct offline_args *args;
- if (unlikely(task_group(p)->qos_level != -1)) + if (unlikely(!is_offline_level(task_group(p)->qos_level))) return;
args = kmalloc(sizeof(struct offline_args), GFP_ATOMIC); @@ -11339,7 +11339,7 @@ static int tg_change_scheduler(struct task_group *tg, void *data) struct cgroup_subsys_state *css = &tg->css;
tg->qos_level = qos_level; - if (qos_level == -1) + if (is_offline_level(qos_level)) policy = SCHED_IDLE; else policy = SCHED_NORMAL; @@ -11361,19 +11361,27 @@ static int cpu_qos_write(struct cgroup_subsys_state *css, if (!tg->se[0]) return -EINVAL;
- if (qos_level != -1 && qos_level != 0) +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (qos_level > QOS_LEVEL_HIGH_EX || qos_level < QOS_LEVEL_OFFLINE_EX) +#else + if (qos_level != QOS_LEVEL_OFFLINE && qos_level != QOS_LEVEL_ONLINE) +#endif return -EINVAL;
if (tg->qos_level == qos_level) goto done;
- if (tg->qos_level == -1 && qos_level == 0) +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + if (!is_normal_level(tg->qos_level)) +#else + if (tg->qos_level == QOS_LEVEL_OFFLINE && qos_level == QOS_LEVEL_ONLINE) +#endif return -EINVAL;
cpus_read_lock(); - if (qos_level == -1) + if (is_offline_level(qos_level)) cfs_bandwidth_usage_inc(); - else + else if (is_offline_level(tg->qos_level) && !is_offline_level(qos_level)) cfs_bandwidth_usage_dec(); cpus_read_unlock();
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 318258ea011e..72b905138d30 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -164,6 +164,23 @@ static DEFINE_PER_CPU(int, qos_smt_status); unsigned int sysctl_sched_prio_load_balance_enabled; #endif
+#ifdef CONFIG_QOS_SCHED_MULTILEVEL +#define QOS_LEVEL_WEIGHT_OFFLINE_EX 1 +#define QOS_LEVEL_WEIGHT_OFFLINE 10 +#define QOS_LEVEL_WEIGHT_ONLINE 100 +#define QOS_LEVEL_WEIGHT_HIGH 1000 +#define QOS_LEVEL_WEIGHT_HIGH_EX 10000 + +unsigned int sysctl_qos_level_weights[5] = { + QOS_LEVEL_WEIGHT_OFFLINE_EX, + QOS_LEVEL_WEIGHT_OFFLINE, + QOS_LEVEL_WEIGHT_ONLINE, + QOS_LEVEL_WEIGHT_HIGH, + QOS_LEVEL_WEIGHT_HIGH_EX, +}; +static long qos_reweight(long shares, struct task_group *tg); +#endif + #ifdef CONFIG_CFS_BANDWIDTH /* * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool @@ -262,6 +279,15 @@ static struct ctl_table sched_fair_sysctls[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + { + .procname = "qos_level_weights", + .data = &sysctl_qos_level_weights, + .maxlen = 5*sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, #endif {} }; @@ -3989,6 +4015,9 @@ static long calc_group_shares(struct cfs_rq *cfs_rq) struct task_group *tg = cfs_rq->tg;
tg_shares = READ_ONCE(tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + tg_shares = qos_reweight(tg_shares, tg); +#endif
load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
@@ -4035,6 +4064,10 @@ static void update_cfs_group(struct sched_entity *se)
#ifndef CONFIG_SMP shares = READ_ONCE(gcfs_rq->tg->shares); +#ifdef CONFIG_QOS_SCHED_MULTILEVEL + shares = qos_reweight(shares, gcfs_rq->tg); +#endif + #else shares = calc_group_shares(gcfs_rq); #endif @@ -8678,7 +8711,7 @@ static inline void cancel_qos_timer(int cpu)
static inline bool is_offline_task(struct task_struct *p) { - return task_group(p)->qos_level == -1; + return task_group(p)->qos_level < QOS_LEVEL_ONLINE; }
static void start_qos_hrtimer(int cpu); @@ -8878,7 +8911,7 @@ static bool check_qos_cfs_rq(struct cfs_rq *cfs_rq) if (unlikely(__this_cpu_read(qos_cpu_overload))) return false;
- if (unlikely(cfs_rq && cfs_rq->tg->qos_level < 0 && + if (unlikely(cfs_rq && is_offline_level(cfs_rq->tg->qos_level) && !sched_idle_cpu(smp_processor_id()) && cfs_rq->h_nr_running == cfs_rq->idle_h_nr_running)) {
@@ -8898,7 +8931,7 @@ static inline void unthrottle_qos_sched_group(struct cfs_rq *cfs_rq) struct rq_flags rf;
rq_lock_irqsave(rq, &rf); - if (cfs_rq->tg->qos_level == -1 && cfs_rq_throttled(cfs_rq)) + if (is_offline_level(cfs_rq->tg->qos_level) && cfs_rq_throttled(cfs_rq)) unthrottle_qos_cfs_rq(cfs_rq); rq_unlock_irqrestore(rq, &rf); } @@ -8911,7 +8944,7 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || fatal_signal_pending(current)) + if (!is_offline_level(qos_level) || fatal_signal_pending(current)) break;
schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); @@ -8941,6 +8974,39 @@ static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) return HRTIMER_NORESTART; }
+#ifdef CONFIG_QOS_SCHED_MULTILEVEL +static long qos_reweight(long shares, struct task_group *tg) +{ + long qos_weight = 100; + long div = 100; + long scale_shares; + + switch (tg->qos_level) { + case QOS_LEVEL_OFFLINE_EX: + qos_weight = sysctl_qos_level_weights[0]; + break; + case QOS_LEVEL_OFFLINE: + qos_weight = sysctl_qos_level_weights[1]; + break; + case QOS_LEVEL_ONLINE: + qos_weight = sysctl_qos_level_weights[2]; + break; + case QOS_LEVEL_HIGH: + qos_weight = sysctl_qos_level_weights[3]; + break; + case QOS_LEVEL_HIGH_EX: + qos_weight = sysctl_qos_level_weights[4]; + break; + } + if (qos_weight > LONG_MAX / shares) + scale_shares = LONG_MAX / div; + else + scale_shares = shares * qos_weight / div; + scale_shares = clamp_t(long, scale_shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES)); + return scale_shares; +} +#endif + static void start_qos_hrtimer(int cpu) { ktime_t time; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 4b679122d26f..19fe3c72e3fa 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1440,11 +1440,20 @@ do { \ } while (0)
#ifdef CONFIG_QOS_SCHED +#ifdef CONFIG_QOS_SCHED_MULTILEVEL enum task_qos_level { + QOS_LEVEL_OFFLINE_EX = -2, QOS_LEVEL_OFFLINE = -1, QOS_LEVEL_ONLINE = 0, - QOS_LEVEL_MAX + QOS_LEVEL_HIGH = 1, + QOS_LEVEL_HIGH_EX = 2 }; +#else +enum task_qos_level { + QOS_LEVEL_OFFLINE = -1, + QOS_LEVEL_ONLINE = 0, +}; +#endif void init_qos_hrtimer(int cpu); #endif
@@ -3323,6 +3332,21 @@ static inline int qos_idle_policy(int policy) { return policy == QOS_LEVEL_OFFLINE; } + +static inline int is_high_level(long qos_level) +{ + return qos_level > QOS_LEVEL_ONLINE; +} + +static inline int is_normal_level(long qos_level) +{ + return qos_level == QOS_LEVEL_ONLINE; +} + +static inline int is_offline_level(long qos_level) +{ + return qos_level < QOS_LEVEL_ONLINE; +} #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER