From: Zhang Qiao zhangqiao22@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4VZJT CVE: NA
--------------------------------
Before, when detect the cpu is overloaded, we throttle offline tasks at exit_to_user_mode_loop() before returning to user mode. Some architects(e.g.,arm64) do not support QOS scheduler because a task do not via exit_to_user_mode_loop() return to userspace at these platforms. In order to slove this problem and support qos scheduler on all architectures, if we require throttling offline tasks, we set flag TIF_NOTIFY_RESUME to an offline task when it is picked and throttle it at tracehook_notify_resume().
Signed-off-by: Zhang Qiao zhangqiao22@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/tracehook.h | 4 ++++ kernel/entry/common.c | 7 +------ kernel/sched/fair.c | 34 +++++++++++++++++++++++++++------- 3 files changed, 32 insertions(+), 13 deletions(-)
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index b480e1a07ed8..5913deb26219 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -196,6 +196,10 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
mem_cgroup_handle_over_high(); blkcg_maybe_throttle_current(); +#ifdef CONFIG_QOS_SCHED + sched_qos_offline_wait(); +#endif + }
#endif /* <linux/tracehook.h> */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 18a29ca01bfe..cea3957ebdbc 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -160,10 +160,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_SIGPENDING) arch_do_signal(regs);
-#ifdef CONFIG_QOS_SCHED - sched_qos_offline_wait(); -#endif - if (ti_work & _TIF_NOTIFY_RESUME) { tracehook_notify_resume(regs); rseq_handle_notify_resume(NULL, regs); @@ -198,8 +194,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) /* Flush pending rcuog wakeup before the last need_resched() check */ rcu_nocb_flush_deferred_wakeup();
- if (unlikely((ti_work & EXIT_TO_USER_MODE_WORK) || - sched_qos_cpu_overload())) + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work);
arch_exit_to_user_mode_prepare(regs, ti_work); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f577d581166b..e85ae008e9da 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -26,6 +26,7 @@ #endif #ifdef CONFIG_QOS_SCHED #include <linux/delay.h> +#include <linux/tracehook.h> #endif
/* @@ -7178,6 +7179,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ }
#ifdef CONFIG_QOS_SCHED + static void start_qos_hrtimer(int cpu); static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { @@ -7342,15 +7344,11 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || signal_pending(current)) + if (qos_level != -1 || fatal_signal_pending(current)) break; - msleep_interruptible(sysctl_offline_wait_interval); - } -}
-int sched_qos_cpu_overload(void) -{ - return __this_cpu_read(qos_cpu_overload); + schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); + } }
static enum hrtimer_restart qos_overload_timer_handler(struct hrtimer *timer) @@ -7383,6 +7381,23 @@ void init_qos_hrtimer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); hrtimer->function = qos_overload_timer_handler; } + +/* + * To avoid Priority inversion issues, when this cpu is qos_cpu_overload, + * we should schedule offline tasks to run so that they can leave kernel + * critical sections, and throttle them before returning to user mode. + */ +static void qos_schedule_throttle(struct task_struct *p) +{ + if (unlikely(current->flags & PF_KTHREAD)) + return; + + if (unlikely(this_cpu_read(qos_cpu_overload))) { + if (task_group(p)->qos_level < 0) + set_notify_resume(p); + } +} + #endif
#ifdef CONFIG_QOS_SCHED_SMT_EXPELLER @@ -7690,9 +7705,14 @@ done: __maybe_unused;
update_misfit_status(p, rq);
+#ifdef CONFIG_QOS_SCHED + qos_schedule_throttle(p); +#endif + #ifdef CONFIG_QOS_SCHED_SMT_EXPELLER qos_smt_expel(this_cpu, p); #endif + return p;
idle: