From: Zhang Qiao zhangqiao22@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I7WWMX CVE: NA
--------------------------------
Before, when detect the cpu is overloaded, we throttle offline tasks at exit_to_user_mode_loop() before returning to user mode. Some architects(e.g.,arm64) do not support QOS scheduler because a task do not via exit_to_user_mode_loop() return to userspace at these platforms. In order to slove this problem and support qos scheduler on all architectures, if we require throttling offline tasks, we set flag TIF_NOTIFY_RESUME to an offline task when it is picked and throttle it at tracehook_notify_resume().
Signed-off-by: Zhang Qiao zhangqiao22@huawei.com --- include/linux/resume_user_mode.h | 5 +++++ kernel/entry/common.c | 7 +------ kernel/sched/fair.c | 33 ++++++++++++++++++++++++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/include/linux/resume_user_mode.h b/include/linux/resume_user_mode.h index f8f3e958e9cf..255372856812 100644 --- a/include/linux/resume_user_mode.h +++ b/include/linux/resume_user_mode.h @@ -59,6 +59,11 @@ static inline void resume_user_mode_work(struct pt_regs *regs) blkcg_maybe_throttle_current();
rseq_handle_notify_resume(NULL, regs); + +#ifdef CONFIG_QOS_SCHED + sched_qos_offline_wait(); +#endif + }
#endif /* LINUX_RESUME_USER_MODE_H */ diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 43698b7dd2ec..d7ee4bc3f2ba 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -170,10 +170,6 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, if (ti_work & _TIF_NOTIFY_RESUME) resume_user_mode_work(regs);
-#ifdef CONFIG_QOS_SCHED - sched_qos_offline_wait(); -#endif - /* Architecture specific TIF work */ arch_exit_to_user_mode_work(regs, ti_work);
@@ -204,8 +200,7 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) tick_nohz_user_enter_prepare();
ti_work = read_thread_flags(); - if (unlikely((ti_work & EXIT_TO_USER_MODE_WORK) || - sched_qos_cpu_overload())) + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work);
arch_exit_to_user_mode_prepare(regs, ti_work); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 20478a3285af..59bfa1c59c82 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -59,6 +59,7 @@
#ifdef CONFIG_QOS_SCHED #include <linux/delay.h> +#include <linux/resume_user_mode.h> #endif
/* @@ -8135,6 +8136,12 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ }
#ifdef CONFIG_QOS_SCHED + +static inline bool is_offline_task(struct task_struct *p) +{ + return task_group(p)->qos_level == QOS_LEVEL_OFFLINE; +} + static void start_qos_hrtimer(int cpu); static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq) { @@ -8289,9 +8296,10 @@ void sched_qos_offline_wait(void) rcu_read_lock(); qos_level = task_group(current)->qos_level; rcu_read_unlock(); - if (qos_level != -1 || signal_pending(current)) + if (qos_level != -1 || fatal_signal_pending(current)) break; - msleep_interruptible(sysctl_offline_wait_interval); + + schedule_timeout_killable(msecs_to_jiffies(sysctl_offline_wait_interval)); } }
@@ -8330,6 +8338,23 @@ void init_qos_hrtimer(int cpu) hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); hrtimer->function = qos_overload_timer_handler; } + +/* + * To avoid Priority inversion issues, when this cpu is qos_cpu_overload, + * we should schedule offline tasks to run so that they can leave kernel + * critical sections, and throttle them before returning to user mode. + */ +static void qos_schedule_throttle(struct task_struct *p) +{ + if (unlikely(current->flags & PF_KTHREAD)) + return; + + if (unlikely(this_cpu_read(qos_cpu_overload))) { + if (is_offline_task(p)) + set_notify_resume(p); + } +} + #endif
#ifdef CONFIG_SMP @@ -8492,6 +8517,10 @@ done: __maybe_unused; update_misfit_status(p, rq); sched_fair_update_stop_tick(rq, p);
+#ifdef CONFIG_QOS_SCHED + qos_schedule_throttle(p); +#endif + return p;
idle: