From: Zhang Qiao zhangqiao22@huawei.com
hulk inclusion category: bugfix bugzilla: 177205, https://gitee.com/openeuler/kernel/issues/I484Y1 CVE: NA
--------------------------------
There is a small race between copy_process() and sched_fork() where child->sched_task_group point to an already freed pointer.
parent doing fork() | someone moving the parent to another cgroup -------------------------------+------------------------------- copy_process() + dup_task_struct()<1> parent move to another cgroup, and free the old cgroup. <2> + sched_fork() + __set_task_cpu()<3> + task_fork_fair() + sched_slice()<4>
In the worst case, this bug can lead to "use-after-free" and cause panic as shown above, (1)parent copy its sched_task_group to child at <1>; (2)someone move the parent to another cgroup and free the old cgroup at <2>; (3)the sched_task_group and cfs_rq that belong to the old cgroup will be accessed at <3> and <4>, which cause a panic:
[89249.732198] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 [89249.732701] PGD 8000001fa0a86067 P4D 8000001fa0a86067 PUD 2029955067 PMD 0 [89249.733005] Oops: 0000 [#1] SMP PTI [89249.733288] CPU: 7 PID: 648398 Comm: ebizzy Kdump: loaded Tainted: G OE --------- - - 4.18.0.x86_64+ #1 [89249.734318] RIP: 0010:sched_slice+0x84/0xc0 .... [89249.737910] Call Trace: [89249.738181] task_fork_fair+0x81/0x120 [89249.738457] sched_fork+0x132/0x240 [89249.738732] copy_process.part.5+0x675/0x20e0 [89249.739010] ? __handle_mm_fault+0x63f/0x690 [89249.739286] _do_fork+0xcd/0x3b0 [89249.739558] do_syscall_64+0x5d/0x1d0 [89249.739830] entry_SYSCALL_64_after_hwframe+0x65/0xca [89249.740107] RIP: 0033:0x7f04418cd7e1
When a new process is forked, cgroup_post_fork() associates it with the cgroup of its parent. Therefore this commit move the __set_task_cpu() and task_fork() that access some cgroup-related fields(sched_task_group and cfs_rq) to sched_post_fork() and call sched_post_fork() after cgroup_post_fork().
Fixes: 8323f26ce342 ("sched: Fix race in task_group") Signed-off-by: Zhang Qiao zhangqiao22@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/sched/task.h | 1 + kernel/fork.c | 1 + kernel/sched/core.c | 36 ++++++++++++++++++++---------------- 3 files changed, 22 insertions(+), 16 deletions(-)
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 44c6f15800ff5..5046980ecb0f8 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -33,6 +33,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu);
extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p);
void __noreturn do_task_dead(void); diff --git a/kernel/fork.c b/kernel/fork.c index e306f8925008b..403b8a3315979 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2158,6 +2158,7 @@ static __latent_entropy struct task_struct *copy_process(
proc_fork_connector(p); cgroup_post_fork(p); + sched_post_fork(p); cgroup_threadgroup_change_end(current); perf_event_fork(p);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e362fe5c84b66..d0d6153df7357 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2348,8 +2348,6 @@ static inline void init_schedstats(void) {} */ int sched_fork(unsigned long clone_flags, struct task_struct *p) { - unsigned long flags; - __sched_fork(clone_flags, p); /* * We mark the process as NEW here. This guarantees that @@ -2393,6 +2391,26 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
init_entity_runnable_average(&p->se);
+#ifdef CONFIG_SCHED_INFO + if (likely(sched_info_on())) + memset(&p->sched_info, 0, sizeof(p->sched_info)); +#endif +#if defined(CONFIG_SMP) + p->on_cpu = 0; +#endif + init_task_preempt_count(p); +#ifdef CONFIG_SMP + plist_node_init(&p->pushable_tasks, MAX_PRIO); + RB_CLEAR_NODE(&p->pushable_dl_tasks); +#endif + + return 0; +} + +void sched_post_fork(struct task_struct *p) +{ + unsigned long flags; + /* * The child is not yet in the pid-hash so no cgroup attach races, * and the cgroup is pinned to this child due to cgroup_fork() @@ -2410,20 +2428,6 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p) if (p->sched_class->task_fork) p->sched_class->task_fork(p); raw_spin_unlock_irqrestore(&p->pi_lock, flags); - -#ifdef CONFIG_SCHED_INFO - if (likely(sched_info_on())) - memset(&p->sched_info, 0, sizeof(p->sched_info)); -#endif -#if defined(CONFIG_SMP) - p->on_cpu = 0; -#endif - init_task_preempt_count(p); -#ifdef CONFIG_SMP - plist_node_init(&p->pushable_tasks, MAX_PRIO); - RB_CLEAR_NODE(&p->pushable_dl_tasks); -#endif - return 0; }
unsigned long to_ratio(u64 period, u64 runtime)