From: "Eric W. Biederman" ebiederm@xmission.com
mainline inclusion from mainline-5.4-rc1 commit 154abafc68bfb7c2ef2ad5308a3b2de8968c3f61 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I3UKOW CVE: NA
-------------------------------------------------
Remove work arounds that were written before there was a grace period after tasks left the runqueue in finish_task_switch().
In particular now that there tasks exiting the runqueue exprience a RCU grace period none of the work performed by task_rcu_dereference() excpet the rcu_dereference() is necessary so replace task_rcu_dereference() with rcu_dereference().
Remove the code in rcuwait_wait_event() that checks to ensure the current task has not exited. It is no longer necessary as it is guaranteed that any running task will experience a RCU grace period after it leaves the run queueue.
Remove the comment in rcuwait_wake_up() as it is no longer relevant.
[Li Hua: change task_rcu_dereference to rcu_dereference in sync_runqueues_membarrier_state]
Ref: 8f95c90ceb54 ("sched/wait, RCU: Introduce rcuwait machinery") Ref: 150593bf8693 ("sched/api: Introduce task_rcu_dereference() and try_get_task_struct()") Signed-off-by: Eric W. Biederman ebiederm@xmission.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Chris Metcalf cmetcalf@ezchip.com Cc: Christoph Lameter cl@linux.com Cc: Davidlohr Bueso dave@stgolabs.net Cc: Kirill Tkhai tkhai@yandex.ru Cc: Linus Torvalds torvalds@linux-foundation.org Cc: Mike Galbraith efault@gmx.de Cc: Oleg Nesterov oleg@redhat.com Cc: Paul E. McKenney paulmck@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Russell King - ARM Linux admin linux@armlinux.org.uk Cc: Thomas Gleixner tglx@linutronix.de Link: https://lkml.kernel.org/r/87lfurdpk9.fsf_-_@x220.int.ebiederm.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Li Hua hucool.lihua@huawei.com Signed-off-by: Zheng Zucheng zhengzucheng@huawei.com
Conflicts: kernel/sched/membarrier.c Reviewed-by: Cheng Jian cj.chengjian@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/rcuwait.h | 20 +++--------- include/linux/sched/task.h | 1 - kernel/exit.c | 67 -------------------------------------- kernel/sched/fair.c | 2 +- kernel/sched/membarrier.c | 6 ++-- 5 files changed, 8 insertions(+), 88 deletions(-)
diff --git a/include/linux/rcuwait.h b/include/linux/rcuwait.h index 90bfa3279a01c..810e1710a69c3 100644 --- a/include/linux/rcuwait.h +++ b/include/linux/rcuwait.h @@ -6,16 +6,11 @@
/* * rcuwait provides a way of blocking and waking up a single - * task in an rcu-safe manner; where it is forbidden to use - * after exit_notify(). task_struct is not properly rcu protected, - * unless dealing with rcu-aware lists, ie: find_task_by_*(). + * task in an rcu-safe manner. * - * Alternatively we have task_rcu_dereference(), but the return - * semantics have different implications which would break the - * wakeup side. The only time @task is non-nil is when a user is - * blocked (or checking if it needs to) on a condition, and reset - * as soon as we know that the condition has succeeded and are - * awoken. + * The only time @task is non-nil is when a user is blocked (or + * checking if it needs to) on a condition, and reset as soon as we + * know that the condition has succeeded and are awoken. */ struct rcuwait { struct task_struct *task; @@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w); */ #define rcuwait_wait_event(w, condition) \ ({ \ - /* \ - * Complain if we are called after do_exit()/exit_notify(), \ - * as we cannot rely on the rcu critical region for the \ - * wakeup side. \ - */ \ - WARN_ON(current->exit_state); \ - \ rcu_assign_pointer((w)->task, current); \ for (;;) { \ /* \ diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 440a25d2b3482..1bf1f51db372f 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -105,7 +105,6 @@ static inline void put_task_struct_many(struct task_struct *t, int nr) __put_task_struct(t); }
-struct task_struct *task_rcu_dereference(struct task_struct **ptask); void put_task_struct_rcu_user(struct task_struct *task);
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT diff --git a/kernel/exit.c b/kernel/exit.c index 0d9cd7602ac3e..c739d83cba988 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -224,69 +224,6 @@ void release_task(struct task_struct *p) goto repeat; }
-/* - * Note that if this function returns a valid task_struct pointer (!NULL) - * task->usage must remain >0 for the duration of the RCU critical section. - */ -struct task_struct *task_rcu_dereference(struct task_struct **ptask) -{ - struct sighand_struct *sighand; - struct task_struct *task; - - /* - * We need to verify that release_task() was not called and thus - * delayed_put_task_struct() can't run and drop the last reference - * before rcu_read_unlock(). We check task->sighand != NULL, - * but we can read the already freed and reused memory. - */ -retry: - task = rcu_dereference(*ptask); - if (!task) - return NULL; - - probe_kernel_address(&task->sighand, sighand); - - /* - * Pairs with atomic_dec_and_test() in put_task_struct(). If this task - * was already freed we can not miss the preceding update of this - * pointer. - */ - smp_rmb(); - if (unlikely(task != READ_ONCE(*ptask))) - goto retry; - - /* - * We've re-checked that "task == *ptask", now we have two different - * cases: - * - * 1. This is actually the same task/task_struct. In this case - * sighand != NULL tells us it is still alive. - * - * 2. This is another task which got the same memory for task_struct. - * We can't know this of course, and we can not trust - * sighand != NULL. - * - * In this case we actually return a random value, but this is - * correct. - * - * If we return NULL - we can pretend that we actually noticed that - * *ptask was updated when the previous task has exited. Or pretend - * that probe_slab_address(&sighand) reads NULL. - * - * If we return the new task (because sighand is not NULL for any - * reason) - this is fine too. This (new) task can't go away before - * another gp pass. - * - * And note: We could even eliminate the false positive if re-read - * task->sighand once again to avoid the falsely NULL. But this case - * is very unlikely so we don't care. - */ - if (!sighand) - return NULL; - - return task; -} - void rcuwait_wake_up(struct rcuwait *w) { struct task_struct *task; @@ -306,10 +243,6 @@ void rcuwait_wake_up(struct rcuwait *w) */ smp_mb(); /* (B) */
- /* - * Avoid using task_rcu_dereference() magic as long as we are careful, - * see comment in rcuwait_wait_event() regarding ->exit_state. - */ task = rcu_dereference(w->task); if (task) wake_up_process(task); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ff86c37f0d292..1ebe39020a7b2 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1668,7 +1668,7 @@ static void task_numa_compare(struct task_numa_env *env, return;
rcu_read_lock(); - cur = task_rcu_dereference(&dst_rq->curr); + cur = rcu_dereference(dst_rq->curr); if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur))) cur = NULL;
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index c4ea07e857985..8c4e14e6544a8 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -114,7 +114,7 @@ static int membarrier_global_expedited(void) * leaves the prior task mm in place as an optimization when * scheduling a kthread. */ - p = task_rcu_dereference(&cpu_rq(cpu)->curr); + p = rcu_dereference(cpu_rq(cpu)->curr); if (p->flags & PF_KTHREAD) continue;
@@ -183,7 +183,7 @@ static int membarrier_private_expedited(int flags) */ if (cpu == raw_smp_processor_id()) continue; - p = task_rcu_dereference(&cpu_rq(cpu)->curr); + p = rcu_dereference(cpu_rq(cpu)->curr); if (p && p->mm == mm) __cpumask_set_cpu(cpu, tmpmask); } @@ -249,7 +249,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) struct rq *rq = cpu_rq(cpu); struct task_struct *p;
- p = task_rcu_dereference(&rq->curr); + p = rcu_dereference(rq->curr); if (p && p->mm == mm) __cpumask_set_cpu(cpu, tmpmask); }