From: Sasha Levin sashal@kernel.org
stable inclusion form stable-v5.10.82 commit d35250ec5a23771187c85a46e6812d5943b5c13e bugzilla: 185877 https://gitee.com/openeuler/kernel/issues/I4QU6V
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
This reverts commit 94902ee2996a7f71471138093495df452dab87b6 which is upstream commit ef54c1a476aef7eef26fe13ea10dc090952c00f8.
Reverting for now due to issues that need to get fixed upstream.
Signed-off-by: Sasha Levin sashal@kernel.org Signed-off-by: Chen Jun chenjun102@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/perf_event.h | 1 - kernel/events/core.c | 142 ++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 80 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5e50bd589394..a08a02c77559 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -615,7 +615,6 @@ struct swevent_hlist { #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 #define PERF_ATTACH_SCHED_CB 0x20 -#define PERF_ATTACH_CHILD 0x40
struct perf_cgroup; struct perf_buffer; diff --git a/kernel/events/core.c b/kernel/events/core.c index 908417736f4e..639b99a318db 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2209,26 +2209,6 @@ static void perf_group_detach(struct perf_event *event) perf_event__header_size(leader); }
-static void sync_child_event(struct perf_event *child_event); - -static void perf_child_detach(struct perf_event *event) -{ - struct perf_event *parent_event = event->parent; - - if (!(event->attach_state & PERF_ATTACH_CHILD)) - return; - - event->attach_state &= ~PERF_ATTACH_CHILD; - - if (WARN_ON_ONCE(!parent_event)) - return; - - lockdep_assert_held(&parent_event->child_mutex); - - sync_child_event(event); - list_del_init(&event->child_list); -} - static bool is_orphaned_event(struct perf_event *event) { return event->state == PERF_EVENT_STATE_DEAD; @@ -2336,7 +2316,6 @@ group_sched_out(struct perf_event *group_event, }
#define DETACH_GROUP 0x01UL -#define DETACH_CHILD 0x02UL
/* * Cross CPU call to remove a performance event @@ -2360,8 +2339,6 @@ __perf_remove_from_context(struct perf_event *event, event_sched_out(event, cpuctx, ctx); if (flags & DETACH_GROUP) perf_group_detach(event); - if (flags & DETACH_CHILD) - perf_child_detach(event); list_del_event(event, ctx);
if (!ctx->nr_events && ctx->is_active) { @@ -2390,21 +2367,25 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
lockdep_assert_held(&ctx->mutex);
+ event_function_call(event, __perf_remove_from_context, (void *)flags); + /* - * Because of perf_event_exit_task(), perf_remove_from_context() ought - * to work in the face of TASK_TOMBSTONE, unlike every other - * event_function_call() user. + * The above event_function_call() can NO-OP when it hits + * TASK_TOMBSTONE. In that case we must already have been detached + * from the context (by perf_event_exit_event()) but the grouping + * might still be in-tact. */ - raw_spin_lock_irq(&ctx->lock); - if (!ctx->is_active) { - __perf_remove_from_context(event, __get_cpu_context(ctx), - ctx, (void *)flags); + WARN_ON_ONCE(event->attach_state & PERF_ATTACH_CONTEXT); + if ((flags & DETACH_GROUP) && + (event->attach_state & PERF_ATTACH_GROUP)) { + /* + * Since in that case we cannot possibly be scheduled, simply + * detach now. + */ + raw_spin_lock_irq(&ctx->lock); + perf_group_detach(event); raw_spin_unlock_irq(&ctx->lock); - return; } - raw_spin_unlock_irq(&ctx->lock); - - event_function_call(event, __perf_remove_from_context, (void *)flags); }
/* @@ -12296,17 +12277,14 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu) } EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
-static void sync_child_event(struct perf_event *child_event) +static void sync_child_event(struct perf_event *child_event, + struct task_struct *child) { struct perf_event *parent_event = child_event->parent; u64 child_val;
- if (child_event->attr.inherit_stat) { - struct task_struct *task = child_event->ctx->task; - - if (task && task != TASK_TOMBSTONE) - perf_event_read_event(child_event, task); - } + if (child_event->attr.inherit_stat) + perf_event_read_event(child_event, child);
child_val = perf_event_count(child_event);
@@ -12321,53 +12299,60 @@ static void sync_child_event(struct perf_event *child_event) }
static void -perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx) +perf_event_exit_event(struct perf_event *child_event, + struct perf_event_context *child_ctx, + struct task_struct *child) { - struct perf_event *parent_event = event->parent; - unsigned long detach_flags = 0; - - if (parent_event) { - /* - * Do not destroy the 'original' grouping; because of the - * context switch optimization the original events could've - * ended up in a random child task. - * - * If we were to destroy the original group, all group related - * operations would cease to function properly after this - * random child dies. - * - * Do destroy all inherited groups, we don't care about those - * and being thorough is better. - */ - detach_flags = DETACH_GROUP | DETACH_CHILD; - mutex_lock(&parent_event->child_mutex); - } + struct perf_event *parent_event = child_event->parent;
- perf_remove_from_context(event, detach_flags); + /* + * Do not destroy the 'original' grouping; because of the context + * switch optimization the original events could've ended up in a + * random child task. + * + * If we were to destroy the original group, all group related + * operations would cease to function properly after this random + * child dies. + * + * Do destroy all inherited groups, we don't care about those + * and being thorough is better. + */ + raw_spin_lock_irq(&child_ctx->lock); + WARN_ON_ONCE(child_ctx->is_active);
- raw_spin_lock_irq(&ctx->lock); - if (event->state > PERF_EVENT_STATE_EXIT) - perf_event_set_state(event, PERF_EVENT_STATE_EXIT); - raw_spin_unlock_irq(&ctx->lock); + if (parent_event) + perf_group_detach(child_event); + list_del_event(child_event, child_ctx); + perf_event_set_state(child_event, PERF_EVENT_STATE_EXIT); /* is_event_hup() */ + raw_spin_unlock_irq(&child_ctx->lock);
/* - * Child events can be freed. + * Parent events are governed by their filedesc, retain them. */ - if (parent_event) { - mutex_unlock(&parent_event->child_mutex); - /* - * Kick perf_poll() for is_event_hup(); - */ - perf_event_wakeup(parent_event); - free_event(event); - put_event(parent_event); + if (!parent_event) { + perf_event_wakeup(child_event); return; } + /* + * Child events can be cleaned up. + */ + + sync_child_event(child_event, child);
/* - * Parent events are governed by their filedesc, retain them. + * Remove this event from the parent's list + */ + WARN_ON_ONCE(parent_event->ctx->parent_ctx); + mutex_lock(&parent_event->child_mutex); + list_del_init(&child_event->child_list); + mutex_unlock(&parent_event->child_mutex); + + /* + * Kick perf_poll() for is_event_hup(). */ - perf_event_wakeup(event); + perf_event_wakeup(parent_event); + free_event(child_event); + put_event(parent_event); }
static void perf_event_exit_task_context(struct task_struct *child, int ctxn) @@ -12424,7 +12409,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) perf_event_task(child, child_ctx, 0);
list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry) - perf_event_exit_event(child_event, child_ctx); + perf_event_exit_event(child_event, child_ctx, child);
mutex_unlock(&child_ctx->mutex);
@@ -12684,7 +12669,6 @@ inherit_event(struct perf_event *parent_event, */ raw_spin_lock_irqsave(&child_ctx->lock, flags); add_event_to_ctx(child_event, child_ctx); - child_event->attach_state |= PERF_ATTACH_CHILD; raw_spin_unlock_irqrestore(&child_ctx->lock, flags);
/*