From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-v5.10.199 commit 487a8e24643a0effb2ba19cad3227fc75dc3c4b7 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/I8B1X3 CVE: CVE-2023-5717
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 32671e3799ca2e4590773fd0e63aaa4229e50c06 upstream.
Because group consistency is non-atomic between parent (filedesc) and children (inherited) events, it is possible for PERF_FORMAT_GROUP read() to try and sum non-matching counter groups -- with non-sensical results.
Add group_generation to distinguish the case where a parent group removes and adds an event and thus has the same number, but a different configuration of events as inherited groups.
This became a problem when commit fa8c269353d5 ("perf/core: Invert perf_read_group() loops") flipped the order of child_list and sibling_list. Previously it would iterate the group (sibling_list) first, and for each sibling traverse the child_list. In this order, only the group composition of the parent is relevant. By flipping the order the group composition of the child (inherited) events becomes an issue and the mis-match in group composition becomes evident.
That said; even prior to this commit, while reading of a group that is not equally inherited was not broken, it still made no sense.
(Ab)use ECHILD as error return to indicate issues with child process group composition.
Fixes: fa8c269353d5 ("perf/core: Invert perf_read_group() loops") Reported-by: Budimir Markovic markovicbudimir@gmail.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Link: https://lkml.kernel.org/r/20231018115654.GK33217@noisy.programming.kicks-ass... Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Yang Jihong yangjihong1@huawei.com --- include/linux/perf_event.h | 1 + kernel/events/core.c | 40 ++++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 6 deletions(-)
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 39b391d6469a..4cd9a6fcbf37 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -667,6 +667,7 @@ struct perf_event { /* The cumulative AND of all event_caps for events in this group. */ int group_caps;
+ unsigned int group_generation; struct perf_event *group_leader; struct pmu *pmu; void *pmu_private; diff --git a/kernel/events/core.c b/kernel/events/core.c index 28eb8152564a..316611b87fe3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1967,6 +1967,7 @@ static void perf_group_attach(struct perf_event *event)
list_add_tail(&event->sibling_list, &group_leader->sibling_list); group_leader->nr_siblings++; + group_leader->group_generation++;
perf_event__header_size(group_leader);
@@ -2159,6 +2160,7 @@ static void perf_group_detach(struct perf_event *event) if (leader != event) { list_del_init(&event->sibling_list); event->group_leader->nr_siblings--; + event->group_leader->group_generation++; goto out; }
@@ -5144,7 +5146,7 @@ static int __perf_read_group_add(struct perf_event *leader, u64 read_format, u64 *values) { struct perf_event_context *ctx = leader->ctx; - struct perf_event *sub; + struct perf_event *sub, *parent; unsigned long flags; int n = 1; /* skip @nr */ int ret; @@ -5154,6 +5156,33 @@ static int __perf_read_group_add(struct perf_event *leader, return ret;
raw_spin_lock_irqsave(&ctx->lock, flags); + /* + * Verify the grouping between the parent and child (inherited) + * events is still in tact. + * + * Specifically: + * - leader->ctx->lock pins leader->sibling_list + * - parent->child_mutex pins parent->child_list + * - parent->ctx->mutex pins parent->sibling_list + * + * Because parent->ctx != leader->ctx (and child_list nests inside + * ctx->mutex), group destruction is not atomic between children, also + * see perf_event_release_kernel(). Additionally, parent can grow the + * group. + * + * Therefore it is possible to have parent and child groups in a + * different configuration and summing over such a beast makes no sense + * what so ever. + * + * Reject this. + */ + parent = leader->parent; + if (parent && + (parent->group_generation != leader->group_generation || + parent->nr_siblings != leader->nr_siblings)) { + ret = -ECHILD; + goto unlock; + }
/* * Since we co-schedule groups, {enabled,running} times of siblings @@ -5183,8 +5212,9 @@ static int __perf_read_group_add(struct perf_event *leader, values[n++] = primary_event_id(sub); }
+unlock: raw_spin_unlock_irqrestore(&ctx->lock, flags); - return 0; + return ret; }
static int perf_read_group(struct perf_event *event, @@ -5203,10 +5233,6 @@ static int perf_read_group(struct perf_event *event,
values[0] = 1 + leader->nr_siblings;
- /* - * By locking the child_mutex of the leader we effectively - * lock the child list of all siblings.. XXX explain how. - */ mutex_lock(&leader->child_mutex);
ret = __perf_read_group_add(leader, read_format, values); @@ -12778,6 +12804,8 @@ static int inherit_group(struct perf_event *parent_event, !perf_get_aux_event(child_ctr, leader)) return -EINVAL; } + if (leader) + leader->group_generation = parent_event->group_generation; return 0; }