*** BLURB HERE ***
Luo Gengkun (1): perf: Fix kabi problem by put mutex in front of perf_buffer
Peter Zijlstra (1): perf/aux: Fix AUX buffer serialization
kernel/events/core.c | 21 +++++++++++++++------ kernel/events/internal.h | 5 +++++ kernel/events/ring_buffer.c | 30 ++++++++++++++++++++---------- 3 files changed, 40 insertions(+), 16 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/13360 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/13360 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/U...
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-v6.6.51 commit c4b69bee3f4ef76809288fe6827bc14d4ae788ef category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAR9C1 CVE: CVE-2024-46713
Reference: https://git.kernel.org/stable/c/c4b69bee3f4ef76809288fe6827bc14d4ae788ef
--------------------------------
commit 2ab9d830262c132ab5db2f571003d80850d56b2a upstream.
Ole reported that event->mmap_mutex is strictly insufficient to serialize the AUX buffer, add a per RB mutex to fully serialize it.
Note that in the lock order comment the perf_event::mmap_mutex order was already wrong, that is, it nesting under mmap_lock is not new with this patch.
Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: Ole ole@binarygecko.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Luo Gengkun luogengkun2@huawei.com --- kernel/events/core.c | 18 ++++++++++++------ kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 2 ++ 3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c index 8ff42177710f..264e3bcda783 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1259,8 +1259,9 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::mutex * perf_event::child_mutex; * perf_event_context::lock - * perf_event::mmap_mutex * mmap_lock + * perf_event::mmap_mutex + * perf_buffer::aux_mutex * perf_addr_filters_head::lock * * cpu_hotplug_lock @@ -6360,12 +6361,11 @@ static void perf_mmap_close(struct vm_area_struct *vma) event->pmu->event_unmapped(event, vma->vm_mm);
/* - * rb->aux_mmap_count will always drop before rb->mmap_count and - * event->mmap_count, so it is ok to use event->mmap_mutex to - * serialize with perf_mmap here. + * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex + * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6382,7 +6382,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
- mutex_unlock(&event->mmap_mutex); + mutex_unlock(&rb->aux_mutex); }
if (atomic_dec_and_test(&rb->mmap_count)) @@ -6470,6 +6470,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; unsigned long vma_size; @@ -6518,6 +6519,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock;
+ aux_mutex = &rb->aux_mutex; + mutex_lock(aux_mutex); + aux_offset = READ_ONCE(rb->user_page->aux_offset); aux_size = READ_ONCE(rb->user_page->aux_size);
@@ -6668,6 +6672,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&rb->mmap_count); } aux_unlock: + if (aux_mutex) + mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex);
/* diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 386d21c7edfa..f376b057320c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct perf_buffer { struct user_struct *mmap_user;
/* AUX area */ + struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index f1f4a627f93d..b0930b418552 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -333,6 +333,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) */ if (!rb->nr_pages) rb->paused = 1; + + mutex_init(&rb->aux_mutex); }
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
HULK inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAR9C1 CVE: CVE-2024-46713
--------------------------------
To fix kabi breakage, put the mutex in front of perf_buffer.
Signed-off-by: Luo Gengkun luogengkun2@huawei.com --- kernel/events/core.c | 9 ++++++--- kernel/events/internal.h | 6 +++++- kernel/events/ring_buffer.c | 30 +++++++++++++++++++----------- 3 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c index 264e3bcda783..f53d7c6ad513 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6356,6 +6356,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); bool detach_rest = false; + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
if (event->pmu->event_unmapped) event->pmu->event_unmapped(event, vma->vm_mm); @@ -6365,7 +6366,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb_ext->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6382,7 +6383,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
- mutex_unlock(&rb->aux_mutex); + mutex_unlock(&rb_ext->aux_mutex); }
if (atomic_dec_and_test(&rb->mmap_count)) @@ -6472,6 +6473,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct user_struct *user = current_user(); struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; + struct perf_buffer_ext *rb_ext = NULL; unsigned long locked, lock_limit; unsigned long vma_size; unsigned long nr_pages; @@ -6519,7 +6521,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock;
- aux_mutex = &rb->aux_mutex; + rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer); + aux_mutex = &rb_ext->aux_mutex; mutex_lock(aux_mutex);
aux_offset = READ_ONCE(rb->user_page->aux_offset); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index f376b057320c..d2e6e6144c54 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,7 +40,6 @@ struct perf_buffer { struct user_struct *mmap_user;
/* AUX area */ - struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ @@ -59,6 +58,11 @@ struct perf_buffer { void *data_pages[]; };
+struct perf_buffer_ext { + struct mutex aux_mutex; + struct perf_buffer perf_buffer; +}; + extern void rb_free(struct perf_buffer *rb);
static inline void rb_free_rcu(struct rcu_head *rcu_head) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index b0930b418552..cfd9448ce28f 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -310,6 +310,7 @@ static void ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) { long max_size = perf_data_size(rb); + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
if (watermark) rb->watermark = min(max_size, watermark); @@ -334,7 +335,7 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) if (!rb->nr_pages) rb->paused = 1;
- mutex_init(&rb->aux_mutex); + mutex_init(&rb_ext->aux_mutex); }
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) @@ -818,21 +819,23 @@ static void perf_mmap_free_page(void *addr)
struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; unsigned long size; int i, node;
- size = sizeof(struct perf_buffer); + size = sizeof(struct perf_buffer_ext); size += nr_pages * sizeof(void *);
if (order_base_2(size) > PAGE_SHIFT+MAX_ORDER) goto fail;
node = (cpu == -1) ? cpu : cpu_to_node(cpu); - rb = kzalloc_node(size, GFP_KERNEL, node); - if (!rb) + rb_ext = kzalloc_node(size, GFP_KERNEL, node); + if (!rb_ext) goto fail;
+ rb = &rb_ext->perf_buffer; rb->user_page = perf_mmap_alloc_page(cpu); if (!rb->user_page) goto fail_user_page; @@ -856,7 +859,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) perf_mmap_free_page(rb->user_page);
fail_user_page: - kfree(rb); + kfree(rb_ext);
fail: return NULL; @@ -865,11 +868,12 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) void rb_free(struct perf_buffer *rb) { int i; + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
perf_mmap_free_page(rb->user_page); for (i = 0; i < rb->nr_pages; i++) perf_mmap_free_page(rb->data_pages[i]); - kfree(rb); + kfree(rb_ext); }
#else @@ -892,6 +896,7 @@ static void perf_mmap_unmark_page(void *addr)
static void rb_free_work(struct work_struct *work) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; void *base; int i, nr; @@ -905,7 +910,8 @@ static void rb_free_work(struct work_struct *work) perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base); - kfree(rb); + rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer); + kfree(rb_ext); }
void rb_free(struct perf_buffer *rb) @@ -915,19 +921,21 @@ void rb_free(struct perf_buffer *rb)
struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; unsigned long size; void *all_buf; int node;
- size = sizeof(struct perf_buffer); + size = sizeof(struct perf_buffer_ext); size += sizeof(void *);
node = (cpu == -1) ? cpu : cpu_to_node(cpu); - rb = kzalloc_node(size, GFP_KERNEL, node); - if (!rb) + rb_ext = kzalloc_node(size, GFP_KERNEL, node); + if (!rb_ext) goto fail;
+ rb = &rb_ext->perf_buffer; INIT_WORK(&rb->work, rb_free_work);
all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); @@ -946,7 +954,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) return rb;
fail_all_buf: - kfree(rb); + kfree(rb_ext);
fail: return NULL;