Luo Gengkun (1): perf: Fix kabi problem by put mutex in front of perf_buffer
Peter Zijlstra (1): perf/aux: Fix AUX buffer serialization
kernel/events/core.c | 21 +++++++++++++++------ kernel/events/internal.h | 5 +++++ kernel/events/ring_buffer.c | 30 ++++++++++++++++++++---------- 3 files changed, 40 insertions(+), 16 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/13369 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/13369 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I...
From: Peter Zijlstra peterz@infradead.org
stable inclusion from stable-v5.10.226 commit 7882923f1cb88dc1a17f2bf0c81b1fc80d44db82 category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAR9C1 CVE: CVE-2024-46713
Reference: https://git.kernel.org/stable/c/7882923f1cb88dc1a17f2bf0c81b1fc80d44db82
--------------------------------
commit 2ab9d830262c132ab5db2f571003d80850d56b2a upstream.
Ole reported that event->mmap_mutex is strictly insufficient to serialize the AUX buffer, add a per RB mutex to fully serialize it.
Note that in the lock order comment the perf_event::mmap_mutex order was already wrong, that is, it nesting under mmap_lock is not new with this patch.
Fixes: 45bfb2e50471 ("perf: Add AUX area to ring buffer for raw data streams") Reported-by: Ole ole@binarygecko.com Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Greg Kroah-Hartman gregkh@linuxfoundation.org Signed-off-by: Luo Gengkun luogengkun2@huawei.com --- kernel/events/core.c | 18 ++++++++++++------ kernel/events/internal.h | 1 + kernel/events/ring_buffer.c | 2 ++ 3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c index d2f6ce13fc08..e4e4b523a090 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1332,8 +1332,9 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::mutex * perf_event::child_mutex; * perf_event_context::lock - * perf_event::mmap_mutex * mmap_lock + * perf_event::mmap_mutex + * perf_buffer::aux_mutex * perf_addr_filters_head::lock * * cpu_hotplug_lock @@ -6003,12 +6004,11 @@ static void perf_mmap_close(struct vm_area_struct *vma) event->pmu->event_unmapped(event, vma->vm_mm);
/* - * rb->aux_mmap_count will always drop before rb->mmap_count and - * event->mmap_count, so it is ok to use event->mmap_mutex to - * serialize with perf_mmap here. + * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex + * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6025,7 +6025,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
- mutex_unlock(&event->mmap_mutex); + mutex_unlock(&rb->aux_mutex); }
if (atomic_dec_and_test(&rb->mmap_count)) @@ -6113,6 +6113,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; unsigned long vma_size; @@ -6159,6 +6160,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock;
+ aux_mutex = &rb->aux_mutex; + mutex_lock(aux_mutex); + aux_offset = READ_ONCE(rb->user_page->aux_offset); aux_size = READ_ONCE(rb->user_page->aux_size);
@@ -6310,6 +6314,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) atomic_dec(&rb->mmap_count); } aux_unlock: + if (aux_mutex) + mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex);
/* diff --git a/kernel/events/internal.h b/kernel/events/internal.h index aa23ffdaf819..82082dad39d7 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct perf_buffer { struct user_struct *mmap_user;
/* AUX area */ + struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 01351e7e2543..cc6218e2abb9 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -329,6 +329,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) */ if (!rb->nr_pages) rb->paused = 1; + + mutex_init(&rb->aux_mutex); }
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
HULK inclusion category: bugfix bugzilla: https://gitee.com/src-openeuler/kernel/issues/IAR9C1 CVE: CVE-2024-46713
--------------------------------
To fix kabi breakage, put the mutex in front of perf_buffer.
Fixes: 45bfb2e50471 ("perf/aux: Fix AUX buffer serialization") Signed-off-by: Luo Gengkun luogengkun2@huawei.com --- kernel/events/core.c | 9 ++++++--- kernel/events/internal.h | 6 +++++- kernel/events/ring_buffer.c | 30 +++++++++++++++++++----------- 3 files changed, 30 insertions(+), 15 deletions(-)
diff --git a/kernel/events/core.c b/kernel/events/core.c index e4e4b523a090..7ba359f4cf5f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5999,6 +5999,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) int mmap_locked = rb->mmap_locked; unsigned long size = perf_data_size(rb); bool detach_rest = false; + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
if (event->pmu->event_unmapped) event->pmu->event_unmapped(event, vma->vm_mm); @@ -6008,7 +6009,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb_ext->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6025,7 +6026,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount));
- mutex_unlock(&rb->aux_mutex); + mutex_unlock(&rb_ext->aux_mutex); }
if (atomic_dec_and_test(&rb->mmap_count)) @@ -6115,6 +6116,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct user_struct *user = current_user(); struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; + struct perf_buffer_ext *rb_ext = NULL; unsigned long locked, lock_limit; unsigned long vma_size; unsigned long nr_pages; @@ -6160,7 +6162,8 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock;
- aux_mutex = &rb->aux_mutex; + rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer); + aux_mutex = &rb_ext->aux_mutex; mutex_lock(aux_mutex);
aux_offset = READ_ONCE(rb->user_page->aux_offset); diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 82082dad39d7..95e32e83c7c8 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,7 +40,6 @@ struct perf_buffer { struct user_struct *mmap_user;
/* AUX area */ - struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ @@ -59,6 +58,11 @@ struct perf_buffer { void *data_pages[]; };
+struct perf_buffer_ext { + struct mutex aux_mutex; + struct perf_buffer perf_buffer; +}; + extern void rb_free(struct perf_buffer *rb);
static inline void rb_free_rcu(struct rcu_head *rcu_head) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index cc6218e2abb9..11c194e9b8d9 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -306,6 +306,7 @@ static void ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) { long max_size = perf_data_size(rb); + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
if (watermark) rb->watermark = min(max_size, watermark); @@ -330,7 +331,7 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) if (!rb->nr_pages) rb->paused = 1;
- mutex_init(&rb->aux_mutex); + mutex_init(&rb_ext->aux_mutex); }
void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) @@ -810,20 +811,22 @@ static void perf_mmap_free_page(void *addr)
struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; unsigned long size; int i;
- size = sizeof(struct perf_buffer); + size = sizeof(struct perf_buffer_ext); size += nr_pages * sizeof(void *);
if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER) goto fail;
- rb = kzalloc(size, GFP_KERNEL); - if (!rb) + rb_ext = kzalloc(size, GFP_KERNEL); + if (!rb_ext) goto fail;
+ rb = &rb_ext->perf_buffer; rb->user_page = perf_mmap_alloc_page(cpu); if (!rb->user_page) goto fail_user_page; @@ -847,7 +850,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) perf_mmap_free_page(rb->user_page);
fail_user_page: - kfree(rb); + kfree(rb_ext);
fail: return NULL; @@ -856,11 +859,12 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) void rb_free(struct perf_buffer *rb) { int i; + struct perf_buffer_ext *rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer);
perf_mmap_free_page(rb->user_page); for (i = 0; i < rb->nr_pages; i++) perf_mmap_free_page(rb->data_pages[i]); - kfree(rb); + kfree(rb_ext); }
#else @@ -883,6 +887,7 @@ static void perf_mmap_unmark_page(void *addr)
static void rb_free_work(struct work_struct *work) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; void *base; int i, nr; @@ -896,7 +901,8 @@ static void rb_free_work(struct work_struct *work) perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base); - kfree(rb); + rb_ext = container_of(rb, struct perf_buffer_ext, perf_buffer); + kfree(rb_ext); }
void rb_free(struct perf_buffer *rb) @@ -906,17 +912,19 @@ void rb_free(struct perf_buffer *rb)
struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) { + struct perf_buffer_ext *rb_ext; struct perf_buffer *rb; unsigned long size; void *all_buf;
- size = sizeof(struct perf_buffer); + size = sizeof(struct perf_buffer_ext); size += sizeof(void *);
- rb = kzalloc(size, GFP_KERNEL); - if (!rb) + rb_ext = kzalloc(size, GFP_KERNEL); + if (!rb_ext) goto fail;
+ rb = &rb_ext->perf_buffer; INIT_WORK(&rb->work, rb_free_work);
all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); @@ -935,7 +943,7 @@ struct perf_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags) return rb;
fail_all_buf: - kfree(rb); + kfree(rb_ext);
fail: return NULL;