[PATCH OLK-5.10 0/4] Fix CVE-2026-43262
Fix CVE-2026-43262. Andreas Gruenbacher (2): iov_iter: Introduce fault_in_iov_iter_writeable gfs2: fiemap page fault fix Baoquan He (1): mm/gup: fix wrongly calculated returned value in fault_in_safe_writeable() Linus Torvalds (1): mm: gup: make fault_in_safe_writeable() use fixup_user_fault() fs/gfs2/inode.c | 16 +++++++++++++++ include/linux/pagemap.h | 2 ++ include/linux/uio.h | 1 + lib/iov_iter.c | 39 ++++++++++++++++++++++++++++++++++++ mm/gup.c | 44 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) -- 2.34.1
From: Andreas Gruenbacher <agruenba@redhat.com> mainline inclusion from mainline-v5.16-rc1 commit cdd591fc86e38ad3899196066219fbbd845f3162 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/14806 CVE: CVE-2026-43262 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Introduce a new fault_in_iov_iter_writeable helper for safely faulting in an iterator for writing. Uses get_user_pages() to fault in the pages without actually writing to them, which would be destructive. We'll use fault_in_iov_iter_writeable in gfs2 once we've determined that the iterator passed to .read_iter isn't in memory. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Conflicts: include/linux/pagemap.h include/linux/uio.h lib/iov_iter.c mm/gup.c [lhb: context conflicts] Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- include/linux/pagemap.h | 2 ++ include/linux/uio.h | 1 + lib/iov_iter.c | 39 +++++++++++++++++++++++++ mm/gup.c | 63 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0bfa9cce6589..ac396936bd3f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -756,6 +756,8 @@ static inline int fault_in_pages_readable(const char __user *uaddr, int size) return 0; } +size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); + int add_to_page_cache_locked(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, diff --git a/include/linux/uio.h b/include/linux/uio.h index 1bf0a426ef02..9f16fd58a2f1 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -130,6 +130,7 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 15b373f2e508..0f3f714932da 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -446,6 +446,45 @@ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) } EXPORT_SYMBOL(iov_iter_fault_in_readable); +/* + * fault_in_iov_iter_writeable - fault in iov iterator for writing + * @i: iterator + * @size: maximum length + * + * Faults in the iterator using get_user_pages(), i.e., without triggering + * hardware page faults. This is primarily useful when we already know that + * some or all of the pages in @i aren't in memory. + * + * Returns the number of bytes not faulted in, like copy_to_user() and + * copy_from_user(). + * + * Always returns 0 for non-user-space iterators. + */ +size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size) +{ + if (iter_is_iovec(i)) { + size_t count = min(size, iov_iter_count(i)); + const struct iovec *p; + size_t skip; + + size -= count; + for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) { + size_t len = min(count, p->iov_len - skip); + size_t ret; + + if (unlikely(!len)) + continue; + ret = fault_in_safe_writeable(p->iov_base + skip, len); + count -= len - ret; + if (ret) + break; + } + return count + size; + } + return 0; +} +EXPORT_SYMBOL(fault_in_iov_iter_writeable); + void iov_iter_init(struct iov_iter *i, unsigned int direction, const struct iovec *iov, unsigned long nr_segs, size_t count) diff --git a/mm/gup.c b/mm/gup.c index 328e38bb1d1f..bc5085866992 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1632,6 +1632,69 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, } #endif /* !CONFIG_MMU */ +/* + * fault_in_safe_writeable - fault in an address range for writing + * @uaddr: start of address range + * @size: length of address range + * + * Faults in an address range using get_user_pages, i.e., without triggering + * hardware page faults. This is primarily useful when we already know that + * some or all of the pages in the address range aren't in memory. + * + * Other than fault_in_writeable(), this function is non-destructive. + * + * Note that we don't pin or otherwise hold the pages referenced that we fault + * in. There's no guarantee that they'll stay in memory for any duration of + * time. + * + * Returns the number of bytes not faulted in, like copy_to_user() and + * copy_from_user(). + */ +size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) +{ + unsigned long start = (unsigned long)untagged_addr(uaddr); + unsigned long end, nstart, nend; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + int locked = 0; + + nstart = start & PAGE_MASK; + end = PAGE_ALIGN(start + size); + if (end < nstart) + end = 0; + for (; nstart != end; nstart = nend) { + unsigned long nr_pages; + long ret; + + if (!locked) { + locked = 1; + mmap_read_lock(mm); + vma = find_vma(mm, nstart); + } else if (nstart >= vma->vm_end) + vma = vma->vm_next; + if (!vma || vma->vm_start >= end) + break; + nend = end ? min(end, vma->vm_end) : vma->vm_end; + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + continue; + if (nstart < vma->vm_start) + nstart = vma->vm_start; + nr_pages = (nend - nstart) / PAGE_SIZE; + ret = __get_user_pages_locked(mm, nstart, nr_pages, + NULL, NULL, &locked, + FOLL_TOUCH | FOLL_WRITE); + if (ret <= 0) + break; + nend = nstart + ret * PAGE_SIZE; + } + if (locked) + mmap_read_unlock(mm); + if (nstart == end) + return 0; + return size - min_t(size_t, nstart - start, size); +} +EXPORT_SYMBOL(fault_in_safe_writeable); + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address -- 2.34.1
From: Linus Torvalds <torvalds@linux-foundation.org> mainline inclusion from mainline-v5.17-rc8 commit fe673d3f5bf1fc50cdc4b754831db91a2ec10126 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/14806 CVE: CVE-2026-43262 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Instead of using GUP, make fault_in_safe_writeable() actually force a 'handle_mm_fault()' using the same fixup_user_fault() machinery that futexes already use. Using the GUP machinery meant that fault_in_safe_writeable() did not do everything that a real fault would do, ranging from not auto-expanding the stack segment, to not updating accessed or dirty flags in the page tables (GUP sets those flags on the pages themselves). The latter causes problems on architectures (like s390) that do accessed bit handling in software, which meant that fault_in_safe_writeable() didn't actually do all the fault handling it needed to, and trying to access the user address afterwards would still cause faults. Reported-and-tested-by: Andreas Gruenbacher <agruenba@redhat.com> Fixes: cdd591fc86e3 ("iov_iter: Introduce fault_in_iov_iter_writeable") Link: https://lore.kernel.org/all/CAHc6FU5nP+nziNGG0JAF1FUx-GV7kKFvM7aZuU_XD2_1v4v... Acked-by: David Hildenbrand <david@redhat.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Conflicts: mm/gup.c [lhb: context conflicts] Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- mm/gup.c | 57 +++++++++++++++++++------------------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index bc5085866992..5f9b56a768c8 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1637,11 +1637,11 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, * @uaddr: start of address range * @size: length of address range * - * Faults in an address range using get_user_pages, i.e., without triggering - * hardware page faults. This is primarily useful when we already know that - * some or all of the pages in the address range aren't in memory. + * Faults in an address range for writing. This is primarily useful when we + * already know that some or all of the pages in the address range aren't in + * memory. * - * Other than fault_in_writeable(), this function is non-destructive. + * Unlike fault_in_writeable(), this function is non-destructive. * * Note that we don't pin or otherwise hold the pages referenced that we fault * in. There's no guarantee that they'll stay in memory for any duration of @@ -1652,46 +1652,27 @@ static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start, */ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) { - unsigned long start = (unsigned long)untagged_addr(uaddr); - unsigned long end, nstart, nend; + unsigned long start = (unsigned long)uaddr, end; struct mm_struct *mm = current->mm; - struct vm_area_struct *vma = NULL; - int locked = 0; + bool unlocked = false; - nstart = start & PAGE_MASK; + if (unlikely(size == 0)) + return 0; end = PAGE_ALIGN(start + size); - if (end < nstart) + if (end < start) end = 0; - for (; nstart != end; nstart = nend) { - unsigned long nr_pages; - long ret; - if (!locked) { - locked = 1; - mmap_read_lock(mm); - vma = find_vma(mm, nstart); - } else if (nstart >= vma->vm_end) - vma = vma->vm_next; - if (!vma || vma->vm_start >= end) - break; - nend = end ? min(end, vma->vm_end) : vma->vm_end; - if (vma->vm_flags & (VM_IO | VM_PFNMAP)) - continue; - if (nstart < vma->vm_start) - nstart = vma->vm_start; - nr_pages = (nend - nstart) / PAGE_SIZE; - ret = __get_user_pages_locked(mm, nstart, nr_pages, - NULL, NULL, &locked, - FOLL_TOUCH | FOLL_WRITE); - if (ret <= 0) + mmap_read_lock(mm); + do { + if (fixup_user_fault(mm, start, FAULT_FLAG_WRITE, &unlocked)) break; - nend = nstart + ret * PAGE_SIZE; - } - if (locked) - mmap_read_unlock(mm); - if (nstart == end) - return 0; - return size - min_t(size_t, nstart - start, size); + start = (start + PAGE_SIZE) & PAGE_MASK; + } while (start != end); + mmap_read_unlock(mm); + + if (size > (unsigned long)uaddr - start) + return size - ((unsigned long)uaddr - start); + return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); -- 2.34.1
From: Baoquan He <bhe@redhat.com> stable inclusion from stable-v6.6.88 commit 006b67ac61312fe2be05f33acb3f9473df6a7416 category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/14806 CVE: CVE-2026-43262 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- commit 8c03ebd7cdc06bd0d2fecb4d1a609ef1dbb7d0aa upstream. Not like fault_in_readable() or fault_in_writeable(), in fault_in_safe_writeable() local variable 'start' is increased page by page to loop till the whole address range is handled. However, it mistakenly calculates the size of the handled range with 'uaddr - start'. Fix it here. Andreas said: : In gfs2, fault_in_iov_iter_writeable() is used in : gfs2_file_direct_read() and gfs2_file_read_iter(), so this potentially : affects buffered as well as direct reads. This bug could cause those : gfs2 functions to spin in a loop. Link: https://lkml.kernel.org/r/20250410035717.473207-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20250410035717.473207-2-bhe@redhat.com Signed-off-by: Baoquan He <bhe@redhat.com> Fixes: fe673d3f5bf1 ("mm: gup: make fault_in_safe_writeable() use fixup_user_fault()") Reviewed-by: Oscar Salvador <osalvador@suse.de> Acked-by: David Hildenbrand <david@redhat.com> Cc: Andreas Gruenbacher <agruenba@redhat.com> Cc: Yanjun.Zhu <yanjun.zhu@linux.dev> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- mm/gup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 5f9b56a768c8..2bdb357f44ff 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1670,8 +1670,8 @@ size_t fault_in_safe_writeable(const char __user *uaddr, size_t size) } while (start != end); mmap_read_unlock(mm); - if (size > (unsigned long)uaddr - start) - return size - ((unsigned long)uaddr - start); + if (size > start - (unsigned long)uaddr) + return size - (start - (unsigned long)uaddr); return 0; } EXPORT_SYMBOL(fault_in_safe_writeable); -- 2.34.1
From: Andreas Gruenbacher <agruenba@redhat.com> stable inclusion from stable-v6.6.128 commit e428670cfb2993d8c224effd076242ca6b0950de category: bugfix bugzilla: https://atomgit.com/src-openeuler/kernel/issues/14806 CVE: CVE-2026-43262 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- [ Upstream commit e411d74cc5ba290f85d0dd5e4d1df8f1d6d975d2 ] In gfs2_fiemap(), we are calling iomap_fiemap() while holding the inode glock. This can lead to recursive glock taking if the fiemap buffer is memory mapped to the same inode and accessing it triggers a page fault. Fix by disabling page faults for iomap_fiemap() and faulting in the buffer by hand if necessary. Fixes xfstest generic/742. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Sasha Levin <sashal@kernel.org> Conflicts: fs/gfs2/inode.c [lhb: Context conflicts] Signed-off-by: Hongbo Li <lihongbo22@huawei.com> --- fs/gfs2/inode.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d75d56d9ea0c..726a1bde6919 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -2065,6 +2065,14 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat, return 0; } +static bool fault_in_fiemap(struct fiemap_extent_info *fi) +{ + struct fiemap_extent __user *dest = fi->fi_extents_start; + size_t size = sizeof(*dest) * fi->fi_extents_max; + + return fault_in_safe_writeable((char __user *)dest, size) == 0; +} + static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -2074,14 +2082,22 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); +retry: ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) goto out; + pagefault_disable(); ret = iomap_fiemap(inode, fieinfo, start, len, &gfs2_iomap_ops); + pagefault_enable(); gfs2_glock_dq_uninit(&gh); + if (ret == -EFAULT && fault_in_fiemap(fieinfo)) { + fieinfo->fi_extents_mapped = 0; + goto retry; + } + out: inode_unlock_shared(inode); return ret; -- 2.34.1
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/22281 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/MHC... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/22281 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/MHC...
participants (2)
-
Hongbo Li -
patchwork bot