From: Liu Shixin liushixin2@huawei.com
mainline inclusion from stable-v6.4-rc1 commit 1cb9dc4b475c7418f925ab0c97b6750007d9f52e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IB0OV7
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
copy-on-write of hugetlb user pages with uncorrectable errors will result in a kernel crash. This is because the copy is performed in kernel mode and in general we can not handle accessing memory with such errors while in kernel mode. Commit a873dfe1032a ("mm, hwpoison: try to recover from copy-on write faults") introduced the routine copy_user_highpage_mc() to gracefully handle copying of user pages with uncorrectable errors. However, the separate hugetlb copy-on-write code paths were not modified as part of commit a873dfe1032a.
Modify hugetlb copy-on-write code paths to use copy_mc_user_highpage() so that they can also gracefully handle uncorrectable errors in user pages. This involves changing the hugetlb specific routine copy_user_large_folio() from type void to int so that it can return an error. Modify the hugetlb userfaultfd code in the same way so that it can return -EHWPOISON if it encounters an uncorrectable error.
Link: https://lkml.kernel.org/r/20230413131349.2524210-1-liushixin2@huawei.com Signed-off-by: Liu Shixin liushixin2@huawei.com Acked-by: Mike Kravetz mike.kravetz@oracle.com Reviewed-by: Naoya Horiguchi naoya.horiguchi@nec.com Cc: Miaohe Lin linmiaohe@huawei.com Cc: Muchun Song muchun.song@linux.dev Cc: Tony Luck tony.luck@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
Conflicts: include/linux/mm.h mm/hugetlb.c mm/memory.c [Ma Wupeng: current branch don't need folio & memory_failure_queue] Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- include/linux/mm.h | 8 +++---- mm/hugetlb.c | 7 ++++-- mm/memory.c | 53 +++++++++++++++++++++++++++++----------------- 3 files changed, 42 insertions(+), 26 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 627f997bc547..00bc6978391b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3222,10 +3222,10 @@ enum mf_action_page_type { extern void clear_huge_page(struct page *page, unsigned long addr_hint, unsigned int pages_per_huge_page); -extern void copy_user_huge_page(struct page *dst, struct page *src, - unsigned long addr_hint, - struct vm_area_struct *vma, - unsigned int pages_per_huge_page); +extern int copy_user_huge_page(struct page *dst, struct page *src, + unsigned long addr_hint, + struct vm_area_struct *vma, + unsigned int pages_per_huge_page); extern long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, unsigned int pages_per_huge_page, diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5f04adac38bb..4f4773bd5393 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4728,8 +4728,11 @@ static vm_fault_t hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, goto out_release_all; }
- copy_user_huge_page(new_page, old_page, address, vma, - pages_per_huge_page(h)); + if (copy_user_huge_page(new_page, old_page, address, vma, + pages_per_huge_page(h))) { + ret = VM_FAULT_HWPOISON_LARGE; + goto out_release_all; + } __SetPageUptodate(new_page);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, haddr, diff --git a/mm/memory.c b/mm/memory.c index 0b71917f87c0..a4b8b1d47a3b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5287,12 +5287,12 @@ EXPORT_SYMBOL(__might_fault); * operation. The target subpage will be processed last to keep its * cache lines hot. */ -static inline void process_huge_page( +static inline int process_huge_page( unsigned long addr_hint, unsigned int pages_per_huge_page, - void (*process_subpage)(unsigned long addr, int idx, void *arg), + int (*process_subpage)(unsigned long addr, int idx, void *arg), void *arg) { - int i, n, base, l; + int i, n, base, l, ret; unsigned long addr = addr_hint & ~(((unsigned long)pages_per_huge_page << PAGE_SHIFT) - 1);
@@ -5306,7 +5306,9 @@ static inline void process_huge_page( /* Process subpages at the end of huge page */ for (i = pages_per_huge_page - 1; i >= 2 * n; i--) { cond_resched(); - process_subpage(addr + i * PAGE_SIZE, i, arg); + ret = process_subpage(addr + i * PAGE_SIZE, i, arg); + if (ret) + return ret; } } else { /* If target subpage in second half of huge page */ @@ -5315,7 +5317,9 @@ static inline void process_huge_page( /* Process subpages at the begin of huge page */ for (i = 0; i < base; i++) { cond_resched(); - process_subpage(addr + i * PAGE_SIZE, i, arg); + ret = process_subpage(addr + i * PAGE_SIZE, i, arg); + if (ret) + return ret; } } /* @@ -5327,10 +5331,15 @@ static inline void process_huge_page( int right_idx = base + 2 * l - 1 - i;
cond_resched(); - process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); + ret = process_subpage(addr + left_idx * PAGE_SIZE, left_idx, arg); + if (ret) + return ret; cond_resched(); - process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); + ret = process_subpage(addr + right_idx * PAGE_SIZE, right_idx, arg); + if (ret) + return ret; } + return 0; }
static void clear_gigantic_page(struct page *page, @@ -5348,11 +5357,12 @@ static void clear_gigantic_page(struct page *page, } }
-static void clear_subpage(unsigned long addr, int idx, void *arg) +static int clear_subpage(unsigned long addr, int idx, void *arg) { struct page *page = arg;
clear_user_highpage(page + idx, addr); + return 0; }
void clear_huge_page(struct page *page, @@ -5369,7 +5379,7 @@ void clear_huge_page(struct page *page, process_huge_page(addr_hint, pages_per_huge_page, clear_subpage, page); }
-static void copy_user_gigantic_page(struct page *dst, struct page *src, +static int copy_user_gigantic_page(struct page *dst, struct page *src, unsigned long addr, struct vm_area_struct *vma, unsigned int pages_per_huge_page) @@ -5380,12 +5390,14 @@ static void copy_user_gigantic_page(struct page *dst, struct page *src,
for (i = 0; i < pages_per_huge_page; ) { cond_resched(); - copy_user_highpage(dst, src, addr + i*PAGE_SIZE, vma); + if (copy_user_highpage_mc(dst, src, addr + i*PAGE_SIZE, vma)) + return -EHWPOISON;
i++; dst = mem_map_next(dst, dst_base, i); src = mem_map_next(src, src_base, i); } + return 0; }
struct copy_subpage_arg { @@ -5394,15 +5406,18 @@ struct copy_subpage_arg { struct vm_area_struct *vma; };
-static void copy_subpage(unsigned long addr, int idx, void *arg) +static int copy_subpage(unsigned long addr, int idx, void *arg) { struct copy_subpage_arg *copy_arg = arg;
- copy_user_highpage(copy_arg->dst + idx, copy_arg->src + idx, - addr, copy_arg->vma); + if (copy_user_highpage_mc(copy_arg->dst + idx, copy_arg->src + idx, + addr, copy_arg->vma)) + return -EHWPOISON; + + return 0; }
-void copy_user_huge_page(struct page *dst, struct page *src, +int copy_user_huge_page(struct page *dst, struct page *src, unsigned long addr_hint, struct vm_area_struct *vma, unsigned int pages_per_huge_page) { @@ -5414,13 +5429,11 @@ void copy_user_huge_page(struct page *dst, struct page *src, .vma = vma, };
- if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) { - copy_user_gigantic_page(dst, src, addr, vma, - pages_per_huge_page); - return; - } + if (unlikely(pages_per_huge_page > MAX_ORDER_NR_PAGES)) + return copy_user_gigantic_page(dst, src, addr, vma, + pages_per_huge_page);
- process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); + return process_huge_page(addr_hint, pages_per_huge_page, copy_subpage, &arg); }
long copy_huge_page_from_user(struct page *dst_page,