From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 4746f5ce0fa52e21b5fe432970fe9516d1a45ebc category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Patch series "khugepaged folio conversions".
We've been kind of hacking piecemeal at converting khugepaged to use folios instead of compound pages, and so this patchset is a little larger than it should be as I undo some of our wrong moves in the past. In particular, collapse_file() now consistently uses 'new_folio' for the freshly allocated folio and 'folio' for the one that's currently in use.
This patch (of 7):
This function has one caller, and the combined function is simpler to read, reason about and modify.
Link: https://lkml.kernel.org/r/20240403171838.1445826-1-willy@infradead.org Link: https://lkml.kernel.org/r/20240403171838.1445826-2-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: Vishal Moola (Oracle) vishal.moola@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [Context difference only] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index c6379a2d55bc..4c1d69c04975 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -919,20 +919,6 @@ static int hpage_collapse_find_target_node(struct collapse_control *cc) } #endif
-static bool hpage_collapse_alloc_folio(struct folio **folio, gfp_t gfp, int node, - nodemask_t *nmask) -{ - *folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, nmask); - - if (unlikely(!*folio)) { - count_vm_event(THP_COLLAPSE_ALLOC_FAILED); - return false; - } - - count_vm_event(THP_COLLAPSE_ALLOC); - return true; -} - /* * If mmap_lock temporarily dropped, revalidate vma * before taking mmap_lock. @@ -1098,11 +1084,14 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, if (cc->reliable) gfp |= GFP_RELIABLE;
- if (!hpage_collapse_alloc_folio(&folio, gfp, node, &cc->alloc_nmask)) { + folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); + if (!folio) { *hpage = NULL; + count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; }
+ count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); *hpage = NULL;
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit d5ab50b9412c0bba750eef5a34fd2937de1aee55 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Both callers want to deal with a folio, so return a folio from this function.
Link: https://lkml.kernel.org/r/20240403171838.1445826-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 4c1d69c04975..3df4bf24dbc7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1073,7 +1073,7 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, return result; }
-static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, +static int alloc_charge_folio(struct folio **foliop, struct mm_struct *mm, struct collapse_control *cc) { gfp_t gfp = (cc->is_khugepaged ? alloc_hugepage_khugepaged_gfpmask() : @@ -1086,7 +1086,7 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
folio = __folio_alloc(gfp, HPAGE_PMD_ORDER, node, &cc->alloc_nmask); if (!folio) { - *hpage = NULL; + *foliop = NULL; count_vm_event(THP_COLLAPSE_ALLOC_FAILED); return SCAN_ALLOC_HUGE_PAGE_FAIL; } @@ -1094,13 +1094,13 @@ static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm, count_vm_event(THP_COLLAPSE_ALLOC); if (unlikely(mem_cgroup_charge(folio, mm, gfp))) { folio_put(folio); - *hpage = NULL; + *foliop = NULL; return SCAN_CGROUP_CHARGE_FAIL; }
count_memcg_folio_events(folio, THP_COLLAPSE_ALLOC, 1);
- *hpage = folio_page(folio, 0); + *foliop = folio; return SCAN_SUCCEED; }
@@ -1129,7 +1129,8 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ mmap_read_unlock(mm);
- result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&folio, mm, cc); + hpage = &folio->page; if (result != SCAN_SUCCEED) goto out_nolock;
@@ -1232,7 +1233,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (unlikely(result != SCAN_SUCCEED)) goto out_up_write;
- folio = page_folio(hpage); /* * The smp_wmb() inside __folio_mark_uptodate() ensures the * copy_huge_page writes become visible before the set_pmd_at() @@ -1827,7 +1827,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct page *hpage; struct page *page; struct page *tmp; - struct folio *folio; + struct folio *folio, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); @@ -1838,7 +1838,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
- result = alloc_charge_hpage(&hpage, mm, cc); + result = alloc_charge_folio(&new_folio, mm, cc); + hpage = &new_folio->page; if (result != SCAN_SUCCEED) goto out;
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 0234779276e56fb17677f3cf64d7cd501f8abe69 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Work purely in terms of the folio. Removes a call to compound_head() in put_page().
Link: https://lkml.kernel.org/r/20240403171838.1445826-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: Vishal Moola (Oracle) vishal.moola@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [use folio version of add_reliable_[page/folio]_counter] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3df4bf24dbc7..f2855694de53 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1113,7 +1113,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, pte_t *pte; pgtable_t pgtable; struct folio *folio; - struct page *hpage; spinlock_t *pmd_ptl, *pte_ptl; int result = SCAN_FAIL; struct vm_area_struct *vma; @@ -1130,7 +1129,6 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, mmap_read_unlock(mm);
result = alloc_charge_folio(&folio, mm, cc); - hpage = &folio->page; if (result != SCAN_SUCCEED) goto out_nolock;
@@ -1226,7 +1224,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ anon_vma_unlock_write(vma->anon_vma);
- result = __collapse_huge_page_copy(pte, hpage, pmd, _pmd, + result = __collapse_huge_page_copy(pte, &folio->page, pmd, _pmd, vma, address, pte_ptl, &compound_pagelist); pte_unmap(pte); @@ -1241,12 +1239,12 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, __folio_mark_uptodate(folio); pgtable = pmd_pgtable(_pmd);
- _pmd = mk_huge_pmd(hpage, vma->vm_page_prot); + _pmd = mk_huge_pmd(&folio->page, vma->vm_page_prot); _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
spin_lock(pmd_ptl); BUG_ON(!pmd_none(*pmd)); - add_reliable_page_counter(hpage, vma->vm_mm, HPAGE_PMD_NR); + add_reliable_folio_counter(folio, vma->vm_mm, HPAGE_PMD_NR); folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); @@ -1254,14 +1252,14 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, update_mmu_cache_pmd(vma, address, pmd); spin_unlock(pmd_ptl);
- hpage = NULL; + folio = NULL;
result = SCAN_SUCCEED; out_up_write: mmap_write_unlock(mm); out_nolock: - if (hpage) - put_page(hpage); + if (folio) + folio_put(folio); trace_mm_collapse_huge_page(mm, result == SCAN_SUCCEED, result); return result; }
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 8eca68e2cfdf863e98dc3c2cc8b2be9cac46b9d6 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Simplify the body of __collapse_huge_page_copy() while I'm looking at it.
Link: https://lkml.kernel.org/r/20240403171838.1445826-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: Vishal Moola (Oracle) vishal.moola@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [return val conflict for copy_mc_user_highpage, which is introduced by commit 681be0689526 ("mm/hwpoison: return -EFAULT when copy fail in copy_mc_[user]_highpage()")] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f2855694de53..00423d9c0b4f 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -794,7 +794,7 @@ static void __collapse_huge_page_copy_failed(pte_t *pte, * Returns SCAN_SUCCEED if copying succeeds, otherwise returns SCAN_COPY_MC. * * @pte: starting of the PTEs to copy from - * @page: the new hugepage to copy contents to + * @folio: the new hugepage to copy contents to * @pmd: pointer to the new hugepage's PMD * @orig_pmd: the original raw pages' PMD * @vma: the original raw pages' virtual memory area @@ -802,33 +802,29 @@ static void __collapse_huge_page_copy_failed(pte_t *pte, * @ptl: lock on raw pages' PTEs * @compound_pagelist: list that stores compound pages */ -static int __collapse_huge_page_copy(pte_t *pte, - struct page *page, - pmd_t *pmd, - pmd_t orig_pmd, - struct vm_area_struct *vma, - unsigned long address, - spinlock_t *ptl, - struct list_head *compound_pagelist) -{ - struct page *src_page; - pte_t *_pte; - pte_t pteval; - unsigned long _address; +static int __collapse_huge_page_copy(pte_t *pte, struct folio *folio, + pmd_t *pmd, pmd_t orig_pmd, struct vm_area_struct *vma, + unsigned long address, spinlock_t *ptl, + struct list_head *compound_pagelist) +{ + unsigned int i; int result = SCAN_SUCCEED;
/* * Copying pages' contents is subject to memory poison at any iteration. */ - for (_pte = pte, _address = address; _pte < pte + HPAGE_PMD_NR; - _pte++, page++, _address += PAGE_SIZE) { - pteval = ptep_get(_pte); + for (i = 0; i < HPAGE_PMD_NR; i++) { + pte_t pteval = ptep_get(pte + i); + struct page *page = folio_page(folio, i); + unsigned long src_addr = address + i * PAGE_SIZE; + struct page *src_page; + if (pte_none(pteval) || is_zero_pfn(pte_pfn(pteval))) { - clear_user_highpage(page, _address); + clear_user_highpage(page, src_addr); continue; } src_page = pte_page(pteval); - if (copy_mc_user_highpage(page, src_page, _address, vma)) { + if (copy_mc_user_highpage(page, src_page, src_addr, vma)) { result = SCAN_COPY_MC; break; } @@ -1224,7 +1220,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, */ anon_vma_unlock_write(vma->anon_vma);
- result = __collapse_huge_page_copy(pte, &folio->page, pmd, _pmd, + result = __collapse_huge_page_copy(pte, folio, pmd, _pmd, vma, address, pte_ptl, &compound_pagelist); pte_unmap(pte);
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 610ff817b981921213ae51e5c5f38c76c6f0405e category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Use new_folio throughout where we had been using hpage.
Link: https://lkml.kernel.org/r/20240403171838.1445826-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: Vishal Moola (Oracle) vishal.moola@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [return val conflict for copy_mc_user_highpage, which is introduced by commit 681be0689526 ("mm/hwpoison: return -EFAULT when copy fail in copy_mc_[user]_highpage()")] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- include/trace/events/huge_memory.h | 6 +-- mm/khugepaged.c | 77 +++++++++++++++--------------- 2 files changed, 42 insertions(+), 41 deletions(-)
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index 6e2ef1d4b002..dc6eeef2d3da 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -207,10 +207,10 @@ TRACE_EVENT(mm_khugepaged_scan_file, );
TRACE_EVENT(mm_khugepaged_collapse_file, - TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index, + TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, bool is_shmem, unsigned long addr, struct file *file, int nr, int result), - TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result), + TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( __field(struct mm_struct *, mm) __field(unsigned long, hpfn) @@ -224,7 +224,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file,
TP_fast_assign( __entry->mm = mm; - __entry->hpfn = hpage ? page_to_pfn(hpage) : -1; + __entry->hpfn = new_folio ? folio_pfn(new_folio) : -1; __entry->index = index; __entry->addr = addr; __entry->is_shmem = is_shmem; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 00423d9c0b4f..6a03e7f5ad99 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1818,30 +1818,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct collapse_control *cc) { struct address_space *mapping = file->f_mapping; - struct page *hpage; struct page *page; - struct page *tmp; + struct page *tmp, *dst; struct folio *folio, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); int nr_none = 0, result = SCAN_SUCCEED; bool is_shmem = shmem_file(file); - int nr = 0;
VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem); VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
result = alloc_charge_folio(&new_folio, mm, cc); - hpage = &new_folio->page; if (result != SCAN_SUCCEED) goto out;
- __SetPageLocked(hpage); + __folio_set_locked(new_folio); if (is_shmem) - __SetPageSwapBacked(hpage); - hpage->index = start; - hpage->mapping = mapping; + __folio_set_swapbacked(new_folio); + new_folio->index = start; + new_folio->mapping = mapping;
/* * Ensure we have slots for all the pages in the range. This is @@ -2074,20 +2071,24 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, * The old pages are locked, so they won't change anymore. */ index = start; + dst = folio_page(new_folio, 0); list_for_each_entry(page, &pagelist, lru) { while (index < page->index) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + clear_highpage(dst); index++; + dst++; } - if (copy_mc_highpage(hpage + (page->index % HPAGE_PMD_NR), page)) { + if (copy_mc_highpage(dst, page)) { result = SCAN_COPY_MC; goto rollback; } index++; + dst++; } while (index < end) { - clear_highpage(hpage + (index % HPAGE_PMD_NR)); + clear_highpage(dst); index++; + dst++; }
if (nr_none) { @@ -2115,16 +2116,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, }
/* - * If userspace observed a missing page in a VMA with a MODE_MISSING - * userfaultfd, then it might expect a UFFD_EVENT_PAGEFAULT for that - * page. If so, we need to roll back to avoid suppressing such an - * event. Since wp/minor userfaultfds don't give userspace any - * guarantees that the kernel doesn't fill a missing page with a zero - * page, so they don't matter here. + * If userspace observed a missing page in a VMA with + * a MODE_MISSING userfaultfd, then it might expect a + * UFFD_EVENT_PAGEFAULT for that page. If so, we need to + * roll back to avoid suppressing such an event. Since + * wp/minor userfaultfds don't give userspace any + * guarantees that the kernel doesn't fill a missing + * page with a zero page, so they don't matter here. * - * Any userfaultfds registered after this point will not be able to - * observe any missing pages due to the previously inserted retry - * entries. + * Any userfaultfds registered after this point will + * not be able to observe any missing pages due to the + * previously inserted retry entries. */ vma_interval_tree_foreach(vma, &mapping->i_mmap, start, end) { if (userfaultfd_missing(vma)) { @@ -2149,33 +2151,32 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, xas_lock_irq(&xas); }
- folio = page_folio(hpage); - nr = folio_nr_pages(folio); if (is_shmem) - __lruvec_stat_mod_folio(folio, NR_SHMEM_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM_THPS, HPAGE_PMD_NR); else - __lruvec_stat_mod_folio(folio, NR_FILE_THPS, nr); + __lruvec_stat_mod_folio(new_folio, NR_FILE_THPS, HPAGE_PMD_NR);
if (nr_none) { - __lruvec_stat_mod_folio(folio, NR_FILE_PAGES, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_FILE_PAGES, nr_none); /* nr_none is always 0 for non-shmem. */ - __lruvec_stat_mod_folio(folio, NR_SHMEM, nr_none); + __lruvec_stat_mod_folio(new_folio, NR_SHMEM, nr_none); }
/* - * Mark hpage as uptodate before inserting it into the page cache so - * that it isn't mistaken for an fallocated but unwritten page. + * Mark new_folio as uptodate before inserting it into the + * page cache so that it isn't mistaken for an fallocated but + * unwritten page. */ - folio_mark_uptodate(folio); - folio_ref_add(folio, HPAGE_PMD_NR - 1); + folio_mark_uptodate(new_folio); + folio_ref_add(new_folio, HPAGE_PMD_NR - 1);
if (is_shmem) - folio_mark_dirty(folio); - folio_add_lru(folio); + folio_mark_dirty(new_folio); + folio_add_lru(new_folio);
/* Join all the small entries into a single multi-index entry. */ xas_set_order(&xas, start, HPAGE_PMD_ORDER); - xas_store(&xas, folio); + xas_store(&xas, new_folio); WARN_ON_ONCE(xas_error(&xas)); xas_unlock_irq(&xas);
@@ -2186,7 +2187,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, retract_page_tables(mapping, start); if (cc && !cc->is_khugepaged) result = SCAN_PTE_MAPPED_HUGEPAGE; - folio_unlock(folio); + folio_unlock(new_folio);
/* * The collapse has succeeded, so free the old pages. @@ -2231,13 +2232,13 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, smp_mb(); }
- hpage->mapping = NULL; + new_folio->mapping = NULL;
- unlock_page(hpage); - put_page(hpage); + folio_unlock(new_folio); + folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); return result; }
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 8d1e24c0b82d9730d05ee85eb7f4195df8cdf6a6 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Pull folios from the page cache instead of pages. Half of this work had been done already, but we were still operating on pages for a large chunk of this function. There is no attempt in this patch to handle large folios that are smaller than a THP; that will have to wait for a future patch.
[willy@infradead.org: the unlikely() is embedded in IS_ERR()] Link: https://lkml.kernel.org/r/ZhIWX8K0E2tSyMSr@casper.infradead.org Link: https://lkml.kernel.org/r/20240403171838.1445826-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [return val conflict for copy_mc_highpage, which is introduced by commit 681be0689526 ("mm/hwpoison: return -EFAULT when copy fail in copy_mc_[user]_highpage()")] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 113 +++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 59 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 6a03e7f5ad99..3cab0e7f9a64 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1818,9 +1818,8 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, struct collapse_control *cc) { struct address_space *mapping = file->f_mapping; - struct page *page; - struct page *tmp, *dst; - struct folio *folio, *new_folio; + struct page *dst; + struct folio *folio, *tmp, *new_folio; pgoff_t index = 0, end = start + HPAGE_PMD_NR; LIST_HEAD(pagelist); XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER); @@ -1858,11 +1857,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
for (index = start; index < end; index++) { xas_set(&xas, index); - page = xas_load(&xas); + folio = xas_load(&xas);
VM_BUG_ON(index != xas.xa_index); if (is_shmem) { - if (!page) { + if (!folio) { /* * Stop if extent has been truncated or * hole-punched, and is now completely @@ -1878,7 +1877,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, continue; }
- if (xa_is_value(page) || !PageUptodate(page)) { + if (xa_is_value(folio) || !folio_test_uptodate(folio)) { xas_unlock_irq(&xas); /* swap in or instantiate fallocated page */ if (shmem_get_folio(mapping->host, index, 0, @@ -1888,28 +1887,27 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } /* drain lru cache to help isolate_lru_page() */ lru_add_drain(); - page = folio_file_page(folio, index); - } else if (trylock_page(page)) { - get_page(page); + } else if (folio_trylock(folio)) { + folio_get(folio); xas_unlock_irq(&xas); } else { result = SCAN_PAGE_LOCK; goto xa_locked; } } else { /* !is_shmem */ - if (!page || xa_is_value(page)) { + if (!folio || xa_is_value(folio)) { xas_unlock_irq(&xas); page_cache_sync_readahead(mapping, &file->f_ra, file, index, end - index); /* drain lru cache to help isolate_lru_page() */ lru_add_drain(); - page = find_lock_page(mapping, index); - if (unlikely(page == NULL)) { + folio = filemap_lock_folio(mapping, index); + if (IS_ERR(folio)) { result = SCAN_FAIL; goto xa_unlocked; } - } else if (PageDirty(page)) { + } else if (folio_test_dirty(folio)) { /* * khugepaged only works on read-only fd, * so this page is dirty because it hasn't @@ -1927,12 +1925,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, filemap_flush(mapping); result = SCAN_FAIL; goto xa_unlocked; - } else if (PageWriteback(page)) { + } else if (folio_test_writeback(folio)) { xas_unlock_irq(&xas); result = SCAN_FAIL; goto xa_unlocked; - } else if (trylock_page(page)) { - get_page(page); + } else if (folio_trylock(folio)) { + folio_get(folio); xas_unlock_irq(&xas); } else { result = SCAN_PAGE_LOCK; @@ -1941,35 +1939,31 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, }
/* - * The page must be locked, so we can drop the i_pages lock + * The folio must be locked, so we can drop the i_pages lock * without racing with truncate. */ - VM_BUG_ON_PAGE(!PageLocked(page), page); + VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- /* make sure the page is up to date */ - if (unlikely(!PageUptodate(page))) { + /* make sure the folio is up to date */ + if (unlikely(!folio_test_uptodate(folio))) { result = SCAN_FAIL; goto out_unlock; }
/* * If file was truncated then extended, or hole-punched, before - * we locked the first page, then a THP might be there already. + * we locked the first folio, then a THP might be there already. * This will be discovered on the first iteration. */ - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - result = compound_order(head) == HPAGE_PMD_ORDER && - head->index == start + if (folio_test_large(folio)) { + result = folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start /* Maybe PMD-mapped */ ? SCAN_PTE_MAPPED_HUGEPAGE : SCAN_PAGE_COMPOUND; goto out_unlock; }
- folio = page_folio(page); - if (folio_mapping(folio) != mapping) { result = SCAN_TRUNCATED; goto out_unlock; @@ -1979,7 +1973,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, folio_test_writeback(folio))) { /* * khugepaged only works on read-only fd, so this - * page is dirty because it hasn't been flushed + * folio is dirty because it hasn't been flushed * since first write. */ result = SCAN_FAIL; @@ -2003,33 +1997,34 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
xas_lock_irq(&xas);
- VM_BUG_ON_PAGE(page != xa_load(xas.xa, index), page); + VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio);
/* - * We control three references to the page: + * We control three references to the folio: * - we hold a pin on it; * - one reference from page cache; - * - one from isolate_lru_page; - * If those are the only references, then any new usage of the - * page will have to fetch it from the page cache. That requires - * locking the page to handle truncate, so any new usage will be - * blocked until we unlock page after collapse/during rollback. + * - one from lru_isolate_folio; + * If those are the only references, then any new usage + * of the folio will have to fetch it from the page + * cache. That requires locking the folio to handle + * truncate, so any new usage will be blocked until we + * unlock folio after collapse/during rollback. */ - if (page_count(page) != 3) { + if (folio_ref_count(folio) != 3) { result = SCAN_PAGE_COUNT; xas_unlock_irq(&xas); - putback_lru_page(page); + folio_putback_lru(folio); goto out_unlock; }
/* - * Accumulate the pages that are being collapsed. + * Accumulate the folios that are being collapsed. */ - list_add_tail(&page->lru, &pagelist); + list_add_tail(&folio->lru, &pagelist); continue; out_unlock: - unlock_page(page); - put_page(page); + folio_unlock(folio); + folio_put(folio); goto xa_unlocked; }
@@ -2068,17 +2063,17 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, }
/* - * The old pages are locked, so they won't change anymore. + * The old folios are locked, so they won't change anymore. */ index = start; dst = folio_page(new_folio, 0); - list_for_each_entry(page, &pagelist, lru) { - while (index < page->index) { + list_for_each_entry(folio, &pagelist, lru) { + while (index < folio->index) { clear_highpage(dst); index++; dst++; } - if (copy_mc_highpage(dst, page)) { + if (copy_mc_highpage(dst, folio_page(folio, 0))) { result = SCAN_COPY_MC; goto rollback; } @@ -2190,15 +2185,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, folio_unlock(new_folio);
/* - * The collapse has succeeded, so free the old pages. + * The collapse has succeeded, so free the old folios. */ - list_for_each_entry_safe(page, tmp, &pagelist, lru) { - list_del(&page->lru); - page->mapping = NULL; - ClearPageActive(page); - ClearPageUnevictable(page); - unlock_page(page); - folio_put_refs(page_folio(page), 3); + list_for_each_entry_safe(folio, tmp, &pagelist, lru) { + list_del(&folio->lru); + folio->mapping = NULL; + folio_clear_active(folio); + folio_clear_unevictable(folio); + folio_unlock(folio); + folio_put_refs(folio, 3); }
goto out; @@ -2212,11 +2207,11 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, shmem_uncharge(mapping->host, nr_none); }
- list_for_each_entry_safe(page, tmp, &pagelist, lru) { - list_del(&page->lru); - unlock_page(page); - putback_lru_page(page); - put_page(page); + list_for_each_entry_safe(folio, tmp, &pagelist, lru) { + list_del(&folio->lru); + folio_unlock(folio); + folio_putback_lru(folio); + folio_put(folio); } /* * Undo the updates of filemap_nr_thps_inc for non-SHMEM
From: "Matthew Wilcox (Oracle)" willy@infradead.org
mainline inclusion from mainline-v6.10-rc1 commit 43849758fdc976a6d6108ed6dfccdb136fdeec39 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Replace the use of pages with folios. Saves a few calls to compound_head() and removes some uses of obsolete functions.
Link: https://lkml.kernel.org/r/20240403171838.1445826-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) willy@infradead.org Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Vishal Moola (Oracle) vishal.moola@gmail.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [adjust page_from_dynamic_pool and page_reliable to use folio] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- include/trace/events/huge_memory.h | 6 ++--- mm/khugepaged.c | 37 +++++++++++++++--------------- 2 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index dc6eeef2d3da..ab576898a126 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -174,10 +174,10 @@ TRACE_EVENT(mm_collapse_huge_page_swapin,
TRACE_EVENT(mm_khugepaged_scan_file,
- TP_PROTO(struct mm_struct *mm, struct page *page, struct file *file, + TP_PROTO(struct mm_struct *mm, struct folio *folio, struct file *file, int present, int swap, int result),
- TP_ARGS(mm, page, file, present, swap, result), + TP_ARGS(mm, folio, file, present, swap, result),
TP_STRUCT__entry( __field(struct mm_struct *, mm) @@ -190,7 +190,7 @@ TRACE_EVENT(mm_khugepaged_scan_file,
TP_fast_assign( __entry->mm = mm; - __entry->pfn = page ? page_to_pfn(page) : -1; + __entry->pfn = folio ? folio_pfn(folio) : -1; __assign_str(filename, file->f_path.dentry->d_iname); __entry->present = present; __entry->swap = swap; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3cab0e7f9a64..d585ee8357da 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2241,7 +2241,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, struct file *file, pgoff_t start, struct collapse_control *cc) { - struct page *page = NULL; + struct folio *folio = NULL; struct address_space *mapping = file->f_mapping; XA_STATE(xas, &mapping->i_pages, start); int present, swap; @@ -2254,11 +2254,11 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, nodes_clear(cc->alloc_nmask); cc->reliable = false; rcu_read_lock(); - xas_for_each(&xas, page, start + HPAGE_PMD_NR - 1) { - if (xas_retry(&xas, page)) + xas_for_each(&xas, folio, start + HPAGE_PMD_NR - 1) { + if (xas_retry(&xas, folio)) continue;
- if (xa_is_value(page)) { + if (xa_is_value(folio)) { ++swap; if (cc->is_khugepaged && swap > khugepaged_max_ptes_swap) { @@ -2273,11 +2273,9 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, * TODO: khugepaged should compact smaller compound pages * into a PMD sized page */ - if (PageTransCompound(page)) { - struct page *head = compound_head(page); - - result = compound_order(head) == HPAGE_PMD_ORDER && - head->index == start + if (folio_test_large(folio)) { + result = folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start /* Maybe PMD-mapped */ ? SCAN_PTE_MAPPED_HUGEPAGE : SCAN_PAGE_COMPOUND; @@ -2290,33 +2288,34 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, break; }
- node = page_to_nid(page); + node = folio_nid(folio); if (hpage_collapse_scan_abort(node, cc)) { result = SCAN_SCAN_ABORT; break; } cc->node_load[node]++;
- if (!PageLRU(page)) { + if (!folio_test_lru(folio)) { result = SCAN_PAGE_LRU; break; }
- if (page_count(page) != - 1 + page_mapcount(page) + page_has_private(page)) { + if (folio_ref_count(folio) != + 1 + folio_mapcount(folio) + folio_test_private(folio)) { result = SCAN_PAGE_COUNT; break; }
- if (page_from_dynamic_pool(page)) { + if (page_from_dynamic_pool(&folio->page)) { result = SCAN_FAIL; break; }
/* - * We probably should check if the page is referenced here, but - * nobody would transfer pte_young() to PageReferenced() for us. - * And rmap walk here is just too costly... + * We probably should check if the folio is referenced + * here, but nobody would transfer pte_young() to + * folio_test_referenced() for us. And rmap walk here + * is just too costly... */
present++; @@ -2326,7 +2325,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, cond_resched_rcu(); }
- if (page_reliable(page)) + if (folio_reliable(folio)) cc->reliable = true; } rcu_read_unlock(); @@ -2341,7 +2340,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, } }
- trace_mm_khugepaged_scan_file(mm, page, file, present, swap, result); + trace_mm_khugepaged_scan_file(mm, folio, file, present, swap, result); return result; } #else
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc1 commit fda6d4de064a9d37414df36d45836898fff5e165 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Patch series "support shmem mTHP collapse", v2.
Shmem already supports mTHP allocation[1], and this patchset adds support for shmem mTHP collapse, as well as adding relevant test cases.
This patch (of 5):
Expand the is_refcount_suitable() to support reference checks for file folios, as preparation for supporting shmem mTHP collapse.
Link: https://lkml.kernel.org/r/cover.1724140601.git.baolin.wang@linux.alibaba.com Link: https://lkml.kernel.org/r/eae4cb3195ebbb654bfb7967cb7261d4e4e7c7fa.172414060... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Acked-by: David Hildenbrand david@redhat.com Cc: Barry Song 21cnbao@gmail.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index d585ee8357da..007684e8b888 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -555,12 +555,14 @@ static void release_pte_pages(pte_t *pte, pte_t *_pte,
static bool is_refcount_suitable(struct folio *folio) { - int expected_refcount; + int expected_refcount = folio_mapcount(folio);
- expected_refcount = folio_mapcount(folio); - if (folio_test_swapcache(folio)) + if (!folio_test_anon(folio) || folio_test_swapcache(folio)) expected_refcount += folio_nr_pages(folio);
+ if (folio_test_private(folio)) + expected_refcount++; + return folio_ref_count(folio) == expected_refcount; }
@@ -2300,8 +2302,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, break; }
- if (folio_ref_count(folio) != - 1 + folio_mapcount(folio) + folio_test_private(folio)) { + if (!is_refcount_suitable(folio)) { result = SCAN_PAGE_COUNT; break; }
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc1 commit d6b8f296e8d74d685627fd746558745c13b8bd32 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Use the number of pages in the folio to check the reference count as preparation for supporting shmem mTHP collapse.
Link: https://lkml.kernel.org/r/9ea49262308de28957596cc6e8edc2d3a4f54659.172414060... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Acked-by: David Hildenbrand david@redhat.com Cc: Barry Song 21cnbao@gmail.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 007684e8b888..b757299dfdc8 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2002,9 +2002,9 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, VM_BUG_ON_FOLIO(folio != xa_load(xas.xa, index), folio);
/* - * We control three references to the folio: + * We control 2 + nr_pages references to the folio: * - we hold a pin on it; - * - one reference from page cache; + * - nr_pages reference from page cache; * - one from lru_isolate_folio; * If those are the only references, then any new usage * of the folio will have to fetch it from the page @@ -2012,7 +2012,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, * truncate, so any new usage will be blocked until we * unlock folio after collapse/during rollback. */ - if (folio_ref_count(folio) != 3) { + if (folio_ref_count(folio) != 2 + folio_nr_pages(folio)) { result = SCAN_PAGE_COUNT; xas_unlock_irq(&xas); folio_putback_lru(folio); @@ -2195,7 +2195,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, folio_clear_active(folio); folio_clear_unevictable(folio); folio_unlock(folio); - folio_put_refs(folio, 3); + folio_put_refs(folio, 2 + folio_nr_pages(folio)); }
goto out;
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc1 commit dfa98f56d932fca3eaadaed8c17393fdfa00574d category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Iterate each subpage in the large folio to copy, as preparation for supporting shmem mTHP collapse.
Link: https://lkml.kernel.org/r/222d615b7c837eabb47a238126c5fdeff8aa5283.172414060... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Cc: Barry Song 21cnbao@gmail.com Cc: David Hildenbrand david@redhat.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflicts: mm/khugepaged.c [return val conflict for copy_mc_highpage, which is introduced by commit 681be0689526 ("mm/hwpoison: return -EFAULT when copy fail in copy_mc_[user]_highpage()")] Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b757299dfdc8..3337b79f51ae 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2070,17 +2070,22 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, index = start; dst = folio_page(new_folio, 0); list_for_each_entry(folio, &pagelist, lru) { + int i, nr_pages = folio_nr_pages(folio); + while (index < folio->index) { clear_highpage(dst); index++; dst++; } - if (copy_mc_highpage(dst, folio_page(folio, 0))) { - result = SCAN_COPY_MC; - goto rollback; + + for (i = 0; i < nr_pages; i++) { + if (copy_mc_highpage(dst, folio_page(folio, i))) { + result = SCAN_COPY_MC; + goto rollback; + } + index++; + dst++; } - index++; - dst++; } while (index < end) { clear_highpage(dst);
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc1 commit 7de856ffd007f132fd4a0474b289a622a3f88cd7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Shmem already supports the allocation of mTHP, but khugepaged does not yet support collapsing mTHP folios. Now khugepaged is ready to support mTHP, and this patch enables the collapse of shmem mTHP.
Link: https://lkml.kernel.org/r/b9da76aab4276eb6e5d12c479af2b5eea5b4575d.172414060... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Cc: Barry Song 21cnbao@gmail.com Cc: David Hildenbrand david@redhat.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 3337b79f51ae..b5922ed5a745 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1857,7 +1857,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } } while (1);
- for (index = start; index < end; index++) { + for (index = start; index < end;) { xas_set(&xas, index); folio = xas_load(&xas);
@@ -1876,6 +1876,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, } } nr_none++; + index++; continue; }
@@ -1957,12 +1958,10 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, * we locked the first folio, then a THP might be there already. * This will be discovered on the first iteration. */ - if (folio_test_large(folio)) { - result = folio_order(folio) == HPAGE_PMD_ORDER && - folio->index == start - /* Maybe PMD-mapped */ - ? SCAN_PTE_MAPPED_HUGEPAGE - : SCAN_PAGE_COMPOUND; + if (folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start) { + /* Maybe PMD-mapped */ + result = SCAN_PTE_MAPPED_HUGEPAGE; goto out_unlock; }
@@ -2023,6 +2022,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, * Accumulate the folios that are being collapsed. */ list_add_tail(&folio->lru, &pagelist); + index += folio_nr_pages(folio); continue; out_unlock: folio_unlock(folio); @@ -2276,16 +2276,10 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue; }
- /* - * TODO: khugepaged should compact smaller compound pages - * into a PMD sized page - */ - if (folio_test_large(folio)) { - result = folio_order(folio) == HPAGE_PMD_ORDER && - folio->index == start - /* Maybe PMD-mapped */ - ? SCAN_PTE_MAPPED_HUGEPAGE - : SCAN_PAGE_COMPOUND; + if (folio_order(folio) == HPAGE_PMD_ORDER && + folio->index == start) { + /* Maybe PMD-mapped */ + result = SCAN_PTE_MAPPED_HUGEPAGE; /* * For SCAN_PTE_MAPPED_HUGEPAGE, further processing * by the caller won't touch the page cache, and so
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I... 失败原因:补丁集缺失封面信息 建议解决方法:请提供补丁集并重新发送您的补丁集到邮件列表
FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/I... Failed Reason: the cover of the patches is missing Suggest Solution: please checkout and apply the patches' cover and send all again
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc1 commit 2e6d88e9d455fae9c4ac95c893362258f9540dfa category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Add shmem mTHP collpase testing. Similar to the anonymous page, users can use the '-s' parameter to specify the shmem mTHP size for testing.
Link: https://lkml.kernel.org/r/fa44bfa20ca5b9fd6f9163a048f3d3c1e53cd0a8.172414060... Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Cc: Barry Song 21cnbao@gmail.com Cc: David Hildenbrand david@redhat.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yang Shi shy828301@gmail.com Cc: Zi Yan ziy@nvidia.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- tools/testing/selftests/mm/khugepaged.c | 4 +- tools/testing/selftests/mm/thp_settings.c | 46 ++++++++++++++++++++--- tools/testing/selftests/mm/thp_settings.h | 9 ++++- 3 files changed, 51 insertions(+), 8 deletions(-)
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 829320a519e7..56d4480e8d3c 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1095,7 +1095,7 @@ static void usage(void) fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); - fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n"); + fprintf(stderr, "\t\t Defaults to 0. Use this size for anon or shmem allocations.\n"); exit(1); }
@@ -1209,6 +1209,8 @@ int main(int argc, char **argv) default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8; default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT; default_settings.hugepages[anon_order].enabled = THP_ALWAYS; + default_settings.shmem_hugepages[hpage_pmd_order].enabled = SHMEM_INHERIT; + default_settings.shmem_hugepages[anon_order].enabled = SHMEM_ALWAYS;
save_settings(); thp_push_settings(&default_settings); diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c index a4163438108e..577eaab6266f 100644 --- a/tools/testing/selftests/mm/thp_settings.c +++ b/tools/testing/selftests/mm/thp_settings.c @@ -33,10 +33,11 @@ static const char * const thp_defrag_strings[] = { };
static const char * const shmem_enabled_strings[] = { + "never", "always", "within_size", "advise", - "never", + "inherit", "deny", "force", NULL @@ -200,6 +201,7 @@ void thp_write_num(const char *name, unsigned long num) void thp_read_settings(struct thp_settings *settings) { unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int i;
@@ -234,12 +236,24 @@ void thp_read_settings(struct thp_settings *settings) settings->hugepages[i].enabled = thp_read_string(path, thp_enabled_strings); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) { + settings->shmem_hugepages[i].enabled = SHMEM_NEVER; + continue; + } + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + settings->shmem_hugepages[i].enabled = + thp_read_string(path, shmem_enabled_strings); + } }
void thp_write_settings(struct thp_settings *settings) { struct khugepaged_settings *khugepaged = &settings->khugepaged; unsigned long orders = thp_supported_orders(); + unsigned long shmem_orders = thp_shmem_supported_orders(); char path[PATH_MAX]; int enabled; int i; @@ -271,6 +285,15 @@ void thp_write_settings(struct thp_settings *settings) enabled = settings->hugepages[i].enabled; thp_write_string(path, thp_enabled_strings[enabled]); } + + for (i = 0; i < NR_ORDERS; i++) { + if (!((1 << i) & shmem_orders)) + continue; + snprintf(path, PATH_MAX, "hugepages-%ukB/shmem_enabled", + (getpagesize() >> 10) << i); + enabled = settings->shmem_hugepages[i].enabled; + thp_write_string(path, shmem_enabled_strings[enabled]); + } }
struct thp_settings *thp_current_settings(void) @@ -324,17 +347,18 @@ void thp_set_read_ahead_path(char *path) dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0'; }
-unsigned long thp_supported_orders(void) +static unsigned long __thp_supported_orders(bool is_shmem) { unsigned long orders = 0; char path[PATH_MAX]; char buf[256]; - int ret; - int i; + int ret, i; + char anon_dir[] = "enabled"; + char shmem_dir[] = "shmem_enabled";
for (i = 0; i < NR_ORDERS; i++) { - ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled", - (getpagesize() >> 10) << i); + ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/%s", + (getpagesize() >> 10) << i, is_shmem ? shmem_dir : anon_dir); if (ret >= PATH_MAX) { printf("%s: Pathname is too long\n", __func__); exit(EXIT_FAILURE); @@ -347,3 +371,13 @@ unsigned long thp_supported_orders(void)
return orders; } + +unsigned long thp_supported_orders(void) +{ + return __thp_supported_orders(false); +} + +unsigned long thp_shmem_supported_orders(void) +{ + return __thp_supported_orders(true); +} diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h index 71cbff05f4c7..876235a23460 100644 --- a/tools/testing/selftests/mm/thp_settings.h +++ b/tools/testing/selftests/mm/thp_settings.h @@ -22,10 +22,11 @@ enum thp_defrag { };
enum shmem_enabled { + SHMEM_NEVER, SHMEM_ALWAYS, SHMEM_WITHIN_SIZE, SHMEM_ADVISE, - SHMEM_NEVER, + SHMEM_INHERIT, SHMEM_DENY, SHMEM_FORCE, }; @@ -46,6 +47,10 @@ struct khugepaged_settings { unsigned long pages_to_scan; };
+struct shmem_hugepages_settings { + enum shmem_enabled enabled; +}; + struct thp_settings { enum thp_enabled thp_enabled; enum thp_defrag thp_defrag; @@ -54,6 +59,7 @@ struct thp_settings { struct khugepaged_settings khugepaged; unsigned long read_ahead_kb; struct hugepages_settings hugepages[NR_ORDERS]; + struct shmem_hugepages_settings shmem_hugepages[NR_ORDERS]; };
int read_file(const char *path, char *buf, size_t buflen); @@ -76,5 +82,6 @@ void thp_save_settings(void);
void thp_set_read_ahead_path(char *path); unsigned long thp_supported_orders(void); +unsigned long thp_shmem_supported_orders(void);
#endif /* __THP_SETTINGS_H__ */
From: Yang Shi yang@os.amperecomputing.com
mainline inclusion from mainline-v6.12-rc4 commit 37f0b47c5143c2957909ced44fc09ffb118c99f7 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
The "addr" and "is_shmem" arguments have different order in TP_PROTO and TP_ARGS. This resulted in the incorrect trace result:
text-hugepage-644429 [276] 392092.878683: mm_khugepaged_collapse_file: mm=0xffff20025d52c440, hpage_pfn=0x200678c00, index=512, addr=1, is_shmem=0, filename=text-hugepage, nr=512, result=failed
The value of "addr" is wrong because it was treated as bool value, the type of is_shmem.
Fix the order in TP_PROTO to keep "addr" is before "is_shmem" since the original patch review suggested this order to achieve best packing.
And use "lx" for "addr" instead of "ld" in TP_printk because address is typically shown in hex.
After the fix, the trace result looks correct:
text-hugepage-7291 [004] 128.627251: mm_khugepaged_collapse_file: mm=0xffff0001328f9500, hpage_pfn=0x20016ea00, index=512, addr=0x400000, is_shmem=0, filename=text-hugepage, nr=512, result=failed
Link: https://lkml.kernel.org/r/20241012011702.1084846-1-yang@os.amperecomputing.c... Fixes: 4c9473e87e75 ("mm/khugepaged: add tracepoint to collapse_file()") Signed-off-by: Yang Shi yang@os.amperecomputing.com Cc: Gautam Menghani gautammenghani201@gmail.com Cc: Steven Rostedt (Google) rostedt@goodmis.org Cc: stable@vger.kernel.org [6.2+] Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- include/trace/events/huge_memory.h | 4 ++-- mm/khugepaged.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h index ab576898a126..9277524e84eb 100644 --- a/include/trace/events/huge_memory.h +++ b/include/trace/events/huge_memory.h @@ -208,7 +208,7 @@ TRACE_EVENT(mm_khugepaged_scan_file,
TRACE_EVENT(mm_khugepaged_collapse_file, TP_PROTO(struct mm_struct *mm, struct folio *new_folio, pgoff_t index, - bool is_shmem, unsigned long addr, struct file *file, + unsigned long addr, bool is_shmem, struct file *file, int nr, int result), TP_ARGS(mm, new_folio, index, addr, is_shmem, file, nr, result), TP_STRUCT__entry( @@ -233,7 +233,7 @@ TRACE_EVENT(mm_khugepaged_collapse_file, __entry->result = result; ),
- TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s", + TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%lx, is_shmem=%d, filename=%s, nr=%d, result=%s", __entry->mm, __entry->hpfn, __entry->index, diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b5922ed5a745..574aacbca7e7 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2240,7 +2240,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr, folio_put(new_folio); out: VM_BUG_ON(!list_empty(&pagelist)); - trace_mm_khugepaged_collapse_file(mm, new_folio, index, is_shmem, addr, file, HPAGE_PMD_NR, result); + trace_mm_khugepaged_collapse_file(mm, new_folio, index, addr, is_shmem, file, HPAGE_PMD_NR, result); return result; }
From: Baolin Wang baolin.wang@linux.alibaba.com
mainline inclusion from mainline-v6.12-rc4 commit d60fcaf00d752c52fdf566d4184e6d04d9d08879 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAXCD2
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Khugepaged already supports collapsing file large folios (including shmem mTHP) by commit 7de856ffd007 ("mm: khugepaged: support shmem mTHP collapse"), and the control parameters in khugepaged: 'khugepaged_max_ptes_swap' and 'khugepaged_max_ptes_none', still compare based on PTE granularity to determine whether a file collapse is needed. However, the statistics for 'present' and 'swap' in hpage_collapse_scan_file() do not take into account the large folios, which may lead to incorrect judgments regarding the khugepaged_max_ptes_swap/none parameters, resulting in unnecessary file collapses.
To fix this issue, take into account the large folios' statistics for 'present' and 'swap' variables in the hpage_collapse_scan_file().
Link: https://lkml.kernel.org/r/c76305d96d12d030a1a346b50503d148364246d2.172890139... Fixes: 7de856ffd007 ("mm: khugepaged: support shmem mTHP collapse") Signed-off-by: Baolin Wang baolin.wang@linux.alibaba.com Acked-by: David Hildenbrand david@redhat.com Reviewed-by: Barry Song baohua@kernel.org Reviewed-by: Zi Yan ziy@nvidia.com Reviewed-by: Yang Shi shy828301@gmail.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox willy@infradead.org Cc: Ryan Roberts ryan.roberts@arm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- mm/khugepaged.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 574aacbca7e7..7f87636c530e 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -2266,7 +2266,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, continue;
if (xa_is_value(folio)) { - ++swap; + swap += 1 << xas_get_order(&xas); if (cc->is_khugepaged && swap > khugepaged_max_ptes_swap) { result = SCAN_EXCEED_SWAP_PTE; @@ -2318,7 +2318,7 @@ static int hpage_collapse_scan_file(struct mm_struct *mm, unsigned long addr, * is just too costly... */
- present++; + present += folio_nr_pages(folio);
if (need_resched()) { xas_pause(&xas);