From: Lance Yang ioworker0@gmail.com
mainline inclusion from mainline-v6.11-rc1 commit 26d21b18d971c8ba3343240ca22cfd03daad4926 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAIHQO
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
Patch series "Reclaim lazyfree THP without splitting", v8.
This series adds support for reclaiming PMD-mapped THP marked as lazyfree without needing to first split the large folio via split_huge_pmd_address().
When the user no longer requires the pages, they would use madvise(MADV_FREE) to mark the pages as lazy free. Subsequently, they typically would not re-write to that memory again.
During memory reclaim, if we detect that the large folio and its PMD are both still marked as clean and there are no unexpected references(such as GUP), so we can just discard the memory lazily, improving the efficiency of memory reclamation in this case.
Performance Testing ===================
On an Intel i5 CPU, reclaiming 1GiB of lazyfree THPs using mem_cgroup_force_empty() results in the following runtimes in seconds (shorter is better):
-------------------------------------------- | Old | New | Change | -------------------------------------------- | 0.683426 | 0.049197 | -92.80% | --------------------------------------------
This patch (of 8):
Introduce the labels walk_done and walk_abort as exit points to eliminate duplicated exit code in the pagewalk loop.
Link: https://lkml.kernel.org/r/20240614015138.31461-1-ioworker0@gmail.com Link: https://lkml.kernel.org/r/20240614015138.31461-2-ioworker0@gmail.com Signed-off-by: Lance Yang ioworker0@gmail.com Reviewed-by: Zi Yan ziy@nvidia.com Reviewed-by: Baolin Wang baolin.wang@linux.alibaba.com Reviewed-by: David Hildenbrand david@redhat.com Reviewed-by: Barry Song baohua@kernel.org Cc: Bang Li libang.li@antgroup.com Cc: Fangrui Song maskray@google.com Cc: Jeff Xie xiehuan09@gmail.com Cc: Kefeng Wang wangkefeng.wang@huawei.com Cc: Matthew Wilcox (Oracle) willy@infradead.org Cc: Michal Hocko mhocko@suse.com Cc: Minchan Kim minchan@kernel.org Cc: Muchun Song songmuchun@bytedance.com Cc: Peter Xu peterx@redhat.com Cc: Ryan Roberts ryan.roberts@arm.com Cc: SeongJae Park sj@kernel.org Cc: Yang Shi shy828301@gmail.com Cc: Yin Fengwei fengwei.yin@intel.com Cc: Zach O'Keefe zokeefe@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Liu Shixin liushixin2@huawei.com --- mm/rmap.c | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-)
diff --git a/mm/rmap.c b/mm/rmap.c index 2802d183f331..f427c34dbd13 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1656,9 +1656,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, /* Restore the mlock which got missed */ if (!folio_test_large(folio)) mlock_vma_folio(folio, vma); - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; + goto walk_abort; }
pfn = pte_pfn(ptep_get(pvmw.pte)); @@ -1696,11 +1694,8 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ if (!anon) { VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (!hugetlb_vma_trylock_write(vma)) { - page_vma_mapped_walk_done(&pvmw); - ret = false; - break; - } + if (!hugetlb_vma_trylock_write(vma)) + goto walk_abort; if (huge_pmd_unshare(mm, vma, address, pvmw.pte)) { hugetlb_vma_unlock_write(vma); flush_tlb_range(vma, @@ -1715,8 +1710,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, * actual page and drop map count * to zero. */ - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_done; } hugetlb_vma_unlock_write(vma); } @@ -1790,9 +1784,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (unlikely(folio_test_swapbacked(folio) != folio_test_swapcache(folio))) { WARN_ON_ONCE(1); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; }
/* MADV_FREE page check */ @@ -1832,23 +1824,17 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, */ set_pte_at(mm, address, pvmw.pte, pteval); folio_set_swapbacked(folio); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; }
if (swap_duplicate(entry) < 0) { set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (arch_unmap_one(mm, vma, address, pteval) < 0) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; }
/* See folio_try_share_anon_rmap(): clear PTE first. */ @@ -1856,9 +1842,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, folio_try_share_anon_rmap_pte(folio, subpage)) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); - ret = false; - page_vma_mapped_walk_done(&pvmw); - break; + goto walk_abort; } if (list_empty(&mm->mmlist)) { spin_lock(&mmlist_lock); @@ -1900,6 +1884,12 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (vma->vm_flags & VM_LOCKED) mlock_drain_local(); folio_put(folio); + continue; +walk_abort: + ret = false; +walk_done: + page_vma_mapped_walk_done(&pvmw); + break; }
mmu_notifier_invalidate_range_end(&range);