From: David Hildenbrand david@redhat.com
mainline inclusion from mainline-v6.8-rc1 commit e3b4b1374f87c71e9309efc6149f113cdd17af72 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9CESE CVE: NA
-------------------------------------------------
Let's convert it like we converted all the other rmap functions. Don't introduce folio_try_share_anon_rmap_ptes() for now, as we don't have a user that wants rmap batching in sight. Pretty easy to add later.
All users are easy to convert -- only ksm.c doesn't use folios yet but that is left for future work -- so let's just do it in a single shot.
While at it, turn the BUG_ON into a WARN_ON_ONCE.
Note that page_try_share_anon_rmap() so far didn't care about pte/pmd mappings (no compound parameter). We're changing that so we can perform better sanity checks and make the code actually more readable/consistent. For example, __folio_rmap_sanity_checks() will make sure that a PMD range actually falls completely into the folio.
Link: https://lkml.kernel.org/r/20231220224504.646757-39-david@redhat.com Signed-off-by: David Hildenbrand david@redhat.com Cc: Hugh Dickins hughd@google.com Cc: Matthew Wilcox (Oracle) willy@infradead.org Cc: Muchun Song muchun.song@linux.dev Cc: Muchun Song songmuchun@bytedance.com Cc: Peter Xu peterx@redhat.com Cc: Ryan Roberts ryan.roberts@arm.com Cc: Yin Fengwei fengwei.yin@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org (cherry picked from commit e3b4b1374f87c71e9309efc6149f113cdd17af72) Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com --- include/linux/rmap.h | 96 ++++++++++++++++++++++++++++++++------------ mm/gup.c | 2 +- mm/huge_memory.c | 9 +++-- mm/internal.h | 4 +- mm/ksm.c | 5 ++- mm/migrate_device.c | 2 +- mm/rmap.c | 11 ++--- 7 files changed, 89 insertions(+), 40 deletions(-)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7f7eda68a7cf..7cf3b6a37766 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -266,7 +266,7 @@ static inline int hugetlb_try_dup_anon_rmap(struct folio *folio, return 0; }
-/* See page_try_share_anon_rmap() */ +/* See folio_try_share_anon_rmap_*() */ static inline int hugetlb_try_share_anon_rmap(struct folio *folio) { VM_WARN_ON_FOLIO(!folio_test_hugetlb(folio), folio); @@ -475,31 +475,15 @@ static inline int folio_try_dup_anon_rmap_pmd(struct folio *folio, #endif }
-/** - * page_try_share_anon_rmap - try marking an exclusive anonymous page possibly - * shared to prepare for KSM or temporary unmapping - * @page: the exclusive anonymous page to try marking possibly shared - * - * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated. - * - * This is similar to folio_try_dup_anon_rmap_*(), however, not used during - * fork() to duplicate a mapping, but instead to prepare for KSM or temporarily - * unmapping a page (swap, migration) via folio_remove_rmap_*(). - * - * Marking the page shared can only fail if the page may be pinned; device - * private pages cannot get pinned and consequently this function cannot fail. - * - * Returns 0 if marking the page possibly shared succeeded. Returns -EBUSY - * otherwise. - */ -static inline int page_try_share_anon_rmap(struct page *page) +static __always_inline int __folio_try_share_anon_rmap(struct folio *folio, + struct page *page, int nr_pages, enum rmap_level level) { - VM_WARN_ON(folio_test_hugetlb(page_folio(page))); - VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); + VM_WARN_ON_FOLIO(!folio_test_anon(folio), folio); + VM_WARN_ON_FOLIO(!PageAnonExclusive(page), folio); + __folio_rmap_sanity_checks(folio, page, nr_pages, level);
- /* device private pages cannot get pinned via GUP. */ - if (unlikely(is_device_private_page(page))) { + /* device private folios cannot get pinned via GUP. */ + if (unlikely(folio_is_device_private(folio))) { ClearPageAnonExclusive(page); return 0; } @@ -550,7 +534,7 @@ static inline int page_try_share_anon_rmap(struct page *page) if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) smp_mb();
- if (unlikely(page_maybe_dma_pinned(page))) + if (unlikely(folio_maybe_dma_pinned(folio))) return -EBUSY; ClearPageAnonExclusive(page);
@@ -563,6 +547,68 @@ static inline int page_try_share_anon_rmap(struct page *page) return 0; }
+/** + * folio_try_share_anon_rmap_pte - try marking an exclusive anonymous page + * mapped by a PTE possibly shared to prepare + * for KSM or temporary unmapping + * @folio: The folio to share a mapping of + * @page: The mapped exclusive page + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pte(), however, not used during + * fork() to duplicate mappings, but instead to prepare for KSM or temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pte(). + * + * Marking the mapped page shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped page possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pte(struct folio *folio, + struct page *page) +{ + return __folio_try_share_anon_rmap(folio, page, 1, RMAP_LEVEL_PTE); +} + +/** + * folio_try_share_anon_rmap_pmd - try marking an exclusive anonymous page + * range mapped by a PMD possibly shared to + * prepare for temporary unmapping + * @folio: The folio to share the mapping of + * @page: The first page to share the mapping of + * + * The page range of the folio is defined by [page, page + HPAGE_PMD_NR) + * + * The caller needs to hold the page table lock and has to have the page table + * entries cleared/invalidated. + * + * This is similar to folio_try_dup_anon_rmap_pmd(), however, not used during + * fork() to duplicate a mapping, but instead to prepare for temporarily + * unmapping parts of a folio (swap, migration) via folio_remove_rmap_pmd(). + * + * Marking the mapped pages shared can only fail if the folio maybe pinned; + * device private folios cannot get pinned and consequently this function cannot + * fail. + * + * Returns 0 if marking the mapped pages possibly shared succeeded. Returns + * -EBUSY otherwise. + */ +static inline int folio_try_share_anon_rmap_pmd(struct folio *folio, + struct page *page) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + return __folio_try_share_anon_rmap(folio, page, HPAGE_PMD_NR, + RMAP_LEVEL_PMD); +#else + WARN_ON_ONCE(true); + return -EBUSY; +#endif +} + /* * Called from mm/vmscan.c to handle paging out */ diff --git a/mm/gup.c b/mm/gup.c index 813add0d3a74..5332f37a79ee 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -177,7 +177,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags) /* * Adjust the pincount before re-checking the PTE for changes. * This is essentially a smp_mb() and is paired with a memory - * barrier in page_try_share_anon_rmap(). + * barrier in folio_try_share_anon_rmap_*(). */ smp_mb__after_atomic();
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2938998d4933..152243e30561 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2379,10 +2379,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * In case we cannot clear PageAnonExclusive(), split the PMD * only and let try_to_migrate_one() fail later. * - * See page_try_share_anon_rmap(): invalidate PMD first. + * See folio_try_share_anon_rmap_pmd(): invalidate PMD first. */ anon_exclusive = PageAnonExclusive(page); - if (freeze && anon_exclusive && page_try_share_anon_rmap(page)) + if (freeze && anon_exclusive && + folio_try_share_anon_rmap_pmd(folio, page)) freeze = false; if (!freeze) { rmap_t rmap_flags = RMAP_NONE; @@ -3416,9 +3417,9 @@ int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, flush_cache_range(vma, address, address + HPAGE_PMD_SIZE); pmdval = pmdp_invalidate(vma, address, pvmw->pmd);
- /* See page_try_share_anon_rmap(): invalidate PMD first. */ + /* See folio_try_share_anon_rmap_pmd(): invalidate PMD first. */ anon_exclusive = folio_test_anon(folio) && PageAnonExclusive(page); - if (anon_exclusive && page_try_share_anon_rmap(page)) { + if (anon_exclusive && folio_try_share_anon_rmap_pmd(folio, page)) { set_pmd_at(mm, address, pvmw->pmd, pmdval); return -EBUSY; } diff --git a/mm/internal.h b/mm/internal.h index 83cdb6ea72e3..2af059499f60 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1033,7 +1033,7 @@ enum { * * Ordinary GUP: Using the PT lock * * GUP-fast and fork(): mm->write_protect_seq * * GUP-fast and KSM or temporary unmapping (swap, migration): see - * page_try_share_anon_rmap() + * folio_try_share_anon_rmap_*() * * Must be called with the (sub)page that's actually referenced via the * page table entry, which might not necessarily be the head page for a @@ -1076,7 +1076,7 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma, return is_cow_mapping(vma->vm_flags); }
- /* Paired with a memory barrier in page_try_share_anon_rmap(). */ + /* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) smp_rmb();
diff --git a/mm/ksm.c b/mm/ksm.c index 3d3db4111262..94207293e6fc 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1148,8 +1148,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, goto out_unlock; }
- /* See page_try_share_anon_rmap(): clear PTE first. */ - if (anon_exclusive && page_try_share_anon_rmap(page)) { + /* See folio_try_share_anon_rmap_pte(): clear PTE first. */ + if (anon_exclusive && + folio_try_share_anon_rmap_pte(page_folio(page), page)) { set_pte_at(mm, pvmw.address, pvmw.pte, entry); goto out_unlock; } diff --git a/mm/migrate_device.c b/mm/migrate_device.c index cee7957cecb8..40032b85ab4b 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -202,7 +202,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, if (anon_exclusive) { pte = ptep_clear_flush(vma, addr, ptep);
- if (page_try_share_anon_rmap(page)) { + if (folio_try_share_anon_rmap_pte(folio, page)) { set_pte_at(mm, addr, ptep, pte); folio_unlock(folio); folio_put(folio); diff --git a/mm/rmap.c b/mm/rmap.c index efbe0708adeb..6bb33498e9e3 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1839,9 +1839,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, break; }
- /* See page_try_share_anon_rmap(): clear PTE first. */ + /* See folio_try_share_anon_rmap(): clear PTE first. */ if (anon_exclusive && - page_try_share_anon_rmap(subpage)) { + folio_try_share_anon_rmap_pte(folio, subpage)) { swap_free(entry); set_pte_at(mm, address, pvmw.pte, pteval); ret = false; @@ -2117,7 +2117,8 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, pte_t swp_pte;
if (anon_exclusive) - BUG_ON(page_try_share_anon_rmap(subpage)); + WARN_ON_ONCE(folio_try_share_anon_rmap_pte(folio, + subpage));
/* * Store the pfn of the page in a special migration @@ -2190,7 +2191,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, VM_BUG_ON_PAGE(pte_write(pteval) && folio_test_anon(folio) && !anon_exclusive, subpage);
- /* See page_try_share_anon_rmap(): clear PTE first. */ + /* See folio_try_share_anon_rmap_pte(): clear PTE first. */ if (folio_test_hugetlb(folio)) { if (anon_exclusive && hugetlb_try_share_anon_rmap(folio)) { @@ -2201,7 +2202,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, break; } } else if (anon_exclusive && - page_try_share_anon_rmap(subpage)) { + folio_try_share_anon_rmap_pte(folio, subpage)) { set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw);