hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I9S4Z4 CVE: NA
--------------------------------
This reverts commit bb6adb65697a4870e33281e1dfb36147a8c953bc.
The is issue found on arm64, so revert it for now.
Fixes: bb6adb65697a ("mm: support multi-size THP numa balancing") Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com --- mm/memory.c | 62 ++++++++++----------------------------------------- mm/mprotect.c | 3 +-- 2 files changed, 13 insertions(+), 52 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c index f52e52426da5..a8f0df59aca1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5082,51 +5082,17 @@ int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma, }
static void numa_rebuild_single_mapping(struct vm_fault *vmf, struct vm_area_struct *vma, - unsigned long fault_addr, pte_t *fault_pte, bool writable) { pte_t pte, old_pte;
- old_pte = ptep_modify_prot_start(vma, fault_addr, fault_pte); + old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte); pte = pte_modify(old_pte, vma->vm_page_prot); pte = pte_mkyoung(pte); if (writable) pte = pte_mkwrite(pte, vma); - ptep_modify_prot_commit(vma, fault_addr, fault_pte, old_pte, pte); - update_mmu_cache_range(vmf, vma, fault_addr, fault_pte, 1); -} - -static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_struct *vma, - struct folio *folio, pte_t fault_pte, - bool ignore_writable, bool pte_write_upgrade) -{ - int nr = pte_pfn(fault_pte) - folio_pfn(folio); - unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start); - unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end); - pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE; - unsigned long addr; - - /* Restore all PTEs' mapping of the large folio */ - for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) { - pte_t ptent = ptep_get(start_ptep); - bool writable = false; - - if (!pte_present(ptent) || !pte_protnone(ptent)) - continue; - - if (pfn_folio(pte_pfn(ptent)) != folio) - continue; - - if (!ignore_writable) { - ptent = pte_modify(ptent, vma->vm_page_prot); - writable = pte_write(ptent); - if (!writable && pte_write_upgrade && - can_change_pte_writable(vma, addr, ptent)) - writable = true; - } - - numa_rebuild_single_mapping(vmf, vma, addr, start_ptep, writable); - } + ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte); + update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); }
static vm_fault_t do_numa_page(struct vm_fault *vmf) @@ -5134,12 +5100,11 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; struct folio *folio = NULL; int nid = NUMA_NO_NODE; - bool writable = false, ignore_writable = false; - bool pte_write_upgrade = vma_wants_manual_pte_write_upgrade(vma); + bool writable = false; int last_cpupid; int target_nid; pte_t pte, old_pte; - int flags = 0, nr_pages; + int flags = 0;
/* * The pte cannot be used safely until we verify, while holding the page @@ -5161,7 +5126,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) * is only valid while holding the PT lock. */ writable = pte_write(pte); - if (!writable && pte_write_upgrade && + if (!writable && vma_wants_manual_pte_write_upgrade(vma) && can_change_pte_writable(vma, vmf->address, pte)) writable = true;
@@ -5169,6 +5134,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!folio || folio_is_zone_device(folio)) goto out_map;
+ /* TODO: handle PTE-mapped THP */ + if (folio_test_large(folio)) + goto out_map; + /* * Avoid grouping on RO pages in general. RO pages shouldn't hurt as * much anyway since they can be in shared cache state. This misses @@ -5188,7 +5157,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) flags |= TNF_SHARED;
nid = folio_nid(folio); - nr_pages = folio_nr_pages(folio); /* * For memory tiering mode, cpupid of slow memory page is used * to record page access time. So use default value. @@ -5205,7 +5173,6 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) } pte_unmap_unlock(vmf->pte, vmf->ptl); writable = false; - ignore_writable = true;
/* Migrate to the requested node */ if (migrate_misplaced_folio(folio, vma, target_nid)) { @@ -5226,19 +5193,14 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
out: if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); + task_numa_fault(last_cpupid, nid, 1, flags); return 0; out_map: /* * Make it present again, depending on how arch implements * non-accessible ptes, some can allow access by kernel mode. */ - if (folio && folio_test_large(folio)) - numa_rebuild_large_mapping(vmf, vma, folio, pte, ignore_writable, - pte_write_upgrade); - else - numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, - writable); + numa_rebuild_single_mapping(vmf, vma, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); goto out; } diff --git a/mm/mprotect.c b/mm/mprotect.c index b360577be4f8..f121c46f6e4c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -129,8 +129,7 @@ static long change_pte_range(struct mmu_gather *tlb,
/* Also skip shared copy-on-write pages */ if (is_cow_mapping(vma->vm_flags) && - (folio_maybe_dma_pinned(folio) || - folio_likely_mapped_shared(folio))) + folio_ref_count(folio) != 1) continue;
/*