[PATCH OLK-5.10 4/4] hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing

24 Apr 2024

From: Mike Kravetz mike.kravetz@oracle.com
stable inclusion
from stable-v6.0.13
commit bb8f66f6afbbc822f61d8bfc01ecefe2a437256c
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I9GVYW
CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=...
--------------------------------
commit 04ada095dcfc4ae359418053c0be94453bdf1e84 upstream.
madvise(MADV_DONTNEED) ends up calling zap_page_range() to clear page
tables associated with the address range.  For hugetlb vmas,
zap_page_range will call __unmap_hugepage_range_final.  However,
__unmap_hugepage_range_final assumes the passed vma is about to be removed
and deletes the vma_lock to prevent pmd sharing as the vma is on the way
out.  In the case of madvise(MADV_DONTNEED) the vma remains, but the
missing vma_lock prevents pmd sharing and could potentially lead to issues
with truncation/fault races.
This issue was originally reported here [1] as a BUG triggered in
page_try_dup_anon_rmap.  Prior to the introduction of the hugetlb
vma_lock, __unmap_hugepage_range_final cleared the VM_MAYSHARE flag to
prevent pmd sharing.  Subsequent faults on this vma were confused as
VM_MAYSHARE indicates a sharable vma, but was not set so page_mapping was
not set in new pages added to the page table.  This resulted in pages that
appeared anonymous in a VM_SHARED vma and triggered the BUG.
Address issue by adding a new zap flag ZAP_FLAG_UNMAP to indicate an unmap
call from unmap_vmas().  This is used to indicate the 'final' unmapping of
a hugetlb vma.  When called via MADV_DONTNEED, this flag is not set and
the vm_lock is not deleted.
NOTE - Prior to the introduction of the huegtlb vma_lock in v6.1,  this
       issue is addressed by not clearing the VM_MAYSHARE flag when
       __unmap_hugepage_range_final is called in the MADV_DONTNEED case.
[1] https://lore.kernel.org/lkml/CAO4mrfdLMXsao9RF4fUE8-Wfde8xmjsKrTNMNC9wjUb6Ju...
Link: https://lkml.kernel.org/r/20221114235507.294320-3-mike.kravetz@oracle.com
Fixes: 90e7e7f5ef3f ("mm: enable MADV_DONTNEED for hugetlb mappings")
Signed-off-by: Mike Kravetz mike.kravetz@oracle.com
Reported-by: Wei Chen harperchen1110@gmail.com
Cc: Axel Rasmussen axelrasmussen@google.com
Cc: David Hildenbrand david@redhat.com
Cc: Matthew Wilcox willy@infradead.org
Cc: Mina Almasry almasrymina@google.com
Cc: Nadav Amit nadav.amit@gmail.com
Cc: Naoya Horiguchi naoya.horiguchi@linux.dev
Cc: Peter Xu peterx@redhat.com
Cc: Rik van Riel riel@surriel.com
Cc: Vlastimil Babka vbabka@suse.cz
Cc: stable@vger.kernel.org
Signed-off-by: Andrew Morton akpm@linux-foundation.org
Signed-off-by: Mike Kravetz mike.kravetz@oracle.com
Signed-off-by: Sasha Levin sashal@kernel.org
Conflicts:
    include/linux/mm.h
    mm/memory.c
Signed-off-by: Ze Zuo zuoze1@huawei.com
---
 include/linux/hugetlb.h  |  5 +++--
 include/linux/mm.h       |  4 ++++
 include/linux/mm_types.h |  2 ++
 mm/hugetlb.c             | 28 ++++++++++++++++------------
 mm/memory.c              |  9 +++++++--
 5 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ce7c1f9d7961..b25c809af612 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -142,7 +142,7 @@ void unmap_hugepage_range(struct vm_area_struct *,
 void __unmap_hugepage_range_final(struct mmu_gather *tlb,
    		  struct vm_area_struct *vma,
    		  unsigned long start, unsigned long end,
-			  struct page *ref_page);
+			  struct page *ref_page, zap_flags_t zap_flags);
 void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
    			unsigned long start, unsigned long end,
    			struct page *ref_page);
@@ -371,7 +371,8 @@ static inline unsigned long hugetlb_change_protection(
static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb,
    		struct vm_area_struct *vma, unsigned long start,
-			unsigned long end, struct page *ref_page)
+			unsigned long end, struct page *ref_page,
+			zap_flags_t zap_flags)
 {
    BUG();
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c43f59ac87a9..e0d269b83c8f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1706,8 +1706,12 @@ struct zap_details {
    pgoff_t	first_index;			/* Lowest page->index to unmap */
    pgoff_t last_index;			/* Highest page->index to unmap */
    struct page *single_page;		/* Locked page to be unmapped */
+	zap_flags_t zap_flags;			/* Extra flags for zapping */
 };
+/* Set in unmap_vmas() to indicate a final unmap call.  Only used by hugetlb */
+#define  ZAP_FLAG_UNMAP		((__force zap_flags_t) BIT(1))
+
 struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
    		     pte_t pte);
 struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 940b19d4a531..d1c5946ad402 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -898,4 +898,6 @@ typedef struct {
    unsigned long val;
 } swp_entry_t;
+typedef unsigned int __bitwise zap_flags_t;
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4a6f683cd7e4..a333caf66d15 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4512,21 +4512,25 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
void __unmap_hugepage_range_final(struct mmu_gather *tlb,
    		  struct vm_area_struct *vma, unsigned long start,
-			  unsigned long end, struct page *ref_page)
+			  unsigned long end, struct page *ref_page,
+			  zap_flags_t zap_flags)
 {
    __unmap_hugepage_range(tlb, vma, start, end, ref_page);
-	/*
-	 * Clear this flag so that x86's huge_pmd_share page_table_shareable
-	 * test will fail on a vma being torn down, and not grab a page table
-	 * on its way out.  We're lucky that the flag has such an appropriate
-	 * name, and can in fact be safely cleared here. We could clear it
-	 * before the __unmap_hugepage_range above, but all that's necessary
-	 * is to clear it before releasing the i_mmap_rwsem. This works
-	 * because in the context this is called, the VMA is about to be
-	 * destroyed and the i_mmap_rwsem is held.
-	 */
-	vma->vm_flags &= ~VM_MAYSHARE;
+	if (zap_flags & ZAP_FLAG_UNMAP) {	/* final unmap */
+		/*
+		 * Clear this flag so that x86's huge_pmd_share
+		 * page_table_shareable test will fail on a vma being torn
+		 * down, and not grab a page table on its way out.  We're lucky
+		 * that the flag has such an appropriate name, and can in fact
+		 * be safely cleared here. We could clear it before the
+		 * __unmap_hugepage_range above, but all that's necessary
+		 * is to clear it before releasing the i_mmap_rwsem. This works
+		 * because in the context this is called, the VMA is about to
+		 * be destroyed and the i_mmap_rwsem is held.
+		 */
+		vma->vm_flags &= ~VM_MAYSHARE;
+	}
 }
void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
diff --git a/mm/memory.c b/mm/memory.c
index bc3f6408236b..494f40362174 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1518,8 +1518,10 @@ static void unmap_single_vma(struct mmu_gather *tlb,
    		 * safe to do nothing in this case.
    		 */
    		if (vma->vm_file) {
+				zap_flags_t zap_flags = details ?
+					details->zap_flags : 0;
    			i_mmap_lock_write(vma->vm_file->f_mapping);
-				__unmap_hugepage_range_final(tlb, vma, start, end, NULL);
+				__unmap_hugepage_range_final(tlb, vma, start, end, NULL, zap_flags);
    			i_mmap_unlock_write(vma->vm_file->f_mapping);
    		}
    	} else
@@ -1550,12 +1552,15 @@ void unmap_vmas(struct mmu_gather *tlb,
    	unsigned long end_addr)
 {
    struct mmu_notifier_range range;
+	struct zap_details details = {
+		.zap_flags = ZAP_FLAG_UNMAP,
+	};
mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, vma->vm_mm,
    			start_addr, end_addr);
    mmu_notifier_invalidate_range_start(&range);
    for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next)
-		unmap_single_vma(tlb, vma, start_addr, end_addr, NULL);
+		unmap_single_vma(tlb, vma, start_addr, end_addr, &details);
    mmu_notifier_invalidate_range_end(&range);
 }
-- 
2.25.1


    

2025

2024

2023

2022

2021

2020

2019

[PATCH OLK-5.10 4/4] hugetlb: don't delete vma_lock in hugetlb MADV_DONTNEED processing