From: Daniel Axtens dja@axtens.net
mainline inclusion from mainline-v5.5-rc3 commit be1db4753ee6a0db80a900df9dbbf6ad2acc4bd1 category: feature bugzilla: NA CVE: NA
---------------------------
apply_to_page_range() takes an address range, and if any parts of it are not covered by the existing page table hierarchy, it allocates memory to fill them in.
In some use cases, this is not what we want - we want to be able to operate exclusively on PTEs that are already in the tables.
Add apply_to_existing_page_range() for this. Adjust the walker functions for apply_to_page_range to take 'create', which switches them between the old and new modes.
This will be used in KASAN vmalloc.
[akpm@linux-foundation.org: reduce code duplication] [akpm@linux-foundation.org: s/apply_to_existing_pages/apply_to_existing_page_range/] [akpm@linux-foundation.org: initialize __apply_to_page_range::err] Link: http://lkml.kernel.org/r/20191205140407.1874-1-dja@axtens.net Signed-off-by: Daniel Axtens dja@axtens.net Cc: Dmitry Vyukov dvyukov@google.com Cc: Uladzislau Rezki (Sony) urezki@gmail.com Cc: Alexander Potapenko glider@google.com Cc: Daniel Axtens dja@axtens.net Cc: Qian Cai cai@lca.pw Cc: Andrey Ryabinin aryabinin@virtuozzo.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mm.h | 3 + mm/memory.c | 136 +++++++++++++++++++++++++++++++-------------- 2 files changed, 97 insertions(+), 42 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index d26d870f68cc..0ff3de89f897 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2642,6 +2642,9 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern int apply_to_existing_page_range(struct mm_struct *mm, + unsigned long address, unsigned long size, + pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING diff --git a/mm/memory.c b/mm/memory.c index 2c5a66af85bb..20d94eed5de9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1956,18 +1956,24 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; pgtable_t token; spinlock_t *uninitialized_var(ptl);
- pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + }
BUG_ON(pmd_huge(*pmd));
@@ -1976,9 +1982,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd);
do { - err = fn(pte++, token, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, token, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode(); @@ -1990,77 +1998,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; }
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0;
- pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; }
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0;
- p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; }
-/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, + void *data, bool create) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; - int err; + int err = 0;
if (WARN_ON(addr >= end)) return -EINVAL; @@ -2068,15 +2094,41 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!create && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break; } while (pgd++, addr = next, addr != end);
return err; } + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, true); +} EXPORT_SYMBOL_GPL(apply_to_page_range);
+/* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, false); +} +EXPORT_SYMBOL_GPL(apply_to_existing_page_range); + /* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures