From: Fang Lijun fanglijun3@huawei.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4D63I CVE: NA
-------------------------------------------------
An interface do_vm_mmap is added to support the allocation in the address spaces of other processes.
Signed-off-by: Fang Lijun fanglijun3@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mm.h | 3 +++ mm/mmap.c | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 75d94ea5d1c20..58fe28dd959b9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2436,6 +2436,9 @@ static inline void mm_populate(unsigned long addr, unsigned long len) {} extern int __must_check vm_brk(unsigned long, unsigned long); extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); +extern unsigned long do_vm_mmap(struct mm_struct *mm, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/mm/mmap.c b/mm/mmap.c index 80779bbb1c048..f7f1fd3b5fa39 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3094,6 +3094,40 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap);
+/* + * Must acquire an additional reference to the mm struct to prevent the + * mm struct of other process from being released. + * + * This interface is applicable only to kernel thread scenarios. + */ +unsigned long do_vm_mmap(struct mm_struct *mm, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff) +{ + unsigned long ret; + unsigned long populate; + LIST_HEAD(uf); + + if (mm == NULL || current->mm) + return -EINVAL; + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + current->mm = mm; + ret = do_mmap_pgoff(0, addr, len, prot, flag, pgoff, + &populate, &uf); + + current->mm = NULL; + up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); + if (populate) + mm_populate(ret, populate); + + return ret; +} +EXPORT_SYMBOL(do_vm_mmap); + SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { profile_munmap(addr);
From: Will Deacon will.deacon@arm.com
mainline inclusion from mainline-v5.0-rc1 commit d239865ac804c91a621294ca7bece4241b006fae category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
The recently merged API for ensuring break-before-make on page-table entries when installing huge mappings in the vmalloc/ioremap region is fairly counter-intuitive, resulting in the arch freeing functions (e.g. pmd_free_pte_page()) being called even on entries that aren't present. This resulted in a minor bug in the arm64 implementation, giving rise to spurious VM_WARN messages.
This patch moves the pXd_present() checks out into the core code, refactoring the callsites at the same time so that we avoid the complex conjunctions when determining whether or not we can put down a huge mapping.
Link: http://lkml.kernel.org/r/1544120495-17438-2-git-send-email-will.deacon@arm.c... Signed-off-by: Will Deacon will.deacon@arm.com Reviewed-by: Toshi Kani toshi.kani@hpe.com Suggested-by: Linus Torvalds torvalds@linux-foundation.org Cc: Chintan Pandya cpandya@codeaurora.org Cc: Toshi Kani toshi.kani@hpe.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Michal Hocko mhocko@suse.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@elte.hu Cc: Sean Christopherson sean.j.christopherson@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/ioremap.c | 56 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 14 deletions(-)
diff --git a/lib/ioremap.c b/lib/ioremap.c index 517f5853ffed1..6c72764af19c2 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -76,6 +76,25 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, return 0; }
+static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_pmd_enabled()) + return 0; + + if ((end - addr) != PMD_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, PMD_SIZE)) + return 0; + + if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) + return 0; + + return pmd_set_huge(pmd, phys_addr, prot); +} + static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -89,13 +108,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end);
- if (ioremap_pmd_enabled() && - ((next - addr) == PMD_SIZE) && - IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd, addr)) { - if (pmd_set_huge(pmd, phys_addr + addr, prot)) - continue; - } + if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr + addr, prot)) + continue;
if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) return -ENOMEM; @@ -103,6 +117,25 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return 0; }
+static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_pud_enabled()) + return 0; + + if ((end - addr) != PUD_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, PUD_SIZE)) + return 0; + + if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) + return 0; + + return pud_set_huge(pud, phys_addr, prot); +} + static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -116,13 +149,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end);
- if (ioremap_pud_enabled() && - ((next - addr) == PUD_SIZE) && - IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud, addr)) { - if (pud_set_huge(pud, phys_addr + addr, prot)) - continue; - } + if (ioremap_try_huge_pud(pud, addr, next, phys_addr + addr, prot)) + continue;
if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) return -ENOMEM;
From: Will Deacon will.deacon@arm.com
mainline inclusion from mainline-v5.0-rc1 commit 36ddc5a78c878e9b10c323d2fe88b0dae2f487eb category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
The current ioremap() code uses a phys_addr variable at each level of page table, which is confusingly offset by subtracting the base virtual address being mapped so that adding the current virtual address back on when iterating through the page table entries gives back the corresponding physical address.
This is fairly confusing and results in all users of phys_addr having to add the current virtual address back on. Instead, this patch just updates phys_addr when iterating over the page table entries, ensuring that it's always up-to-date and doesn't require explicit offsetting.
Link: http://lkml.kernel.org/r/1544120495-17438-5-git-send-email-will.deacon@arm.c... Signed-off-by: Will Deacon will.deacon@arm.com Tested-by: Sean Christopherson sean.j.christopherson@intel.com Reviewed-by: Sean Christopherson sean.j.christopherson@intel.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Toshi Kani toshi.kani@hpe.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Michal Hocko mhocko@suse.com Cc: Sean Christopherson sean.j.christopherson@intel.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@elte.hu Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/ioremap.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-)
diff --git a/lib/ioremap.c b/lib/ioremap.c index 6c72764af19c2..10d7c5485c395 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -101,19 +101,18 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, pmd_t *pmd; unsigned long next;
- phys_addr -= addr; pmd = pmd_alloc(&init_mm, pud, addr); if (!pmd) return -ENOMEM; do { next = pmd_addr_end(addr, end);
- if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr + addr, prot)) + if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) continue;
- if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) + if (ioremap_pte_range(pmd, addr, next, phys_addr, prot)) return -ENOMEM; - } while (pmd++, addr = next, addr != end); + } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
@@ -142,19 +141,18 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, pud_t *pud; unsigned long next;
- phys_addr -= addr; pud = pud_alloc(&init_mm, p4d, addr); if (!pud) return -ENOMEM; do { next = pud_addr_end(addr, end);
- if (ioremap_try_huge_pud(pud, addr, next, phys_addr + addr, prot)) + if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) continue;
- if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) + if (ioremap_pmd_range(pud, addr, next, phys_addr, prot)) return -ENOMEM; - } while (pud++, addr = next, addr != end); + } while (pud++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
@@ -164,7 +162,6 @@ static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, p4d_t *p4d; unsigned long next;
- phys_addr -= addr; p4d = p4d_alloc(&init_mm, pgd, addr); if (!p4d) return -ENOMEM; @@ -173,14 +170,14 @@ static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr,
if (ioremap_p4d_enabled() && ((next - addr) == P4D_SIZE) && - IS_ALIGNED(phys_addr + addr, P4D_SIZE)) { - if (p4d_set_huge(p4d, phys_addr + addr, prot)) + IS_ALIGNED(phys_addr, P4D_SIZE)) { + if (p4d_set_huge(p4d, phys_addr, prot)) continue; }
- if (ioremap_pud_range(p4d, addr, next, phys_addr + addr, prot)) + if (ioremap_pud_range(p4d, addr, next, phys_addr, prot)) return -ENOMEM; - } while (p4d++, addr = next, addr != end); + } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
@@ -196,14 +193,13 @@ int ioremap_page_range(unsigned long addr, BUG_ON(addr >= end);
start = addr; - phys_addr -= addr; pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = ioremap_p4d_range(pgd, addr, next, phys_addr+addr, prot); + err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot); if (err) break; - } while (pgd++, addr = next, addr != end); + } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
flush_cache_vmap(start, end);
From: Will Deacon will.deacon@arm.com
mainline inclusion from mainline-v5.0-rc1 commit 8e2d43405b22e98cf5f3730c1829ec1fdbe17ae7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
Whilst no architectures actually enable support for huge p4d mappings in the vmap area, the code that is implemented should be using break-before-make, as we do for pud and pmd huge entries.
Link: http://lkml.kernel.org/r/1544120495-17438-6-git-send-email-will.deacon@arm.c... Signed-off-by: Will Deacon will.deacon@arm.com Reviewed-by: Toshi Kani toshi.kani@hpe.com Cc: Chintan Pandya cpandya@codeaurora.org Cc: Toshi Kani toshi.kani@hpe.com Cc: Thomas Gleixner tglx@linutronix.de Cc: Michal Hocko mhocko@suse.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@elte.hu Cc: Sean Christopherson sean.j.christopherson@intel.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/mmu.c | 5 +++++ arch/x86/mm/pgtable.c | 8 ++++++++ include/asm-generic/pgtable.h | 5 +++++ lib/ioremap.c | 27 +++++++++++++++++++++------ 4 files changed, 39 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 5673db3b4cc43..99a86326a301b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1036,6 +1036,11 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) return 1; }
+int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; /* Don't attempt a block mapping */ +} + #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock) diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index bf52106ab9c49..5b97717b389bb 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -794,6 +794,14 @@ int pmd_clear_huge(pmd_t *pmd) return 0; }
+/* + * Until we support 512GB pages, skip them in the vmap area. + */ +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} + #ifdef CONFIG_X86_64 /** * pud_free_pmd_page - Clear pud entry and free pmd page. diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 15fd0277ffa69..fa90bdd4089fc 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1019,6 +1019,7 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); +int p4d_free_pud_page(p4d_t *p4d, unsigned long addr); int pud_free_pmd_page(pud_t *pud, unsigned long addr); int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ @@ -1046,6 +1047,10 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } +static inline int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) +{ + return 0; +} static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; diff --git a/lib/ioremap.c b/lib/ioremap.c index 10d7c5485c395..063213685563c 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -156,6 +156,25 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, return 0; }
+static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, + pgprot_t prot) +{ + if (!ioremap_p4d_enabled()) + return 0; + + if ((end - addr) != P4D_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, P4D_SIZE)) + return 0; + + if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) + return 0; + + return p4d_set_huge(p4d, phys_addr, prot); +} + static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -168,12 +187,8 @@ static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end);
- if (ioremap_p4d_enabled() && - ((next - addr) == P4D_SIZE) && - IS_ALIGNED(phys_addr, P4D_SIZE)) { - if (p4d_set_huge(p4d, phys_addr, prot)) - continue; - } + if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) + continue;
if (ioremap_pud_range(p4d, addr, next, phys_addr, prot)) return -ENOMEM;
From: Ingo Molnar mingo@kernel.org
mainline inclusion from mainline v5.6-rc1 commit 1f059dfdf5d170dccbac92193be2fee3c1763384 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
In the x86 MM code we'd like to untangle various types of historic header dependency spaghetti, but for this we'd need to pass to the generic vmalloc code various vmalloc related defines that customarily come via the <asm/page.h> low level arch header.
Signed-off-by: Ingo Molnar mingo@kernel.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/alpha/include/asm/vmalloc.h | 4 ++++ arch/arc/include/asm/vmalloc.h | 4 ++++ arch/arm/include/asm/vmalloc.h | 4 ++++ arch/arm64/include/asm/vmalloc.h | 4 ++++ arch/c6x/include/asm/vmalloc.h | 4 ++++ arch/csky/include/asm/vmalloc.h | 4 ++++ arch/h8300/include/asm/vmalloc.h | 4 ++++ arch/hexagon/include/asm/vmalloc.h | 4 ++++ arch/ia64/include/asm/vmalloc.h | 4 ++++ arch/m68k/include/asm/vmalloc.h | 4 ++++ arch/microblaze/include/asm/vmalloc.h | 4 ++++ arch/mips/include/asm/vmalloc.h | 4 ++++ arch/nds32/include/asm/vmalloc.h | 4 ++++ arch/nios2/include/asm/vmalloc.h | 4 ++++ arch/openrisc/include/asm/vmalloc.h | 4 ++++ arch/parisc/include/asm/vmalloc.h | 4 ++++ arch/powerpc/include/asm/vmalloc.h | 4 ++++ arch/riscv/include/asm/vmalloc.h | 4 ++++ arch/s390/include/asm/vmalloc.h | 4 ++++ arch/sh/include/asm/vmalloc.h | 4 ++++ arch/sparc/include/asm/vmalloc.h | 4 ++++ arch/um/include/asm/vmalloc.h | 4 ++++ arch/unicore32/include/asm/vmalloc.h | 4 ++++ arch/x86/include/asm/vmalloc.h | 4 ++++ arch/xtensa/include/asm/vmalloc.h | 4 ++++ include/linux/vmalloc.h | 2 ++ 26 files changed, 102 insertions(+) create mode 100644 arch/alpha/include/asm/vmalloc.h create mode 100644 arch/arc/include/asm/vmalloc.h create mode 100644 arch/arm/include/asm/vmalloc.h create mode 100644 arch/arm64/include/asm/vmalloc.h create mode 100644 arch/c6x/include/asm/vmalloc.h create mode 100644 arch/csky/include/asm/vmalloc.h create mode 100644 arch/h8300/include/asm/vmalloc.h create mode 100644 arch/hexagon/include/asm/vmalloc.h create mode 100644 arch/ia64/include/asm/vmalloc.h create mode 100644 arch/m68k/include/asm/vmalloc.h create mode 100644 arch/microblaze/include/asm/vmalloc.h create mode 100644 arch/mips/include/asm/vmalloc.h create mode 100644 arch/nds32/include/asm/vmalloc.h create mode 100644 arch/nios2/include/asm/vmalloc.h create mode 100644 arch/openrisc/include/asm/vmalloc.h create mode 100644 arch/parisc/include/asm/vmalloc.h create mode 100644 arch/powerpc/include/asm/vmalloc.h create mode 100644 arch/riscv/include/asm/vmalloc.h create mode 100644 arch/s390/include/asm/vmalloc.h create mode 100644 arch/sh/include/asm/vmalloc.h create mode 100644 arch/sparc/include/asm/vmalloc.h create mode 100644 arch/um/include/asm/vmalloc.h create mode 100644 arch/unicore32/include/asm/vmalloc.h create mode 100644 arch/x86/include/asm/vmalloc.h create mode 100644 arch/xtensa/include/asm/vmalloc.h
diff --git a/arch/alpha/include/asm/vmalloc.h b/arch/alpha/include/asm/vmalloc.h new file mode 100644 index 0000000000000..0a9a366a4d344 --- /dev/null +++ b/arch/alpha/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ALPHA_VMALLOC_H +#define _ASM_ALPHA_VMALLOC_H + +#endif /* _ASM_ALPHA_VMALLOC_H */ diff --git a/arch/arc/include/asm/vmalloc.h b/arch/arc/include/asm/vmalloc.h new file mode 100644 index 0000000000000..973095aad665a --- /dev/null +++ b/arch/arc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARC_VMALLOC_H +#define _ASM_ARC_VMALLOC_H + +#endif /* _ASM_ARC_VMALLOC_H */ diff --git a/arch/arm/include/asm/vmalloc.h b/arch/arm/include/asm/vmalloc.h new file mode 100644 index 0000000000000..a9b3718b8600b --- /dev/null +++ b/arch/arm/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM_VMALLOC_H +#define _ASM_ARM_VMALLOC_H + +#endif /* _ASM_ARM_VMALLOC_H */ diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h new file mode 100644 index 0000000000000..2ca708ab9b20b --- /dev/null +++ b/arch/arm64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_ARM64_VMALLOC_H +#define _ASM_ARM64_VMALLOC_H + +#endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/c6x/include/asm/vmalloc.h b/arch/c6x/include/asm/vmalloc.h new file mode 100644 index 0000000000000..26c6c6696bbd9 --- /dev/null +++ b/arch/c6x/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_C6X_VMALLOC_H +#define _ASM_C6X_VMALLOC_H + +#endif /* _ASM_C6X_VMALLOC_H */ diff --git a/arch/csky/include/asm/vmalloc.h b/arch/csky/include/asm/vmalloc.h new file mode 100644 index 0000000000000..43dca6336b4c6 --- /dev/null +++ b/arch/csky/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_CSKY_VMALLOC_H +#define _ASM_CSKY_VMALLOC_H + +#endif /* _ASM_CSKY_VMALLOC_H */ diff --git a/arch/h8300/include/asm/vmalloc.h b/arch/h8300/include/asm/vmalloc.h new file mode 100644 index 0000000000000..08a55c1dfa23c --- /dev/null +++ b/arch/h8300/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_H8300_VMALLOC_H +#define _ASM_H8300_VMALLOC_H + +#endif /* _ASM_H8300_VMALLOC_H */ diff --git a/arch/hexagon/include/asm/vmalloc.h b/arch/hexagon/include/asm/vmalloc.h new file mode 100644 index 0000000000000..7b04609e525c4 --- /dev/null +++ b/arch/hexagon/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_HEXAGON_VMALLOC_H +#define _ASM_HEXAGON_VMALLOC_H + +#endif /* _ASM_HEXAGON_VMALLOC_H */ diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h new file mode 100644 index 0000000000000..a2b51141ad285 --- /dev/null +++ b/arch/ia64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_IA64_VMALLOC_H +#define _ASM_IA64_VMALLOC_H + +#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/m68k/include/asm/vmalloc.h b/arch/m68k/include/asm/vmalloc.h new file mode 100644 index 0000000000000..bc1dca6cf134f --- /dev/null +++ b/arch/m68k/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_M68K_VMALLOC_H +#define _ASM_M68K_VMALLOC_H + +#endif /* _ASM_M68K_VMALLOC_H */ diff --git a/arch/microblaze/include/asm/vmalloc.h b/arch/microblaze/include/asm/vmalloc.h new file mode 100644 index 0000000000000..04013a42b0fec --- /dev/null +++ b/arch/microblaze/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MICROBLAZE_VMALLOC_H +#define _ASM_MICROBLAZE_VMALLOC_H + +#endif /* _ASM_MICROBLAZE_VMALLOC_H */ diff --git a/arch/mips/include/asm/vmalloc.h b/arch/mips/include/asm/vmalloc.h new file mode 100644 index 0000000000000..25dc09b25eaf9 --- /dev/null +++ b/arch/mips/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_MIPS_VMALLOC_H +#define _ASM_MIPS_VMALLOC_H + +#endif /* _ASM_MIPS_VMALLOC_H */ diff --git a/arch/nds32/include/asm/vmalloc.h b/arch/nds32/include/asm/vmalloc.h new file mode 100644 index 0000000000000..caeed38984194 --- /dev/null +++ b/arch/nds32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NDS32_VMALLOC_H +#define _ASM_NDS32_VMALLOC_H + +#endif /* _ASM_NDS32_VMALLOC_H */ diff --git a/arch/nios2/include/asm/vmalloc.h b/arch/nios2/include/asm/vmalloc.h new file mode 100644 index 0000000000000..ec7a9260090bc --- /dev/null +++ b/arch/nios2/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_NIOS2_VMALLOC_H +#define _ASM_NIOS2_VMALLOC_H + +#endif /* _ASM_NIOS2_VMALLOC_H */ diff --git a/arch/openrisc/include/asm/vmalloc.h b/arch/openrisc/include/asm/vmalloc.h new file mode 100644 index 0000000000000..75435eceec32a --- /dev/null +++ b/arch/openrisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_OPENRISC_VMALLOC_H +#define _ASM_OPENRISC_VMALLOC_H + +#endif /* _ASM_OPENRISC_VMALLOC_H */ diff --git a/arch/parisc/include/asm/vmalloc.h b/arch/parisc/include/asm/vmalloc.h new file mode 100644 index 0000000000000..1088ae4e7af9f --- /dev/null +++ b/arch/parisc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_PARISC_VMALLOC_H +#define _ASM_PARISC_VMALLOC_H + +#endif /* _ASM_PARISC_VMALLOC_H */ diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h new file mode 100644 index 0000000000000..b992dfaaa1618 --- /dev/null +++ b/arch/powerpc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_POWERPC_VMALLOC_H +#define _ASM_POWERPC_VMALLOC_H + +#endif /* _ASM_POWERPC_VMALLOC_H */ diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h new file mode 100644 index 0000000000000..ff9abc00d1394 --- /dev/null +++ b/arch/riscv/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_RISCV_VMALLOC_H +#define _ASM_RISCV_VMALLOC_H + +#endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/s390/include/asm/vmalloc.h b/arch/s390/include/asm/vmalloc.h new file mode 100644 index 0000000000000..3ba3a6bdca254 --- /dev/null +++ b/arch/s390/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_S390_VMALLOC_H +#define _ASM_S390_VMALLOC_H + +#endif /* _ASM_S390_VMALLOC_H */ diff --git a/arch/sh/include/asm/vmalloc.h b/arch/sh/include/asm/vmalloc.h new file mode 100644 index 0000000000000..716b774726465 --- /dev/null +++ b/arch/sh/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SH_VMALLOC_H +#define _ASM_SH_VMALLOC_H + +#endif /* _ASM_SH_VMALLOC_H */ diff --git a/arch/sparc/include/asm/vmalloc.h b/arch/sparc/include/asm/vmalloc.h new file mode 100644 index 0000000000000..04b8ab9518b88 --- /dev/null +++ b/arch/sparc/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_SPARC_VMALLOC_H +#define _ASM_SPARC_VMALLOC_H + +#endif /* _ASM_SPARC_VMALLOC_H */ diff --git a/arch/um/include/asm/vmalloc.h b/arch/um/include/asm/vmalloc.h new file mode 100644 index 0000000000000..9a7b9ed937337 --- /dev/null +++ b/arch/um/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UM_VMALLOC_H +#define _ASM_UM_VMALLOC_H + +#endif /* _ASM_UM_VMALLOC_H */ diff --git a/arch/unicore32/include/asm/vmalloc.h b/arch/unicore32/include/asm/vmalloc.h new file mode 100644 index 0000000000000..054435818a146 --- /dev/null +++ b/arch/unicore32/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_UNICORE32_VMALLOC_H +#define _ASM_UNICORE32_VMALLOC_H + +#endif /* _ASM_UNICORE32_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h new file mode 100644 index 0000000000000..ba6295fa58762 --- /dev/null +++ b/arch/x86/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_X86_VMALLOC_H +#define _ASM_X86_VMALLOC_H + +#endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/xtensa/include/asm/vmalloc.h b/arch/xtensa/include/asm/vmalloc.h new file mode 100644 index 0000000000000..0eb94b70be55d --- /dev/null +++ b/arch/xtensa/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_XTENSA_VMALLOC_H +#define _ASM_XTENSA_VMALLOC_H + +#endif /* _ASM_XTENSA_VMALLOC_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 239571c69d6bd..ab60c69a40975 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -10,6 +10,8 @@ #include <linux/rbtree.h> #include <linux/overflow.h>
+#include <asm/vmalloc.h> + struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */
From: Daniel Axtens dja@axtens.net
mainline inclusion from mainline-v5.5-rc3 commit be1db4753ee6a0db80a900df9dbbf6ad2acc4bd1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
apply_to_page_range() takes an address range, and if any parts of it are not covered by the existing page table hierarchy, it allocates memory to fill them in.
In some use cases, this is not what we want - we want to be able to operate exclusively on PTEs that are already in the tables.
Add apply_to_existing_page_range() for this. Adjust the walker functions for apply_to_page_range to take 'create', which switches them between the old and new modes.
This will be used in KASAN vmalloc.
[akpm@linux-foundation.org: reduce code duplication] [akpm@linux-foundation.org: s/apply_to_existing_pages/apply_to_existing_page_range/] [akpm@linux-foundation.org: initialize __apply_to_page_range::err] Link: http://lkml.kernel.org/r/20191205140407.1874-1-dja@axtens.net Signed-off-by: Daniel Axtens dja@axtens.net Cc: Dmitry Vyukov dvyukov@google.com Cc: Uladzislau Rezki (Sony) urezki@gmail.com Cc: Alexander Potapenko glider@google.com Cc: Daniel Axtens dja@axtens.net Cc: Qian Cai cai@lca.pw Cc: Andrey Ryabinin aryabinin@virtuozzo.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mm.h | 3 + mm/memory.c | 136 +++++++++++++++++++++++++++++++-------------- 2 files changed, 97 insertions(+), 42 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 58fe28dd959b9..8aa492fb7d538 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2709,6 +2709,9 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +extern int apply_to_existing_page_range(struct mm_struct *mm, + unsigned long address, unsigned long size, + pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING diff --git a/mm/memory.c b/mm/memory.c index 508e7867378c6..8365bcac60bae 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1967,18 +1967,24 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pte_t *pte; - int err; + int err = 0; pgtable_t token; spinlock_t *uninitialized_var(ptl);
- pte = (mm == &init_mm) ? - pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); - if (!pte) - return -ENOMEM; + if (create) { + pte = (mm == &init_mm) ? + pte_alloc_kernel(pmd, addr) : + pte_alloc_map_lock(mm, pmd, addr, &ptl); + if (!pte) + return -ENOMEM; + } else { + pte = (mm == &init_mm) ? + pte_offset_kernel(pmd, addr) : + pte_offset_map_lock(mm, pmd, addr, &ptl); + }
BUG_ON(pmd_huge(*pmd));
@@ -1987,9 +1993,11 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd);
do { - err = fn(pte++, token, addr, data); - if (err) - break; + if (create || !pte_none(*pte)) { + err = fn(pte++, token, addr, data); + if (err) + break; + } } while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode(); @@ -2001,77 +2009,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pmd_t *pmd; unsigned long next; - int err; + int err = 0;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(mm, pud, addr); - if (!pmd) - return -ENOMEM; + if (create) { + pmd = pmd_alloc(mm, pud, addr); + if (!pmd) + return -ENOMEM; + } else { + pmd = pmd_offset(pud, addr); + } do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); - if (err) - break; + if (create || !pmd_none_or_clear_bad(pmd)) { + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, + create); + if (err) + break; + } } while (pmd++, addr = next, addr != end); return err; }
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { pud_t *pud; unsigned long next; - int err; + int err = 0;
- pud = pud_alloc(mm, p4d, addr); - if (!pud) - return -ENOMEM; + if (create) { + pud = pud_alloc(mm, p4d, addr); + if (!pud) + return -ENOMEM; + } else { + pud = pud_offset(p4d, addr); + } do { next = pud_addr_end(addr, end); - err = apply_to_pmd_range(mm, pud, addr, next, fn, data); - if (err) - break; + if (create || !pud_none_or_clear_bad(pud)) { + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, + create); + if (err) + break; + } } while (pud++, addr = next, addr != end); return err; }
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, unsigned long addr, unsigned long end, - pte_fn_t fn, void *data) + pte_fn_t fn, void *data, bool create) { p4d_t *p4d; unsigned long next; - int err; + int err = 0;
- p4d = p4d_alloc(mm, pgd, addr); - if (!p4d) - return -ENOMEM; + if (create) { + p4d = p4d_alloc(mm, pgd, addr); + if (!p4d) + return -ENOMEM; + } else { + p4d = p4d_offset(pgd, addr); + } do { next = p4d_addr_end(addr, end); - err = apply_to_pud_range(mm, p4d, addr, next, fn, data); - if (err) - break; + if (create || !p4d_none_or_clear_bad(p4d)) { + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, + create); + if (err) + break; + } } while (p4d++, addr = next, addr != end); return err; }
-/* - * Scan a region of virtual memory, filling in page tables as necessary - * and calling a provided function on each leaf page table. - */ -int apply_to_page_range(struct mm_struct *mm, unsigned long addr, - unsigned long size, pte_fn_t fn, void *data) +static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, + void *data, bool create) { pgd_t *pgd; unsigned long next; unsigned long end = addr + size; - int err; + int err = 0;
if (WARN_ON(addr >= end)) return -EINVAL; @@ -2079,15 +2105,41 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - err = apply_to_p4d_range(mm, pgd, addr, next, fn, data); + if (!create && pgd_none_or_clear_bad(pgd)) + continue; + err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break; } while (pgd++, addr = next, addr != end);
return err; } + +/* + * Scan a region of virtual memory, filling in page tables as necessary + * and calling a provided function on each leaf page table. + */ +int apply_to_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, true); +} EXPORT_SYMBOL_GPL(apply_to_page_range);
+/* + * Scan a region of virtual memory, calling a provided function on + * each leaf page table where it exists. + * + * Unlike apply_to_page_range, this does _not_ fill in page tables + * where they are absent. + */ +int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr, + unsigned long size, pte_fn_t fn, void *data) +{ + return __apply_to_page_range(mm, addr, size, fn, data, false); +} +EXPORT_SYMBOL_GPL(apply_to_existing_page_range); + /* * handle_pte_fault chooses page fault handler according to an entry which was * read non-atomically. Before making any commitment, on those architectures
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.6-rc1 commit 93fab1b22ef7c4abcbc760ce4432762b02e7f3d1 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
Patch series "Generic page walk and ptdump", v17.
Many architectures current have a debugfs file for dumping the kernel page tables. Currently each architecture has to implement custom functions for this because the details of walking the page tables used by the kernel are different between architectures.
This series extends the capabilities of walk_page_range() so that it can deal with the page tables of the kernel (which have no VMAs and can contain larger huge pages than exist for user space). A generic PTDUMP implementation is the implemented making use of the new functionality of walk_page_range() and finally arm64 and x86 are switch to using it, removing the custom table walkers.
To enable a generic page table walker to walk the unusual mappings of the kernel we need to implement a set of functions which let us know when the walker has reached the leaf entry. After a suggestion from Will Deacon I've chosen the name p?d_leaf() as this (hopefully) describes the purpose (and is a new name so has no historic baggage). Some architectures have p?d_large macros but this is easily confused with "large pages".
This series ends with a generic PTDUMP implemention for arm64 and x86.
Mostly this is a clean up and there should be very little functional change. The exceptions are:
* arm64 PTDUMP debugfs now displays pages which aren't present (patch 22).
* arm64 has the ability to efficiently process KASAN pages (which previously only x86 implemented). This means that the combination of KASAN and DEBUG_WX is now useable.
This patch (of 23):
Exposing the pud/pgd levels of the page tables to walk_page_range() means we may come across the exotic large mappings that come with large areas of contiguous memory (such as the kernel's linear map).
For architectures that don't provide all p?d_leaf() macros, provide generic do nothing default that are suitable where there cannot be leaf pages at that level. Futher patches will add implementations for individual architectures.
The name p?d_leaf() is chosen to minimize the confusion with existing uses of "large" pages and "huge" pages which do not necessary mean that the entry is a leaf (for example it may be a set of contiguous entries that only take 1 TLB slot). For the purpose of walking the page tables we don't need to know how it will be represented in the TLB, but we do need to know for sure if it is a leaf of the tree.
Link: http://lkml.kernel.org/r/20191218162402.45610-2-steven.price@arm.com Signed-off-by: Steven Price steven.price@arm.com Acked-by: Mark Rutland mark.rutland@arm.com Cc: Andy Lutomirski luto@kernel.org Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arnd Bergmann arnd@arndb.de Cc: Borislav Petkov bp@alien8.de Cc: Catalin Marinas catalin.marinas@arm.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Ingo Molnar mingo@redhat.com Cc: James Morse james.morse@arm.com Cc: Jerome Glisse jglisse@redhat.com Cc: Peter Zijlstra peterz@infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Will Deacon will@kernel.org Cc: "H. Peter Anvin" hpa@zytor.com Cc: "Liang, Kan" kan.liang@linux.intel.com Cc: Albert Ou aou@eecs.berkeley.edu Cc: Alexandre Ghiti alex@ghiti.fr Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: David S. Miller davem@davemloft.net Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: James Hogan jhogan@kernel.org Cc: Michael Ellerman mpe@ellerman.id.au Cc: Paul Burton paul.burton@mips.com Cc: Paul Mackerras paulus@samba.org Cc: Paul Walmsley paul.walmsley@sifive.com Cc: Ralf Baechle ralf@linux-mips.org Cc: Russell King linux@armlinux.org.uk Cc: Vasily Gorbik gor@linux.ibm.com Cc: Vineet Gupta vgupta@synopsys.com Cc: Zong Li zong.li@sifive.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/asm-generic/pgtable.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index fa90bdd4089fc..0d17410962b9d 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1148,4 +1148,24 @@ static inline bool arch_has_pfn_modify_check(void) #define mm_pmd_folded(mm) __is_defined(__PAGETABLE_PMD_FOLDED) #endif
+/* + * p?d_leaf() - true if this entry is a final mapping to a physical address. + * This differs from p?d_huge() by the fact that they are always available (if + * the architecture supports large pages at the appropriate level) even + * if CONFIG_HUGETLB_PAGE is not defined. + * Only meaningful when called on a valid entry. + */ +#ifndef pgd_leaf +#define pgd_leaf(x) 0 +#endif +#ifndef p4d_leaf +#define p4d_leaf(x) 0 +#endif +#ifndef pud_leaf +#define pud_leaf(x) 0 +#endif +#ifndef pmd_leaf +#define pmd_leaf(x) 0 +#endif + #endif /* _ASM_GENERIC_PGTABLE_H */
From: Steven Price steven.price@arm.com
mainline inclusion from mainline-v5.6-rc1 commit 8aa82df3c123129025a364d8f823929cc488b834 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
walk_page_range() is going to be allowed to walk page tables other than those of user space. For this it needs to know when it has reached a 'leaf' entry in the page tables. This information will be provided by the p?d_leaf() functions/macros.
For arm64, we already have p?d_sect() macros which we can reuse for p?d_leaf().
pud_sect() is defined as a dummy function when CONFIG_PGTABLE_LEVELS < 3 or CONFIG_ARM64_64K_PAGES is defined. However when the kernel is configured this way then architecturally it isn't allowed to have a large page at this level, and any code using these page walking macros is implicitly relying on the page size/number of levels being the same as the kernel. So it is safe to reuse this for p?d_leaf() as it is an architectural restriction.
Link: http://lkml.kernel.org/r/20191218162402.45610-5-steven.price@arm.com Signed-off-by: Steven Price steven.price@arm.com Acked-by: Catalin Marinas catalin.marinas@arm.com Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will@kernel.org Cc: Albert Ou aou@eecs.berkeley.edu Cc: Alexandre Ghiti alex@ghiti.fr Cc: Andy Lutomirski luto@kernel.org Cc: Ard Biesheuvel ard.biesheuvel@linaro.org Cc: Arnd Bergmann arnd@arndb.de Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Borislav Petkov bp@alien8.de Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: David S. Miller davem@davemloft.net Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: "H. Peter Anvin" hpa@zytor.com Cc: Ingo Molnar mingo@redhat.com Cc: James Hogan jhogan@kernel.org Cc: James Morse james.morse@arm.com Cc: Jerome Glisse jglisse@redhat.com Cc: "Liang, Kan" kan.liang@linux.intel.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Ellerman mpe@ellerman.id.au Cc: Paul Burton paul.burton@mips.com Cc: Paul Mackerras paulus@samba.org Cc: Paul Walmsley paul.walmsley@sifive.com Cc: Peter Zijlstra peterz@infradead.org Cc: Ralf Baechle ralf@linux-mips.org Cc: Russell King linux@armlinux.org.uk Cc: Thomas Gleixner tglx@linutronix.de Cc: Vasily Gorbik gor@linux.ibm.com Cc: Vineet Gupta vgupta@synopsys.com Cc: Zong Li zong.li@sifive.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/pgtable.h | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 85af92cb24560..f4abd58262403 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -417,6 +417,7 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, PMD_TYPE_TABLE) #define pmd_sect(pmd) ((pmd_val(pmd) & PMD_TYPE_MASK) == \ PMD_TYPE_SECT) +#define pmd_leaf(pmd) pmd_sect(pmd)
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } @@ -482,6 +483,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & PUD_TABLE_BIT)) #define pud_present(pud) pte_present(pud_pte(pud)) +#define pud_leaf(pud) pud_sect(pud) #define pud_valid(pud) pte_valid(pud_pte(pud))
static inline void set_pud(pud_t *pudp, pud_t pud)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit 4926627793c0a7e7db2bc674e1d06777e86d8dab category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
Switch the two remaining callers to use __get_vm_area_caller instead.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-9-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/powerpc/kernel/pci_64.c | 3 ++- arch/sh/kernel/cpu/sh4/sq.c | 3 ++- include/linux/vmalloc.h | 2 -- mm/vmalloc.c | 8 -------- 4 files changed, 4 insertions(+), 12 deletions(-)
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index dff28f9035124..ac956dfdbbde6 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -144,7 +144,8 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) * with incomplete address decoding but I'd rather not deal with * those outside of the reserved 64K legacy region. */ - area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); + area = __get_vm_area_caller(size_page, 0, PHB_IO_BASE, PHB_IO_END, + __builtin_return_address(0)); if (area == NULL) return -ENOMEM; hose->io_base_alloc = area->addr; diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 4ca78ed71ad2c..fb97dab3bad7d 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -106,7 +106,8 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot) #if defined(CONFIG_MMU) struct vm_struct *vma;
- vma = __get_vm_area(map->size, VM_ALLOC, map->sq_addr, SQ_ADDRMAX); + vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr, + SQ_ADDRMAX, __builtin_return_address(0)); if (!vma) return -ENOMEM;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index ab60c69a40975..b789a8fd51381 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -133,8 +133,6 @@ static inline size_t get_vm_area_size(const struct vm_struct *area) extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); -extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, - unsigned long start, unsigned long end); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, diff --git a/mm/vmalloc.c b/mm/vmalloc.c index c42d784f03b8f..57ab4726aeb34 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2000,14 +2000,6 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, return area; }
-struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags, - unsigned long start, unsigned long end) -{ - return __get_vm_area_node(size, 1, flags, start, end, NUMA_NO_NODE, - GFP_KERNEL, __builtin_return_address(0)); -} -EXPORT_SYMBOL_GPL(__get_vm_area); - struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit 8f87cc9386dc7965de151605637eee939ea0d098 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
This commit will break kabi for unexporting an interface. ---------------------------
There are no modular users of this function.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-10-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 57ab4726aeb34..1648d7f14a475 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1910,7 +1910,6 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { vunmap_page_range(addr, addr + size); } -EXPORT_SYMBOL_GPL(unmap_kernel_range_noflush);
/** * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit b607e6d17db5b91e6a807b4f9a2e849219d720a0 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
This allows to unexport map_vm_area and unmap_kernel_range, which are rather deep internal and should not be available to modules, as they for example allow fine grained control of mapping permissions, and also allow splitting the setup of a vmalloc area and the actual mapping and thus expose vmalloc internals.
zsmalloc is typically built-in and continues to work (just like the percpu-vm code using a similar patter), while modular zsmalloc also continues to work, but must use copies.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-12-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/Kconfig | 2 +- mm/vmalloc.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-)
diff --git a/mm/Kconfig b/mm/Kconfig index 7edf3c4c1252b..0434aef47b44c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -638,7 +638,7 @@ config ZSMALLOC
config PGTABLE_MAPPING bool "Use page table mapping to access object in zsmalloc" - depends on ZSMALLOC + depends on ZSMALLOC=y help By default, zsmalloc uses a copy-based object mapping method to access allocations that span two pages. However, if a particular diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1648d7f14a475..1f6a8b1eae5fe 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1927,7 +1927,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) vunmap_page_range(addr, end); flush_tlb_kernel_range(addr, end); } -EXPORT_SYMBOL_GPL(unmap_kernel_range);
int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { @@ -1939,7 +1938,6 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages)
return err > 0 ? 0 : err; } -EXPORT_SYMBOL_GPL(map_vm_area);
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller)
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit 78a0e8c4837f42e9c2b1127e9c450ceeb0efbde6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
Ever use of addr in vb_free casts to unsigned long first, and the caller has an unsigned long version of the address available anyway. Just pass that and avoid all the casts.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-13-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 1f6a8b1eae5fe..56e1c55e73f47 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1556,7 +1556,7 @@ static void *vb_alloc(unsigned long size, gfp_t gfp_mask) return vaddr; }
-static void vb_free(const void *addr, unsigned long size) +static void vb_free(unsigned long addr, unsigned long size) { unsigned long offset; unsigned long vb_idx; @@ -1566,24 +1566,22 @@ static void vb_free(const void *addr, unsigned long size) BUG_ON(offset_in_page(size)); BUG_ON(size > PAGE_SIZE*VMAP_MAX_ALLOC);
- flush_cache_vunmap((unsigned long)addr, (unsigned long)addr + size); + flush_cache_vunmap(addr, addr + size);
order = get_order(size);
- offset = (unsigned long)addr & (VMAP_BLOCK_SIZE - 1); - offset >>= PAGE_SHIFT; + offset = (addr & (VMAP_BLOCK_SIZE - 1)) >> PAGE_SHIFT;
- vb_idx = addr_to_vb_idx((unsigned long)addr); + vb_idx = addr_to_vb_idx(addr); rcu_read_lock(); vb = radix_tree_lookup(&vmap_block_tree, vb_idx); rcu_read_unlock(); BUG_ON(!vb);
- vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); + vunmap_page_range(addr, addr + size);
if (debug_pagealloc_enabled()) - flush_tlb_kernel_range((unsigned long)addr, - (unsigned long)addr + size); + flush_tlb_kernel_range(addr, addr + size);
spin_lock(&vb->lock);
@@ -1675,7 +1673,7 @@ void vm_unmap_ram(const void *mem, unsigned int count)
if (likely(count <= VMAP_MAX_ALLOC)) { debug_check_no_locks_freed(mem, size); - vb_free(mem, size); + vb_free(addr, size); return; }
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit b521c43f58e5234ee9b29817ed5e93523abcffa9 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
These have non-static aliases called map_kernel_range_noflush and unmap_kernel_range_noflush that just differ slightly in the calling conventions that pass addr + size instead of an end.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-14-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 98 +++++++++++++++++++++------------------------------- 1 file changed, 40 insertions(+), 58 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 56e1c55e73f47..f9652503896af 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -118,10 +118,24 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end) } while (p4d++, addr = next, addr != end); }
-static void vunmap_page_range(unsigned long addr, unsigned long end) +/** + * unmap_kernel_range_noflush - unmap kernel VM area + * @addr: start of the VM area to unmap + * @size: size of the VM area to unmap + * + * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size specify + * should have been allocated using get_vm_area() and its friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is responsible + * for calling flush_cache_vunmap() on to-be-mapped areas before calling this + * function and flush_tlb_kernel_range() after. + */ +void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { - pgd_t *pgd; + unsigned long end = addr + size; unsigned long next; + pgd_t *pgd;
BUG_ON(addr >= end); pgd = pgd_offset_k(addr); @@ -210,18 +224,30 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, return 0; }
-/* - * Set up page tables in kva (addr, end). The ptes shall have prot "prot", and - * will have pfns corresponding to the "pages" array. +/** + * map_kernel_range_noflush - map kernel VM area with the specified pages + * @addr: start of the VM area to map + * @size: size of the VM area to map + * @prot: page protection flags to use + * @pages: pages to map * - * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N] + * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size specify should + * have been allocated using get_vm_area() and its friends. + * + * NOTE: + * This function does NOT do any cache flushing. The caller is responsible for + * calling flush_cache_vmap() on to-be-mapped areas before calling this + * function. + * + * RETURNS: + * The number of pages mapped on success, -errno on failure. */ -static int vmap_page_range_noflush(unsigned long start, unsigned long end, - pgprot_t prot, struct page **pages) +int map_kernel_range_noflush(unsigned long addr, unsigned long size, + pgprot_t prot, struct page **pages) { - pgd_t *pgd; + unsigned long end = addr + size; unsigned long next; - unsigned long addr = start; + pgd_t *pgd; int err = 0; int nr = 0;
@@ -242,7 +268,7 @@ static int vmap_page_range(unsigned long start, unsigned long end, { int ret;
- ret = vmap_page_range_noflush(start, end, prot, pages); + ret = map_kernel_range_noflush(start, end - start, prot, pages); flush_cache_vmap(start, end); return ret; } @@ -1147,7 +1173,7 @@ static void free_vmap_area(struct vmap_area *va) */ static void unmap_vmap_area(struct vmap_area *va) { - vunmap_page_range(va->va_start, va->va_end); + unmap_kernel_range_noflush(va->va_start, va->va_end - va->va_start); }
/* @@ -1578,7 +1604,7 @@ static void vb_free(unsigned long addr, unsigned long size) rcu_read_unlock(); BUG_ON(!vb);
- vunmap_page_range(addr, addr + size); + unmap_kernel_range_noflush(addr, size);
if (debug_pagealloc_enabled()) flush_tlb_kernel_range(addr, addr + size); @@ -1865,50 +1891,6 @@ void __init vmalloc_init(void) vmap_initialized = true; }
-/** - * map_kernel_range_noflush - map kernel VM area with the specified pages - * @addr: start of the VM area to map - * @size: size of the VM area to map - * @prot: page protection flags to use - * @pages: pages to map - * - * Map PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vmap() on to-be-mapped areas - * before calling this function. - * - * RETURNS: - * The number of pages mapped on success, -errno on failure. - */ -int map_kernel_range_noflush(unsigned long addr, unsigned long size, - pgprot_t prot, struct page **pages) -{ - return vmap_page_range_noflush(addr, addr + size, prot, pages); -} - -/** - * unmap_kernel_range_noflush - unmap kernel VM area - * @addr: start of the VM area to unmap - * @size: size of the VM area to unmap - * - * Unmap PFN_UP(@size) pages at @addr. The VM area @addr and @size - * specify should have been allocated using get_vm_area() and its - * friends. - * - * NOTE: - * This function does NOT do any cache flushing. The caller is - * responsible for calling flush_cache_vunmap() on to-be-mapped areas - * before calling this function and flush_tlb_kernel_range() after. - */ -void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) -{ - vunmap_page_range(addr, addr + size); -} - /** * unmap_kernel_range - unmap kernel VM area and flush cache and TLB * @addr: start of the VM area to unmap @@ -1922,7 +1904,7 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) unsigned long end = addr + size;
flush_cache_vunmap(addr, end); - vunmap_page_range(addr, end); + unmap_kernel_range_noflush(addr, size); flush_tlb_kernel_range(addr, end); }
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit a29adb6209cead1f6c34a8d72481fb183bfc2d68 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
This matches the map_kernel_range_noflush API. Also change to pass a size instead of the end, similar to the noflush version.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-15-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f9652503896af..bbdaf00b5db91 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -263,13 +263,13 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return nr; }
-static int vmap_page_range(unsigned long start, unsigned long end, +static int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages) { int ret;
- ret = map_kernel_range_noflush(start, end - start, prot, pages); - flush_cache_vmap(start, end); + ret = map_kernel_range_noflush(start, size, prot, pages); + flush_cache_vmap(start, start + size); return ret; }
@@ -1747,7 +1747,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro addr = va->va_start; mem = (void *)addr; } - if (vmap_page_range(addr, addr + size, prot, pages) < 0) { + if (map_kernel_range(addr, size, prot, pages) < 0) { vm_unmap_ram(mem, count); return NULL; } @@ -1911,10 +1911,9 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) { unsigned long addr = (unsigned long)area->addr; - unsigned long end = addr + get_vm_area_size(area); int err;
- err = vmap_page_range(addr, end, prot, pages); + err = map_kernel_range(addr, get_vm_area_size(area), prot, pages);
return err > 0 ? 0 : err; }
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit 60bb44652a0dcc44acfc2ed8ebb35e4a389e5421 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
None of the callers needs the number of pages, and a 0 / -errno return value is a lot more intuitive.
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-16-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index bbdaf00b5db91..7eb558a9053c6 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -240,7 +240,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, * function. * * RETURNS: - * The number of pages mapped on success, -errno on failure. + * 0 on success, -errno on failure. */ int map_kernel_range_noflush(unsigned long addr, unsigned long size, pgprot_t prot, struct page **pages) @@ -260,7 +260,7 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return err; } while (pgd++, addr = next, addr != end);
- return nr; + return 0; }
static int map_kernel_range(unsigned long start, unsigned long size,
From: Christoph Hellwig hch@lst.de
mainline inclusion from mainline-v5.8-rc1 commit ed1f324c5fed06c91f30a36aedb66f34244ab86e category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
Add adjustment for map_vm_area in dma mapping, and don't remove in ceph_common.
---------------------------
Switch all callers to map_kernel_range, which symmetric to the unmap side (as well as the _noflush versions).
Signed-off-by: Christoph Hellwig hch@lst.de Signed-off-by: Andrew Morton akpm@linux-foundation.org Acked-by: Peter Zijlstra (Intel) peterz@infradead.org Cc: Christian Borntraeger borntraeger@de.ibm.com Cc: Christophe Leroy christophe.leroy@c-s.fr Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: David Airlie airlied@linux.ie Cc: Gao Xiang xiang@kernel.org Cc: Greg Kroah-Hartman gregkh@linuxfoundation.org Cc: Haiyang Zhang haiyangz@microsoft.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: "K. Y. Srinivasan" kys@microsoft.com Cc: Laura Abbott labbott@redhat.com Cc: Mark Rutland mark.rutland@arm.com Cc: Michael Kelley mikelley@microsoft.com Cc: Minchan Kim minchan@kernel.org Cc: Nitin Gupta ngupta@vflare.org Cc: Robin Murphy robin.murphy@arm.com Cc: Sakari Ailus sakari.ailus@linux.intel.com Cc: Stephen Hemminger sthemmin@microsoft.com Cc: Sumit Semwal sumit.semwal@linaro.org Cc: Wei Liu wei.liu@kernel.org Cc: Benjamin Herrenschmidt benh@kernel.crashing.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Heiko Carstens heiko.carstens@de.ibm.com Cc: Paul Mackerras paulus@ozlabs.org Cc: Vasily Gorbik gor@linux.ibm.com Cc: Will Deacon will@kernel.org Link: http://lkml.kernel.org/r/20200414131348.444715-17-hch@lst.de Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/core-api/cachetlb.rst | 2 +- include/linux/vmalloc.h | 10 ++++------ kernel/dma/mapping.c | 3 ++- mm/vmalloc.c | 21 +++++++-------------- mm/zsmalloc.c | 4 +++- 5 files changed, 17 insertions(+), 23 deletions(-)
diff --git a/Documentation/core-api/cachetlb.rst b/Documentation/core-api/cachetlb.rst index 6eb9d3f090cdf..a2fed63e69290 100644 --- a/Documentation/core-api/cachetlb.rst +++ b/Documentation/core-api/cachetlb.rst @@ -223,7 +223,7 @@ Here are the routines, one by one: there will be no entries in the cache for the kernel address space for virtual addresses in the range 'start' to 'end-1'.
- The first of these two routines is invoked after map_vm_area() + The first of these two routines is invoked after map_kernel_range() has installed the page table entries. The second is invoked before unmap_kernel_range() deletes the page table entries.
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index b789a8fd51381..086013413a1c3 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -140,11 +140,11 @@ extern struct vm_struct *__get_vm_area_caller(unsigned long size, extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr);
-extern int map_vm_area(struct vm_struct *area, pgprot_t prot, - struct page **pages); #ifdef CONFIG_MMU extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); +int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, + struct page **pages); extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size); extern void unmap_kernel_range(unsigned long addr, unsigned long size); #else @@ -154,14 +154,12 @@ map_kernel_range_noflush(unsigned long start, unsigned long size, { return size >> PAGE_SHIFT; } +#define map_kernel_range map_kernel_range_noflush static inline void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) { } -static inline void -unmap_kernel_range(unsigned long addr, unsigned long size) -{ -} +#define unmap_kernel_range unmap_kernel_range_noflush #endif
/* Allocate/destroy a 'vmalloc' VM area. */ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index d2a92ddaac4d1..9cc1b34cab8f4 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -255,7 +255,8 @@ static struct vm_struct *__dma_common_pages_remap(struct page **pages, if (!area) return NULL;
- if (map_vm_area(area, prot, pages)) { + if (map_kernel_range((unsigned long)area->addr, size, prot, + pages) < 0) { vunmap(area->addr); return NULL; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 7eb558a9053c6..9d205edc4f984 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -263,8 +263,8 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, return 0; }
-static int map_kernel_range(unsigned long start, unsigned long size, - pgprot_t prot, struct page **pages) +int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, + struct page **pages) { int ret;
@@ -1908,16 +1908,6 @@ void unmap_kernel_range(unsigned long addr, unsigned long size) flush_tlb_kernel_range(addr, end); }
-int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages) -{ - unsigned long addr = (unsigned long)area->addr; - int err; - - err = map_kernel_range(addr, get_vm_area_size(area), prot, pages); - - return err > 0 ? 0 : err; -} - static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { @@ -2202,7 +2192,8 @@ void *vmap(struct page **pages, unsigned int count, if (!area) return NULL;
- if (map_vm_area(area, prot, pages)) { + if (map_kernel_range((unsigned long)area->addr, size, prot, + pages) < 0) { vunmap(area->addr); return NULL; } @@ -2265,8 +2256,10 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, cond_resched(); }
- if (map_vm_area(area, prot, pages)) + if (map_kernel_range((unsigned long)area->addr, get_vm_area_size(area), + prot, pages) < 0) goto fail; + return area->addr;
fail: diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 85cc29c93d93a..45a53ffd7c974 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -1156,7 +1156,9 @@ static inline void __zs_cpu_down(struct mapping_area *area) static inline void *__zs_map_object(struct mapping_area *area, struct page *pages[2], int off, int size) { - BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, pages)); + unsigned long addr = (unsigned long)area->vm->addr; + + BUG_ON(map_kernel_range(addr, PAGE_SIZE * 2, PAGE_KERNEL, pages) < 0); area->vm_addr = area->vm->addr; return area->vm_addr + off; }
From: Anshuman Khandual anshuman.khandual@arm.com
mainline inclusion from mainline-v5.3-rc1 commit 0f472d04f59ff89d15b2a1c4eafde7317ddd67a2 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
Drop the support for powerpc platform. ---------------------------
Finish up what commit c2febafc6773 ("mm: convert generic code to 5-level paging") started while levelling up P4D huge mapping support at par with PUD and PMD. A new arch call back arch_ioremap_p4d_supported() is added which just maintains status quo (P4D huge map not supported) on x86, arm64 and powerpc.
When HAVE_ARCH_HUGE_VMAP is enabled its just a simple check from the arch about the support, hence runtime effects are minimal.
Link: http://lkml.kernel.org/r/1561699231-20991-1-git-send-email-anshuman.khandual... Signed-off-by: Anshuman Khandual anshuman.khandual@arm.com Acked-by: Thomas Gleixner tglx@linutronix.de Acked-by: Michael Ellerman mpe@ellerman.id.au (powerpc) Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will.deacon@arm.com Cc: Dave Hansen dave.hansen@linux.intel.com Cc: Andy Lutomirski luto@kernel.org Cc: Peter Zijlstra peterz@infradead.org Cc: Ingo Molnar mingo@redhat.com Cc: Kirill A. Shutemov kirill.shutemov@linux.intel.com Cc: Michal Hocko mhocko@kernel.org Cc: Stephen Rothwell sfr@canb.auug.org.au Signed-off-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/mm/mmu.c | 5 +++++ arch/x86/mm/ioremap.c | 5 +++++ include/linux/io.h | 1 + lib/ioremap.c | 2 ++ 4 files changed, 13 insertions(+)
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 99a86326a301b..437ee4ab41a19 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -921,6 +921,11 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return dt_virt; }
+int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { /* diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index adc77904fc3ed..6ce8a2b593e09 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -431,6 +431,11 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap);
+int __init arch_ioremap_p4d_supported(void) +{ + return 0; +} + int __init arch_ioremap_pud_supported(void) { #ifdef CONFIG_X86_64 diff --git a/include/linux/io.h b/include/linux/io.h index 32e30e8fb9db9..58514cebfce6e 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -45,6 +45,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end,
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP void __init ioremap_huge_init(void); +int arch_ioremap_p4d_supported(void); int arch_ioremap_pud_supported(void); int arch_ioremap_pmd_supported(void); #else diff --git a/lib/ioremap.c b/lib/ioremap.c index 063213685563c..c3dc213b69800 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -30,6 +30,8 @@ early_param("nohugeiomap", set_nohugeiomap); void __init ioremap_huge_init(void) { if (!ioremap_huge_disabled) { + if (arch_ioremap_p4d_supported()) + ioremap_p4d_capable = 1; if (arch_ioremap_pud_supported()) ioremap_pud_capable = 1; if (arch_ioremap_pmd_supported())
From: Mike Rapoport rppt@linux.ibm.com
mainline inclusion from mainline-v5.9-rc1 commit ab05eabfa18a6a537a73408642177e4a803317dc category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
---------------------------
The functionality in lib/ioremap.c deals with pagetables, vmalloc and caches, so it naturally belongs to mm/ Moving it there will also allow declaring p?d_alloc_track functions in an header file inside mm/ rather than having those declarations in include/linux/mm.h
Suggested-by: Andrew Morton akpm@linux-foundation.org Signed-off-by: Mike Rapoport rppt@linux.ibm.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Reviewed-by: Pekka Enberg penberg@kernel.org Cc: Abdul Haleem abdhalee@linux.vnet.ibm.com Cc: Andy Lutomirski luto@kernel.org Cc: Arnd Bergmann arnd@arndb.de Cc: Christophe Leroy christophe.leroy@csgroup.eu Cc: Joerg Roedel joro@8bytes.org Cc: Joerg Roedel jroedel@suse.de Cc: Max Filippov jcmvbkbc@gmail.com Cc: Peter Zijlstra (Intel) peterz@infradead.org Cc: Satheesh Rajendran sathnaga@linux.vnet.ibm.com Cc: Stafford Horne shorne@gmail.com Cc: Stephen Rothwell sfr@canb.auug.org.au Cc: Steven Rostedt rostedt@goodmis.org Cc: Geert Uytterhoeven geert@linux-m68k.org Cc: Matthew Wilcox willy@infradead.org Link: http://lkml.kernel.org/r/20200627143453.31835-8-rppt@kernel.org Signed-off-by: Linus Torvalds torvalds@linux-foundation.org Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- lib/Makefile | 1 - mm/Makefile | 2 +- {lib => mm}/ioremap.c | 0 3 files changed, 1 insertion(+), 2 deletions(-) rename {lib => mm}/ioremap.c (100%)
diff --git a/lib/Makefile b/lib/Makefile index 01f8051b483d5..e33238bff52e3 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -38,7 +38,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ nmi_backtrace.o nodemask.o win_minmax.o
lib-$(CONFIG_PRINTK) += dump_stack.o -lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o
lib-y += kobject.o klist.o diff --git a/mm/Makefile b/mm/Makefile index bf2e26994c820..876359db8b05f 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -25,7 +25,7 @@ mmu-y := nommu.o mmu-$(CONFIG_MMU) := gup.o highmem.o memory.o mincore.o \ mlock.o mmap.o mmu_gather.o mprotect.o mremap.o \ msync.o page_vma_mapped.o pagewalk.o \ - pgtable-generic.o rmap.o vmalloc.o + pgtable-generic.o rmap.o vmalloc.o ioremap.o
ifdef CONFIG_CROSS_MEMORY_ATTACH diff --git a/lib/ioremap.c b/mm/ioremap.c similarity index 100% rename from lib/ioremap.c rename to mm/ioremap.c
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-2-npiggin@gmail.com/ --------------
vmalloc_to_page returns NULL for addresses mapped by larger pages[*]. Whether or not a vmap is huge depends on the architecture details, alignments, boot options, etc., which the caller can not be expected to know. Therefore HUGE_VMAP is a regression for vmalloc_to_page.
This change teaches vmalloc_to_page about larger pages, and returns the struct page that corresponds to the offset within the large page. This makes the API agnostic to mapping implementation details.
[*] As explained by commit 029c54b095995 ("mm/vmalloc.c: huge-vmap: fail gracefully on unexpected huge vmap mappings")
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 9d205edc4f984..e71195027a8d4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -37,6 +37,7 @@ #include <linux/uaccess.h> #include <asm/tlbflush.h> #include <asm/shmparam.h> +#include <asm/pgtable.h>
#include "internal.h"
@@ -289,7 +290,9 @@ int is_vmalloc_or_module_addr(const void *x) }
/* - * Walk a vmap address to the struct page it maps. + * Walk a vmap address to the struct page it maps. Huge vmap mappings will + * return the tail page that corresponds to the base page address, which + * matches small vmap mappings. */ struct page *vmalloc_to_page(const void *vmalloc_addr) { @@ -309,25 +312,33 @@ struct page *vmalloc_to_page(const void *vmalloc_addr)
if (pgd_none(*pgd)) return NULL; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) + return NULL; /* XXX: no allowance for huge pgd */ + if (WARN_ON_ONCE(pgd_bad(*pgd))) + return NULL; + p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) return NULL; - pud = pud_offset(p4d, addr); + if (p4d_leaf(*p4d)) + return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(p4d_bad(*p4d))) + return NULL;
- /* - * Don't dereference bad PUD or PMD (below) entries. This will also - * identify huge mappings, which we may encounter on architectures - * that define CONFIG_HAVE_ARCH_HUGE_VMAP=y. Such regions will be - * identified as vmalloc addresses by is_vmalloc_addr(), but are - * not [unambiguously] associated with a struct page, so there is - * no correct value to return for them. - */ - WARN_ON_ONCE(pud_bad(*pud)); - if (pud_none(*pud) || pud_bad(*pud)) + pud = pud_offset(p4d, addr); + if (pud_none(*pud)) + return NULL; + if (pud_leaf(*pud)) + return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(pud_bad(*pud))) return NULL; + pmd = pmd_offset(pud, addr); - WARN_ON_ONCE(pmd_bad(*pmd)); - if (pmd_none(*pmd) || pmd_bad(*pmd)) + if (pmd_none(*pmd)) + return NULL; + if (pmd_leaf(*pmd)) + return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(pmd_bad(*pmd))) return NULL;
ptep = pte_offset_map(pmd, addr); @@ -335,6 +346,7 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) if (pte_present(pte)) page = pte_page(pte); pte_unmap(ptep); + return page; } EXPORT_SYMBOL(vmalloc_to_page);
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-3-npiggin@gmail.com/ --------------
apply_to_pte_range might mistake a large pte for bad, or treat it as a page table, resulting in a crash or corruption. Add a test to warn and return error if large entries are found.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memory.c | 60 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 44 insertions(+), 16 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c index 8365bcac60bae..56e57897d565f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2026,13 +2026,20 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, } do { next = pmd_addr_end(addr, end); - if (create || !pmd_none_or_clear_bad(pmd)) { - err = apply_to_pte_range(mm, pmd, addr, next, fn, data, - create); - if (err) - break; + if (pmd_none(*pmd) && !create) + continue; + if (WARN_ON_ONCE(pmd_leaf(*pmd))) + return -EINVAL; + if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) { + if (!create) + continue; + pmd_clear_bad(pmd); } + err = apply_to_pte_range(mm, pmd, addr, next, fn, data, create); + if (err) + break; } while (pmd++, addr = next, addr != end); + return err; }
@@ -2053,13 +2060,20 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, } do { next = pud_addr_end(addr, end); - if (create || !pud_none_or_clear_bad(pud)) { - err = apply_to_pmd_range(mm, pud, addr, next, fn, data, - create); - if (err) - break; + if (pud_none(*pud) && !create) + continue; + if (WARN_ON_ONCE(pud_leaf(*pud))) + return -EINVAL; + if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) { + if (!create) + continue; + pud_clear_bad(pud); } + err = apply_to_pmd_range(mm, pud, addr, next, fn, data, create); + if (err) + break; } while (pud++, addr = next, addr != end); + return err; }
@@ -2080,13 +2094,20 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, } do { next = p4d_addr_end(addr, end); - if (create || !p4d_none_or_clear_bad(p4d)) { - err = apply_to_pud_range(mm, p4d, addr, next, fn, data, - create); - if (err) - break; + if (p4d_none(*p4d) && !create) + continue; + if (WARN_ON_ONCE(p4d_leaf(*p4d))) + return -EINVAL; + if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) { + if (!create) + continue; + p4d_clear_bad(p4d); } + err = apply_to_pud_range(mm, p4d, addr, next, fn, data, create); + if (err) + break; } while (p4d++, addr = next, addr != end); + return err; }
@@ -2105,8 +2126,15 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - if (!create && pgd_none_or_clear_bad(pgd)) + if (pgd_none(*pgd) && !create) continue; + if (WARN_ON_ONCE(pgd_leaf(*pgd))) + return -EINVAL; + if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { + if (!create) + continue; + pgd_clear_bad(pgd); + } err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create); if (err) break;
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-4-npiggin@gmail.com/ --------------
The vmalloc mapper operates on a struct page * array rather than a linear physical address, re-name it to make this distinction clear.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index e71195027a8d4..6ed7f5f9d742f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -148,7 +148,7 @@ void unmap_kernel_range_noflush(unsigned long addr, unsigned long size) } while (pgd++, addr = next, addr != end); }
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, +static int vmap_pages_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pte_t *pte; @@ -174,7 +174,7 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, return 0; }
-static int vmap_pmd_range(pud_t *pud, unsigned long addr, +static int vmap_pages_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pmd_t *pmd; @@ -185,13 +185,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, return -ENOMEM; do { next = pmd_addr_end(addr, end); - if (vmap_pte_range(pmd, addr, next, prot, pages, nr)) + if (vmap_pages_pte_range(pmd, addr, next, prot, pages, nr)) return -ENOMEM; } while (pmd++, addr = next, addr != end); return 0; }
-static int vmap_pud_range(p4d_t *p4d, unsigned long addr, +static int vmap_pages_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { pud_t *pud; @@ -202,13 +202,13 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr, return -ENOMEM; do { next = pud_addr_end(addr, end); - if (vmap_pmd_range(pud, addr, next, prot, pages, nr)) + if (vmap_pages_pmd_range(pud, addr, next, prot, pages, nr)) return -ENOMEM; } while (pud++, addr = next, addr != end); return 0; }
-static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, +static int vmap_pages_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, int *nr) { p4d_t *p4d; @@ -219,7 +219,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, return -ENOMEM; do { next = p4d_addr_end(addr, end); - if (vmap_pud_range(p4d, addr, next, prot, pages, nr)) + if (vmap_pages_pud_range(p4d, addr, next, prot, pages, nr)) return -ENOMEM; } while (p4d++, addr = next, addr != end); return 0; @@ -256,7 +256,7 @@ int map_kernel_range_noflush(unsigned long addr, unsigned long size, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = vmap_p4d_range(pgd, addr, next, prot, pages, &nr); + err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr); if (err) return err; } while (pgd++, addr = next, addr != end);
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-5-npiggin@gmail.com/ --------------
This will be used as a generic kernel virtual mapping function, so re-name it in preparation.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/ioremap.c | 51 +++++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/mm/ioremap.c b/mm/ioremap.c index c3dc213b69800..0297d2e840327 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -60,7 +60,7 @@ static inline int ioremap_pud_enabled(void) { return 0; } static inline int ioremap_pmd_enabled(void) { return 0; } #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, +static int vmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pte_t *pte; @@ -78,9 +78,8 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, return 0; }
-static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) +static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { if (!ioremap_pmd_enabled()) return 0; @@ -97,7 +96,7 @@ static int ioremap_try_huge_pmd(pmd_t *pmd, unsigned long addr, return pmd_set_huge(pmd, phys_addr, prot); }
-static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, +static int vmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { pmd_t *pmd; @@ -109,18 +108,17 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end);
- if (ioremap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) + if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) continue;
- if (ioremap_pte_range(pmd, addr, next, phys_addr, prot)) + if (vmap_pte_range(pmd, addr, next, phys_addr, prot)) return -ENOMEM; } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
-static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) +static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { if (!ioremap_pud_enabled()) return 0; @@ -137,8 +135,8 @@ static int ioremap_try_huge_pud(pud_t *pud, unsigned long addr, return pud_set_huge(pud, phys_addr, prot); }
-static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { pud_t *pud; unsigned long next; @@ -149,18 +147,17 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, do { next = pud_addr_end(addr, end);
- if (ioremap_try_huge_pud(pud, addr, next, phys_addr, prot)) + if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) continue;
- if (ioremap_pmd_range(pud, addr, next, phys_addr, prot)) + if (vmap_pmd_range(pud, addr, next, phys_addr, prot)) return -ENOMEM; } while (pud++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
-static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, - pgprot_t prot) +static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { if (!ioremap_p4d_enabled()) return 0; @@ -177,8 +174,8 @@ static int ioremap_try_huge_p4d(p4d_t *p4d, unsigned long addr, return p4d_set_huge(p4d, phys_addr, prot); }
-static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { p4d_t *p4d; unsigned long next; @@ -189,17 +186,17 @@ static inline int ioremap_p4d_range(pgd_t *pgd, unsigned long addr, do { next = p4d_addr_end(addr, end);
- if (ioremap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) + if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) continue;
- if (ioremap_pud_range(p4d, addr, next, phys_addr, prot)) + if (vmap_pud_range(p4d, addr, next, phys_addr, prot)) return -ENOMEM; } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
-int ioremap_page_range(unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +static int vmap_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot) { pgd_t *pgd; unsigned long start; @@ -213,7 +210,7 @@ int ioremap_page_range(unsigned long addr, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = ioremap_p4d_range(pgd, addr, next, phys_addr, prot); + err = vmap_p4d_range(pgd, addr, next, phys_addr, prot); if (err) break; } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); @@ -222,3 +219,9 @@ int ioremap_page_range(unsigned long addr,
return err; } + +int ioremap_page_range(unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + return vmap_range(addr, end, phys_addr, prot); +}
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-6-npiggin@gmail.com/ --------------
This changes the awkward approach where architectures provide init functions to determine which levels they can provide large mappings for, to one where the arch is queried for each call.
This removes code and indirection, and allows constant-folding of dead code for unsupported levels.
This also adds a prot argument to the arch query. This is unused currently but could help with some architectures (e.g., some powerpc processors can't map uncacheable memory with large pages).
Cc: linuxppc-dev@lists.ozlabs.org Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will@kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: Thomas Gleixner tglx@linutronix.de Cc: Ingo Molnar mingo@redhat.com Cc: Borislav Petkov bp@alien8.de Cc: x86@kernel.org Cc: "H. Peter Anvin" hpa@zytor.com Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/vmalloc.h | 8 +++ arch/arm64/mm/mmu.c | 10 ++-- arch/powerpc/include/asm/vmalloc.h | 8 +++ arch/x86/include/asm/vmalloc.h | 8 +++ arch/x86/mm/ioremap.c | 10 ++-- include/linux/io.h | 9 --- include/linux/vmalloc.h | 6 ++ init/main.c | 1 - mm/ioremap.c | 91 ++++++++++++------------------ 9 files changed, 77 insertions(+), 74 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 2ca708ab9b20b..597b40405319d 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -1,4 +1,12 @@ #ifndef _ASM_ARM64_VMALLOC_H #define _ASM_ARM64_VMALLOC_H
+#include <asm/page.h> + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +bool arch_vmap_p4d_supported(pgprot_t prot); +bool arch_vmap_pud_supported(pgprot_t prot); +bool arch_vmap_pmd_supported(pgprot_t prot); +#endif + #endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 437ee4ab41a19..cc44645a997bc 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -921,12 +921,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return dt_virt; }
-int __init arch_ioremap_p4d_supported(void) +bool arch_vmap_p4d_supported(pgprot_t prot) { - return 0; + return false; }
-int __init arch_ioremap_pud_supported(void) +bool arch_vmap_pud_supported(pgprot_t prot) { /* * Only 4k granule supports level 1 block mappings. @@ -936,9 +936,9 @@ int __init arch_ioremap_pud_supported(void) !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); }
-int __init arch_ioremap_pmd_supported(void) +bool arch_vmap_pmd_supported(pgprot_t prot) { - /* See arch_ioremap_pud_supported() */ + /* See arch_vmap_pud_supported() */ return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); }
diff --git a/arch/powerpc/include/asm/vmalloc.h b/arch/powerpc/include/asm/vmalloc.h index b992dfaaa1618..105abb73f075c 100644 --- a/arch/powerpc/include/asm/vmalloc.h +++ b/arch/powerpc/include/asm/vmalloc.h @@ -1,4 +1,12 @@ #ifndef _ASM_POWERPC_VMALLOC_H #define _ASM_POWERPC_VMALLOC_H
+#include <asm/page.h> + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +bool arch_vmap_p4d_supported(pgprot_t prot); +bool arch_vmap_pud_supported(pgprot_t prot); +bool arch_vmap_pmd_supported(pgprot_t prot); +#endif + #endif /* _ASM_POWERPC_VMALLOC_H */ diff --git a/arch/x86/include/asm/vmalloc.h b/arch/x86/include/asm/vmalloc.h index ba6295fa58762..17864fb62e4b3 100644 --- a/arch/x86/include/asm/vmalloc.h +++ b/arch/x86/include/asm/vmalloc.h @@ -1,4 +1,12 @@ #ifndef _ASM_X86_VMALLOC_H #define _ASM_X86_VMALLOC_H
+#include <asm/page.h> + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +bool arch_vmap_p4d_supported(pgprot_t prot); +bool arch_vmap_pud_supported(pgprot_t prot); +bool arch_vmap_pmd_supported(pgprot_t prot); +#endif + #endif /* _ASM_X86_VMALLOC_H */ diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6ce8a2b593e09..944de9aaa0cd5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -431,21 +431,21 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap);
-int __init arch_ioremap_p4d_supported(void) +bool arch_vmap_p4d_supported(pgprot_t prot) { - return 0; + return false; }
-int __init arch_ioremap_pud_supported(void) +bool arch_vmap_pud_supported(pgprot_t prot) { #ifdef CONFIG_X86_64 return boot_cpu_has(X86_FEATURE_GBPAGES); #else - return 0; + return false; #endif }
-int __init arch_ioremap_pmd_supported(void) +bool arch_vmap_pmd_supported(pgprot_t prot) { return boot_cpu_has(X86_FEATURE_PSE); } diff --git a/include/linux/io.h b/include/linux/io.h index 58514cebfce6e..890c1913969da 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -43,15 +43,6 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, } #endif
-#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -void __init ioremap_huge_init(void); -int arch_ioremap_p4d_supported(void); -int arch_ioremap_pud_supported(void); -int arch_ioremap_pmd_supported(void); -#else -static inline void ioremap_huge_init(void) { } -#endif - /* * Managed iomap interface */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 086013413a1c3..6cbbebc97ea89 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -59,6 +59,12 @@ struct vmap_area { struct vm_struct *vm; };
+#ifndef CONFIG_HAVE_ARCH_HUGE_VMAP +static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } +static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } +static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } +#endif + /* * Highlevel APIs for driver use */ diff --git a/init/main.c b/init/main.c index 664f8a3d6c073..4e041fc2a689c 100644 --- a/init/main.c +++ b/init/main.c @@ -521,7 +521,6 @@ static void __init mm_init(void) kmem_cache_init(); pgtable_init(); vmalloc_init(); - ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); /* Should be run after espfix64 is set up. */ diff --git a/mm/ioremap.c b/mm/ioremap.c index 0297d2e840327..e4c5044b1ab21 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -15,49 +15,16 @@ #include <asm/pgtable.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -static int __read_mostly ioremap_p4d_capable; -static int __read_mostly ioremap_pud_capable; -static int __read_mostly ioremap_pmd_capable; -static int __read_mostly ioremap_huge_disabled; +static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
static int __init set_nohugeiomap(char *str) { - ioremap_huge_disabled = 1; + iomap_max_page_shift = P4D_SHIFT; return 0; } early_param("nohugeiomap", set_nohugeiomap); - -void __init ioremap_huge_init(void) -{ - if (!ioremap_huge_disabled) { - if (arch_ioremap_p4d_supported()) - ioremap_p4d_capable = 1; - if (arch_ioremap_pud_supported()) - ioremap_pud_capable = 1; - if (arch_ioremap_pmd_supported()) - ioremap_pmd_capable = 1; - } -} - -static inline int ioremap_p4d_enabled(void) -{ - return ioremap_p4d_capable; -} - -static inline int ioremap_pud_enabled(void) -{ - return ioremap_pud_capable; -} - -static inline int ioremap_pmd_enabled(void) -{ - return ioremap_pmd_capable; -} - -#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ -static inline int ioremap_p4d_enabled(void) { return 0; } -static inline int ioremap_pud_enabled(void) { return 0; } -static inline int ioremap_pmd_enabled(void) { return 0; } +#else /* CONFIG_HAVE_ARCH_HUGE_VMAP */ +static const bool iomap_max_page_shift = PAGE_SHIFT; #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
static int vmap_pte_range(pmd_t *pmd, unsigned long addr, @@ -79,9 +46,13 @@ static int vmap_pte_range(pmd_t *pmd, unsigned long addr, }
static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { - if (!ioremap_pmd_enabled()) + if (max_page_shift < PMD_SHIFT) + return 0; + + if (!arch_vmap_pmd_supported(prot)) return 0;
if ((end - addr) != PMD_SIZE) @@ -97,7 +68,8 @@ static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, }
static int vmap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { pmd_t *pmd; unsigned long next; @@ -108,7 +80,7 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end);
- if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot)) + if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) continue;
if (vmap_pte_range(pmd, addr, next, phys_addr, prot)) @@ -118,9 +90,13 @@ static int vmap_pmd_range(pud_t *pud, unsigned long addr, }
static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { - if (!ioremap_pud_enabled()) + if (max_page_shift < PUD_SHIFT) + return 0; + + if (!arch_vmap_pud_supported(prot)) return 0;
if ((end - addr) != PUD_SIZE) @@ -136,7 +112,8 @@ static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, }
static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { pud_t *pud; unsigned long next; @@ -147,19 +124,23 @@ static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, do { next = pud_addr_end(addr, end);
- if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot)) + if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) continue;
- if (vmap_pmd_range(pud, addr, next, phys_addr, prot)) + if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift)) return -ENOMEM; } while (pud++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { - if (!ioremap_p4d_enabled()) + if (max_page_shift < P4D_SHIFT) + return 0; + + if (!arch_vmap_p4d_supported(prot)) return 0;
if ((end - addr) != P4D_SIZE) @@ -175,7 +156,8 @@ static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, }
static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { p4d_t *p4d; unsigned long next; @@ -186,17 +168,18 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, do { next = p4d_addr_end(addr, end);
- if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot)) + if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) continue;
- if (vmap_pud_range(p4d, addr, next, phys_addr, prot)) + if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift)) return -ENOMEM; } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); return 0; }
static int vmap_range(unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot) + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) { pgd_t *pgd; unsigned long start; @@ -210,7 +193,7 @@ static int vmap_range(unsigned long addr, unsigned long end, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - err = vmap_p4d_range(pgd, addr, next, phys_addr, prot); + err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift); if (err) break; } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); @@ -223,5 +206,5 @@ static int vmap_range(unsigned long addr, unsigned long end, int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { - return vmap_range(addr, end, phys_addr, prot); + return vmap_range(addr, end, phys_addr, prot, iomap_max_page_shift); }
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-8-npiggin@gmail.com/ --------------
This allows unsupported levels to be constant folded away, and so p4d_free_pud_page can be removed because it's no longer linked to.
Cc: Catalin Marinas catalin.marinas@arm.com Cc: Will Deacon will@kernel.org Cc: linux-arm-kernel@lists.infradead.org Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/include/asm/vmalloc.h | 23 ++++++++++++++++++++--- arch/arm64/mm/mmu.c | 26 -------------------------- 2 files changed, 20 insertions(+), 29 deletions(-)
diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h index 597b40405319d..fc9a12d6cc1ac 100644 --- a/arch/arm64/include/asm/vmalloc.h +++ b/arch/arm64/include/asm/vmalloc.h @@ -4,9 +4,26 @@ #include <asm/page.h>
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP -bool arch_vmap_p4d_supported(pgprot_t prot); -bool arch_vmap_pud_supported(pgprot_t prot); -bool arch_vmap_pmd_supported(pgprot_t prot); +static inline bool arch_vmap_p4d_supported(pgprot_t prot) +{ + return false; +} + +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + /* + * Only 4k granule supports level 1 block mappings. + * SW table walks can't handle removal of intermediate entries. + */ + return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && + !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); +} + +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + /* See arch_vmap_pud_supported() */ + return !IS_ENABLED(CONFIG_PTDUMP_DEBUGFS); +} #endif
#endif /* _ASM_ARM64_VMALLOC_H */ diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index cc44645a997bc..300972b01939b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -921,27 +921,6 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys) return dt_virt; }
-bool arch_vmap_p4d_supported(pgprot_t prot) -{ - return false; -} - -bool arch_vmap_pud_supported(pgprot_t prot) -{ - /* - * Only 4k granule supports level 1 block mappings. - * SW table walks can't handle removal of intermediate entries. - */ - return IS_ENABLED(CONFIG_ARM64_4K_PAGES) && - !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); -} - -bool arch_vmap_pmd_supported(pgprot_t prot) -{ - /* See arch_vmap_pud_supported() */ - return !IS_ENABLED(CONFIG_ARM64_PTDUMP_DEBUGFS); -} - int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | @@ -1041,11 +1020,6 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr) return 1; }
-int p4d_free_pud_page(p4d_t *p4d, unsigned long addr) -{ - return 0; /* Don't attempt a block mapping */ -} - #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap, bool want_memblock)
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-10-npiggin@gmail.com/ --------------
This is a generic kernel virtual memory mapper, not specific to ioremap.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/vmalloc.h | 3 + mm/ioremap.c | 176 ---------------------------------------- mm/vmalloc.c | 175 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 176 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6cbbebc97ea89..496ac80046c01 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -147,6 +147,9 @@ extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr);
#ifdef CONFIG_MMU +int vmap_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift); extern int map_kernel_range_noflush(unsigned long start, unsigned long size, pgprot_t prot, struct page **pages); int map_kernel_range(unsigned long start, unsigned long size, pgprot_t prot, diff --git a/mm/ioremap.c b/mm/ioremap.c index e4c5044b1ab21..8b1905bf03d26 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -27,182 +27,6 @@ early_param("nohugeiomap", set_nohugeiomap); static const bool iomap_max_page_shift = PAGE_SHIFT; #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static int vmap_pte_range(pmd_t *pmd, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot) -{ - pte_t *pte; - u64 pfn; - - pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(pmd, addr); - if (!pte) - return -ENOMEM; - do { - BUG_ON(!pte_none(*pte)); - set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); - pfn++; - } while (pte++, addr += PAGE_SIZE, addr != end); - return 0; -} - -static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - if (max_page_shift < PMD_SHIFT) - return 0; - - if (!arch_vmap_pmd_supported(prot)) - return 0; - - if ((end - addr) != PMD_SIZE) - return 0; - - if (!IS_ALIGNED(phys_addr, PMD_SIZE)) - return 0; - - if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) - return 0; - - return pmd_set_huge(pmd, phys_addr, prot); -} - -static int vmap_pmd_range(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - pmd_t *pmd; - unsigned long next; - - pmd = pmd_alloc(&init_mm, pud, addr); - if (!pmd) - return -ENOMEM; - do { - next = pmd_addr_end(addr, end); - - if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) - continue; - - if (vmap_pte_range(pmd, addr, next, phys_addr, prot)) - return -ENOMEM; - } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - if (max_page_shift < PUD_SHIFT) - return 0; - - if (!arch_vmap_pud_supported(prot)) - return 0; - - if ((end - addr) != PUD_SIZE) - return 0; - - if (!IS_ALIGNED(phys_addr, PUD_SIZE)) - return 0; - - if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) - return 0; - - return pud_set_huge(pud, phys_addr, prot); -} - -static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - pud_t *pud; - unsigned long next; - - pud = pud_alloc(&init_mm, p4d, addr); - if (!pud) - return -ENOMEM; - do { - next = pud_addr_end(addr, end); - - if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) - continue; - - if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift)) - return -ENOMEM; - } while (pud++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - if (max_page_shift < P4D_SHIFT) - return 0; - - if (!arch_vmap_p4d_supported(prot)) - return 0; - - if ((end - addr) != P4D_SIZE) - return 0; - - if (!IS_ALIGNED(phys_addr, P4D_SIZE)) - return 0; - - if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) - return 0; - - return p4d_set_huge(p4d, phys_addr, prot); -} - -static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - p4d_t *p4d; - unsigned long next; - - p4d = p4d_alloc(&init_mm, pgd, addr); - if (!p4d) - return -ENOMEM; - do { - next = p4d_addr_end(addr, end); - - if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) - continue; - - if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift)) - return -ENOMEM; - } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); - return 0; -} - -static int vmap_range(unsigned long addr, unsigned long end, - phys_addr_t phys_addr, pgprot_t prot, - unsigned int max_page_shift) -{ - pgd_t *pgd; - unsigned long start; - unsigned long next; - int err; - - might_sleep(); - BUG_ON(addr >= end); - - start = addr; - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, end); - err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift); - if (err) - break; - } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); - - flush_cache_vmap(start, end); - - return err; -} - int ioremap_page_range(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6ed7f5f9d742f..5c7340299b27b 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -59,6 +59,181 @@ static void free_work(struct work_struct *w) }
/*** Page table manipulation functions ***/ +static int vmap_pte_range(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot) +{ + pte_t *pte; + u64 pfn; + + pfn = phys_addr >> PAGE_SHIFT; + pte = pte_alloc_kernel(pmd, addr); + if (!pte) + return -ENOMEM; + do { + BUG_ON(!pte_none(*pte)); + set_pte_at(&init_mm, addr, pte, pfn_pte(pfn, prot)); + pfn++; + } while (pte++, addr += PAGE_SIZE, addr != end); + return 0; +} + +static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + if (max_page_shift < PMD_SHIFT) + return 0; + + if (!arch_vmap_pmd_supported(prot)) + return 0; + + if ((end - addr) != PMD_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, PMD_SIZE)) + return 0; + + if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) + return 0; + + return pmd_set_huge(pmd, phys_addr, prot); +} + +static int vmap_pmd_range(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + pmd_t *pmd; + unsigned long next; + + pmd = pmd_alloc(&init_mm, pud, addr); + if (!pmd) + return -ENOMEM; + do { + next = pmd_addr_end(addr, end); + + if (vmap_try_huge_pmd(pmd, addr, next, phys_addr, prot, max_page_shift)) + continue; + + if (vmap_pte_range(pmd, addr, next, phys_addr, prot)) + return -ENOMEM; + } while (pmd++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + if (max_page_shift < PUD_SHIFT) + return 0; + + if (!arch_vmap_pud_supported(prot)) + return 0; + + if ((end - addr) != PUD_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, PUD_SIZE)) + return 0; + + if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) + return 0; + + return pud_set_huge(pud, phys_addr, prot); +} + +static int vmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + pud_t *pud; + unsigned long next; + + pud = pud_alloc(&init_mm, p4d, addr); + if (!pud) + return -ENOMEM; + do { + next = pud_addr_end(addr, end); + + if (vmap_try_huge_pud(pud, addr, next, phys_addr, prot, max_page_shift)) + continue; + + if (vmap_pmd_range(pud, addr, next, phys_addr, prot, max_page_shift)) + return -ENOMEM; + } while (pud++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + if (max_page_shift < P4D_SHIFT) + return 0; + + if (!arch_vmap_p4d_supported(prot)) + return 0; + + if ((end - addr) != P4D_SIZE) + return 0; + + if (!IS_ALIGNED(phys_addr, P4D_SIZE)) + return 0; + + if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) + return 0; + + return p4d_set_huge(p4d, phys_addr, prot); +} + +static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + p4d_t *p4d; + unsigned long next; + + p4d = p4d_alloc(&init_mm, pgd, addr); + if (!p4d) + return -ENOMEM; + do { + next = p4d_addr_end(addr, end); + + if (vmap_try_huge_p4d(p4d, addr, next, phys_addr, prot, max_page_shift)) + continue; + + if (vmap_pud_range(p4d, addr, next, phys_addr, prot, max_page_shift)) + return -ENOMEM; + } while (p4d++, phys_addr += (next - addr), addr = next, addr != end); + return 0; +} + +int vmap_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + pgd_t *pgd; + unsigned long start; + unsigned long next; + int err; + + might_sleep(); + BUG_ON(addr >= end); + + start = addr; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, end); + err = vmap_p4d_range(pgd, addr, next, phys_addr, prot, max_page_shift); + if (err) + break; + } while (pgd++, phys_addr += (next - addr), addr = next, addr != end); + + flush_cache_vmap(start, end); + + return err; +}
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) {
From: Nicholas Piggin npiggin@gmail.com
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4EUVI CVE: NA
https://lwn.net/ml/linux-kernel/20200825145753.529284-11-npiggin@gmail.com/ --------------
As a side-effect, the order of flush_cache_vmap() and arch_sync_kernel_mappings() calls are switched, but that now matches the other callers in this file.
Signed-off-by: Nicholas Piggin npiggin@gmail.com Signed-off-by: Rui Xiang rui.xiang@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Zefan Li lizefan@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/vmalloc.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5c7340299b27b..fc6394184a1ba 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -209,7 +209,7 @@ static int vmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, return 0; }
-int vmap_range(unsigned long addr, unsigned long end, +static int vmap_range_noflush(unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot, unsigned int max_page_shift) { @@ -230,7 +230,17 @@ int vmap_range(unsigned long addr, unsigned long end, break; } while (pgd++, phys_addr += (next - addr), addr = next, addr != end);
- flush_cache_vmap(start, end); + return err; +} + +int vmap_range(unsigned long addr, unsigned long end, + phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift) +{ + int err; + + err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift); + flush_cache_vmap(addr, end);
return err; }