From: Ding Tianhong dingtianhong@huawei.com
ascend inclusion category: feature bugzilla: NA CVE: NA
-------------------------------------------------
The do_mmap/mmap_region/__mm_populate could only be used to handle the current process, now the share pool need to handle the other process and create memory mmaping, so need to export new function to distinguish different process and handle it, it would not break the current logic and only valid for share pool.
The share pool need to remap the vmalloc pages to user space, so introduce the hugetlb_insert_hugepage to support hugepage remapming.
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Signed-off-by: Li Ming limingming.li@huawei.com Signed-off-by: Zefan Li lizefan@huawei.com Signed-off-by: Zhou Guanghui zhouguanghui1@huawei.com Signed-off-by: Ding Tianhong dingtianhong@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/hugetlb.h | 4 ++- include/linux/mm.h | 25 +++++++++++++- mm/gup.c | 28 ++++++++++----- mm/hugetlb.c | 42 +++++++++++++++++++++++ mm/memory.c | 7 +++- mm/mmap.c | 76 +++++++++++++++++++++++++++++++++++------ 6 files changed, 160 insertions(+), 22 deletions(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 870c29d8b8e4..bd658f44e133 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -382,8 +382,10 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping, const struct hstate *hugetlb_get_hstate(void); struct page *hugetlb_alloc_hugepage(int nid); int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr, - pgprot_t prot, struct page *hpage); + pgprot_t prot, struct page *hpage); #endif +int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot);
/* arch callback */ int __init __alloc_bootmem_huge_page(struct hstate *h); diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ff3de89f897..35c2225c6e57 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -230,6 +230,10 @@ extern unsigned int kobjsize(const void *objp); #define VM_MERGEABLE 0x80000000 /* KSM may merge identical pages */ #define VM_PA32BIT 0x400000000 /* Physical address is within 4G */
+#ifdef CONFIG_ASCEND_SHARE_POOL +#define VM_HUGE_SPECIAL 0x800000000 /* Special hugepage flag used by share pool */ +#endif + #ifdef CONFIG_COHERENT_DEVICE #define VM_CDM 0x100000000 /* Contains coherent device memory */ #endif @@ -240,11 +244,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ +#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ #define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) #define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) #define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) #define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) #define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) +#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) #endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */
#ifdef CONFIG_ARCH_HAS_PKEYS @@ -260,6 +266,12 @@ extern unsigned int kobjsize(const void *objp); #endif #endif /* CONFIG_ARCH_HAS_PKEYS */
+#if defined(CONFIG_ASCEND_SHARE_POOL) +# define VM_SHARE_POOL VM_HIGH_ARCH_5 +#else +# define VM_SHARE_POOL VM_NONE +#endif + #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ #elif defined(CONFIG_PPC) @@ -559,7 +571,7 @@ int region_intersects(resource_size_t offset, size_t size, unsigned long flags, /* Support for virtually mapped pages */ struct page *vmalloc_to_page(const void *addr); unsigned long vmalloc_to_pfn(const void *addr); - +struct page *vmalloc_to_hugepage(const void *addr); /* * Determine if an address is within the vmalloc range * @@ -2344,6 +2356,10 @@ extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, struct list_head *uf); +extern unsigned long __do_mmap(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, unsigned long prot, + unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf); extern int do_munmap(struct mm_struct *, unsigned long, size_t, struct list_head *uf);
@@ -2364,14 +2380,21 @@ static inline void mm_populate(unsigned long addr, unsigned long len) /* Ignore errors */ (void) __mm_populate(addr, len, 1); } +extern int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors); #else static inline void mm_populate(unsigned long addr, unsigned long len) {} +int do_mm_populate(struct mm_struct *mm, unsigned long addr, unsigned long len, + int ignore_errors) +{ +} #endif
/* These take the mm semaphore themselves */ extern int __must_check vm_brk(unsigned long, unsigned long); extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long); extern int vm_munmap(unsigned long, size_t); +extern int do_vm_munmap(struct task_struct *tsk, unsigned long start, size_t len); extern unsigned long __must_check vm_mmap(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); diff --git a/mm/gup.c b/mm/gup.c index 3fc585282f24..707c374d4f6b 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -16,6 +16,7 @@ #include <linux/migrate.h> #include <linux/mm_inline.h> #include <linux/sched/mm.h> +#include <linux/share_pool.h>
#include <asm/mmu_context.h> #include <asm/pgtable.h> @@ -1355,6 +1356,7 @@ long populate_vma_page_range(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; unsigned long nr_pages = (end - start) / PAGE_SIZE; int gup_flags; + struct task_struct *tsk;
VM_BUG_ON(start & ~PAGE_MASK); VM_BUG_ON(end & ~PAGE_MASK); @@ -1380,24 +1382,22 @@ long populate_vma_page_range(struct vm_area_struct *vma, if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) gup_flags |= FOLL_FORCE;
+ tsk = sp_get_task(mm); /* * We made sure addr is within a VMA, so the following will * not result in a stack expansion that recurses back here. */ - return __get_user_pages(current, mm, start, nr_pages, gup_flags, + return __get_user_pages(tsk, mm, start, nr_pages, gup_flags, NULL, NULL, nonblocking); }
/* - * __mm_populate - populate and/or mlock pages within a range of address space. - * - * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap - * flags. VMAs must be already marked with the desired vm_flags, and - * mmap_sem must not be held. + * do_mm_populate - populate and/or mlock pages within a range of + * address space for the specified mm_struct. */ -int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +int do_mm_populate(struct mm_struct *mm, unsigned long start, unsigned long len, + int ignore_errors) { - struct mm_struct *mm = current->mm; unsigned long end, nstart, nend; struct vm_area_struct *vma = NULL; int locked = 0; @@ -1448,6 +1448,18 @@ int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) return ret; /* 0 or negative error code */ }
+/* + * __mm_populate - populate and/or mlock pages within a range of address space. + * + * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap + * flags. VMAs must be already marked with the desired vm_flags, and + * mmap_sem must not be held. + */ +int __mm_populate(unsigned long start, unsigned long len, int ignore_errors) +{ + return do_mm_populate(current->mm, start, len, ignore_errors); +} + /** * get_dump_page() - pin user page in memory while writing it to core dump * @addr: user address diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 225dcf7536ae..9d2035632aed 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -25,6 +25,7 @@ #include <linux/swap.h> #include <linux/swapops.h> #include <linux/jhash.h> +#include <linux/share_pool.h>
#include <asm/page.h> #include <asm/pgtable.h> @@ -3961,6 +3962,12 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm, }
page = alloc_huge_page(vma, haddr, 0); + if (IS_ERR(page) && sp_check_vm_share_pool(vma->vm_flags)) { + page = alloc_huge_page_node(hstate_file(vma->vm_file), + numa_mem_id()); + if (!page) + page = ERR_PTR(-ENOMEM); + } if (IS_ERR(page)) { /* * Returning error will result in faulting task being @@ -5265,6 +5272,41 @@ int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm, } EXPORT_SYMBOL_GPL(hugetlb_insert_hugepage_pte_by_pa);
+int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr, + struct page *hpage, pgprot_t prot) +{ + struct hstate *h = hstate_vma(vma); + int anon_rmap = 0; + spinlock_t *ptl; + pte_t *ptep; + pte_t pte; + struct mm_struct *mm = vma->vm_mm; + + ptep = hugetlb_huge_pte_alloc(mm, addr, huge_page_size(h)); + if (!ptep) + return -ENXIO; + + get_page(hpage); + + ptl = huge_pte_lock(h, mm, ptep); + if (anon_rmap) { + ClearPagePrivate(hpage); + hugepage_add_new_anon_rmap(hpage, vma, addr); + } else { + page_dup_rmap(hpage, true); + } + + pte = make_huge_pte(vma, hpage, ((vma->vm_flags & VM_WRITE) + && (vma->vm_flags & VM_SHARED))); + set_huge_pte_at(mm, addr, ptep, pte); + + hugetlb_count_add(pages_per_huge_page(h), mm); + + spin_unlock(ptl); + + return 0; +} + #ifdef CONFIG_ASCEND_CHARGE_MIGRATE_HUGEPAGES
static int __init ascend_enable_charge_migrate_hugepages(char *s) diff --git a/mm/memory.c b/mm/memory.c index 7503203c8436..e369f3961ad2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -70,6 +70,7 @@ #include <linux/dax.h> #include <linux/oom.h> #include <linux/ktask.h> +#include <linux/share_pool.h>
#include <asm/io.h> #include <asm/mmu_context.h> @@ -1529,7 +1530,11 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, BUG_ON(vma->vm_flags & VM_PFNMAP); vma->vm_flags |= VM_MIXEDMAP; } - return insert_page(vma, addr, page, vma->vm_page_prot); + + if (sp_check_hugepage(page)) + return hugetlb_insert_hugepage(vma, addr, page, vma->vm_page_prot); + else + return insert_page(vma, addr, page, vma->vm_page_prot); } EXPORT_SYMBOL(vm_insert_page);
diff --git a/mm/mmap.c b/mm/mmap.c index 00702331afc1..e1069c42ec8e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -46,6 +46,7 @@ #include <linux/pkeys.h> #include <linux/oom.h> #include <linux/sched/mm.h> +#include <linux/share_pool.h>
#include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -178,6 +179,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) if (vma->vm_file) fput(vma->vm_file); mpol_put(vma_policy(vma)); + sp_area_drop(vma); vm_area_free(vma); return next; } @@ -1119,6 +1121,10 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, if (vm_flags & VM_SPECIAL) return NULL;
+ /* don't merge this kind of vma as sp_area couldn't be merged */ + if (sp_check_vm_share_pool(vm_flags)) + return NULL; + if (prev) next = prev->vm_next; else @@ -1373,16 +1379,20 @@ int unregister_mmap_notifier(struct notifier_block *nb) EXPORT_SYMBOL_GPL(unregister_mmap_notifier); #endif
+static unsigned long __mmap_region(struct mm_struct *mm, + struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf); + /* * The caller must hold down_write(¤t->mm->mmap_sem). */ -unsigned long do_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate, - struct list_head *uf) +unsigned long __do_mmap(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + vm_flags_t vm_flags, unsigned long pgoff, + unsigned long *populate, struct list_head *uf) { - struct mm_struct *mm = current->mm; int pkey = 0;
*populate = 0; @@ -1407,6 +1417,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (!(flags & MAP_FIXED)) addr = round_hint_to_min(addr);
+ /* the MAP_DVPP couldn't work with MAP_SHARE_POOL */ + if ((flags & MAP_DVPP) && sp_mmap_check(flags)) + return -EINVAL; + /* Careful about overflows.. */ len = PAGE_ALIGN(len); if (!len) @@ -1568,7 +1582,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, if (is_set_cdmmask()) vm_flags |= ((numanode << CHECKNODE_BITS) & CHECKNODE_MASK);
- addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); + addr = __mmap_region(mm, file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -1576,6 +1590,17 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; }
+/* + * The caller must hold down_write(¤t->mm->mmap_sem). + */ +unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, vm_flags_t vm_flags, + unsigned long pgoff, unsigned long *populate, struct list_head *uf) +{ + return __do_mmap(current->mm, file, addr, len, prot, flags, vm_flags, pgoff, populate, uf); +} + + unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) @@ -1716,11 +1741,11 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; }
-unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, - struct list_head *uf) +static unsigned long __mmap_region(struct mm_struct *mm, struct file *file, + unsigned long addr, unsigned long len, + vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) { - struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; int error; struct rb_node **rb_link, *rb_parent; @@ -1882,6 +1907,13 @@ unsigned long mmap_region(struct file *file, unsigned long addr, return error; }
+unsigned long mmap_region(struct file *file, unsigned long addr, + unsigned long len, vm_flags_t vm_flags, + unsigned long pgoff, struct list_head *uf) +{ + return __mmap_region(current->mm, file, addr, len, vm_flags, pgoff, uf); +} + unsigned long unmapped_area(struct vm_unmapped_area_info *info) { /* @@ -2133,6 +2165,8 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
+ sp_area_work_around(&info); + return vm_unmapped_area(&info); } #endif @@ -2183,6 +2217,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
+ sp_area_work_around(&info); + addr = vm_unmapped_area(&info);
/* @@ -2200,6 +2236,8 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, if (enable_mmap_dvpp) dvpp_mmap_get_area(&info, flags);
+ sp_area_work_around(&info); + addr = vm_unmapped_area(&info); }
@@ -2871,6 +2909,22 @@ int vm_munmap(unsigned long start, size_t len) } EXPORT_SYMBOL(vm_munmap);
+int do_vm_munmap(struct task_struct *tsk, unsigned long start, size_t len) +{ + int ret; + struct mm_struct *mm = tsk->mm; + LIST_HEAD(uf); + + if (down_write_killable(&mm->mmap_sem)) + return -EINTR; + + ret = do_munmap(mm, start, len, &uf); + up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); + return ret; +} +EXPORT_SYMBOL(do_vm_munmap); + SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len) { profile_munmap(addr);