[PATCH 2/7] ascend: share_pool: Use remap_pfn_range to share kva to uva

8 Dec 2020

From: Weilong Chen <chenweilong@huawei.com>

ascend inclusion
category: feature
bugzilla: NA
CVE: NA

-------------------------------------------------

Add a flag VM_SHAREPOOL to avoid vfree() a shared kva.

Reviewed-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 include/linux/hugetlb.h    |  3 +++
 include/linux/share_pool.h |  2 --
 include/linux/vmalloc.h    |  4 +++
 mm/hugetlb.c               |  7 +++++
 mm/memory.c                |  3 ++-
 mm/share_pool.c            | 53 +++++++++++++++++++++++++++++++++-----
 mm/vmalloc.c               |  8 ++++++
 7 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index be649e5ba322..debd4603991e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -384,6 +384,9 @@ struct page *hugetlb_alloc_hugepage(int nid);
 int hugetlb_insert_hugepage_pte(struct mm_struct *mm, unsigned long addr,
 				pgprot_t prot, struct page *hpage);
 #endif
+int hugetlb_insert_hugepage_pte_by_pa(struct mm_struct *mm,
+                                    unsigned long vir_addr,
+                                    pgprot_t prot, unsigned long phy_addr);
 int hugetlb_insert_hugepage(struct vm_area_struct *vma, unsigned long addr,
 			    struct page *hpage, pgprot_t prot);
 
diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index 47d8579c23ec..933b77be8ff8 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -24,8 +24,6 @@
 
 #define MAX_DEVID 1	/* the max num of Da-vinci devices */
 
-#define VM_HUGE_PAGES           0x00001000      /* use for huge pages */
-
 /* to align the pointer to the (next) PMD boundary */
 #define PMD_ALIGN(addr)		ALIGN(addr, PMD_SIZE)
 
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 244eedb7591a..6383d6989c0f 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -23,6 +23,10 @@ struct notifier_block;		/* in notifier.h */
 #define VM_UNINITIALIZED	0x00000020	/* vm_struct is not fully initialized */
 #define VM_NO_GUARD		0x00000040      /* don't add guard page */
 #define VM_KASAN		0x00000080      /* has allocated kasan shadow memory */
+#ifdef CONFIG_ASCEND_SHARE_POOL
+#define VM_HUGE_PAGES		0x00001000      /* use for huge pages */
+#define VM_SHAREPOOL		0x00002000      /* remapped to sharepool */
+#endif
 /* bits [20..32] reserved for arch specific ioremap internals */
 
 /*
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a68e7e1afd82..4cc8e041299a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3598,6 +3598,13 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
 
 		pte = huge_ptep_get_and_clear(mm, address, ptep);
 		tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
+
+		/* sharepool k2u mapped pages are marked special */
+		if (sp_check_vm_share_pool(vma->vm_flags) && pte_special(pte)) {
+			spin_unlock(ptl);
+			continue;
+		}
+
 		if (huge_pte_dirty(pte))
 			set_page_dirty(page);
 
diff --git a/mm/memory.c b/mm/memory.c
index e369f3961ad2..b69f8bd23ca6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1532,7 +1532,8 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
 	}
 
 	if (sp_check_hugepage(page))
-		return hugetlb_insert_hugepage(vma, addr, page, vma->vm_page_prot);
+		return hugetlb_insert_hugepage_pte_by_pa(vma->vm_mm, addr,
+					vma->vm_page_prot, page_to_phys(page));
 	else
 		return insert_page(vma, addr, page, vma->vm_page_prot);
 }
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 286e74f99360..d39c2c3d728c 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -178,6 +178,7 @@ struct sp_area {
 	struct sp_group *spg;
 	enum spa_type type;		/* where spa born from */
 	struct mm_struct *mm;		/* owner of k2u(task) */
+	unsigned long kva;		/* shared kva */
 };
 static DEFINE_SPINLOCK(sp_area_lock);
 static struct rb_root sp_area_root = RB_ROOT;
@@ -1393,6 +1394,17 @@ static int is_vmap_hugepage(unsigned long addr)
 		return 0;
 }
 
+static unsigned long __sp_remap_get_pfn(unsigned long kva)
+{
+	unsigned long pfn;
+	if (is_vmalloc_addr((void *)kva))
+		pfn = vmalloc_to_pfn((void *)kva);
+	else
+		pfn = virt_to_pfn(kva);
+
+	return pfn;
+}
+
 static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 					 struct mm_struct *mm)
 {
@@ -1403,6 +1415,7 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 	int ret = 0;
 	struct user_struct *user = NULL;
 	int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT;
+	unsigned long addr, buf, offset;
 
 	if (spa->is_hugepage) {
 		file = hugetlb_file_setup(HUGETLB_ANON_FILE, spa_size(spa), VM_NORESERVE,
@@ -1437,13 +1450,23 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 			ret_addr = ret;
 			goto out;
 		}
+		vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	} else {
-		ret = remap_vmalloc_range(vma, (void *)kva, 0);
-		if (ret) {
-			pr_err("share pool: remap vmalloc failed, ret %d\n", ret);
-			ret_addr = ret;
-			goto out;
-		}
+		buf = ret_addr;
+		addr = kva;
+		offset = 0;
+		do {
+			ret = remap_pfn_range(vma, buf, __sp_remap_get_pfn(addr), PAGE_SIZE,
+					__pgprot(vma->vm_page_prot.pgprot));
+			if (ret) {
+				pr_err("share pool: remap_pfn_range failed, ret %d\n", ret);
+				ret_addr = ret;
+				goto out;
+			}
+			offset += PAGE_SIZE;
+			buf += PAGE_SIZE;
+			addr += PAGE_SIZE;
+		} while (offset < spa_size(spa));
 	}
 
 out:
@@ -1551,6 +1574,7 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
 	unsigned int page_size = PAGE_SIZE;
 	enum spa_type type;
 	int ret;
+	struct vm_struct *area;
 
 	if (sp_flags & ~SP_DVPP) {
 		if (printk_ratelimit())
@@ -1632,6 +1656,12 @@ void *sp_make_share_k2u(unsigned long kva, unsigned long size,
 			  type, current->comm, current->tgid, current->pid, spg_id,
 			  (void *)spa->va_start, spa->real_size);
 		sp_dump_stack();
+
+		/* associate vma and spa */
+		area = find_vm_area((void *)kva);
+		if (area)
+			area->flags |= VM_SHAREPOOL;
+		spa->kva = kva;
 	}
 
 	return uva;
@@ -1901,6 +1931,7 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
 	unsigned long uva_aligned;
 	unsigned long size_aligned;
 	unsigned int page_size;
+	struct vm_struct *area;
 
 	mutex_lock(&sp_mutex);
 	/*
@@ -2031,6 +2062,10 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int pid, int sp
 	}
 
 out_drop_area:
+	/* deassociate vma and spa */
+	area = find_vm_area((void *)spa->kva);
+	if (area)
+		area->flags &= ~VM_SHAREPOOL;
 	__sp_area_drop(spa);
 out_unlock:
 	mutex_unlock(&sp_mutex);
@@ -2045,6 +2080,7 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size)
 	unsigned long step;
 	bool is_hugepage = true;
 	int ret;
+	struct vm_struct *area;
 
 	ret = is_vmap_hugepage(kva);
 	if (ret > 0) {
@@ -2080,6 +2116,11 @@ static int sp_unshare_kva(unsigned long kva, unsigned long size)
 			       (void *)addr);
 	}
 
+	/* deassociate vma and spa */
+	area = find_vm_area((void *)kva_aligned);
+	if (area)
+		area->flags &= ~VM_SHAREPOOL;
+
 	vunmap((void *)kva_aligned);
 
 	return 0;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 78f56e719e1d..aa2415741d13 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2369,6 +2369,14 @@ static void __vunmap(const void *addr, int deallocate_pages)
 		return;
 	}
 
+#ifdef CONFIG_ASCEND_SHARE_POOL
+	/* unmap a sharepool vm area will cause meamleak! */
+	if (area->flags & VM_SHAREPOOL) {
+		WARN(1, KERN_ERR "Memory leak due to vfree() sharepool vm area (%p) !\n", addr);
+		return;
+	}
+#endif
+
 	debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
 	debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
 
-- 
2.25.1