[PATCH OLK-6.6 4/13] mm/sharepool: Use hugetlb_insert_hugepage_pte for sharepool hugepage

11 Jun 2026

From: Wang Wensheng <wangwensheng4@huawei.com>

hulk inclusion
category: feature
category: bugfix
bugzilla: NA

----------------------------------------

Don't use hugetlb_no_page() to allocate sharepool hugepages. Because we
want to demote 1G-sized hugetlb pages while there is no 2M-sized
hugetlb pages. We allocate hugepages directly via
alloc_hugetlb_folio_nodemask_size() and map those hugepages via
hugetlb_insert_hugepage_pte(), just the same as what we do in k2u.

Fixes: 00c7c3d64806 ("hugetlb: support auto demote and promote")
Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
---
 mm/share_pool.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/mm/share_pool.c b/mm/share_pool.c
index 100446279620..9ff7a2da3dfc 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -700,7 +700,10 @@ struct sp_area {
 	struct sp_group *spg;
 	struct sp_mapping *spm;		/* where spa born from */
 	enum spa_type type;
-	unsigned long kva;		/* shared kva */
+	union {
+		unsigned long kva;	/* shared kva */
+		struct page **pages;	/* for hugetlb alloc */
+	};
 	pid_t applier;			/* the original applier process */
 	int preferred_node_id;		/* memory node */
 	struct work_struct work;
@@ -1497,6 +1500,65 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa)
 	return atomic_dec_and_test(&spa->spg->spa_num);
 }
 
+static bool sp_area_alloc_hugepage_enable __read_mostly = true;
+
+static int __init sp_area_alloc_hugepage_disable(char *p)
+{
+	sp_area_alloc_hugepage_enable = false;
+
+	return 1;
+}
+__setup("sp_area_alloc_hugepage_disable", sp_area_alloc_hugepage_disable);
+
+static bool sp_area_need_hugepage(struct sp_area *spa)
+{
+	return sp_area_alloc_hugepage_enable && spa->type == SPA_TYPE_ALLOC && spa->is_hugepage;
+}
+
+static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *nodemask)
+{
+	int i;
+	struct page **pages;
+	int nr_pages = ALIGN(spa_size(spa), PMD_SIZE) / PMD_SIZE;
+
+	pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return false;
+
+	for (i = 0; i < nr_pages; i++) {
+		pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE,
+									    nid, nodemask);
+		if (!pages[i]) {
+			while (i--)
+				put_page(pages[i]);
+			kvfree(pages);
+			return false;
+		}
+		memset(page_to_virt(pages[i]), 0, PMD_SIZE);
+	}
+
+	spa->pages = pages;
+
+	return true;
+}
+
+static void sp_area_free_hugepages(struct sp_area *spa)
+{
+	int nr_pages = ALIGN(spa->real_size, PMD_SIZE) / PMD_SIZE;
+
+	if (!sp_area_need_hugepage(spa))
+		return;
+
+	if (!spa->pages)
+		return;
+
+	while (nr_pages--)
+		put_page(spa->pages[nr_pages]);
+
+	kvfree(spa->pages);
+	spa->pages = NULL;
+}
+
 /**
  * sp_area_alloc() - Allocate a region of VA from the share pool.
  * @size: the size of VA to allocate.
@@ -1721,6 +1783,7 @@ static void sp_area_free(struct sp_area *spa)
 	rb_erase(&spa->rb_node, &spm->area_root);
 	spin_unlock(&spm->sp_mapping_lock);
 	RB_CLEAR_NODE(&spa->rb_node);
+	sp_area_free_hugepages(spa);
 	kfree(spa);
 }
 
@@ -1924,11 +1987,32 @@ int mg_sp_free(unsigned long addr, int id)
 }
 EXPORT_SYMBOL_GPL(mg_sp_free);
 
+static int sp_vma_insert_hugepages(struct vm_area_struct *vma, struct page **pages,
+				   unsigned long uaddr, unsigned long size)
+{
+	int i = 0;
+
+	vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
+	do {
+		int ret = hugetlb_insert_hugepage_pte(vma->vm_mm, uaddr,
+				vma->vm_page_prot, pages[i]);
+		if (ret)
+			return ret;
+
+		uaddr += PMD_SIZE;
+		size -= PMD_SIZE;
+		i++;
+	} while (size > 0);
+
+	return 0;
+}
+
 /* wrapper of __do_mmap() and the caller must hold mmap_write_lock(mm). */
 static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 			     struct sp_area *spa, unsigned long *populate,
 			     unsigned long prot)
 {
+	int ret = 0;
 	unsigned long addr = spa->va_start;
 	unsigned long size = spa_size(spa);
 	unsigned long flags = MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_POPULATE |
@@ -1959,6 +2043,14 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file,
 	else
 		vm_flags_clear(vma, VM_MAYWRITE);
 
+	if (sp_area_need_hugepage(spa)) {
+		ret  = sp_vma_insert_hugepages(vma, spa->pages, addr, size);
+		if (ret) {
+			do_munmap(mm, addr, size, NULL);
+			return (unsigned long)ret;
+		}
+	}
+
 	return addr;
 }
 
@@ -2156,7 +2248,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa,
 			    unsigned long prot, struct sp_alloc_context *ac,
 			    const char *str)
 {
-	int ret;
+	int ret = 0;
 	unsigned long mmap_addr;
 	unsigned long populate = 0;
 
@@ -2179,6 +2271,13 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa,
 	switch (spa->type) {
 	case SPA_TYPE_ALLOC:
 		mmap_write_unlock(mm);
+		/*
+		 * If spa of SP_TYPE_ALLOC has unzero pages, we must have
+		 * populated it in sp_mmap() before. So just break and don't
+		 * pouplate it again.
+		 */
+		if (spa->pages)
+			break;
 		ret = sp_alloc_populate(mm, spa, populate, ac);
 		if (ret) {
 			mmap_write_lock(mm);
@@ -2234,6 +2333,10 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context *
 	struct mm_struct *mm;
 	bool reach_current = false;
 
+	if (sp_area_need_hugepage(spa) &&
+	    !sp_area_alloc_hugepages(spa, ac->preferred_node_id, ac->nodemask))
+		return -ENOMEM;
+
 	mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc");
 	if (mmap_ret) {
 		/* Don't skip error for current process */
-- 
2.43.0

    

[PATCH OLK-6.6 4/13] mm/sharepool: Use hugetlb_insert_hugepage_pte for sharepool hugepage

Yin Tirui