From: Chen Jun <chenjun102@huawei.com> hulk inclusion category: feature bugzilla: NA -------------------------------- To avoid page faults within the virtual address range of the sharepool, support using `remap_pfn_range` to establish page table mapping. Enable this feature by adding the bootargs as: sharepool.remap=normal Signed-off-by: Chen Jun <chenjun102@huawei.com> --- mm/share_pool.c | 108 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index fb92eb5094b5..d380fa74bb95 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -717,7 +717,7 @@ static unsigned long spa_size(struct sp_area *spa) return spa->real_size; } -static struct file *spa_file(struct sp_area *spa) +static inline struct file *spa_file(struct sp_area *spa) { if (spa->is_hugepage) return spa->spg->file_hugetlb; @@ -725,6 +725,19 @@ static struct file *spa_file(struct sp_area *spa) return spa->spg->file; } +static inline unsigned long spa_page_size(struct sp_area *spa) +{ + if (spa->is_hugepage) + return PMD_SIZE; + else + return PAGE_SIZE; +} + +static inline unsigned long spa_nr_pages(struct sp_area *spa) +{ + return DIV_ROUND_UP(spa_size(spa), spa_page_size(spa)); +} + /* the caller should hold sp_area_lock */ static void spa_inc_usage(struct sp_area *spa) { @@ -1500,44 +1513,70 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } -static bool sp_area_alloc_hugepage_enable __read_mostly = true; +static bool spa_remap_huge __ro_after_init = true; +static bool spa_remap_normal __ro_after_init; -static int __init sp_area_alloc_hugepage_disable(char *p) +static int __init sharepool_use_remap_setup(char *s) { - sp_area_alloc_hugepage_enable = false; + char *token; + + while ((token = strsep(&s, ",")) != NULL) { + if (!*token) + continue; + + if (!strcmp(token, "nohuge")) + spa_remap_huge = false; + else if (!strcmp(token, "normal")) + spa_remap_normal = true; + } return 1; } -__setup("sp_area_alloc_hugepage_disable", sp_area_alloc_hugepage_disable); +__setup("sharepool.remap=", sharepool_use_remap_setup); -static bool sp_area_need_hugepage(struct sp_area *spa) +static bool sp_area_use_remap(struct sp_area *spa) { - return sp_area_alloc_hugepage_enable && spa->type == SPA_TYPE_ALLOC && spa->is_hugepage; + return ((spa_remap_huge && spa->is_hugepage) || + (spa_remap_normal && !spa->is_hugepage)) && + spa->type == SPA_TYPE_ALLOC; } -static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *nodemask) +static struct page *sp_area_alloc_hugepages(int nid, nodemask_t *nodemask) { - int i; + struct page *page; + + page = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); + if (!page) + page = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, nodemask, + GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT | __GFP_COMP); + + return page; +} + +static bool sp_area_prepare_pages(struct sp_area *spa, int nid, nodemask_t *nodemask) +{ + int nr_pages = spa_nr_pages(spa); struct page **pages; - int nr_pages = ALIGN(spa_size(spa), PMD_SIZE) / PMD_SIZE; + int i; pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return false; for (i = 0; i < nr_pages; i++) { - pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, - nid, nodemask); - if (!pages[i]) - pages[i] = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, - nodemask, GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ACCOUNT); + if (spa->is_hugepage) + pages[i] = sp_area_alloc_hugepages(nid, nodemask); + else + pages[i] = __alloc_pages(GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT, + 0, nid, nodemask); + if (!pages[i]) { while (i--) put_page(pages[i]); kvfree(pages); return false; } - memset(page_to_virt(pages[i]), 0, PMD_SIZE); + memset(page_to_virt(pages[i]), 0, spa_page_size(spa)); } spa->pages = pages; @@ -1545,11 +1584,11 @@ static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *no return true; } -static void sp_area_free_hugepages(struct sp_area *spa) +static void sp_area_free_pages(struct sp_area *spa) { - int nr_pages = ALIGN(spa->real_size, PMD_SIZE) / PMD_SIZE; + int nr_pages = spa_nr_pages(spa); - if (!sp_area_need_hugepage(spa)) + if (!sp_area_use_remap(spa)) return; if (!spa->pages) @@ -1786,7 +1825,7 @@ static void sp_area_free(struct sp_area *spa) rb_erase(&spa->rb_node, &spm->area_root); spin_unlock(&spm->sp_mapping_lock); RB_CLEAR_NODE(&spa->rb_node); - sp_area_free_hugepages(spa); + sp_area_free_pages(spa); kfree(spa); } @@ -1987,20 +2026,25 @@ int mg_sp_free(unsigned long addr, int id) } EXPORT_SYMBOL_GPL(mg_sp_free); -static int sp_vma_insert_hugepages(struct vm_area_struct *vma, struct page **pages, +static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa, unsigned long uaddr, unsigned long size) { + unsigned long page_size = spa_page_size(spa); int i = 0; + int ret; - vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); do { - int ret = remap_pfn_range_try_pmd(vma, uaddr, page_to_pfn(pages[i]), - PMD_SIZE, vma->vm_page_prot); + if (spa->is_hugepage) + ret = remap_pfn_range_try_pmd(vma, uaddr, + page_to_pfn(spa->pages[i]), page_size, vma->vm_page_prot); + else + ret = remap_pfn_range(vma, uaddr, + page_to_pfn(spa->pages[i]), page_size, vma->vm_page_prot); if (ret) return ret; - uaddr += PMD_SIZE; - size -= PMD_SIZE; + uaddr += page_size; + size -= page_size; i++; } while (size > 0); @@ -2043,8 +2087,8 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, else vm_flags_clear(vma, VM_MAYWRITE); - if (sp_area_need_hugepage(spa)) { - ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (sp_area_use_remap(spa)) { + ret = sp_vma_populate_pages(vma, spa, addr, size); if (ret) { do_munmap(mm, addr, size, NULL); return (unsigned long)ret; @@ -2225,8 +2269,8 @@ static int sp_nc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long p vm_flags_clear(vma, VM_MAYWRITE); vma->vm_page_prot = sp_pgprot_writethrough(vma->vm_page_prot); - if (sp_area_need_hugepage(spa)) { - ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (sp_area_use_remap(spa)) { + ret = sp_vma_populate_pages(vma, spa, addr, size); if (ret) do_munmap(mm, addr, spa_size(spa), NULL); up_write(&mm->mmap_lock); @@ -2339,8 +2383,8 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context * struct mm_struct *mm; bool reach_current = false; - if (sp_area_need_hugepage(spa) && - !sp_area_alloc_hugepages(spa, ac->preferred_node_id, ac->nodemask)) + if (sp_area_use_remap(spa) && + !sp_area_prepare_pages(spa, ac->preferred_node_id, ac->nodemask)) return -ENOMEM; mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc"); -- 2.43.0