From: Tang Yizhou tangyizhou@huawei.com
ascend inclusion category: feature bugzilla: 46819 CVE: NA
------------------------------------------------- The reason of exporting buff_vzalloc_user() is that gfp_mask __GFP_ACCOUNT can be used to limit memory usage with memory cgroup.
The same reason for buff_vzalloc_hugepage_user(), a hugepage version.
By selecting HAVE_ARCH_HUGE_VMALLOC and enabling boot arg enable_share_pool, buff_vzalloc_user() and vmalloc_hugepage_user() can allocate hugepage memory. Also, vmalloc() will allocate hugepage memory if possible. Reference: https://lwn.net/Articles/839107/
Signed-off-by: Tang Yizhou tangyizhou@huawei.com Reviewed-by: Ding Tianhong dingtianhong@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/Kconfig | 4 ++ arch/arm64/Kconfig | 2 + include/linux/share_pool.h | 29 ++++++++++++ include/linux/vmalloc.h | 3 +- mm/share_pool.c | 91 ++++++++++++++++++++++++++++++++++++++ mm/vmalloc.c | 52 +--------------------- 6 files changed, 129 insertions(+), 52 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig index bf32f02845c7..de33474c4381 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -544,6 +544,10 @@ config HAVE_ARCH_HUGE_VMAP config HAVE_ARCH_HUGE_VMALLOC depends on HAVE_ARCH_HUGE_VMAP bool + help + Archs that select this would be capable of PMD-sized vmaps (i.e., + arch_vmap_pmd_supported() returns true), and they must make no + assumptions that vmalloc memory is mapped with PAGE_SIZE ptes.
config HAVE_ARCH_SOFT_DIRTY bool diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index d93dae75f1a6..478205b50196 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -106,6 +106,7 @@ config ARM64 select HAVE_ARCH_BITREVERSE select HAVE_ARCH_COMPILER_H select HAVE_ARCH_HUGE_VMAP + select HAVE_ARCH_HUGE_VMALLOC select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48) select HAVE_ARCH_KGDB @@ -1557,6 +1558,7 @@ config ASCEND_SHARE_POOL select ARCH_USES_HIGH_VMA_FLAGS select MM_OWNER depends on HUGETLBFS + depends on HAVE_ARCH_HUGE_VMALLOC help This feature allows multiple processes to share virtual memory both in kernel and user level, which is only enabled for ascend platform. diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 933b77be8ff8..3c5a41ae5bd1 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -38,6 +38,10 @@ extern int sysctl_sp_debug_mode;
extern int enable_ascend_share_pool;
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC +extern bool vmap_allow_huge; +#endif + /* Processes in the same sp_group can share memory. * Memory layout for share pool: * @@ -223,6 +227,11 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, struct address_space *mapping, pgoff_t idx, unsigned long address, pte_t *ptep, unsigned int flags);
+extern void *vmalloc_hugepage(unsigned long size); +extern void *vmalloc_hugepage_user(unsigned long size); +extern void *buff_vzalloc_user(unsigned long size); +extern void *buff_vzalloc_hugepage_user(unsigned long size); + #else
static inline int sp_group_add_task(int pid, int spg_id) @@ -365,6 +374,26 @@ static inline bool sp_mmap_check(unsigned long flags) static inline void sp_dump_stack(void) { } + +static inline void *vmalloc_hugepage(unsigned long size) +{ + return NULL; +} + +static inline void *vmalloc_hugepage_user(unsigned long size) +{ + return NULL; +} + +static inline void *buff_vzalloc_user(unsigned long size) +{ + return NULL; +} + +static inline void *buff_vzalloc_hugepage_user(unsigned long size) +{ + return NULL; +} #endif
#endif /* LINUX_SHARE_POOL_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 6383d6989c0f..bb814f6418fd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -99,8 +99,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); -extern void *vmalloc_hugepage(unsigned long size); -extern void *vmalloc_hugepage_user(unsigned long size); + #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, diff --git a/mm/share_pool.c b/mm/share_pool.c index 72e46686566a..4fa539e452ef 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2674,11 +2674,102 @@ struct page *sp_alloc_pages(struct vm_struct *area, gfp_t mask, return alloc_pages_node(node, mask, page_order); }
+/** + * vmalloc_hugepage - allocate virtually contiguous hugetlb memory + * @size: allocation size + * + * Allocate enough huge pages to cover @size and map them into + * contiguous kernel virtual space. + * + * The allocation size is aligned to PMD_SIZE automatically + */ +void *vmalloc_hugepage(unsigned long size) +{ + /* PMD hugepage aligned */ + size = PMD_ALIGN(size); + + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL, + VM_HUGE_PAGES, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vmalloc_hugepage); + +/** + * vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory + * for userspace + * @size: allocation size + * + * Allocate enough huge pages to cover @size and map them into + * contiguous kernel virtual space. The resulting memory area + * is zeroed so it can be mapped to userspace without leaking data. + * + * The allocation size is aligned to PMD_SIZE automatically + */ +void *vmalloc_hugepage_user(unsigned long size) +{ + /* PMD hugepage aligned */ + size = PMD_ALIGN(size); + + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, + VM_HUGE_PAGES | VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vmalloc_hugepage_user); + +/** + * buff_vzalloc_user - allocate zeroed virtually contiguous memory + * for userspace + * @size: allocation size + * + * The resulting memory area is zeroed so it can be mapped to userspace + * without leaking data. + * + * Compare to vmalloc_user(), this is a customized function because + * __GFP_ACCOUNT is used to limit memory usage. + */ +void *buff_vzalloc_user(unsigned long size) +{ + return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, + VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(buff_vzalloc_user); + +/** + * buff_vzalloc_hugepage_user - allocate virtually contiguous hugetlb memory + * for userspace + * @size: allocation size + * + * Allocate enough huge pages to cover @size and map them into + * contiguous kernel virtual space. The resulting memory area + * is zeroed so it can be mapped to userspace without leaking data. + * + * The allocation size is aligned to PMD_SIZE automatically + * + * Compare to vmalloc_hugepage_user(), this is a customized function because + * __GFP_ACCOUNT is used to limit memory usage. + */ +void *buff_vzalloc_hugepage_user(unsigned long size) +{ + /* PMD hugepage aligned */ + size = PMD_ALIGN(size); + + return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT, PAGE_KERNEL, + VM_HUGE_PAGES | VM_USERMAP, NUMA_NO_NODE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(buff_vzalloc_hugepage_user); + int enable_ascend_share_pool;
static int __init enable_share_pool(char *s) { enable_ascend_share_pool = 1; + vmap_allow_huge = true;
pr_info("Ascend enable share pool features\n");
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index aa2415741d13..6bebb7b52448 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -43,7 +43,7 @@ #include "internal.h"
#ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC -static bool __ro_after_init vmap_allow_huge = true; +bool __ro_after_init vmap_allow_huge;
static int __init set_nohugevmalloc(char *str) { @@ -52,7 +52,7 @@ static int __init set_nohugevmalloc(char *str) } early_param("nohugevmalloc", set_nohugevmalloc); #else /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */ -static const bool vmap_allow_huge = false; +static const bool vmap_allow_huge; #endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
struct vfree_deferred { @@ -2931,54 +2931,6 @@ void *vmalloc_32_user(unsigned long size) } EXPORT_SYMBOL(vmalloc_32_user);
-/** - * vmalloc_hugepage - allocate virtually contiguous hugetlb memory - * @size: allocation size - * - * Allocate enough huge pages to cover @size and map them into - * contiguous kernel virtual space. - * - * The allocation size is aligned to PMD_SIZE automatically - */ -void *vmalloc_hugepage(unsigned long size) -{ - /* PMD hugepage aligned */ - size = PMD_ALIGN(size); - - return __vmalloc_node(size, 1, GFP_KERNEL, PAGE_KERNEL, - NUMA_NO_NODE, __builtin_return_address(0)); -} -EXPORT_SYMBOL(vmalloc_hugepage); - -/** - * vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory - * for userspace - * @size: allocation size - * - * Allocate enough huge pages to cover @size and map them into - * contiguous kernel virtual space. The resulting memory area - * is zeroed so it can be mapped to userspace without leaking data. - * - * The allocation size is aligned to PMD_SIZE automatically - */ -void *vmalloc_hugepage_user(unsigned long size) -{ - struct vm_struct *area; - void *ret; - - /* 2M hugepa aligned */ - size = PMD_ALIGN(size); - - ret = __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL, - NUMA_NO_NODE, __builtin_return_address(0)); - if (ret) { - area = find_vm_area(ret); - area->flags |= VM_USERMAP; - } - return ret; -} -EXPORT_SYMBOL(vmalloc_hugepage_user); -
/* * small helper routine , copy contents to buf from addr.