On 2023/12/7 21:43, Wang Wensheng wrote:
ascend inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LNGH
This allow the user to do huge vmalloc and remap those hugepage range into userspace.
Some devices could not handle mixed levels of page table. They want to know exactly if the memory thay alloced is hugepages or not. Introduce vmalloc/vmap/remap interfaces that handle only hugepages.
Introduce VM_HUGE_PAGES flag. __vmalloc_node_range() would alloc PMD_SIZE hugepages if specifying VM_HUGE_PAGES.
Signed-off-by: Wang Wensheng wangwensheng4@huawei.com
include/linux/vmalloc.h | 18 ++++ mm/Kconfig | 8 ++ mm/vmalloc.c | 205 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 229 insertions(+), 2 deletions(-)
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index c720be70c8dd..e7db501b7602 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -36,6 +36,12 @@ struct iov_iter; /* in uio.h */ #define VM_DEFER_KMEMLEAK 0 #endif
+#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING +#define VM_HUGE_PAGES 0x00004000 /* vmalloc hugepage mapping only */ +#else +#define VM_HUGE_PAGES 0 +#endif
/* bits [20..32] reserved for arch specific ioremap internals */
/*
@@ -173,6 +179,18 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff);
+#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING +extern void *vmalloc_hugepage(unsigned long size); +extern void *vmalloc_hugepage_user(unsigned long size); +extern void *vmap_hugepage(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
+extern int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma,
unsigned long uaddr, void *kaddr,
unsigned long pgoff, unsigned long size);
+extern int remap_vmalloc_hugepage_range(struct vm_area_struct *vma,
void *addr, unsigned long pgoff);
+#endif
- /*
- Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values
- and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings()
diff --git a/mm/Kconfig b/mm/Kconfig index 3dead7328cd5..64a8aea7f67a 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1273,6 +1273,7 @@ menuconfig ASCEND_FEATURES bool "Support Ascend Features" depends on ARM64 select HUGETLB_INSERT_PAGE
- select EXTEND_HUGEPAGE_MAPPING help The Ascend chip use the Hisilicon DaVinci architecture, and mainly focus on AI and machine leanring area, contains many external features.
@@ -1284,6 +1285,13 @@ config HUGETLB_INSERT_PAGE help This allowed a driver to insert hugetlb mapping into user address space.
+config EXTEND_HUGEPAGE_MAPPING
bool
help
Introduce vmalloc/vmap/remap interfaces that handle only hugepages.
This allow the user to do huge vmalloc and remap those hugepage range
into userspace.
source "mm/damon/Kconfig"
endmenu
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3fedb3ee0db..7794d2b0db64 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c
@@ -3267,7 +3267,9 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
这个搞个变量
bool try_pmd_mapping = (IS_ENABLED(CONFIG_EXTEND_HUGEPAGE_MAPPING) && (vm_flags & VM_HUGE_PAGES);
size_per_node = size; if (node == NUMA_NO_NODE) size_per_node /= num_online_nodes();
if (arch_vmap_pmd_supported(prot) && size_per_node >= PMD_SIZE)
if (arch_vmap_pmd_supported(prot) && (size_per_node >= PMD_SIZE ||
(IS_ENABLED(CONFIG_EXTEND_HUGEPAGE_MAPPING) &&
else shift = arch_vmap_pte_supported_shift(size_per_node);(vm_flags & VM_HUGE_PAGES)))) shift = PMD_SHIFT;
@@ -3350,7 +3352,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return area->addr;
fail:
- if (shift > PAGE_SHIFT) {
- if (shift > PAGE_SHIFT && !(IS_ENABLED(CONFIG_EXTEND_HUGEPAGE_MAPPING) &&
shift = PAGE_SHIFT; align = real_align; size = real_size;(vm_flags & VM_HUGE_PAGES))) {
@@ -4308,6 +4311,204 @@ bool vmalloc_dump_obj(void *object) } #endif
+#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING
这里有点怪,size没有用;产品都传什么
+/**
- vmalloc_hugepage - allocate virtually contiguous hugetlb memory
@size: allocation size
- Allocate enough huge pages to cover @size and map them into
- contiguous kernel virtual space.
- The allocation size is aligned to PMD_SIZE automatically
- */
+void *vmalloc_hugepage(unsigned long size) > +{
- return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL,
VM_ALLOW_HUGE_VMAP | VM_HUGE_PAGES, NUMA_NO_NODE,
__builtin_return_address(0));
+} +EXPORT_SYMBOL(vmalloc_hugepage);
+/**
- vmalloc_hugepage_user - allocate virtually contiguous hugetlb memory
- for userspace
@size: allocation size
- Allocate enough huge pages to cover @size and map them into
- contiguous kernel virtual space. The resulting memory area
- is zeroed so it can be mapped to userspace without leaking data.
- The allocation size is aligned to PMD_SIZE automatically
- */
+void *vmalloc_hugepage_user(unsigned long size) +{
- return __vmalloc_node_range(size, PMD_SIZE, VMALLOC_START, VMALLOC_END,
GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
VM_ALLOW_HUGE_VMAP | VM_USERMAP | VM_HUGE_PAGES, NUMA_NO_NODE,
__builtin_return_address(0));
+} +EXPORT_SYMBOL(vmalloc_hugepage_user);
+static int vmap_hugepages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
+{
- unsigned int i, nr = (end - addr) >> page_shift;
- for (i = 0; i < nr; i++) {
int err;
err = vmap_range_noflush(addr, addr + (1UL << page_shift),
__pa(page_address(pages[i])), prot,
page_shift);
if (err)
return err;
addr += 1UL << page_shift;
- }
- return 0;
+}
+static int vmap_hugepages_range(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages,
unsigned int page_shift)
+{
- int err;
- err = vmap_hugepages_range_noflush(addr, end, prot, pages, page_shift);
- flush_cache_vmap(addr, end);
- return err;
+}
+/**
vmap_hugepage - map an array of huge pages into virtually contiguous space
@pages: array of huge page pointers (only the header)
@count: number of pages to map
@flags: vm_area->flags
@prot: page protection for the mapping
Maps @count pages from @pages into contiguous kernel virtual
space.
- */
+void *vmap_hugepage(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot)
+{
- struct vm_struct *area;
- unsigned long size; /* In bytes */
- might_sleep();
- if (count > totalram_pages())
return NULL;
- size = (unsigned long)count << PMD_SHIFT;
- area = __get_vm_area_node(size, PMD_SIZE, PMD_SHIFT, flags | VM_HUGE_PAGES,
VMALLOC_START, VMALLOC_END,
NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0));
- if (!area)
return NULL;
- if (vmap_hugepages_range((unsigned long)area->addr,
(unsigned long)area->addr + size, prot,
pages, PMD_SHIFT) < 0) {
vunmap(area->addr);
return NULL;
- }
- return area->addr;
+} +EXPORT_SYMBOL(vmap_hugepage);
+/**
remap_vmalloc_hugepage_range_partial - map vmalloc hugepages
to userspace
@vma: vma to cover
@uaddr: target user address to start at
@kaddr: virtual address of vmalloc hugepage kernel memory
@size: size of map area
Returns: 0 for success, -Exxx on failure
This function checks that @kaddr is a valid vmalloc'ed area,
and that it is big enough to cover the range starting at
@uaddr in @vma. Will return failure if that criteria isn't
met.
Similar to remap_pfn_range() (see mm/memory.c)
- */
+int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma, unsigned long uaddr,
void *kaddr, unsigned long pgoff, unsigned long size)
+{
- struct vm_struct *area;
- unsigned long off;
- unsigned long end_index;
- if (check_shl_overflow(pgoff, PMD_SHIFT, &off))
return -EINVAL;
- size = ALIGN(size, PMD_SIZE);
- if (!IS_ALIGNED(uaddr, PMD_SIZE) || !IS_ALIGNED((unsigned long)kaddr, PMD_SIZE))
return -EINVAL;
- area = find_vm_area(kaddr);
- if (!area)
return -EINVAL;
- if (!(area->flags & VM_USERMAP))
return -EINVAL;
- if (check_add_overflow(size, off, &end_index) ||
end_index > get_vm_area_size(area))
return -EINVAL;
- kaddr += off;
- do {
struct page *page = vmalloc_to_page(kaddr);
int ret;
ret = hugetlb_insert_hugepage_pte_by_pa(vma->vm_mm, uaddr,
vma->vm_page_prot, page_to_phys(page));
if (ret)
return ret;
uaddr += PMD_SIZE;
kaddr += PMD_SIZE;
size -= PMD_SIZE;
- } while (size > 0);
- vm_flags_set(vma, VM_DONTEXPAND | VM_DONTDUMP);
- return 0;
+} +EXPORT_SYMBOL(remap_vmalloc_hugepage_range_partial);
+/**
remap_vmalloc_hugepage_range - map vmalloc hugepages to userspace
@vma: vma to cover (map full range of vma)
@addr: vmalloc memory
@pgoff: number of hugepages into addr before first page to map
Returns: 0 for success, -Exxx on failure
This function checks that addr is a valid vmalloc'ed area, and
that it is big enough to cover the vma. Will return failure if
that criteria isn't met.
Similar to remap_pfn_range() (see mm/memory.c)
- */
+int remap_vmalloc_hugepage_range(struct vm_area_struct *vma, void *addr,
unsigned long pgoff)
+{
- return remap_vmalloc_hugepage_range_partial(vma, vma->vm_start,
addr, pgoff,
vma->vm_end - vma->vm_start);
+} +EXPORT_SYMBOL(remap_vmalloc_hugepage_range); +#endif
- #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) __acquires(&vmap_purge_lock)