Zhou Guanghui (1): mm: reserve address range for svsp fs/hugetlbfs/inode.c | 13 +++++ include/linux/ascend_vm.h | 69 ++++++++++++++++++++++++++ include/linux/share_pool.h | 11 ++-- include/uapi/asm-generic/mman-common.h | 1 + mm/Makefile | 1 + mm/ascend_vm.c | 21 ++++++++ mm/mmap.c | 15 ++++-- 7 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 include/linux/ascend_vm.h create mode 100644 mm/ascend_vm.c -- 2.43.0
From: Zhou Guanghui <zhouguanghui1@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I7SZC2 ----------------------------------------------------------------- A range is reserved from the user address space to ensure that the process cannot apply for the address range. Signed-off-by: Zhou Guanghui <zhouguanghui1@huawei.com> --- fs/hugetlbfs/inode.c | 13 +++++ include/linux/ascend_vm.h | 69 ++++++++++++++++++++++++++ include/linux/share_pool.h | 11 ++-- include/uapi/asm-generic/mman-common.h | 1 + mm/Makefile | 1 + mm/ascend_vm.c | 21 ++++++++ mm/mmap.c | 15 ++++-- 7 files changed, 125 insertions(+), 6 deletions(-) create mode 100644 include/linux/ascend_vm.h create mode 100644 mm/ascend_vm.c diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c4f3c5d631f8..ebe4e0f60441 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -36,6 +36,7 @@ #include <linux/migrate.h> #include <linux/uio.h> #include <linux/dynamic_pool.h> +#include <linux/ascend_vm.h> #include <linux/uaccess.h> #include <linux/sched/mm.h> @@ -208,6 +209,9 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr, info.high_limit = arch_get_mmap_end(addr, len, flags); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; + + ascend_mmap_get_area(&info, flags); + return vm_unmapped_area(&info); } @@ -224,6 +228,9 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base); info.align_mask = PAGE_MASK & ~huge_page_mask(h); info.align_offset = 0; + + ascend_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); /* @@ -237,6 +244,9 @@ hugetlb_get_unmapped_area_topdown(struct file *file, unsigned long addr, info.flags = 0; info.low_limit = current->mm->mmap_base; info.high_limit = arch_get_mmap_end(addr, len, flags); + + ascend_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); } @@ -258,6 +268,9 @@ generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr, if (len > TASK_SIZE) return -ENOMEM; + if (ascend_mmap_check_addr(addr, len, flags)) + return -EINVAL; + if (flags & MAP_FIXED) { if (prepare_hugepage_range(file, addr, len)) return -EINVAL; diff --git a/include/linux/ascend_vm.h b/include/linux/ascend_vm.h new file mode 100644 index 000000000000..f099d933333f --- /dev/null +++ b/include/linux/ascend_vm.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASCEND_VM__ +#define __ASCEND_VM__ + +#include <uapi/linux/mman.h> +#include <linux/share_pool.h> + +#ifdef CONFIG_SVSP +extern struct static_key_false enable_mmap_svsp; + +/* skip 7T for stack */ +#define MMAP_SVSP_SKIP 0x070000000000UL +/* reserve 1T size*/ +#define SVSP_MMAP_SIZE 0x010000000000UL +#define SVSP_MMAP_END (TASK_SIZE - MMAP_SVSP_SKIP) +#define SVSP_MMAP_BASE (SVSP_MMAP_END - SVSP_MMAP_SIZE) + +static inline int svsp_mmap_check(unsigned long addr, unsigned long len) +{ + /* svsp does not support fix addr, so we do not check flag here */ + if (static_branch_likely(&enable_mmap_svsp) && + addr + len > SVSP_MMAP_BASE && addr < SVSP_MMAP_END) + return -EINVAL; + else + return 0; +} + +static inline void svsp_mmap_get_area(struct vm_unmapped_area_info *info, + unsigned long flags) +{ + if (static_branch_likely(&enable_mmap_svsp) && (flags & MAP_SVSP)) { + info->low_limit = SVSP_MMAP_BASE; + info->high_limit = SVSP_MMAP_END; + } else { + info->high_limit = min(info->high_limit, SVSP_MMAP_BASE); + } +} + +#else /* CONFIG_SVSP */ +static inline int svsp_mmap_check(unsigned long addr, unsigned long len) +{ + return 0; +} + +static inline void svsp_mmap_get_area(struct vm_unmapped_area_info *info, + unsigned long flags) +{ } +#endif /* CONFIG_SVSP */ + +static inline void ascend_mmap_get_area(struct vm_unmapped_area_info *info, + unsigned long flags) +{ + svsp_mmap_get_area(info, flags); + sp_area_work_around(info, flags); +} + +static inline int ascend_mmap_check_addr(unsigned long addr, unsigned long len, + unsigned long flags) +{ + if (sp_check_mmap_addr(addr, flags)) + return -EINVAL; + + if (svsp_mmap_check(addr, len)) + return -EINVAL; + + return 0; +} + +#endif /* __ASCEND_VM__ */ diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index f177a9e9d204..2db4689616e4 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -131,9 +131,13 @@ static inline void sp_mm_clean(struct mm_struct *mm) __sp_mm_clean(mm); } -static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +static inline void sp_area_work_around(struct vm_unmapped_area_info *info, + unsigned long flags) { - if (sp_is_enabled()) + /* the MAP_SVSP couldn't work with MAP_SHARE_POOL. In addition, the + * address ranges corresponding to the two flags must not overlap. + */ + if (sp_is_enabled() && !(flags & MAP_SVSP)) info->high_limit = min(info->high_limit, MMAP_SHARE_POOL_START); } @@ -244,7 +248,8 @@ static inline bool sp_is_enabled(void) return false; } -static inline void sp_area_work_around(struct vm_unmapped_area_info *info) +static inline void sp_area_work_around(struct vm_unmapped_area_info *info, + unsigned long flags) { } diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h index 14e5498efd7a..096c9018d2d5 100644 --- a/include/uapi/asm-generic/mman-common.h +++ b/include/uapi/asm-generic/mman-common.h @@ -30,6 +30,7 @@ #define MAP_SYNC 0x080000 /* perform synchronous page faults for the mapping */ #define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */ +#define MAP_SVSP 0x400000 #define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be * uninitialized */ diff --git a/mm/Makefile b/mm/Makefile index 08be0241b94b..cd0df6e57417 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -141,6 +141,7 @@ obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o obj-$(CONFIG_SHARE_POOL) += share_pool.o +obj-$(CONFIG_ASCEND_FEATURES) += ascend_vm.o obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o obj-$(CONFIG_ETMEM) += etmem.o obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o diff --git a/mm/ascend_vm.c b/mm/ascend_vm.c new file mode 100644 index 000000000000..84c3612fb9ca --- /dev/null +++ b/mm/ascend_vm.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mm_inline.h> +#include <linux/ascend_vm.h> + +#ifdef CONFIG_SVSP + +DEFINE_STATIC_KEY_FALSE(enable_mmap_svsp); + +static int __init ascend_enable_mmap_svsp(char *s) +{ + static_branch_enable(&enable_mmap_svsp); + + pr_info("Ascend enable svsp mmap features\n"); + + return 1; +} +__setup("enable_mmap_svsp", ascend_enable_mmap_svsp); + +#endif diff --git a/mm/mmap.c b/mm/mmap.c index 8b1a182eebb7..f0baaec7e3ad 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -48,6 +48,7 @@ #include <linux/sched/mm.h> #include <linux/ksm.h> #include <linux/share_pool.h> +#include <linux/ascend_vm.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -1697,7 +1698,6 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) { unsigned long addr; - sp_area_work_around(info); if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) addr = unmapped_area_topdown(info); else @@ -1731,7 +1731,7 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM; - if (sp_check_mmap_addr(addr, flags)) + if (ascend_mmap_check_addr(addr, len, flags)) return -EINVAL; if (flags & MAP_FIXED) @@ -1752,6 +1752,9 @@ generic_get_unmapped_area(struct file *filp, unsigned long addr, info.high_limit = mmap_end; info.align_mask = 0; info.align_offset = 0; + + ascend_mmap_get_area(&info, flags); + return vm_unmapped_area(&info); } @@ -1783,7 +1786,7 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, if (len > mmap_end - mmap_min_addr) return -ENOMEM; - if (sp_check_mmap_addr(addr, flags)) + if (ascend_mmap_check_addr(addr, len, flags)) return -EINVAL; if (flags & MAP_FIXED) @@ -1805,6 +1808,9 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.high_limit = arch_get_mmap_base(addr, mm->mmap_base); info.align_mask = 0; info.align_offset = 0; + + ascend_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); /* @@ -1818,6 +1824,9 @@ generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr, info.flags = 0; info.low_limit = TASK_UNMAPPED_BASE; info.high_limit = mmap_end; + + ascend_mmap_get_area(&info, flags); + addr = vm_unmapped_area(&info); } -- 2.43.0
hulk inclusion category: feature bugzilla: NA ---------------------------------------- Since the ascend hardware only support fixed offset of nc mapping, the devm alloc 8T address space with fixed start 0x100000000000, and ascend svsp reserved 8T after devm end. We could define the offset is 16T, which is same of sharepool size. Virtual address space has been adjusted as follows: Virtual address space before: 0x100000000000: devm start 0x180000000000: devm end and svsp start 0x200000000000: svsp end 0xe00000000000: nc map start 0xe80000000000: share pool normal start and nc map end Virtual address space After: 0x100000000000: devm start 0x180000000000: devm end and svsp start 0x200000000000: svsp end and nc map start 0x280000000000: nc map end 0xe00000000000: share pool normal start 0xe7f000000000: share pool normal end and share pool ro start 0xe80000000000: share pool ro end and share pool dvpp start 0xf00000000000: share pool dvpp end and share pool nc map start 0xf80000000000: share pool nc map end Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com> Signed-off-by: Yin Tirui <yintirui@huawei.com> --- arch/arm64/include/asm/ascend_vm.h | 24 +++++++ arch/arm64/mm/ascend_vm.c | 34 +++++++++ fs/Kconfig | 2 +- include/linux/ascend_vm.h | 28 ++++++++ include/linux/share_pool.h | 13 +--- mm/mmap.c | 70 +++++++++++++++++++ mm/share_pool.c | 107 +++++++++++++++++++++++++++++ 7 files changed, 267 insertions(+), 11 deletions(-) diff --git a/arch/arm64/include/asm/ascend_vm.h b/arch/arm64/include/asm/ascend_vm.h index ab2a23ba8b91..1019e0fb75fb 100644 --- a/arch/arm64/include/asm/ascend_vm.h +++ b/arch/arm64/include/asm/ascend_vm.h @@ -6,12 +6,25 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_SHARE_POOL +DECLARE_STATIC_KEY_FALSE(share_pool_enabled_key); +static inline bool sp_is_enabled(void) +{ + return static_branch_likely(&share_pool_enabled_key); +} +#endif #ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW DECLARE_STATIC_KEY_FALSE(enable_pt_multi_view); #define N_PT_MULTI_VIEW_MMAP_END 0x280000000000UL #define N_PT_MULTI_VIEW_MMAP_SIZE 0x080000000000UL /* 8T */ #define N_PT_MULTI_VIEW_MMAP_START (N_PT_MULTI_VIEW_MMAP_END - N_PT_MULTI_VIEW_MMAP_SIZE) +#ifdef CONFIG_SHARE_POOL +#define MMAP_SHARE_POOL_NC_START 0xf00000000000UL /* share pool end */ +#define MMAP_SHARE_POOL_NC_END 0xf80000000000UL +#define MMAP_SHARE_POOL_NC_OFFSET 0x100000000000UL /* 16T */ +#endif + #define ASCEND_PTMV_PGD_ORDER 1 static inline int ascend_pt_multi_view_enabled(void) @@ -28,6 +41,13 @@ static inline unsigned long ascend_p4d_offset(unsigned long addr) return (addr >> P4D_SHIFT) * sizeof(p4d_t); } +#ifdef CONFIG_SHARE_POOL +static inline bool ascend_spnc_offset_valid(unsigned long offset) +{ + return offset >= ascend_p4d_offset(MMAP_SHARE_POOL_NC_START) && + offset < ascend_p4d_offset(MMAP_SHARE_POOL_NC_END); +} +#endif static inline bool ascend_nc_offset_valid(unsigned long offset) { @@ -46,6 +66,10 @@ static inline bool ascend_check_p4d_nocopy(p4d_t *p4dp) return true; offset = ((unsigned long)p4dp) & ~PAGE_MASK; +#ifdef CONFIG_SHARE_POOL + if (sp_is_enabled() && ascend_spnc_offset_valid(offset)) + return true; +#endif return ascend_nc_offset_valid(offset); } diff --git a/arch/arm64/mm/ascend_vm.c b/arch/arm64/mm/ascend_vm.c index a33c7046f57f..5c5e119193a7 100644 --- a/arch/arm64/mm/ascend_vm.c +++ b/arch/arm64/mm/ascend_vm.c @@ -31,6 +31,20 @@ static inline bool ascend_check_range_valid(unsigned long addr, unsigned long le { struct ascend_vm_area range; +#ifdef CONFIG_SHARE_POOL + if (!sp_is_enabled()) + goto skip; + + range.va_start = MMAP_SHARE_POOL_NC_START; + range.va_end = MMAP_SHARE_POOL_NC_END; + + if ((flags & MAP_NPTMV) && (flags & MAP_SHARE_POOL)) + return check_range_valid(addr, addr + len, &range); + + if (check_range_overlap(addr, addr + len, &range)) + return false; +skip: +#endif range.va_start = N_PT_MULTI_VIEW_MMAP_START; range.va_end = N_PT_MULTI_VIEW_MMAP_END; @@ -66,10 +80,30 @@ void n_pt_multi_view_mmap_get_area(struct vm_unmapped_area_info *info, info->high_limit = N_PT_MULTI_VIEW_MMAP_END; info->flags |= VM_UNMAPPED_AREA_NC; } +#ifdef CONFIG_SHARE_POOL + if (!sp_is_enabled()) + return; + + if ((flags & MAP_NPTMV) && (flags & MAP_SHARE_POOL)) { + info->low_limit = MMAP_SHARE_POOL_NC_START; + info->high_limit = MMAP_SHARE_POOL_NC_END; + } +#endif +} + +#ifdef CONFIG_SHARE_POOL +static bool ascend_spnc_map_valid(unsigned long addr) +{ + return addr >= MMAP_SHARE_POOL_NC_START && addr < MMAP_SHARE_POOL_NC_END; } +#endif bool ascend_nc_map_valid(unsigned long addr) { +#ifdef CONFIG_SHARE_POOL + if (sp_is_enabled() && ascend_spnc_map_valid(addr)) + return true; +#endif return addr >= N_PT_MULTI_VIEW_MMAP_START && addr < N_PT_MULTI_VIEW_MMAP_END; } diff --git a/fs/Kconfig b/fs/Kconfig index f80b4f76093c..fb395d8b060e 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -300,7 +300,7 @@ config HUGETLB_PAGE_OPTIMIZE_VMEMMAP config HUGETLB_PMD_PAGE_TABLE_SHARING def_bool HUGETLB_PAGE - depends on ARCH_WANT_HUGE_PMD_SHARE && SPLIT_PMD_PTLOCKS + depends on ARCH_WANT_HUGE_PMD_SHARE && SPLIT_PMD_PTLOCKS && !ASCEND_PAGE_TABLE_MULTI_VIEW config HUGETLB_ALLOC_LIMIT bool "Limit hugeTLB pages allocation" diff --git a/include/linux/ascend_vm.h b/include/linux/ascend_vm.h index fd596e77b0f3..f959800492e0 100644 --- a/include/linux/ascend_vm.h +++ b/include/linux/ascend_vm.h @@ -95,6 +95,34 @@ static inline int ascend_mmap_check_addr(unsigned long addr, unsigned long len, #endif #ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + +#define ASCEND_VMA_INFO_LOW_END N_PT_MULTI_VIEW_MMAP_START +#define ASCEND_VMA_INFO_HIGH_BASE N_PT_MULTI_VIEW_MMAP_END +#ifdef CONFIG_SHARE_POOL +#define ASCEND_VMA_INFO_HIGH_END MMAP_SHARE_POOL_NC_START +#endif + +static inline void ascend_set_vma_info_low(struct vm_unmapped_area_info *info) +{ + info->high_limit = min(info->high_limit, ASCEND_VMA_INFO_LOW_END); +} + +static inline void ascend_set_vma_info_high(struct vm_unmapped_area_info *info) +{ + info->low_limit = max(info->low_limit, ASCEND_VMA_INFO_HIGH_BASE); +#ifdef CONFIG_SHARE_POOL + if (!sp_is_enabled()) + return; + + info->high_limit = min(info->high_limit, ASCEND_VMA_INFO_HIGH_END); +#endif +} + +static inline bool ascend_vm_info_valid(struct vm_unmapped_area_info *info) +{ + return info->flags & ASCEND_VM_INFO_MASK; +} + void n_pt_multi_view_mmap_get_area(struct vm_unmapped_area_info *info, unsigned long flags); #else diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index 2db4689616e4..a3caedb99c5c 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -50,8 +50,6 @@ #define MAX_DEVID 8 /* the max num of Da-vinci devices */ -extern struct static_key_false share_pool_enabled_key; - struct sp_walk_data { struct page **pages; unsigned int page_count; @@ -74,8 +72,8 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DVPP_SIZE 0x80000000000UL /* 16G size */ #define MMAP_SHARE_POOL_16G_SIZE 0x400000000UL -/* skip 8T for stack */ -#define MMAP_SHARE_POOL_SKIP 0x80000000000UL +/* skip 16T for stack */ +#define MMAP_SHARE_POOL_SKIP 0x100000000000UL #define MMAP_SHARE_POOL_END (TASK_SIZE - MMAP_SHARE_POOL_SKIP) #define MMAP_SHARE_POLL_DVPP_END (MMAP_SHARE_POOL_END) /* MMAP_SHARE_POOL_DVPP_START should be align to 16G */ @@ -119,11 +117,6 @@ extern bool mg_is_sharepool_addr(unsigned long addr); extern int mg_sp_id_of_current(void); -static inline bool sp_is_enabled(void) -{ - return static_branch_likely(&share_pool_enabled_key); -} - extern void __sp_mm_clean(struct mm_struct *mm); static inline void sp_mm_clean(struct mm_struct *mm) { @@ -137,7 +130,7 @@ static inline void sp_area_work_around(struct vm_unmapped_area_info *info, /* the MAP_SVSP couldn't work with MAP_SHARE_POOL. In addition, the * address ranges corresponding to the two flags must not overlap. */ - if (sp_is_enabled() && !(flags & MAP_SVSP)) + if (sp_is_enabled() && !(flags & (MAP_SVSP | MAP_NPTMV))) info->high_limit = min(info->high_limit, MMAP_SHARE_POOL_START); } diff --git a/mm/mmap.c b/mm/mmap.c index fbdefa4af6c3..a0c230400223 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1734,6 +1734,71 @@ static unsigned long unmapped_area_topdown(struct vm_unmapped_area_info *info) return gap; } +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + +unsigned long vm_unmapped_area_no_reserve(struct vm_unmapped_area_info *info) +{ + unsigned long addr; + unsigned long low_limit, high_limit; + + low_limit = info->low_limit; + high_limit = info->high_limit; + + if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) { + ascend_set_vma_info_high(info); + addr = unmapped_area_topdown(info); + if (IS_ERR_VALUE(addr)) { + info->low_limit = low_limit; +#ifdef CONFIG_SHARE_POOL + if (sp_is_enabled()) + info->high_limit = high_limit; +#endif + ascend_set_vma_info_low(info); + addr = unmapped_area_topdown(info); + return addr; + } + + return addr; + } + + ascend_set_vma_info_low(info); + addr = unmapped_area(info); + if (IS_ERR_VALUE(addr)) { + info->high_limit = high_limit; + ascend_set_vma_info_high(info); + addr = unmapped_area(info); + return addr; + } + + return addr; +} + +static unsigned long vm_unmapped_area_common(struct vm_unmapped_area_info *info) +{ + unsigned long addr; + + if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) + addr = unmapped_area_topdown(info); + else + addr = unmapped_area(info); + + return addr; +} + +static unsigned long ascend_vm_unmapped_area(struct vm_unmapped_area_info *info) +{ + unsigned long addr; + + if (ascend_vm_info_valid(info)) + addr = vm_unmapped_area_common(info); + else + addr = vm_unmapped_area_no_reserve(info); + + trace_vm_unmapped_area(addr, info); + return addr; +} +#endif + /* * Search for an unmapped address range. * @@ -1747,6 +1812,11 @@ unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info) { unsigned long addr; +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + if (ascend_pt_multi_view_enabled()) + return ascend_vm_unmapped_area(info); +#endif + if (info->flags & VM_UNMAPPED_AREA_TOPDOWN) addr = unmapped_area_topdown(info); else diff --git a/mm/share_pool.c b/mm/share_pool.c index 548b85e61989..e2369747c983 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -704,6 +704,9 @@ struct sp_area { pid_t applier; /* the original applier process */ int preferred_node_id; /* memory node */ struct work_struct work; +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + unsigned long nc_va_start; /* nocache vma start */ +#endif }; static unsigned long spa_size(struct sp_area *spa) @@ -1637,6 +1640,9 @@ static struct sp_area *sp_area_alloc(unsigned long size, unsigned long flags, spa->applier = applier; spa->preferred_node_id = node_id; atomic_set(&spa->use_count, 1); +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + spa->nc_va_start = spa->va_start + MMAP_SHARE_POOL_NC_OFFSET; +#endif /* the link location could be saved before, to be optimized */ spm_insert_area(mapping, spa); @@ -1802,6 +1808,14 @@ static void __sp_free(struct sp_area *spa, struct mm_struct *stop) if (mm == stop) break; sp_munmap(mm, spa->va_start, spa_size(spa)); +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + if (ascend_pt_multi_view_enabled()) { + if (spa->type != SPA_TYPE_ALLOC || spa->flags & SP_DVPP) + continue; + + sp_munmap(mm, spa->nc_va_start, spa_size(spa)); + } +#endif } } @@ -2075,6 +2089,62 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, return ret; } +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW +#ifndef PTE_PBHA0 +#define PTE_PBHA0 (_AT(pteval_t, 1) << 59) /* PBHA 59 bit */ +#endif +#define sp_pgprot_writethrough(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, \ + PTE_ATTRINDX(MT_NORMAL_WT) | PTE_PXN | PTE_UXN | PTE_PBHA0) + +static int sp_nc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long prot) +{ + int ret; + unsigned long populate = 0; + unsigned long size = spa_size(spa); + unsigned long addr = spa->nc_va_start; + unsigned long flags = MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_POPULATE | + MAP_SHARE_POOL | MAP_NPTMV; + unsigned long vm_flags = VM_NORESERVE | VM_SHARE_POOL | VM_DONTCOPY; + unsigned long pgoff = addr_offset(spa) >> PAGE_SHIFT; + struct vm_area_struct *vma; + + down_write(&mm->mmap_lock); + if (spa->flags & SP_PROT_RO) + prot &= ~PROT_WRITE; + + atomic_inc(&spa->use_count); + addr = __do_mmap_mm(mm, spa_file(spa), addr, size, prot, flags, vm_flags, pgoff, + &populate, NULL); + if (IS_ERR_VALUE(addr)) { + atomic_dec(&spa->use_count); + up_write(&mm->mmap_lock); + return -ENOMEM; + } + + BUG_ON(addr != spa->nc_va_start); + vma = find_vma(mm, addr); + vma->spa = spa; + + if (prot & PROT_WRITE) + vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & + vma->vm_page_prot.pgprot) | PTE_DIRTY); + else + vm_flags_clear(vma, VM_MAYWRITE); + + vma->vm_page_prot = sp_pgprot_writethrough(vma->vm_page_prot); + up_write(&mm->mmap_lock); + + ret = do_mm_populate(mm, addr, populate, 0); + if (ret) { + down_write(&mm->mmap_lock); + do_munmap(mm, addr, spa_size(spa), NULL); + up_write(&mm->mmap_lock); + } + + return ret; +} +#endif static int sp_k2u_populate(struct mm_struct *mm, struct sp_area *spa); #define SP_SKIP_ERR 1 @@ -2122,6 +2192,29 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, } mmap_write_unlock(mm); } + +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + if (ret) + goto out; + + if (!ascend_pt_multi_view_enabled()) + goto out; + + /* bypass dvpp allocation */ + if (spa->flags & SP_DVPP) + goto out; + + if (spa->type != SPA_TYPE_ALLOC) + goto out; + + ret = sp_nc_mmap(mm, spa, prot); + if (ret) { + down_write(&mm->mmap_lock); + do_munmap(mm, mmap_addr, spa_size(spa), NULL); + up_write(&mm->mmap_lock); + } +out: +#endif mmput_async(mm); return ret; @@ -3001,8 +3094,22 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) } EXPORT_SYMBOL_GPL(mg_sp_config_dvpp_range); +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW +static bool is_spnc_reserve_addr(unsigned long addr) +{ + if (!ascend_pt_multi_view_enabled()) + return false; + + return addr >= MMAP_SHARE_POOL_NC_START && addr < MMAP_SHARE_POOL_NC_END; +} +#endif + static bool is_sp_reserve_addr(unsigned long addr) { +#ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW + if (is_spnc_reserve_addr(addr)) + return true; +#endif return addr >= MMAP_SHARE_POOL_START && addr < MMAP_SHARE_POOL_END; } -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature category: bugfix bugzilla: NA --------------------------------------------- Prepare for later patch and no logic change here. Fixes: 00c7c3d64806 ("hugetlb: support auto demote and promote") Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index e2369747c983..100446279620 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2176,7 +2176,8 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, return (int)mmap_addr; } - if (spa->type == SPA_TYPE_ALLOC) { + switch (spa->type) { + case SPA_TYPE_ALLOC: mmap_write_unlock(mm); ret = sp_alloc_populate(mm, spa, populate, ac); if (ret) { @@ -2184,13 +2185,18 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, do_munmap(mm, mmap_addr, spa_size(spa), NULL); mmap_write_unlock(mm); } - } else { + break; + case SPA_TYPE_K2SPG: + case SPA_TYPE_K2TASK: ret = sp_k2u_populate(mm, spa); if (ret) { do_munmap(mm, mmap_addr, spa_size(spa), NULL); pr_info("k2u populate failed, %d\n", ret); } mmap_write_unlock(mm); + break; + default: + break; } #ifdef CONFIG_ASCEND_PAGE_TABLE_MULTI_VIEW -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature category: bugfix bugzilla: NA ---------------------------------------- Don't use hugetlb_no_page() to allocate sharepool hugepages. Because we want to demote 1G-sized hugetlb pages while there is no 2M-sized hugetlb pages. We allocate hugepages directly via alloc_hugetlb_folio_nodemask_size() and map those hugepages via hugetlb_insert_hugepage_pte(), just the same as what we do in k2u. Fixes: 00c7c3d64806 ("hugetlb: support auto demote and promote") Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 2 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 100446279620..9ff7a2da3dfc 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -700,7 +700,10 @@ struct sp_area { struct sp_group *spg; struct sp_mapping *spm; /* where spa born from */ enum spa_type type; - unsigned long kva; /* shared kva */ + union { + unsigned long kva; /* shared kva */ + struct page **pages; /* for hugetlb alloc */ + }; pid_t applier; /* the original applier process */ int preferred_node_id; /* memory node */ struct work_struct work; @@ -1497,6 +1500,65 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } +static bool sp_area_alloc_hugepage_enable __read_mostly = true; + +static int __init sp_area_alloc_hugepage_disable(char *p) +{ + sp_area_alloc_hugepage_enable = false; + + return 1; +} +__setup("sp_area_alloc_hugepage_disable", sp_area_alloc_hugepage_disable); + +static bool sp_area_need_hugepage(struct sp_area *spa) +{ + return sp_area_alloc_hugepage_enable && spa->type == SPA_TYPE_ALLOC && spa->is_hugepage; +} + +static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *nodemask) +{ + int i; + struct page **pages; + int nr_pages = ALIGN(spa_size(spa), PMD_SIZE) / PMD_SIZE; + + pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return false; + + for (i = 0; i < nr_pages; i++) { + pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, + nid, nodemask); + if (!pages[i]) { + while (i--) + put_page(pages[i]); + kvfree(pages); + return false; + } + memset(page_to_virt(pages[i]), 0, PMD_SIZE); + } + + spa->pages = pages; + + return true; +} + +static void sp_area_free_hugepages(struct sp_area *spa) +{ + int nr_pages = ALIGN(spa->real_size, PMD_SIZE) / PMD_SIZE; + + if (!sp_area_need_hugepage(spa)) + return; + + if (!spa->pages) + return; + + while (nr_pages--) + put_page(spa->pages[nr_pages]); + + kvfree(spa->pages); + spa->pages = NULL; +} + /** * sp_area_alloc() - Allocate a region of VA from the share pool. * @size: the size of VA to allocate. @@ -1721,6 +1783,7 @@ static void sp_area_free(struct sp_area *spa) rb_erase(&spa->rb_node, &spm->area_root); spin_unlock(&spm->sp_mapping_lock); RB_CLEAR_NODE(&spa->rb_node); + sp_area_free_hugepages(spa); kfree(spa); } @@ -1924,11 +1987,32 @@ int mg_sp_free(unsigned long addr, int id) } EXPORT_SYMBOL_GPL(mg_sp_free); +static int sp_vma_insert_hugepages(struct vm_area_struct *vma, struct page **pages, + unsigned long uaddr, unsigned long size) +{ + int i = 0; + + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + do { + int ret = hugetlb_insert_hugepage_pte(vma->vm_mm, uaddr, + vma->vm_page_prot, pages[i]); + if (ret) + return ret; + + uaddr += PMD_SIZE; + size -= PMD_SIZE; + i++; + } while (size > 0); + + return 0; +} + /* wrapper of __do_mmap() and the caller must hold mmap_write_lock(mm). */ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, struct sp_area *spa, unsigned long *populate, unsigned long prot) { + int ret = 0; unsigned long addr = spa->va_start; unsigned long size = spa_size(spa); unsigned long flags = MAP_FIXED_NOREPLACE | MAP_SHARED | MAP_POPULATE | @@ -1959,6 +2043,14 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, else vm_flags_clear(vma, VM_MAYWRITE); + if (sp_area_need_hugepage(spa)) { + ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (ret) { + do_munmap(mm, addr, size, NULL); + return (unsigned long)ret; + } + } + return addr; } @@ -2156,7 +2248,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, unsigned long prot, struct sp_alloc_context *ac, const char *str) { - int ret; + int ret = 0; unsigned long mmap_addr; unsigned long populate = 0; @@ -2179,6 +2271,13 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, switch (spa->type) { case SPA_TYPE_ALLOC: mmap_write_unlock(mm); + /* + * If spa of SP_TYPE_ALLOC has unzero pages, we must have + * populated it in sp_mmap() before. So just break and don't + * pouplate it again. + */ + if (spa->pages) + break; ret = sp_alloc_populate(mm, spa, populate, ac); if (ret) { mmap_write_lock(mm); @@ -2234,6 +2333,10 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context * struct mm_struct *mm; bool reach_current = false; + if (sp_area_need_hugepage(spa) && + !sp_area_alloc_hugepages(spa, ac->preferred_node_id, ac->nodemask)) + return -ENOMEM; + mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc"); if (mmap_ret) { /* Don't skip error for current process */ -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature category: bugfix bugzilla: NA ---------------------------------------- Do the same in sp_mmap_nc() as what we do in sp_mmap(). Fixes: 00c7c3d64806 ("hugetlb: support auto demote and promote") Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 9ff7a2da3dfc..e359d188ebba 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -2225,13 +2225,19 @@ static int sp_nc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long p vm_flags_clear(vma, VM_MAYWRITE); vma->vm_page_prot = sp_pgprot_writethrough(vma->vm_page_prot); - up_write(&mm->mmap_lock); - - ret = do_mm_populate(mm, addr, populate, 0); - if (ret) { - down_write(&mm->mmap_lock); - do_munmap(mm, addr, spa_size(spa), NULL); + if (sp_area_need_hugepage(spa)) { + ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (ret) + do_munmap(mm, addr, spa_size(spa), NULL); up_write(&mm->mmap_lock); + } else { + up_write(&mm->mmap_lock); + ret = do_mm_populate(mm, addr, populate, 0); + if (ret) { + down_write(&mm->mmap_lock); + do_munmap(mm, addr, spa_size(spa), NULL); + up_write(&mm->mmap_lock); + } } return ret; -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature category: bugfix bugzilla: NA ---------------------------------------- Alloc buddy pages if no hugetlbfs pages is reserved. Here we don't need to set vm.nr_overcommit_hugepages. Even it is set to non-zero, we just ignore it and allocate hugepage from buddy. Fixes: 00c7c3d64806 ("hugetlb: support auto demote and promote") Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mm/share_pool.c b/mm/share_pool.c index e359d188ebba..fb662732f671 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1528,6 +1528,9 @@ static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *no for (i = 0; i < nr_pages; i++) { pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); + if (!pages[i]) + pages[i] = __alloc_pages(GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ACCOUNT, + get_order(PMD_SIZE), nid, nodemask); if (!pages[i]) { while (i--) put_page(pages[i]); -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature bugzilla: NA ---------------------------------------- This is used to allocate 2M-sized compound page from buddy. It acts like hugetlbfs hugepage in that it will be accounted into the HugePages_Total items in /proc/meminfo. HPageTemporary is set so that the page will be retruned to buddy after its refcount reaches zero. Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- include/linux/hugetlb.h | 25 +++++++++++++++++++++++++ mm/hugetlb.c | 39 +++++++++++++++++++++++++++++++++++++++ mm/share_pool.c | 4 ++-- 3 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 46785a6cd9d6..038c6343749b 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -317,6 +317,9 @@ static inline int hugetlb_insert_hugepage_pte_size(struct mm_struct *mm, unsigne struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size); struct folio *alloc_hugetlb_folio_nodemask_size(unsigned long size, int nid, nodemask_t *nodemask); +struct folio *alloc_temporary_hugetlb_folio_nodemask(int nid, nodemask_t *nodemask, + gfp_t gfp_mask); +struct folio *alloc_temporary_hugetlb_folio(int nid, gfp_t gfp_mask); #else static inline struct folio *alloc_hugetlb_folio_nodemask_size(unsigned long size, int nid, nodemask_t *nodemask) @@ -328,6 +331,17 @@ static inline struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size { return NULL; } + +static inline struct folio *alloc_temporary_hugetlb_folio_nodemask(int nid, + nodemask_t *nodemask, gfp_t gfp_mask) +{ + return NULL; +} + +static inline struct folio *alloc_temporary_hugetlb_folio(int nid, gfp_t gfp_mask) +{ + return NULL; +} #endif #else /* !CONFIG_HUGETLB_PAGE */ @@ -568,6 +582,17 @@ static inline struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size return NULL; } +static inline struct folio *alloc_temporary_hugetlb_folio_nodemask(int nid, + nodemask_t *nodemask, gfp_t gfp_mask) +{ + return NULL; +} + +static inline struct folio *alloc_temporary_hugetlb_folio(int nid, gfp_t gfp_mask) +{ + return NULL; +} + static inline void hugetlb_split(struct vm_area_struct *vma, unsigned long addr) {} #endif /* !CONFIG_HUGETLB_PAGE */ diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a1e096e48e83..f782af1c8ce8 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -8444,4 +8444,43 @@ struct folio *alloc_hugetlb_folio_size(int nid, unsigned long size) return alloc_hugetlb_folio_nodemask_size(size, nid, &nodemask); } EXPORT_SYMBOL(alloc_hugetlb_folio_size); + +struct folio *alloc_temporary_hugetlb_folio_nodemask(int nid, nodemask_t *nodemask, + gfp_t gfp_mask) +{ + struct hstate *h; + struct folio *folio; + + h = size_to_hstate(PMD_SIZE); + if (!h) + return NULL; + + folio = only_alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nodemask, NULL); + if (!folio) + return NULL; + + spin_lock_irq(&hugetlb_lock); + __prep_account_new_huge_page(h, folio_nid(folio)); + spin_unlock_irq(&hugetlb_lock); + + folio_ref_unfreeze(folio, 1); + /* + * We do not account these pages as surplus because they are only + * temporary and will be released properly on the last reference + */ + folio_set_hugetlb_temporary(folio); + + return folio; +} + +struct folio *alloc_temporary_hugetlb_folio(int nid, gfp_t gfp_mask) +{ + nodemask_t nodemask; + + nodes_clear(nodemask); + node_set(nid, nodemask); + + return alloc_temporary_hugetlb_folio_nodemask(nid, &nodemask, gfp_mask); +} +EXPORT_SYMBOL(alloc_temporary_hugetlb_folio); #endif diff --git a/mm/share_pool.c b/mm/share_pool.c index fb662732f671..58aede9b3325 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1529,8 +1529,8 @@ static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *no pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); if (!pages[i]) - pages[i] = __alloc_pages(GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ACCOUNT, - get_order(PMD_SIZE), nid, nodemask); + pages[i] = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, + nodemask, GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ACCOUNT); if (!pages[i]) { while (i--) put_page(pages[i]); -- 2.43.0
From: Wang Wensheng <wangwensheng4@huawei.com> hulk inclusion category: feature bugzilla: NA ---------------------------------------- This is used to indecate that the caller is free sp_area allocated by other process. This is allowed as long as the caller is in the same group. But the current->mm is NULL in do_exit() and we cannot judge wheather the caller is in the group or not. We return error in this case before and change it in this patch. Because if the process is killed with SIGKILL, the driver can free the sharepool memory only in the release hook of xsmem device file, which is called in do_exit(). Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 58aede9b3325..5cd4efce8f68 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1942,12 +1942,9 @@ static int sp_free_inner(unsigned long addr, int spg_id, bool is_sp_free) goto drop_spa; } - if (!current->mm && spa->applier != current->tgid) { - ret = -EPERM; - pr_err("%s, free a spa allocated by other process(%d), current(%d)\n", + if (!current->mm && spa->applier != current->tgid) + pr_warn_ratelimited("%s, free a spa allocated by other process(%d), current(%d)\n", str, spa->applier, current->tgid); - goto drop_spa; - } __sp_free(spa, NULL); if (spa->type == SPA_TYPE_ALLOC) -- 2.43.0
From: Yuan Can <yuancan@huawei.com> hulk inclusion category: feature category: bugfix bugzilla: NA ------------------ This log was used to notice the user that they use sharepool in an unrecommand manner that allocate memory in a process and free memory in another process. But this usage case is needed in real world and this message seems useless and confuses the user. And this produces too much log in mdc scene. Just delete this log. Fixes: cda50cb21293 ("mm/sharepool: support fork() and exit() to handle the mm") Signed-off-by: Yuan Can <yuancan@huawei.com> Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com> --- mm/share_pool.c | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index 5cd4efce8f68..f52705ac814b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -3573,8 +3573,6 @@ static void __init proc_sharepool_init(void) void __sp_mm_clean(struct mm_struct *mm) { - struct sp_meminfo *meminfo; - long alloc_size, k2u_size; /* lockless visit */ struct sp_group_master *master = mm->sp_group_master; struct sp_group_node *spg_node, *tmp; @@ -3583,29 +3581,6 @@ void __sp_mm_clean(struct mm_struct *mm) if (!master) return; - /* - * There are two basic scenarios when a process in the share pool is - * exiting but its share pool memory usage is not 0. - * 1. Process A called sp_alloc(), but it terminates without calling - * sp_free(). Then its share pool memory usage is a positive number. - * 2. Process A never called sp_alloc(), and process B in the same spg - * called sp_alloc() to get an addr u. Then A gets u somehow and - * called sp_free(u). Now A's share pool memory usage is a negative - * number. Notice B's memory usage will be a positive number. - * - * We decide to print an info when seeing both of the scenarios. - * - * A process not in an sp group doesn't need to print because there - * wont't be any memory which is not freed. - */ - meminfo = &master->meminfo; - alloc_size = meminfo_alloc_sum(meminfo); - k2u_size = atomic64_read(&meminfo->k2u_size); - if (alloc_size != 0 || k2u_size != 0) - pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", - master->comm, master->tgid, - byte2kb(alloc_size), byte2kb(k2u_size)); - down_write(&sp_global_sem); list_for_each_entry_safe(spg_node, tmp, &master->group_head, group_node) { spg = spg_node->spg; -- 2.43.0
hulk inclusion category: feature bugzilla: NA ---------------------------------------- sharepool remove the CONFIG_HUGETLB_INSERT_PAGE dependency by using the remap_pfn_range_try_pmd() interface for hugepages mappings. Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/Kconfig | 1 - mm/share_pool.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/Kconfig b/mm/Kconfig index 2465b06e8d84..15b5e9f57f80 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1346,7 +1346,6 @@ config IOMMU_MM_DATA menuconfig ASCEND_FEATURES bool "Support Ascend Features" depends on ARM64 - select HUGETLB_INSERT_PAGE select EXTEND_HUGEPAGE_MAPPING select SHARE_POOL help diff --git a/mm/share_pool.c b/mm/share_pool.c index f52705ac814b..fb92eb5094b5 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1994,8 +1994,8 @@ static int sp_vma_insert_hugepages(struct vm_area_struct *vma, struct page **pag vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); do { - int ret = hugetlb_insert_hugepage_pte(vma->vm_mm, uaddr, - vma->vm_page_prot, pages[i]); + int ret = remap_pfn_range_try_pmd(vma, uaddr, page_to_pfn(pages[i]), + PMD_SIZE, vma->vm_page_prot); if (ret) return ret; -- 2.43.0
From: Chen Jun <chenjun102@huawei.com> hulk inclusion category: feature bugzilla: NA -------------------------------- To avoid page faults within the virtual address range of the sharepool, support using `remap_pfn_range` to establish page table mapping. Enable this feature by adding the bootargs as: sharepool.remap=normal Signed-off-by: Chen Jun <chenjun102@huawei.com> --- mm/share_pool.c | 108 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index fb92eb5094b5..d380fa74bb95 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -717,7 +717,7 @@ static unsigned long spa_size(struct sp_area *spa) return spa->real_size; } -static struct file *spa_file(struct sp_area *spa) +static inline struct file *spa_file(struct sp_area *spa) { if (spa->is_hugepage) return spa->spg->file_hugetlb; @@ -725,6 +725,19 @@ static struct file *spa_file(struct sp_area *spa) return spa->spg->file; } +static inline unsigned long spa_page_size(struct sp_area *spa) +{ + if (spa->is_hugepage) + return PMD_SIZE; + else + return PAGE_SIZE; +} + +static inline unsigned long spa_nr_pages(struct sp_area *spa) +{ + return DIV_ROUND_UP(spa_size(spa), spa_page_size(spa)); +} + /* the caller should hold sp_area_lock */ static void spa_inc_usage(struct sp_area *spa) { @@ -1500,44 +1513,70 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } -static bool sp_area_alloc_hugepage_enable __read_mostly = true; +static bool spa_remap_huge __ro_after_init = true; +static bool spa_remap_normal __ro_after_init; -static int __init sp_area_alloc_hugepage_disable(char *p) +static int __init sharepool_use_remap_setup(char *s) { - sp_area_alloc_hugepage_enable = false; + char *token; + + while ((token = strsep(&s, ",")) != NULL) { + if (!*token) + continue; + + if (!strcmp(token, "nohuge")) + spa_remap_huge = false; + else if (!strcmp(token, "normal")) + spa_remap_normal = true; + } return 1; } -__setup("sp_area_alloc_hugepage_disable", sp_area_alloc_hugepage_disable); +__setup("sharepool.remap=", sharepool_use_remap_setup); -static bool sp_area_need_hugepage(struct sp_area *spa) +static bool sp_area_use_remap(struct sp_area *spa) { - return sp_area_alloc_hugepage_enable && spa->type == SPA_TYPE_ALLOC && spa->is_hugepage; + return ((spa_remap_huge && spa->is_hugepage) || + (spa_remap_normal && !spa->is_hugepage)) && + spa->type == SPA_TYPE_ALLOC; } -static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *nodemask) +static struct page *sp_area_alloc_hugepages(int nid, nodemask_t *nodemask) { - int i; + struct page *page; + + page = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); + if (!page) + page = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, nodemask, + GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT | __GFP_COMP); + + return page; +} + +static bool sp_area_prepare_pages(struct sp_area *spa, int nid, nodemask_t *nodemask) +{ + int nr_pages = spa_nr_pages(spa); struct page **pages; - int nr_pages = ALIGN(spa_size(spa), PMD_SIZE) / PMD_SIZE; + int i; pages = kvmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); if (!pages) return false; for (i = 0; i < nr_pages; i++) { - pages[i] = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, - nid, nodemask); - if (!pages[i]) - pages[i] = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, - nodemask, GFP_HIGHUSER_MOVABLE | __GFP_COMP | __GFP_ACCOUNT); + if (spa->is_hugepage) + pages[i] = sp_area_alloc_hugepages(nid, nodemask); + else + pages[i] = __alloc_pages(GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT, + 0, nid, nodemask); + if (!pages[i]) { while (i--) put_page(pages[i]); kvfree(pages); return false; } - memset(page_to_virt(pages[i]), 0, PMD_SIZE); + memset(page_to_virt(pages[i]), 0, spa_page_size(spa)); } spa->pages = pages; @@ -1545,11 +1584,11 @@ static bool sp_area_alloc_hugepages(struct sp_area *spa, int nid, nodemask_t *no return true; } -static void sp_area_free_hugepages(struct sp_area *spa) +static void sp_area_free_pages(struct sp_area *spa) { - int nr_pages = ALIGN(spa->real_size, PMD_SIZE) / PMD_SIZE; + int nr_pages = spa_nr_pages(spa); - if (!sp_area_need_hugepage(spa)) + if (!sp_area_use_remap(spa)) return; if (!spa->pages) @@ -1786,7 +1825,7 @@ static void sp_area_free(struct sp_area *spa) rb_erase(&spa->rb_node, &spm->area_root); spin_unlock(&spm->sp_mapping_lock); RB_CLEAR_NODE(&spa->rb_node); - sp_area_free_hugepages(spa); + sp_area_free_pages(spa); kfree(spa); } @@ -1987,20 +2026,25 @@ int mg_sp_free(unsigned long addr, int id) } EXPORT_SYMBOL_GPL(mg_sp_free); -static int sp_vma_insert_hugepages(struct vm_area_struct *vma, struct page **pages, +static int sp_vma_populate_pages(struct vm_area_struct *vma, struct sp_area *spa, unsigned long uaddr, unsigned long size) { + unsigned long page_size = spa_page_size(spa); int i = 0; + int ret; - vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); do { - int ret = remap_pfn_range_try_pmd(vma, uaddr, page_to_pfn(pages[i]), - PMD_SIZE, vma->vm_page_prot); + if (spa->is_hugepage) + ret = remap_pfn_range_try_pmd(vma, uaddr, + page_to_pfn(spa->pages[i]), page_size, vma->vm_page_prot); + else + ret = remap_pfn_range(vma, uaddr, + page_to_pfn(spa->pages[i]), page_size, vma->vm_page_prot); if (ret) return ret; - uaddr += PMD_SIZE; - size -= PMD_SIZE; + uaddr += page_size; + size -= page_size; i++; } while (size > 0); @@ -2043,8 +2087,8 @@ static unsigned long sp_mmap(struct mm_struct *mm, struct file *file, else vm_flags_clear(vma, VM_MAYWRITE); - if (sp_area_need_hugepage(spa)) { - ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (sp_area_use_remap(spa)) { + ret = sp_vma_populate_pages(vma, spa, addr, size); if (ret) { do_munmap(mm, addr, size, NULL); return (unsigned long)ret; @@ -2225,8 +2269,8 @@ static int sp_nc_mmap(struct mm_struct *mm, struct sp_area *spa, unsigned long p vm_flags_clear(vma, VM_MAYWRITE); vma->vm_page_prot = sp_pgprot_writethrough(vma->vm_page_prot); - if (sp_area_need_hugepage(spa)) { - ret = sp_vma_insert_hugepages(vma, spa->pages, addr, size); + if (sp_area_use_remap(spa)) { + ret = sp_vma_populate_pages(vma, spa, addr, size); if (ret) do_munmap(mm, addr, spa_size(spa), NULL); up_write(&mm->mmap_lock); @@ -2339,8 +2383,8 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, struct sp_alloc_context * struct mm_struct *mm; bool reach_current = false; - if (sp_area_need_hugepage(spa) && - !sp_area_alloc_hugepages(spa, ac->preferred_node_id, ac->nodemask)) + if (sp_area_use_remap(spa) && + !sp_area_prepare_pages(spa, ac->preferred_node_id, ac->nodemask)) return -ENOMEM; mmap_ret = sp_map_spa_to_mm(current->mm, spa, spg_node->prot, ac, "sp_alloc"); -- 2.43.0
hulk inclusion category: feature bugzilla: NA -------------------------------- Introduce sp_register_hugepage_allocator() to let drivers replace the default sharepool hugepage allocation functions. Signed-off-by: Yin Tirui <yintirui@huawei.com> --- include/linux/share_pool.h | 9 +++++++ mm/share_pool.c | 51 +++++++++++++++++++++++++++++++++++--- 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h index a3caedb99c5c..147e99620b9d 100644 --- a/include/linux/share_pool.h +++ b/include/linux/share_pool.h @@ -88,6 +88,8 @@ struct sp_walk_data { #define MMAP_SHARE_POOL_DYNAMIC_DVPP_END (MMAP_SHARE_POOL_DYNAMIC_DVPP_BASE + \ MMAP_SHARE_POOL_16G_SIZE * 64) +typedef unsigned long (*sp_alloc_hugepage_fn)(unsigned long size, int nid, nodemask_t *nodemask); + #ifdef CONFIG_SHARE_POOL /* @@ -161,6 +163,8 @@ static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) return sp_is_enabled() && mg_is_sharepool_addr(addr) && !(flags & MAP_SHARE_POOL); } +int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func); + #else /* CONFIG_SHARE_POOL */ static inline int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) @@ -260,6 +264,11 @@ static inline bool sp_check_mmap_addr(unsigned long addr, unsigned long flags) { return false; } + +static inline int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func) +{ + return -EPERM; +} #endif /* !CONFIG_SHARE_POOL */ #endif /* LINUX_SHARE_POOL_H */ diff --git a/mm/share_pool.c b/mm/share_pool.c index d380fa74bb95..cec673f18160 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -88,6 +88,8 @@ static DEFINE_MUTEX(master_list_lock); static LIST_HEAD(spm_dvpp_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(spm_list_lock); +/* mutex to protect hugepage allocator registration */ +static DEFINE_MUTEX(sp_hugepage_reg_mutex); #define SEQ_printf(m, x...) \ do { \ @@ -1513,6 +1515,36 @@ static bool sp_group_delete_area(struct sp_group *spg, struct sp_area *spa) return atomic_dec_and_test(&spa->spg->spa_num); } +unsigned long sp_alloc_hugepage(unsigned long size, int nid, nodemask_t *nodemask) +{ + struct folio *folio = alloc_hugetlb_folio_nodemask_size(size, nid, nodemask); + + if (unlikely(!folio)) + return -ENOMEM; + + return folio_pfn(folio); +} + +static sp_alloc_hugepage_fn sp_alloc_hugepage_func = sp_alloc_hugepage; +static bool allow_hugepage_allocator_register = true; + +int sp_register_hugepage_allocator(sp_alloc_hugepage_fn alloc_func) +{ + if (unlikely(!alloc_func)) + return -EINVAL; + + mutex_lock(&sp_hugepage_reg_mutex); + if (!allow_hugepage_allocator_register) { + mutex_unlock(&sp_hugepage_reg_mutex); + return -EBUSY; + } + sp_alloc_hugepage_func = alloc_func; + allow_hugepage_allocator_register = false; + mutex_unlock(&sp_hugepage_reg_mutex); + return 0; +} +EXPORT_SYMBOL_GPL(sp_register_hugepage_allocator); + static bool spa_remap_huge __ro_after_init = true; static bool spa_remap_normal __ro_after_init; @@ -1544,13 +1576,16 @@ static bool sp_area_use_remap(struct sp_area *spa) static struct page *sp_area_alloc_hugepages(int nid, nodemask_t *nodemask) { struct page *page; + unsigned long pfn; - page = (struct page *)alloc_hugetlb_folio_nodemask_size(PMD_SIZE, nid, nodemask); - if (!page) + pfn = sp_alloc_hugepage_func(PMD_SIZE, nid, nodemask); + if (IS_ERR_VALUE(pfn)) { page = (struct page *)alloc_temporary_hugetlb_folio_nodemask(nid, nodemask, GFP_HIGHUSER_MOVABLE | __GFP_ACCOUNT | __GFP_COMP); + return page; + } - return page; + return pfn_to_page(pfn); } static bool sp_area_prepare_pages(struct sp_area *spa, int nid, nodemask_t *nodemask) @@ -2447,6 +2482,16 @@ static void *__mg_sp_alloc_nodemask(unsigned long size, unsigned long sp_flags, struct sp_alloc_context ac; struct sp_group_node *spg_node; + /** + * To avoid atomic risks, once share pool alloc memory, + * registering hugepage allocation hook is no longer supported. + */ + if (unlikely(allow_hugepage_allocator_register)) { + mutex_lock(&sp_hugepage_reg_mutex); + allow_hugepage_allocator_register = false; + mutex_unlock(&sp_hugepage_reg_mutex); + } + ret = sp_alloc_prepare(size, sp_flags, spg_id, &ac); if (ret) return ERR_PTR(ret); -- 2.43.0
From: Lin Ruifeng <linruifeng4@huawei.com> hulk inclusion category: feature bugzilla: N/A -------------------------------- Overprinting may cause following softlockup, we use pr_warn_ratelimited and pr_err_ratelimited to limited the speed of print. watchdog: BUG: soft lockup - CPU#55 stuck for 23s! [test_u2k_add_an:50936] rcu: 55-....: (1447 ticks this GP) idle=cb2/1/0x4000000000000002 softirq=94729/94729 fqs=2509 Modules linked in: rcu: (detected by 12, t=5555 jiffies, g=123873, q=11191) sharepool_dev(O) Sending NMI from CPU 12 to CPUs 55: NMI backtrace for cpu 55 CPU: 55 PID: 50936 Comm: test_u2k_add_an Tainted: G W O 5.10.0+ #9 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 Call trace: console_unlock+0x2d0/0x65c vprintk_emit+0x208/0x2fc vprintk_default+0x40/0x50 vprintk_func+0xf4/0x1a0 printk+0x64/0x8c sp_munmap+0xec/0x110 sp_free_inner+0x1b0/0x320 mg_sp_free+0x38/0x60 dev_ioctl+0x540/0x1b14 [sharepool_dev] __arm64_sys_ioctl+0xb0/0xfc el0_svc_common.constprop.0+0x88/0x250 do_el0_svc+0x2c/0x90 el0_svc+0x20/0x30 el0_sync_handler+0xb0/0xb4 el0_sync+0x180/0x1c0 Sample time: 1644475978700 ns(HZ: 250) Sample stat: curr: user: 69196000000, nice: 0, sys: 109948000000, idle: 1438412458000, iowait: 446000, irq: 0, softirq: 5740000000, st: 0 deta: user: 69196000000, nice: 0, sys: 109948000000, idle: 1438412458000, iowait: 446000, irq: 0, softirq: 5740000000, st: 0 Sample softirq: TIMER: 806 TASKLET: 7 SCHED: 9345 RCU: 94729 Kernel panic - not syncing: softlockup: hung tasks Signed-off-by: Lin Ruifeng <linruifeng4@huawei.com> Signed-off-by: Yin Tirui <yintirui@huawei.com> --- mm/share_pool.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mm/share_pool.c b/mm/share_pool.c index cec673f18160..bef8d470246d 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -1212,7 +1212,7 @@ static int sp_group_link_task(struct mm_struct *mm, struct sp_group *spg, } if (is_process_in_group(spg, mm)) { - pr_err("task already in target group(%d)\n", spg->id); + pr_warn_ratelimited("task already in target group(%d)\n", spg->id); return -EEXIST; } @@ -1924,7 +1924,7 @@ static void sp_munmap(struct mm_struct *mm, unsigned long addr, mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { mmap_write_unlock(mm); - pr_warn("munmap: target mm is exiting\n"); + pr_warn_ratelimited("munmap: target mm is exiting\n"); return; } @@ -2340,7 +2340,7 @@ static int sp_map_spa_to_mm(struct mm_struct *mm, struct sp_area *spa, mmap_write_lock(mm); if (unlikely(!mmget_not_zero(mm))) { mmap_write_unlock(mm); - pr_warn("sp_map: target mm is exiting\n"); + pr_warn_ratelimited("sp_map: target mm is exiting\n"); return SP_SKIP_ERR; } -- 2.43.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,转换为PR失败! 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/H3R... 失败原因:应用补丁/补丁集失败,Patch failed at 0001 mm: reserve address range for svsp 建议解决方法:请查看失败原因, 确认补丁是否可以应用在当前期望分支的最新代码上 FeedBack: The patch(es) which you have sent to kernel@openeuler.org has been converted to PR failed! Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/H3R... Failed Reason: apply patch(es) failed, Patch failed at 0001 mm: reserve address range for svsp Suggest Solution: please checkout if the failed patch(es) can work on the newest codes in expected branch
participants (2)
-
patchwork bot -
Yin Tirui