From: Kemeng Shi shikemeng@huawei.com
euleros inclusion category: feature feature: etmem bugzilla: https://gitee.com/openeuler/kernel/issues/I4OODH?from=project-issue CVE: NA
-------------------------------------------------
Add /proc/sys/vm/hugepage_pmem_allocall switch. Set 1 to allowed all memory in pmem could alloc for hugepage. Set 0(default) hugepage alloc is limited by zone watermark as usual. Add /proc/sys/vm/hugepage_mig_noalloc switch. Set 1 to forbid new hugepage alloc in hugepage migration when hugepage in dest node runs out. Set 0(default) to allow hugepage alloc in hugepage migration as usual.
Signed-off-by: Yuchen Tang tangyuchen5@huawei.com Signed-off-by: Kemeng Shi shikemeng@huawei.com Reviewed-by: louhongxiang louhongxiang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/etmem.h | 3 +++ include/linux/highmem.h | 4 ++-- mm/etmem.c | 37 +++++++++++++++++++++++++++++++++++++ mm/hugetlb.c | 9 +++++++++ 4 files changed, 51 insertions(+), 2 deletions(-)
diff --git a/include/linux/etmem.h b/include/linux/etmem.h index a274785216d1..a51df74c28e1 100644 --- a/include/linux/etmem.h +++ b/include/linux/etmem.h @@ -39,6 +39,9 @@ static inline struct kvm *mm_kvm(struct mm_struct *mm) } #endif
+extern int sysctl_hugetlb_mig_noalloc; +extern int sysctl_hugetlb_pmem_allocall; + enum node_type { NODE_TYPE_DRAM, NODE_TYPE_PMEM, diff --git a/include/linux/highmem.h b/include/linux/highmem.h index fe5b75dd7647..ea88d7239195 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -565,6 +565,7 @@ static inline void unmap_and_put_page(struct page *page, void *addr) put_page(page); }
+#ifdef CONFIG_ETMEM #ifndef __HAVE_ARCH_COPY_HUGEPAGES
static inline void copy_highpages(struct folio *dst, struct folio *src) @@ -579,7 +580,6 @@ static inline void copy_highpages(struct folio *dst, struct folio *src) cond_resched(); } } - #endif /* __HAVE_ARCH_COPY_HUGEPAGES */ - +#endif /* CONFIG_ETMEM */ #endif /* _LINUX_HIGHMEM_H */ diff --git a/mm/etmem.c b/mm/etmem.c index 90abf8ac4596..37e389acb463 100644 --- a/mm/etmem.c +++ b/mm/etmem.c @@ -9,6 +9,10 @@ #include <linux/etmem.h> #include "internal.h"
+#ifdef CONFIG_ETMEM +int sysctl_hugetlb_mig_noalloc; +int sysctl_hugetlb_pmem_allocall; +#endif static bool enable_kernel_swap __read_mostly = true; unsigned int sysctl_vmemmap_block_from_dram; #ifdef CONFIG_NUMA @@ -94,6 +98,39 @@ static int __init vmemmap_sysctl_init(void) late_initcall(vmemmap_sysctl_init); #endif
+ +#ifdef CONFIG_HUGETLBFS +static struct ctl_table hugetlb_pmem_table[] = { + { + .procname = "hugepage_mig_noalloc", + .data = &sysctl_hugetlb_mig_noalloc, + .maxlen = sizeof(sysctl_hugetlb_mig_noalloc), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "hugepage_pmem_allocall", + .data = &sysctl_hugetlb_pmem_allocall, + .maxlen = sizeof(sysctl_hugetlb_pmem_allocall), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { } +}; + +static int __init hugetlb_pmem_init(void) +{ + register_sysctl_init("kernel", hugetlb_pmem_table); + return 0; +} +late_initcall(hugetlb_pmem_init); +#endif + + int add_page_for_swap(struct page *page, struct list_head *pagelist) { int err = -EBUSY; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1a4d388b6a3b..1b714e3ea680 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -35,6 +35,7 @@ #include <linux/delayacct.h> #include <linux/memory.h> #include <linux/mm_inline.h> +#include <linux/etmem.h>
#include <asm/page.h> #include <asm/pgalloc.h> @@ -2233,6 +2234,10 @@ static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { +#ifdef CONFIG_ETMEM + if (get_node_type(node) == NODE_TYPE_PMEM && sysctl_hugetlb_pmem_allocall) + gfp_mask |= __GFP_MEMALLOC; +#endif folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node, nodes_allowed, node_alloc_noretry); if (folio) { @@ -2501,7 +2506,11 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, nmask); +#ifdef CONFIG_ETMEM + if (folio || sysctl_hugetlb_mig_noalloc) { +#else if (folio) { +#endif spin_unlock_irq(&hugetlb_lock); return folio; }