From: Kemeng Shi shikemeng@huawei.com
euleros inclusion category: feature feature: etmem bugzilla: https://gitee.com/openeuler/kernel/issues/I7RO5Q CVE: NA Reference: https://gitee.com/openeuler/kernel/commit/80ed6b3203c960b286e1fd5d8db1b39a19...
-------------------------------------------------
Add /proc/sys/kernel/hugepage_pmem_allocall switch. Set 1 to allowed all memory in pmem could alloc for hugepage. Set 0(default) hugepage alloc is limited by zone watermark as usual. Add /proc/sys/kernel/hugepage_mig_noalloc switch. Set 1 to forbid new hugepage alloc in hugepage migration when hugepage in dest node runs out. Set 0(default) to allow hugepage alloc in hugepage migration as usual.
Signed-off-by: Kemeng Shi shikemeng@huawei.com Reviewed-by: louhongxiang louhongxiang@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/hugetlb.h | 6 ++++++ kernel/sysctl.c | 20 ++++++++++++++++++++ mm/hugetlb.c | 7 ++++++- 3 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 6d041aa9f0fe..1c5b9123378f 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -181,6 +181,12 @@ struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage); extern int sysctl_hugetlb_shm_group; extern struct list_head huge_boot_pages;
+#ifdef CONFIG_HUGETLBFS +extern int sysctl_hugetlb_mig_noalloc; +extern int sysctl_hugetlb_pmem_allocall; +#endif + + /* arch callbacks */
#ifndef CONFIG_HIGHPTE diff --git a/kernel/sysctl.c b/kernel/sysctl.c index bfe53e835524..e3b4c07422ff 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2327,6 +2327,26 @@ static struct ctl_table vm_table[] = { .extra1 = (void *)&mmap_rnd_compat_bits_min, .extra2 = (void *)&mmap_rnd_compat_bits_max, }, +#endif +#ifdef CONFIG_HUGETLBFS + { + .procname = "hugepage_mig_noalloc", + .data = &sysctl_hugetlb_mig_noalloc, + .maxlen = sizeof(sysctl_hugetlb_mig_noalloc), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + { + .procname = "hugepage_pmem_allocall", + .data = &sysctl_hugetlb_pmem_allocall, + .maxlen = sizeof(sysctl_hugetlb_pmem_allocall), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, #endif { } }; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index f154019e6b84..54808c78baa9 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -89,6 +89,9 @@ DEFINE_SPINLOCK(hugetlb_lock); static int num_fault_mutexes; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
+int sysctl_hugetlb_mig_noalloc; +int sysctl_hugetlb_pmem_allocall; + /* Forward declaration */ static int hugetlb_acct_memory(struct hstate *h, long delta); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); @@ -2218,6 +2221,8 @@ static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed, gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) { + if (get_node_type(node) == NODE_TYPE_PMEM && sysctl_hugetlb_pmem_allocall) + gfp_mask |= __GFP_MEMALLOC; folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node, nodes_allowed, node_alloc_noretry); if (folio) { @@ -2486,7 +2491,7 @@ struct folio *alloc_hugetlb_folio_nodemask(struct hstate *h, int preferred_nid,
folio = dequeue_hugetlb_folio_nodemask(h, gfp_mask, preferred_nid, nmask); - if (folio) { + if (folio || sysctl_hugetlb_mig_noalloc) { spin_unlock_irq(&hugetlb_lock); return folio; }