hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9Q9DF CVE: NA
-------------------------------------------------
The high-order pages stored on PCP list may not always win, so it is disabled by default for high-orders except PMD_ORDER.
Adding a new control pcp_allow_high_order to allow user to enable/disable the specified high-order(only order 4 for now) pages stored on PCP list or not, note, the all pages on pcplists will be drained when disable it.
Signed-off-by: Kefeng Wang wangkefeng.wang@huawei.com --- Documentation/admin-guide/mm/transhuge.rst | 9 +++++++ include/linux/gfp.h | 1 + include/linux/huge_mm.h | 1 + mm/huge_memory.c | 31 ++++++++++++++++++++++ mm/page_alloc.c | 18 ++++++++++++- 5 files changed, 59 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index e52cd57bb512..0dc0b4dab621 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -220,6 +220,15 @@ writing the corresponding bit to 1:: echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align
+The kernel could enable high-orders(greated than PAGE_ALLOC_COSTLY_ORDER, only +support order 4 for now) be stored on PCP lists(except PMD order), which could +reduce the zone lock contention when allocate hige-order pages frequently. It +is possible to enable order 4 pages stored on PCP lists by writing 4 or disable +it back by writing 0:: + + echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order + echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order + khugepaged will be automatically started when one or more hugepage sizes are enabled (either by directly setting "always" or "madvise", or by setting "inherit" while the top-level enabled is set to "always" diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b18b7e3758be..b2d4f45a866b 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -335,6 +335,7 @@ extern void page_frag_free(void *addr);
void page_alloc_init_cpuhp(void); int decay_pcp_high(struct zone *zone, struct per_cpu_pages *pcp); +void drain_all_zone_pages(void); void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); void drain_all_pages(struct zone *zone); void drain_local_pages(struct zone *zone); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8fdf17e80359..056f6918eeed 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -104,6 +104,7 @@ extern unsigned long transparent_hugepage_flags; extern unsigned long huge_anon_orders_always; extern unsigned long huge_anon_orders_madvise; extern unsigned long huge_anon_orders_inherit; +extern unsigned long huge_pcp_allow_orders;
static inline bool hugepage_global_enabled(void) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9f1100dfee66..508155fe9830 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -74,6 +74,7 @@ unsigned long huge_zero_pfn __read_mostly = ~0UL; unsigned long huge_anon_orders_always __read_mostly; unsigned long huge_anon_orders_madvise __read_mostly; unsigned long huge_anon_orders_inherit __read_mostly; +unsigned long huge_pcp_allow_orders __read_mostly;
unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, unsigned long vm_flags, bool smaps, @@ -417,6 +418,35 @@ static ssize_t use_zero_page_store(struct kobject *kobj, } static struct kobj_attribute use_zero_page_attr = __ATTR_RW(use_zero_page);
+static ssize_t pcp_allow_high_order_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sysfs_emit(buf, "%lu\n", READ_ONCE(huge_pcp_allow_orders)); +} +static ssize_t pcp_allow_high_order_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + + /* Only enable order 4 now, 0 is to disable it */ + if (value != 0 && value != (PAGE_ALLOC_COSTLY_ORDER + 1)) + return -EINVAL; + + if (value == 0) + drain_all_zone_pages(); + + WRITE_ONCE(huge_pcp_allow_orders, value); + + return count; +} +static struct kobj_attribute pcp_allow_high_order_attr = + __ATTR_RW(pcp_allow_high_order); + static ssize_t hpage_pmd_size_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -531,6 +561,7 @@ static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, &use_zero_page_attr.attr, + &pcp_allow_high_order_attr.attr, &hpage_pmd_size_attr.attr, #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 4652dc453964..f225f412e71d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -528,7 +528,7 @@ static void bad_page(struct page *page, const char *reason) static inline unsigned int order_to_pindex(int migratetype, int order) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (order > PAGE_ALLOC_COSTLY_ORDER) { + if (order > PAGE_ALLOC_COSTLY_ORDER + 1) { VM_BUG_ON(order != HPAGE_PMD_ORDER); return NR_LOWORDER_PCP_LISTS; } @@ -560,6 +560,8 @@ static inline bool pcp_allowed_order(unsigned int order) #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order == HPAGE_PMD_ORDER) return true; + if (order == READ_ONCE(huge_pcp_allow_orders)) + return true; #endif return false; } @@ -6829,6 +6831,20 @@ void zone_pcp_reset(struct zone *zone) } }
+void drain_all_zone_pages(void) +{ + struct zone *zone; + + mutex_lock(&pcp_batch_high_lock); + for_each_populated_zone(zone) + __zone_set_pageset_high_and_batch(zone, 0, 0, 1); + __drain_all_pages(NULL, true); + for_each_populated_zone(zone) + __zone_set_pageset_high_and_batch(zone, zone->pageset_high_min, + zone->pageset_high_max, zone->pageset_batch); + mutex_unlock(&pcp_batch_high_lock); +} + #ifdef CONFIG_MEMORY_HOTREMOVE /* * All pages in the range must be in a single zone, must not contain holes,