hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAHJKC CVE: NA
--------------------------------
Sometimes migrate THP is not beneficial, for example, when 64K page size is set on ARM64, THP will be 512M, migration may result in performance regression. This featrue add a interface to contrl THP migration when do numa balancing: /sys/kernel/mm/transparent_hugepage/numa_control
Default value is 0 which means keep default policy(will migrate). Write 1 to disable migrate THP while taskes still have chance to collect numa group info and may migrate.
The current control logic is applied for both autonuma and SPE based numa affinity.
Spark benchmark show 5% performance improvement after set 1 to the numa_control.
Fixes: 34387bcad1cd ("mm: numa-affinity: support THP migration") Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- Documentation/admin-guide/mm/transhuge.rst | 8 ++++++ arch/arm64/Kconfig | 1 + arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/huge_mm.h | 13 +++++++++ mm/Kconfig | 10 +++++++ mm/huge_memory.c | 33 ++++++++++++++++++++++ mm/migrate.c | 3 ++ 8 files changed, 70 insertions(+)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 2bfb380e8380..fdff6c4247db 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -160,6 +160,14 @@ library) may want to know the size (in bytes) of a transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+If CONFIG_THP_NUMA_CONTROL is on, user can control THP migration when +do numa balancing, 0 is default which means keep the default behavior, +writing 1 will disable thp migrate while tasks still have chance to +migrate:: + + echo 0 > /sys/kernel/mm/transparent_hugepage/numa_control + echo 1 > /sys/kernel/mm/transparent_hugepage/numa_control + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cae54a9bf65d..8b8f48b2a51e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -216,6 +216,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_LIVEPATCH_WO_FTRACE + select THP_NUMA_CONTROL if ARM64_64K_PAGES && NUMA_BALANCING && TRANSPARENT_HUGEPAGE help ARM 64-bit (AArch64) Linux support.
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 5b928488b4c0..c26a9a7379a9 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1182,6 +1182,7 @@ CONFIG_MEMORY_RELIABLE=y CONFIG_EXTEND_HUGEPAGE_MAPPING=y CONFIG_MEM_SAMPLING=y CONFIG_NUMABALANCING_MEM_SAMPLING=y +# CONFIG_THP_NUMA_CONTROL is not set
# # Data Access Monitoring diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index c522018b6481..c399055a52be 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1110,6 +1110,7 @@ CONFIG_ARCH_HAS_PTE_SPECIAL=y CONFIG_MAPPING_DIRTY_HELPERS=y CONFIG_MEMORY_RELIABLE=y # CONFIG_CLEAR_FREELIST_PAGE is not set +# CONFIG_THP_NUMA_CONTROL is not set
# # Data Access Monitoring diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index efb370e79ac3..d9dde313d267 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -498,6 +498,19 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); }
+#ifdef CONFIG_THP_NUMA_CONTROL +#define THP_DISABLE_NUMA_MIGRATE 1 +extern unsigned long thp_numa_control; +static inline bool thp_numa_migrate_disabled(void) +{ + return thp_numa_control == THP_DISABLE_NUMA_MIGRATE; +} +#else +static inline bool thp_numa_migrate_disabled(void) +{ + return false; +} +#endif /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/mm/Kconfig b/mm/Kconfig index ccbad233f2b1..cc43f5124cb3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1038,6 +1038,16 @@ config NUMABALANCING_MEM_SAMPLING
if unsure, say N to disable the NUMABALANCING_MEM_SAMPLING.
+config THP_NUMA_CONTROL + bool "Control THP migration when numa balancing" + depends on NUMA_BALANCING && TRANSPARENT_HUGEPAGE + default n + help + Sometimes migrate THP is not beneficial, for example, when 64K page + size is set on ARM64, THP will be 512M, migration will be expensive. + This featrue add a switch to control the behavior of THP migration + when do numa balancing. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index eb293d17a104..f286261f5525 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -316,6 +316,36 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size);
+#ifdef CONFIG_THP_NUMA_CONTROL +unsigned long thp_numa_control __read_mostly; + +static ssize_t numa_control_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", READ_ONCE(thp_numa_control)); +} + +static ssize_t numa_control_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + if (value > THP_DISABLE_NUMA_MIGRATE) + return -EINVAL; + + WRITE_ONCE(thp_numa_control, value); + + return count; +} + +static struct kobj_attribute numa_control_attr = + __ATTR(numa_control, 0644, numa_control_show, numa_control_store); +#endif + static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, @@ -323,6 +353,9 @@ static struct attribute *hugepage_attr[] = { &hpage_pmd_size_attr.attr, #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, +#endif +#ifdef CONFIG_THP_NUMA_CONTROL + &numa_control_attr.attr, #endif NULL, }; diff --git a/mm/migrate.c b/mm/migrate.c index 857c15e43497..cff5e11437d9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2161,6 +2161,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, */ compound = PageTransHuge(page);
+ if (compound && thp_numa_migrate_disabled()) + return 0; + if (compound) new = alloc_misplaced_dst_page_thp; else