hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAHJKC CVE: NA
--------------------------------
Sometimes migrate THP is not beneficial, for example, when 64K page size is set on ARM64, THP will be 512M, migration may result in performance regression. This featrue add a interface to contrl THP migration when do numa balancing: /sys/kernel/mm/transparent_hugepage/numa_control
Default value is 0 which means do nothing. Write 1 to disable migrate THP while task still have chance to migrate. Write 2 to disable autonuma for THP totally.
Signed-off-by: Nanyong Sun sunnanyong@huawei.com --- Documentation/admin-guide/mm/transhuge.rst | 9 ++++++ arch/arm64/Kconfig | 1 + include/linux/huge_mm.h | 24 +++++++++++++++ mm/Kconfig | 10 ++++++ mm/huge_memory.c | 36 ++++++++++++++++++++++ mm/mem_sampling.c | 3 ++ mm/migrate.c | 3 ++ 7 files changed, 86 insertions(+)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index 2bfb380e8380..1038ff8e184d 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -160,6 +160,15 @@ library) may want to know the size (in bytes) of a transparent hugepage::
cat /sys/kernel/mm/transparent_hugepage/hpage_pmd_size
+If CONFIG_THP_NUMA_CONTROL is on, user can control THP migration when +do numa balancing, 0 is default which means keep the default behavior, +writing 1 will disable thp migrate while tasks still have chance to +migrate, writing 2 will skip THP totally from numa balancing:: + + echo 0 > /sys/kernel/mm/transparent_hugepage/numa_control + echo 1 > /sys/kernel/mm/transparent_hugepage/numa_control + echo 2 > /sys/kernel/mm/transparent_hugepage/numa_control + khugepaged will be automatically started when transparent_hugepage/enabled is set to "always" or "madvise, and it'll be automatically shutdown if it's set to "never". diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cae54a9bf65d..bde9ec4af773 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -216,6 +216,7 @@ config ARM64 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select HAVE_LIVEPATCH_WO_FTRACE + select THP_NUMA_CONTROL if ARM64_64K_PAGES help ARM 64-bit (AArch64) Linux support.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index efb370e79ac3..c2bf15d2d969 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -498,6 +498,30 @@ static inline unsigned long thp_size(struct page *page) return PAGE_SIZE << thp_order(page); }
+#ifdef CONFIG_THP_NUMA_CONTROL +#define THP_DISABLE_NUMA_MIGRATE 1 +#define THP_DISABLE_AUTONUMA 2 +extern unsigned long thp_numa_control; +static inline bool thp_numa_migrate_disabled(void) +{ + return thp_numa_control == THP_DISABLE_NUMA_MIGRATE; +} + +static inline bool thp_autonuma_disabled(void) +{ + return thp_numa_control == THP_DISABLE_AUTONUMA; +} +#else +static inline bool thp_numa_migrate_disabled(void) +{ + return false; +} + +static inline bool thp_numa_migrate_disabled(void) +{ + return false; +} +#endif /* * archs that select ARCH_WANTS_THP_SWAP but don't support THP_SWP due to * limitations in the implementation like arm64 MTE can override this to diff --git a/mm/Kconfig b/mm/Kconfig index ccbad233f2b1..cc43f5124cb3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1038,6 +1038,16 @@ config NUMABALANCING_MEM_SAMPLING
if unsure, say N to disable the NUMABALANCING_MEM_SAMPLING.
+config THP_NUMA_CONTROL + bool "Control THP migration when numa balancing" + depends on NUMA_BALANCING && TRANSPARENT_HUGEPAGE + default n + help + Sometimes migrate THP is not beneficial, for example, when 64K page + size is set on ARM64, THP will be 512M, migration will be expensive. + This featrue add a switch to control the behavior of THP migration + when do numa balancing. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index eb293d17a104..332f712906e1 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -316,6 +316,36 @@ static ssize_t hpage_pmd_size_show(struct kobject *kobj, static struct kobj_attribute hpage_pmd_size_attr = __ATTR_RO(hpage_pmd_size);
+#ifdef CONFIG_THP_NUMA_CONTROL +unsigned long thp_numa_control; + +static ssize_t numa_control_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + return sprintf(buf, "%lu\n", thp_numa_control); +} + +static ssize_t numa_control_store(struct kobject *kobj, + struct kobj_attribute *attr, const char *buf, size_t count) +{ + unsigned long value; + int ret; + + ret = kstrtoul(buf, 10, &value); + if (ret < 0) + return ret; + if (value > THP_DISABLE_AUTONUMA) + return -EINVAL; + + thp_numa_control = value; + + return count; +} + +static struct kobj_attribute numa_control_attr = + __ATTR(numa_control, 0644, numa_control_show, numa_control_store); +#endif + static struct attribute *hugepage_attr[] = { &enabled_attr.attr, &defrag_attr.attr, @@ -323,6 +353,9 @@ static struct attribute *hugepage_attr[] = { &hpage_pmd_size_attr.attr, #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, +#endif +#ifdef CONFIG_THP_NUMA_CONTROL + &numa_control_attr.attr, #endif NULL, }; @@ -1743,6 +1776,9 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, if (prot_numa && !thp_migration_supported()) return 1;
+ if (prot_numa && thp_autonuma_disabled()) + return 1; + ptl = __pmd_trans_huge_lock(pmd, vma); if (!ptl) return 0; diff --git a/mm/mem_sampling.c b/mm/mem_sampling.c index 1d8a831be531..ffc0e4964cad 100644 --- a/mm/mem_sampling.c +++ b/mm/mem_sampling.c @@ -145,6 +145,9 @@ static inline void do_thp_numa_access(struct mm_struct *mm, pmd_t *pmd, pmde; spinlock_t *ptl;
+ if (thp_autonuma_disabled()) + return; + pgd = pgd_offset(mm, vaddr); if (!pgd_present(*pgd)) return; diff --git a/mm/migrate.c b/mm/migrate.c index 3f5b217d5af1..faaa7b790da0 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2157,6 +2157,9 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma, */ compound = PageTransHuge(page);
+ if (compound && thp_numa_migrate_disabled()) + return 0; + if (compound) new = alloc_misplaced_dst_page_thp; else
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/10633 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/10633 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/5...