Liu Shixin (2): mm/huge_memory: mTHP user controls to pagecache large folio mm/huge_memory: allow to enable 64K anouymous mapping align alone
Documentation/admin-guide/mm/transhuge.rst | 16 +-- include/linux/huge_mm.h | 7 +- mm/filemap.c | 2 + mm/huge_memory.c | 118 ++++++++++++--------- mm/mmap.c | 3 +- mm/readahead.c | 2 + 6 files changed, 84 insertions(+), 64 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAKLTF
--------------------------------
Add mTHP controls to sysfs to allow user space to disable large folio. For now, the control can be set to either `always` or `never` to enable or disable.
By default, large folio is enabled if it is supported by filesystem.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- Documentation/admin-guide/mm/transhuge.rst | 6 ++++ include/linux/huge_mm.h | 7 ++++ mm/filemap.c | 2 ++ mm/huge_memory.c | 38 +++++++++++++++++++++- mm/readahead.c | 2 ++ 5 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index f9dc42f4451f5..f25381671dec2 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -232,6 +232,12 @@ it back by writing 0:: echo 0 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order echo 4 >/sys/kernel/mm/transparent_hugepage/pcp_allow_high_order
+The kernel could enable or disable file-backed hugepages, which has no +effect on existed pagecache:: + + echo always >/sys/kernel/mm/transparent_hugepage/file_enabled + echo never >/sys/kernel/mm/transparent_hugepage/file_enabled + khugepaged will be automatically started when PMD-sized THP is enabled (either of the per-size anon control or the top-level control are set to "always" or "madvise"), and it'll be automatically shutdown when diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1474fd9c63ad2..5122ecbcf4597 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -51,6 +51,7 @@ enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG, TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG, + TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG, TRANSPARENT_HUGEPAGE_FILE_EXEC_THP_FLAG, TRANSPARENT_HUGEPAGE_FILE_EXEC_MTHP_FLAG, TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG, @@ -308,6 +309,10 @@ static inline void count_mthp_stat(int order, enum mthp_stat_item item) (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
+#define file_mthp_enabled() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG)) + #define thp_anon_mapping_pmd_align() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_ANON_MAPPING_PMD_ALIGN_FLAG)) @@ -465,6 +470,8 @@ static inline void folio_prep_large_rmappable(struct folio *folio) {}
#define transparent_hugepage_flags 0UL
+#define file_mthp_enabled() false + #define thp_anon_mapping_pmd_align() NULL
#define thp_get_unmapped_area NULL diff --git a/mm/filemap.c b/mm/filemap.c index 630a3eec5a881..1d5d5f1c2b541 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1932,6 +1932,8 @@ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
if (!mapping_large_folio_support(mapping)) order = 0; + if (order && !file_mthp_enabled()) + order = 0; if (order && mm_in_dynamic_pool(current->mm)) order = 0; if (order > MAX_PAGECACHE_ORDER) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index a62c4dc2b9da7..fcc881a5a4af9 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -65,7 +65,8 @@ unsigned long transparent_hugepage_flags __read_mostly = #endif (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)| (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)| - (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG); + (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)| + (1<<TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG);
static struct shrinker deferred_split_shrinker;
@@ -489,6 +490,40 @@ static void thp_flag_set(enum transparent_hugepage_flag flag, bool enable) clear_bit(flag, &transparent_hugepage_flags); }
+static ssize_t file_enabled_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + const char *output; + + if (test_bit(TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG, + &transparent_hugepage_flags)) + output = "[always] never"; + else + output = "always [never]"; + + return sysfs_emit(buf, "%s\n", output); +} + +static ssize_t file_enabled_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + ssize_t ret = count; + + if (sysfs_streq(buf, "always")) { + set_bit(TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG, + &transparent_hugepage_flags); + } else if (sysfs_streq(buf, "never")) { + clear_bit(TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG, + &transparent_hugepage_flags); + } else + ret = -EINVAL; + + return ret; +} + +static struct kobj_attribute file_enabled_attr = __ATTR_RW(file_enabled); + static ssize_t thp_exec_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -592,6 +627,7 @@ static struct attribute *hugepage_attr[] = { #ifdef CONFIG_SHMEM &shmem_enabled_attr.attr, #endif + &file_enabled_attr.attr, &thp_exec_enabled_attr.attr, &thp_mapping_align_attr.attr, NULL, diff --git a/mm/readahead.c b/mm/readahead.c index d0b3de43cf23b..ab1c61f0c0360 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -504,6 +504,8 @@ void page_cache_ra_order(struct readahead_control *ractl,
if (!mapping_large_folio_support(mapping) || ra->size < 4) goto fallback; + if (!file_mthp_enabled()) + goto fallback; if (mm_in_dynamic_pool(current->mm)) goto fallback;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/IAKLTF
--------------------------------
In commit 08f7407a9f04 ("mm: add thp anon pmd size mapping align control"), we add a control to pmd size anouymous mapping align. But not as the name shows, it controls 64K anonymous mapping align too, which is so weird.
To fix it, we move thp_anon_mapping_pmd_align into thp_get_unmapped_area(). And allow user to enable 64K anouymous mapping align without enable pmd size anouymous mapping align.
By the way, refactor the code of mapping align to make it more readable.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- Documentation/admin-guide/mm/transhuge.rst | 10 ++- include/linux/huge_mm.h | 6 -- mm/huge_memory.c | 80 ++++++++-------------- mm/mmap.c | 3 +- 4 files changed, 33 insertions(+), 66 deletions(-)
diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst index f25381671dec2..e569df5c999d6 100644 --- a/Documentation/admin-guide/mm/transhuge.rst +++ b/Documentation/admin-guide/mm/transhuge.rst @@ -211,12 +211,10 @@ possible to enable/disable it by configurate the corresponding bit:: echo 0x2 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled echo 0x3 >/sys/kernel/mm/transparent_hugepage/thp_exec_enabled
-The kernel could try to enable mappings for different sizes, eg, 64K on -arm64, BIT0 for file mapping, BIT1 for anonymous mapping, and THP size -page, BIT2 for anonymous mapping, where 2M anonymous mapping for arm64 -is dependent on BIT2 being turned on, the above feature are disabled by -default, and could enable the above feature by writing the corresponding -bit to 1:: +The kernel could try to enable mappings for different sizes, BIT0 for +64K file mapping, BIT1 for 64K anonymous mapping, and BIT2 for PMD size +anonymous mapping, the above feature are disabled by default, and could +enable the above feature by writing the corresponding bit to 1::
echo 0x1 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align echo 0x4 >/sys/kernel/mm/transparent_hugepage/thp_mapping_align diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 5122ecbcf4597..ae45391c693dc 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -313,10 +313,6 @@ static inline void count_mthp_stat(int order, enum mthp_stat_item item) (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_FILE_MTHP_FLAG))
-#define thp_anon_mapping_pmd_align() \ - (transparent_hugepage_flags & \ - (1<<TRANSPARENT_HUGEPAGE_ANON_MAPPING_PMD_ALIGN_FLAG)) - unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags);
@@ -472,8 +468,6 @@ static inline void folio_prep_large_rmappable(struct folio *folio) {}
#define file_mthp_enabled() false
-#define thp_anon_mapping_pmd_align() NULL - #define thp_get_unmapped_area NULL
static inline bool diff --git a/mm/huge_memory.c b/mm/huge_memory.c index fcc881a5a4af9..862e1c1359671 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -602,8 +602,7 @@ static ssize_t thp_mapping_align_store(struct kobject *kobj, ret = kstrtoul(buf, 16, &val); if (ret < 0) return ret; - if ((val & ~THP_MAPPING_ALIGN_ALL) || (!(val & ANON_MAPPING_PMD_ALIGN) && - (val & ANON_MAPPING_ALIGN))) + if (val & ~THP_MAPPING_ALIGN_ALL) return -EINVAL;
thp_flag_set(TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG, @@ -1088,64 +1087,31 @@ static unsigned long __thp_get_unmapped_area(struct file *filp, return ret; }
-#define thp_file_mapping_align_enabled() \ +#define mthp_file_mapping_align() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_FILE_MAPPING_ALIGN_FLAG))
-#define thp_anon_mapping_align_enabled() \ +#define mthp_anon_mapping_align() \ (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_ANON_MAPPING_ALIGN_FLAG))
-static bool file_mapping_align_enabled(struct file *filp) +#define thp_anon_mapping_align() \ + (transparent_hugepage_flags & \ + (1<<TRANSPARENT_HUGEPAGE_ANON_MAPPING_PMD_ALIGN_FLAG)) + +static bool mthp_mapping_align_enabled(struct file *filp) { struct address_space *mapping;
- if (!thp_file_mapping_align_enabled()) - return false; - if (!filp) - return false; - - mapping = filp->f_mapping; - if (!mapping || !mapping_large_folio_support(mapping)) - return false; - - return true; -} + return mthp_anon_mapping_align();
-static bool anon_mapping_align_enabled(int order) -{ - unsigned long mask; - - if (!thp_anon_mapping_align_enabled()) - return 0; - - mask = READ_ONCE(huge_anon_orders_always) | - READ_ONCE(huge_anon_orders_madvise); - - if (hugepage_global_enabled()) - mask |= READ_ONCE(huge_anon_orders_inherit); - - mask = BIT(order) & mask; - if (!mask) + if (!mthp_file_mapping_align()) return false;
- return true; -} - -static unsigned long folio_get_unmapped_area(struct file *filp, unsigned long addr, - unsigned long len, unsigned long pgoff, unsigned long flags) -{ - int order = arch_wants_exec_folio_order(); - - if (order < 0) - return 0; + mapping = filp->f_mapping;
- if (file_mapping_align_enabled(filp) || - (!filp && anon_mapping_align_enabled(order))) - return __thp_get_unmapped_area(filp, addr, len, pgoff, flags, - PAGE_SIZE << order); - return 0; + return mapping && mapping_large_folio_support(mapping); }
unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, @@ -1154,13 +1120,23 @@ unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr, unsigned long ret; loff_t off = (loff_t)pgoff << PAGE_SHIFT;
- ret = __thp_get_unmapped_area(filp, addr, len, off, flags, PMD_SIZE); - if (ret) - return ret; + if (filp || thp_anon_mapping_align()) { + ret = __thp_get_unmapped_area(filp, addr, len, off, flags, + PMD_SIZE); + if (ret) + return ret; + }
- ret = folio_get_unmapped_area(filp, addr, len, off, flags); - if (ret) - return ret; + if (mthp_mapping_align_enabled(filp)) { + int order = arch_wants_exec_folio_order(); + + if (order >= 0) { + ret = __thp_get_unmapped_area(filp, addr, len, off, + flags, PAGE_SIZE << order); + if (ret) + return ret; + } + }
return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags); } diff --git a/mm/mmap.c b/mm/mmap.c index 27ba0bb1acde0..230065e77fc38 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1859,8 +1859,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, * so use shmem's get_unmapped_area in case it can be huge. */ get_area = shmem_get_unmapped_area; - } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && - thp_anon_mapping_pmd_align()) { + } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) { /* Ensures that larger anonymous mappings are THP aligned. */ get_area = thp_get_unmapped_area; }
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/11503 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/11503 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/E...