From: Bin Wang wangbin224@huawei.com
If processes release part of a transparent huge page, kernel will not split the huge page to reclaim memory immediately. The huge page will be added to deferred_split_queue and will be reclaimed when shrink_slab.
This feature leads to a problem. Processes have released memory but system's available memory does not increase, which makes users think there's been a memory leak. To solve this problem, add a cmdline to let kernel split and reclaim the transparent huge pages when patial free.
Signed-off-by: Bin Wang wangbin224@huawei.com Reviewed-by: Pan Zhang zhangpan26@huawei.com --- mm/Kconfig | 7 +++++++ mm/huge_memory.c | 5 +++++ mm/memory.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+)
diff --git a/mm/Kconfig b/mm/Kconfig index b367e0cf9..42fdd0e8d 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -961,4 +961,11 @@ config EULEROS_EXTEND_MODULE_AREA Say Y here to extend the module area. Reserve enough memory for loading modules when the KASAN is open.
+config EULEROS_FORCE_SPLIT_HUGE_PAGE + bool "force split huge page" + depends on TRANSPARENT_HUGEPAGE + default y + help + Say Y here to force split transparent huge pages when partial free. + endmenu diff --git a/mm/huge_memory.c b/mm/huge_memory.c index acb2e2c9e..cd71bc08f 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2850,8 +2850,13 @@ static unsigned long deferred_split_count(struct shrinker *shrink, return READ_ONCE(ds_queue->split_queue_len); }
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE +unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc) +#else static unsigned long deferred_split_scan(struct shrinker *shrink, struct shrink_control *sc) +#endif { struct pglist_data *pgdata = NODE_DATA(sc->nid); struct deferred_split *ds_queue = &pgdata->deferred_split_queue; diff --git a/mm/memory.c b/mm/memory.c index 58e3e276d..7fcc4fa44 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -146,6 +146,18 @@ EXPORT_SYMBOL(zero_pfn);
unsigned long highest_memmap_pfn __read_mostly;
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE +extern unsigned long deferred_split_scan(struct shrinker *shrink, + struct shrink_control *sc); +bool force_split_huge_page = false; +static int __init set_force_split_huge_page(char *s) +{ + force_split_huge_page = true; + return 1; +} +__setup("force_split_huge_page", set_force_split_huge_page); +#endif + /* * CONFIG_MMU architectures set up ZERO_PAGE in their paging_init() */ @@ -1216,6 +1228,10 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, pte_t *start_pte; pte_t *pte; swp_entry_t entry; +#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + bool is_trans_compound = false; + struct page *tmp_page = NULL; +#endif
tlb_change_page_size(tlb, PAGE_SIZE); again: @@ -1263,6 +1279,13 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, rss[mm_counter(page)]--; reliable_page_counter(page, mm, -1); page_remove_rmap(page, false); +#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + if (unlikely(force_split_huge_page) && + unlikely(PageTransCompound(page))) { + is_trans_compound = true; + tmp_page = page; + } +#endif if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); if (unlikely(__tlb_remove_page(tlb, page))) { @@ -1320,6 +1343,23 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, tlb_flush_mmu_tlbonly(tlb); pte_unmap_unlock(start_pte, ptl);
+#ifdef CONFIG_EULEROS_FORCE_SPLIT_HUGE_PAGE + if (unlikely(force_split_huge_page) && unlikely(is_trans_compound)) { + struct shrink_control sc = { + /* nid of pages of compound page is same */ + .nid = page_to_nid(tmp_page), + /* force split 256 thps one time */ + .nr_to_scan = 256, +#ifdef CONFIG_MEMCG + /* memcg aware */ + .memcg = page_memcg(compound_head(tmp_page)), +#endif + }; + + deferred_split_scan(NULL, &sc); + } +#endif + /* * If we forced a TLB flush (either due to running out of * batch buffers or because we needed to flush dirty TLB