Recently, we found there are some cases that the dynamic pool is not working as expected and the pages is allocated from buddy system. There are mainly two reason:
1. THP is not supported on dynamic pool, some path is not restricted to allocate THP which the allocation from dynamic pool is failed and fallback to allocate from buddy system. 2. The numa balance will try to migrate the pages coming from dynamic pool and use pages from buddy system instead.
Liu Shixin (2): mm/dynamic_pool: replace task_in_dynamic_pool() with mm_in_dynamic_pool() mm/dynamic_pool: check page_from_dynamic_pool() on migration
include/linux/dynamic_pool.h | 18 ++++++++++++++---- mm/compaction.c | 2 +- mm/dynamic_pool.c | 19 +++++++++++++++++-- mm/hugetlb.c | 4 ++-- mm/khugepaged.c | 4 ++-- mm/memory.c | 4 ++-- mm/migrate.c | 4 ++-- mm/page_isolation.c | 4 ++-- mm/shmem.c | 2 +- 9 files changed, 43 insertions(+), 18 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/10517 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/10517 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/7...
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAF8L3
--------------------------------
It's not appropriate to use current task to check THP for a vma, because the vma is not belong to current task when called from process_madvise() or damos_madvise(). Use vma->vm_mm instead of current task.
For shmem, if called from file operations, there's no vma. In such case, use current-mm is enough if need to allocate pages.
Fixes: 35e812f6fd13 ("mm/dynamic_pool: disable THP for task attached with dpool") Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_pool.h | 8 ++++---- mm/dynamic_pool.c | 19 +++++++++++++++++-- mm/khugepaged.c | 4 ++-- mm/memory.c | 4 ++-- mm/shmem.c | 2 +- 5 files changed, 26 insertions(+), 11 deletions(-)
diff --git a/include/linux/dynamic_pool.h b/include/linux/dynamic_pool.h index 1d41c6a853c3..3b54c384ec5a 100644 --- a/include/linux/dynamic_pool.h +++ b/include/linux/dynamic_pool.h @@ -77,13 +77,13 @@ struct dpool_info { struct range pfn_ranges[]; };
-bool __task_in_dynamic_pool(struct task_struct *tsk); -static inline bool task_in_dynamic_pool(struct task_struct *tsk) +bool __mm_in_dynamic_pool(struct mm_struct *mm); +static inline bool mm_in_dynamic_pool(struct mm_struct *mm) { if (!dpool_enabled) return false;
- return __task_in_dynamic_pool(tsk); + return __mm_in_dynamic_pool(mm); }
static inline bool page_from_dynamic_pool(struct page *page) @@ -140,7 +140,7 @@ static inline bool page_from_dynamic_pool(struct page *page) return false; }
-static inline bool task_in_dynamic_pool(struct task_struct *tsk) +static inline bool mm_in_dynamic_pool(struct mm_struct *mm) { return false; } diff --git a/mm/dynamic_pool.c b/mm/dynamic_pool.c index 2d41bb61ceb6..7b0bbf0c348f 100644 --- a/mm/dynamic_pool.c +++ b/mm/dynamic_pool.c @@ -137,14 +137,29 @@ static struct dynamic_pool *dpool_get_from_page(struct page *page) return dpool; }
-bool __task_in_dynamic_pool(struct task_struct *tsk) +static struct dynamic_pool *dpool_get_from_mm(struct mm_struct *mm) +{ + struct dynamic_pool *dpool = NULL; + struct mem_cgroup *memcg; + + memcg = get_mem_cgroup_from_mm(mm); + if (!memcg) + return NULL; + + dpool = dpool_get_from_memcg(memcg); + css_put(&memcg->css); + + return dpool; +} + +bool __mm_in_dynamic_pool(struct mm_struct *mm) { struct dynamic_pool *dpool;
if (!dpool_enabled) return false;
- dpool = dpool_get_from_task(tsk); + dpool = dpool_get_from_mm(mm); dpool_put(dpool);
return !!dpool; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 06f31ad4452e..bc1aaf5b99ed 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -363,7 +363,7 @@ int hugepage_madvise(struct vm_area_struct *vma, return 0; #endif
- if (task_in_dynamic_pool(current)) + if (mm_in_dynamic_pool(vma->vm_mm)) return -EINVAL;
*vm_flags &= ~VM_NOHUGEPAGE; @@ -2743,7 +2743,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, if (!thp_vma_allowable_order(vma, vma->vm_flags, 0, PMD_ORDER)) return -EINVAL;
- if (task_in_dynamic_pool(current)) + if (mm_in_dynamic_pool(mm)) return -EINVAL;
cc = kmalloc(sizeof(*cc), GFP_KERNEL); diff --git a/mm/memory.c b/mm/memory.c index 49a5618661d8..63493bef46e9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5455,7 +5455,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (pud_none(*vmf.pud) && thp_vma_allowable_order(vma, vm_flags, TVA_IN_PF | TVA_ENFORCE_SYSFS, PUD_ORDER) && - !task_in_dynamic_pool(current)) { + !mm_in_dynamic_pool(mm)) { ret = create_huge_pud(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; @@ -5491,7 +5491,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (pmd_none(*vmf.pmd) && thp_vma_allowable_order(vma, vm_flags, TVA_IN_PF | TVA_ENFORCE_SYSFS, PMD_ORDER) && - !task_in_dynamic_pool(current)) { + !mm_in_dynamic_pool(mm)) { ret = create_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; diff --git a/mm/shmem.c b/mm/shmem.c index 079f47192bdb..af01e8d283f4 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2031,7 +2031,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, if (!shmem_is_huge(inode, index, false, vma ? vma->vm_mm : NULL, vma ? vma->vm_flags : 0)) goto alloc_nohuge; - if (task_in_dynamic_pool(current)) + if (mm_in_dynamic_pool(vma ? vma->vm_mm : current->mm)) goto alloc_nohuge;
huge_gfp = vma_thp_gfp_mask(vma);
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/IAF8L3
--------------------------------
To skip unexpected migration on dynamic pool, we should also check whether the page is allocated from dynamic pool, add page_from_dynamic_pool(). Add page_from_or_in_dynamic_pool() helper to simplify the check.
Fixes: 9532dabee631 ("mm/dynamic_pool: skip unexpected migration") Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_pool.h | 10 ++++++++++ mm/compaction.c | 2 +- mm/hugetlb.c | 4 ++-- mm/migrate.c | 4 ++-- mm/page_isolation.c | 4 ++-- 5 files changed, 17 insertions(+), 7 deletions(-)
diff --git a/include/linux/dynamic_pool.h b/include/linux/dynamic_pool.h index 3b54c384ec5a..1d12d76405cf 100644 --- a/include/linux/dynamic_pool.h +++ b/include/linux/dynamic_pool.h @@ -103,6 +103,11 @@ static inline bool file_in_dynamic_pool(struct hugetlbfs_inode_info *p) }
bool page_in_dynamic_pool(struct page *page); +static inline bool page_from_or_in_dynamic_pool(struct page *page) +{ + return page_from_dynamic_pool(page) || page_in_dynamic_pool(page); +} + int dynamic_pool_can_attach(struct task_struct *tsk, struct mem_cgroup *memcg); struct page *dynamic_pool_alloc_page(gfp_t gfp, unsigned int order, unsigned int alloc_flags); @@ -150,6 +155,11 @@ static inline bool page_in_dynamic_pool(const struct page *page) return false; }
+static inline bool page_from_or_in_dynamic_pool(struct page *page) +{ + return false; +} + static inline int dynamic_pool_can_attach(struct task_struct *tsk, struct mem_cgroup *memcg) { diff --git a/mm/compaction.c b/mm/compaction.c index 09424cb5418f..e2735752c374 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2135,7 +2135,7 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc) continue; }
- if (page_in_dynamic_pool(page)) + if (page_from_or_in_dynamic_pool(page)) continue;
/* diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 360ac804e68d..57c81f87d474 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2368,7 +2368,7 @@ int dissolve_free_huge_page(struct page *page) if (!folio_test_hugetlb(folio)) return 0;
- if (page_from_dynamic_pool(page) || page_in_dynamic_pool(page)) + if (page_from_or_in_dynamic_pool(page)) return -EBUSY;
spin_lock_irq(&hugetlb_lock); @@ -3079,7 +3079,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) struct folio *folio = page_folio(page); int ret = -EBUSY;
- if (page_from_dynamic_pool(page) || page_in_dynamic_pool(page)) + if (page_from_or_in_dynamic_pool(page)) return -EBUSY;
/* diff --git a/mm/migrate.c b/mm/migrate.c index 78c5b4aaf60d..b5d9d8feacfa 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -2050,7 +2050,7 @@ struct folio *alloc_migration_target(struct folio *src, unsigned long private) if (folio_test_hugetlb(src)) { struct hstate *h = folio_hstate(src);
- if (page_in_dynamic_pool(folio_page(src, 0))) + if (page_from_dynamic_pool(folio_page(src, 0))) return NULL;
gfp_mask = htlb_modify_alloc_mask(h, gfp_mask); @@ -2569,7 +2569,7 @@ static int numamigrate_isolate_folio(pg_data_t *pgdat, struct folio *folio) return 0; }
- if (page_in_dynamic_pool(folio_page(folio, 0))) + if (page_from_dynamic_pool(folio_page(folio, 0))) return 0;
if (!folio_isolate_lru(folio)) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 09eb445cfde9..03381be87b28 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -330,7 +330,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags, start_pfn = max(ALIGN_DOWN(isolate_pageblock, MAX_ORDER_NR_PAGES), zone->zone_start_pfn);
- if (page_in_dynamic_pool(pfn_to_page(isolate_pageblock))) + if (page_from_or_in_dynamic_pool(pfn_to_page(isolate_pageblock))) return -EBUSY;
if (skip_isolation) { @@ -562,7 +562,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < isolate_end - pageblock_nr_pages; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && (page_in_dynamic_pool(page) || + if (page && (page_from_or_in_dynamic_pool(page) || set_migratetype_isolate(page, migratetype, flags, start_pfn, end_pfn))) { undo_isolate_page_range(isolate_start, pfn, migratetype);