Support dynamic_hugetlb on arm64 and fix some bug.
Liu Shixin (6): mm/dynamic_hugetlb: fix kabi broken when enable CONFIG_DYNAMIC_HUGETLB on arm64 mm/dynamic_hugetlb: support dynamic hugetlb on arm64 mm/dynamic_hugetlb: isolate hugepage without dissolve mm/dynamic_hugetlb: replace spin_lock with mutex_lock and fix kabi broken mm/dynamic_hugetlb: set PagePool to bad page mm/dynamic_hugetlb: fix type error of pfn in __hpool_split_gigantic_page()
fs/Kconfig | 2 +- fs/hugetlbfs/inode.c | 2 +- include/linux/dynamic_hugetlb.h | 24 +++++++++++++-- include/linux/memcontrol.h | 6 +++- mm/dynamic_hugetlb.c | 54 +++++++++++++++++++++++++++------ mm/hugetlb.c | 7 +++++ 6 files changed, 80 insertions(+), 15 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/1062 邮件列表地址: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/HY...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/1062 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/HY...
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6XOIE CVE: NA
--------------------------------
When enable dynamic hugetlb on arm64, the new member struct dhugetlb_pool* will be added to mem_cgroup. We need to use a KABI_RESERVE to fix broken of kabi. The previous struct dhugetlb_pool* is only used on x86_64.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/memcontrol.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 600cda4ea1be..3056b0985c1b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -372,7 +372,7 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif
-#ifdef CONFIG_DYNAMIC_HUGETLB +#if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_X86_64) struct dhugetlb_pool *hpool; #endif #ifndef __GENKSYMS__ @@ -392,7 +392,11 @@ struct mem_cgroup { KABI_RESERVE(3) KABI_RESERVE(4) #endif +#if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_ARM64) + KABI_USE(5, struct dhugetlb_pool *hpool) +#else KABI_RESERVE(5) +#endif KABI_RESERVE(6) KABI_RESERVE(7) KABI_RESERVE(8)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6XOIE CVE: NA
--------------------------------
To support dynamic hugetlb on arm64, we need to do two more things. The first one is to fix kabi broken in mem_cgroup, we use kabi_reserve_5 to fix it in previous patch. The second one is to check cont-bit hugetlb since this feature only support for PMD-size and PUD-size hugepage.
This feature only support for 4KB pagesize, not support for 16KB and 64KB.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- fs/Kconfig | 2 +- fs/hugetlbfs/inode.c | 2 +- include/linux/dynamic_hugetlb.h | 4 ++-- mm/dynamic_hugetlb.c | 13 ++++++++++--- 4 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/fs/Kconfig b/fs/Kconfig index a5ed26b093b7..385602ba0d99 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -262,7 +262,7 @@ config HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON
config DYNAMIC_HUGETLB bool "Dynamic HugeTLB" - depends on X86_64 + depends on X86_64 || (ARM64 && ARM64_4K_PAGES) depends on HUGETLBFS depends on MEMCG && CGROUP_HUGETLB help diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cfdd8cffe6d7..2c101a812dee 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -1202,7 +1202,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) */ mpol_shared_policy_init(&p->policy, NULL); /* Initialize hpool here in case of a quick call to destroy */ - link_hpool(p); + link_hpool(p, sbinfo->hstate);
return &p->vfs_inode; } diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index 5dcba8e8b933..af523139ab3a 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -97,7 +97,7 @@ bool free_page_to_dhugetlb_pool(struct page *page); void free_page_list_to_dhugetlb_pool(struct list_head *list); int task_has_mem_in_hpool(struct task_struct *tsk);
-void link_hpool(struct hugetlbfs_inode_info *p); +void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h); void unlink_hpool(struct hugetlbfs_inode_info *p); bool file_has_mem_in_hpool(struct hugetlbfs_inode_info *p); int dhugetlb_acct_memory(struct hstate *h, long delta, struct hugetlbfs_inode_info *p); @@ -147,7 +147,7 @@ static inline int task_has_mem_in_hpool(struct task_struct *tsk) }
#ifdef CONFIG_HUGETLBFS -static inline void link_hpool(struct hugetlbfs_inode_info *p) +static inline void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h) { } static inline void unlink_hpool(struct hugetlbfs_inode_info *p) diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 6b615009c3a4..ff4fd0c9f11b 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -5,6 +5,7 @@
#include <linux/rmap.h> #include <linux/migrate.h> +#include <linux/memblock.h> #include <linux/memory_hotplug.h> #include <linux/dynamic_hugetlb.h>
@@ -618,13 +619,19 @@ void free_page_list_to_dhugetlb_pool(struct list_head *list) } }
-void link_hpool(struct hugetlbfs_inode_info *p) +void link_hpool(struct hugetlbfs_inode_info *p, struct hstate *h) { + unsigned long size; + if (!dhugetlb_enabled || !p) return;
- p->hpool = find_hpool_by_task(current); - if (!get_hpool_unless_zero(p->hpool)) + size = huge_page_size(h); + if (size == PMD_SIZE || size == PUD_SIZE) { + p->hpool = find_hpool_by_task(current); + if (!get_hpool_unless_zero(p->hpool)) + p->hpool = NULL; + } else p->hpool = NULL; }
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I6XOIE CVE: NA
--------------------------------
The memory hotplug and memory failure will dissolve freed hugepages to buddy system, this is not the expected behavior for dynamic hugetlb. Skip the dissolve operation for hugepages belonging to dynamic hugetlb. For memory hotplug, the hotplug operation is not allowed, if dhugetlb pool existed. For memory failure, the hugepage will be discard directly.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_hugetlb.h | 6 ++++++ mm/dynamic_hugetlb.c | 23 ++++++++++++++++++++++- mm/hugetlb.c | 7 +++++++ 3 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index af523139ab3a..476a9014a83a 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -104,6 +104,7 @@ int dhugetlb_acct_memory(struct hstate *h, long delta, struct hugetlbfs_inode_in struct page *alloc_huge_page_from_dhugetlb_pool(struct hstate *h, struct dhugetlb_pool *hpool, bool need_unreserved); void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve); +bool page_belong_to_dynamic_hugetlb(struct page *page);
#else
@@ -171,6 +172,11 @@ static inline void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) { } +static inline +bool page_belong_to_dynamic_hugetlb(struct page *page) +{ + return false; +} #endif
#endif /* CONFIG_DYNAMIC_HUGETLB */ diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index ff4fd0c9f11b..228b04b9c7b7 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -448,6 +448,19 @@ static struct dhugetlb_pool *find_hpool_by_dhugetlb_pagelist(struct page *page) return hpool; }
+bool page_belong_to_dynamic_hugetlb(struct page *page) +{ + struct dhugetlb_pool *hpool; + + if (!dhugetlb_enabled) + return false; + + hpool = find_hpool_by_dhugetlb_pagelist(page); + if (hpool) + return true; + return false; +} + static struct dhugetlb_pool *find_hpool_by_task(struct task_struct *tsk) { struct mem_cgroup *memcg; @@ -740,8 +753,15 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) }
spin_lock(&hpool->lock); + /* + * memory_failure will free the hwpoison hugepage, and then try to + * dissolve it and free subpage to buddy system. Since the page in + * dhugetlb_pool should not free to buudy system, we isolate the + * hugepage here directly, and skip the latter dissolution. + */ + if (PageHWPoison(page)) + goto out; ClearPagePool(page); - set_compound_page_dtor(page, NULL_COMPOUND_DTOR); if (hstate_is_gigantic(h)) hpages_pool = &hpool->hpages_pool[HUGE_PAGES_POOL_1G]; else @@ -757,6 +777,7 @@ void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve) } trace_dynamic_hugetlb_alloc_free(hpool, page, hpages_pool->free_huge_pages, DHUGETLB_FREE, huge_page_size(h)); +out: spin_unlock(&hpool->lock); put_hpool(hpool); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 03eca3aec0f6..6c3ebbbccabd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2023,6 +2023,13 @@ int dissolve_free_huge_page(struct page *page) if (!PageHuge(page)) return 0;
+ /* + * the page belong to dynamic hugetlb will be isolated as a whole + * when free. See free_huge_page_to_dhugetlb_pool() for detail. + */ + if (page_belong_to_dynamic_hugetlb(page)) + return -EBUSY; + spin_lock_irq(&hugetlb_lock); if (!PageHuge(page)) { rc = 0;
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6MH03 CVE: NA
--------------------------------
When memory is fragmented, update_reserve_pages() may call migrate_pages() to collect continuous memory. This function can sleep, so we should use mutex lock instead of spin lock. Use KABI_EXTEND to fix kabi broken.
Fixes: 0c06a1c068ab ("mm/dynamic_hugetlb: add interface to configure the count of hugepages") Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_hugetlb.h | 14 +++++++++++++- mm/dynamic_hugetlb.c | 6 +++--- 2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index 476a9014a83a..eff31669e210 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -66,7 +66,7 @@ enum huge_pages_pool_type { struct dhugetlb_pool { int nid; spinlock_t lock; - spinlock_t reserved_lock; + KABI_DEPRECATE(spinlock_t, reserved_lock) atomic_t refcnt; unsigned long normal_pages_disabled;
@@ -74,6 +74,18 @@ struct dhugetlb_pool {
unsigned long total_huge_pages; struct huge_pages_pool hpages_pool[HUGE_PAGES_POOL_MAX]; + + /* The dhugetlb_pool structures is only used by core kernel, it is + * also accessed only the memory cgroup and hugetlb core code and + * so changes made to dhugetlb_pool structure should not affect + * third-party kernel modules. + */ + KABI_EXTEND(struct mutex reserved_lock) + + /* + * The percpu_pool[] should only be used by dynamic hugetlb core. + * External kernel modules should not used it. + */ struct percpu_pages_pool percpu_pool[0]; };
diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 228b04b9c7b7..65b379632bcf 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -887,7 +887,7 @@ static int hugetlb_pool_create(struct mem_cgroup *memcg, unsigned long nid) return -ENOMEM;
spin_lock_init(&hpool->lock); - spin_lock_init(&hpool->reserved_lock); + mutex_init(&hpool->reserved_lock); hpool->nid = nid; atomic_set(&hpool->refcnt, 1);
@@ -1000,7 +1000,7 @@ static ssize_t update_reserved_pages(struct mem_cgroup *memcg, char *buf, int hp if (!get_hpool_unless_zero(hpool)) return -EINVAL;
- spin_lock(&hpool->reserved_lock); + mutex_lock(&hpool->reserved_lock); spin_lock(&hpool->lock); hpages_pool = &hpool->hpages_pool[hpages_pool_idx]; if (nr_pages > hpages_pool->nr_huge_pages) { @@ -1036,7 +1036,7 @@ static ssize_t update_reserved_pages(struct mem_cgroup *memcg, char *buf, int hp hpages_pool->free_normal_pages += delta; } spin_unlock(&hpool->lock); - spin_unlock(&hpool->reserved_lock); + mutex_unlock(&hpool->reserved_lock); put_hpool(hpool); return 0; }
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6XOIE CVE: NA
--------------------------------
Before discard the bad page, set PagePool flag to distinguish from free page. And increase used_pages to guarantee used + freed = total.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- mm/dynamic_hugetlb.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 65b379632bcf..526d84870a60 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -529,6 +529,13 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) spin_lock_irqsave(&percpu_pool->lock, flags);
do { + /* + * Before discard the bad page, set PagePool flag to + * distinguish from free page. And increase used_pages + * to guarantee used + freed = total. + */ + if (page) + SetPagePool(page); page = NULL; if (percpu_pool->free_pages == 0) { int ret; @@ -544,8 +551,8 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) page = list_entry(percpu_pool->head_page.next, struct page, lru); list_del(&page->lru); percpu_pool->free_pages--; + percpu_pool->used_pages++; } while (page && check_new_page(page)); - percpu_pool->used_pages++; SetPagePool(page);
unlock:
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6XOIE CVE: NA
--------------------------------
The type of pfn is int, which can result in truncation. Change its type to unsigned long to fix the problem.
Fixes: eef7b4fd04a0 ("mm/dynamic_hugetlb: use pfn to traverse subpages") Signed-off-by: Liu Shixin liushixin2@huawei.com --- mm/dynamic_hugetlb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 526d84870a60..8199ef893f4a 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -55,7 +55,8 @@ static void __hpool_split_gigantic_page(struct dhugetlb_pool *hpool, struct page { int nr_pages = 1 << (PUD_SHIFT - PAGE_SHIFT); int nr_blocks = 1 << (PMD_SHIFT - PAGE_SHIFT); - int i, pfn = page_to_pfn(page); + unsigned long pfn = page_to_pfn(page); + int i;
lockdep_assert_held(&hpool->lock); atomic_set(compound_mapcount_ptr(page), 0);