hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8S9BY CVE: NA
--------------------------------
The HugeTLB system use resv_huge_pages to avoid SIGBUS, introduce the mechainism(simplified version) to dpool to protect HugeTLB allocation.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- fs/hugetlbfs/inode.c | 5 +++ include/linux/dynamic_pool.h | 34 +++++++++++++++++++ include/linux/hugetlb.h | 3 ++ mm/dynamic_pool.c | 62 ++++++++++++++++++++++++++++++++++ mm/hugetlb.c | 64 +++++++++++++++++++++++------------- 5 files changed, 145 insertions(+), 23 deletions(-)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b0edf5fe8132..86a56890ce01 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -35,6 +35,7 @@ #include <linux/magic.h> #include <linux/migrate.h> #include <linux/uio.h> +#include <linux/dynamic_pool.h>
#include <linux/uaccess.h> #include <linux/sched/mm.h> @@ -1294,6 +1295,9 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb) */ mpol_shared_policy_init(&p->policy, NULL);
+ /* Initialize hpool here in case of a quick call to destroy */ + dynamic_pool_bind_file(p, sbinfo->hstate); + return &p->vfs_inode; }
@@ -1306,6 +1310,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode) { hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb)); mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy); + dynamic_pool_unbind_file(HUGETLBFS_I(inode)); }
static const struct address_space_operations hugetlbfs_aops = { diff --git a/include/linux/dynamic_pool.h b/include/linux/dynamic_pool.h index b85aea9cd98e..8216e2019678 100644 --- a/include/linux/dynamic_pool.h +++ b/include/linux/dynamic_pool.h @@ -71,10 +71,22 @@ static inline bool page_from_dynamic_pool(struct page *page) return PagePool(page); }
+static inline bool file_in_dynamic_pool(struct hugetlbfs_inode_info *p) +{ + if (!dpool_enabled) + return false; + + return p && p->dpool; +} + int dynamic_pool_can_attach(struct task_struct *tsk, struct mem_cgroup *memcg); struct page *dynamic_pool_alloc_page(gfp_t gfp, unsigned int order, unsigned int alloc_flags); void dynamic_pool_free_page(struct page *page); +void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, struct hstate *h); +void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p); +int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta, + struct hugetlbfs_inode_info *p);
void dynamic_pool_inherit(struct mem_cgroup *parent, struct mem_cgroup *memcg); int dynamic_pool_destroy(struct cgroup *cgrp, bool *clear_css_online); @@ -112,6 +124,28 @@ static inline void dynamic_pool_free_page(struct page *page) { }
+#ifdef CONFIG_HUGETLBFS +static inline bool file_in_dynamic_pool(struct hugetlbfs_inode_info *p) +{ + return false; +{ + +static inline void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, + struct hstate *h) +{ +} + +static inline void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p) +{ +} + +static inline int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta, + struct hugetlbfs_inode_info *p) +{ + return -ENOMEM; +} +#endif + static inline void dynamic_pool_inherit(struct mem_cgroup *parent, struct mem_cgroup *memcg) { diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index f61aae3cfcd3..f582d53492a8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -596,6 +596,9 @@ struct hugetlbfs_inode_info { struct shared_policy policy; struct inode vfs_inode; unsigned int seals; +#ifdef CONFIG_DYNAMIC_POOL + struct dynamic_pool *dpool; +#endif };
static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) diff --git a/mm/dynamic_pool.c b/mm/dynamic_pool.c index d4f9dfede6cc..18eb03a192b7 100644 --- a/mm/dynamic_pool.c +++ b/mm/dynamic_pool.c @@ -731,6 +731,68 @@ void dynamic_pool_free_page(struct page *page) dpool_put(dpool); }
+void dynamic_pool_bind_file(struct hugetlbfs_inode_info *p, struct hstate *h) +{ + unsigned long size; + + if (!dpool_enabled || !p) + return; + + size = huge_page_size(h); + if (size == PMD_SIZE || size == PUD_SIZE) + p->dpool = dpool_get_from_task(current); + else + p->dpool = NULL; +} + +void dynamic_pool_unbind_file(struct hugetlbfs_inode_info *p) +{ + struct dynamic_pool *dpool; + + if (!dpool_enabled || !p || !p->dpool) + return; + + dpool = p->dpool; + p->dpool = NULL; + dpool_put(dpool); +} + +int dynamic_pool_hugetlb_acct_memory(struct hstate *h, long delta, + struct hugetlbfs_inode_info *p) +{ + struct dynamic_pool *dpool; + struct pages_pool *pool; + unsigned long flags; + int type; + int ret = -ENOMEM; + + if (!dpool_enabled || !p || !p->dpool) + return 0; + + dpool = p->dpool; + spin_lock_irqsave(&dpool->lock, flags); + + if (hstate_is_gigantic(h)) + type = PAGES_POOL_1G; + else + type = PAGES_POOL_2M; + pool = &dpool->pool[type]; + + if (delta > 0) { + if (delta <= pool->free_huge_pages - pool->resv_huge_pages) { + pool->resv_huge_pages += delta; + ret = 0; + } + } else { + pool->resv_huge_pages -= (unsigned long)(-delta); + WARN_ON(pool->resv_huge_pages < 0); + ret = 0; + } + spin_unlock_irqrestore(&dpool->lock, flags); + + return ret; +} + /* === dynamic pool function ========================================== */
static void dpool_dump_child_memcg(struct mem_cgroup *memcg, void *message) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0a657e684660..def6959ef574 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -36,6 +36,7 @@ #include <linux/memory.h> #include <linux/mm_inline.h> #include <linux/share_pool.h> +#include <linux/dynamic_pool.h>
#include <asm/page.h> #include <asm/pgalloc.h> @@ -92,7 +93,8 @@ static int num_fault_mutexes; struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
/* Forward declaration */ -static int hugetlb_acct_memory(struct hstate *h, long delta); +static int hugetlb_acct_memory(struct hstate *h, long delta, + struct hugetlbfs_inode_info *info); static void hugetlb_vma_lock_free(struct vm_area_struct *vma); static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma); static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma); @@ -123,7 +125,7 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool, if (subpool_is_free(spool)) { if (spool->min_hpages != -1) hugetlb_acct_memory(spool->hstate, - -spool->min_hpages); + -spool->min_hpages, NULL); kfree(spool); } } @@ -143,7 +145,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, spool->hstate = h; spool->min_hpages = min_hpages;
- if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) { + if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages, NULL)) { kfree(spool); return NULL; } @@ -171,13 +173,16 @@ void hugepage_put_subpool(struct hugepage_subpool *spool) * a subpool minimum size must be maintained. */ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool, - long delta) + long delta, struct hugetlbfs_inode_info *info) { long ret = delta;
if (!spool) return ret;
+ if (file_in_dynamic_pool(info)) + return ret; + spin_lock_irq(&spool->lock);
if (spool->max_hpages != -1) { /* maximum size accounting */ @@ -216,7 +221,7 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool, * in the case where a subpool minimum size must be maintained. */ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool, - long delta) + long delta, struct hugetlbfs_inode_info *info) { long ret = delta; unsigned long flags; @@ -224,6 +229,9 @@ static long hugepage_subpool_put_pages(struct hugepage_subpool *spool, if (!spool) return delta;
+ if (file_in_dynamic_pool(info)) + return ret; + spin_lock_irqsave(&spool->lock, flags);
if (spool->max_hpages != -1) /* maximum size accounting */ @@ -935,14 +943,15 @@ static long region_del(struct resv_map *resv, long f, long t) void hugetlb_fix_reserve_counts(struct inode *inode) { struct hugepage_subpool *spool = subpool_inode(inode); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); long rsv_adjust; bool reserved = false;
- rsv_adjust = hugepage_subpool_get_pages(spool, 1); + rsv_adjust = hugepage_subpool_get_pages(spool, 1, info); if (rsv_adjust > 0) { struct hstate *h = hstate_inode(inode);
- if (!hugetlb_acct_memory(h, 1)) + if (!hugetlb_acct_memory(h, 1, info)) reserved = true; } else if (!rsv_adjust) { reserved = true; @@ -1928,7 +1937,7 @@ void free_huge_folio(struct folio *folio) * after page is free. Therefore, force restore_reserve * operation. */ - if (hugepage_subpool_put_pages(spool, 1) == 0) + if (hugepage_subpool_put_pages(spool, 1, NULL) == 0) restore_reserve = true; }
@@ -3118,6 +3127,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve) { struct hugepage_subpool *spool = subpool_vma(vma); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(file_inode(vma->vm_file)); struct hstate *h = hstate_vma(vma); struct folio *folio; long map_chg, map_commit; @@ -3144,7 +3154,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, * checked against any subpool limit. */ if (map_chg || avoid_reserve) { - gbl_chg = hugepage_subpool_get_pages(spool, 1); + gbl_chg = hugepage_subpool_get_pages(spool, 1, info); if (gbl_chg < 0) { vma_end_reservation(h, vma, addr); return ERR_PTR(-ENOSPC); @@ -3224,8 +3234,8 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, */ long rsv_adjust;
- rsv_adjust = hugepage_subpool_put_pages(spool, 1); - hugetlb_acct_memory(h, -rsv_adjust); + rsv_adjust = hugepage_subpool_put_pages(spool, 1, info); + hugetlb_acct_memory(h, -rsv_adjust, info); if (deferred_reserve) hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h), pages_per_huge_page(h), folio); @@ -3240,7 +3250,7 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct *vma, h_cg); out_subpool_put: if (map_chg || avoid_reserve) - hugepage_subpool_put_pages(spool, 1); + hugepage_subpool_put_pages(spool, 1, info); vma_end_reservation(h, vma, addr); return ERR_PTR(-ENOSPC); } @@ -4927,13 +4937,17 @@ unsigned long hugetlb_total_pages(void) return nr_total_pages; }
-static int hugetlb_acct_memory(struct hstate *h, long delta) +static int hugetlb_acct_memory(struct hstate *h, long delta, + struct hugetlbfs_inode_info *info) { int ret = -ENOMEM;
if (!delta) return 0;
+ if (file_in_dynamic_pool(info)) + return dynamic_pool_hugetlb_acct_memory(h, delta, info); + spin_lock_irq(&hugetlb_lock); /* * When cpuset is configured, it breaks the strict hugetlb page @@ -5020,6 +5034,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) struct hstate *h = hstate_vma(vma); struct resv_map *resv; struct hugepage_subpool *spool = subpool_vma(vma); + struct hugetlbfs_inode_info *info; unsigned long reserve, start, end; long gbl_reserve;
@@ -5039,8 +5054,9 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) * Decrement reserve counts. The global reserve count may be * adjusted if the subpool has a minimum size. */ - gbl_reserve = hugepage_subpool_put_pages(spool, reserve); - hugetlb_acct_memory(h, -gbl_reserve); + info = HUGETLBFS_I(file_inode(vma->vm_file)); + gbl_reserve = hugepage_subpool_put_pages(spool, reserve, info); + hugetlb_acct_memory(h, -gbl_reserve, info); }
kref_put(&resv->refs, resv_map_release); @@ -6864,6 +6880,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long chg = -1, add = -1; struct hstate *h = hstate_inode(inode); struct hugepage_subpool *spool = subpool_inode(inode); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); struct resv_map *resv_map; struct hugetlb_cgroup *h_cg = NULL; long gbl_reserve, regions_needed = 0; @@ -6934,7 +6951,7 @@ bool hugetlb_reserve_pages(struct inode *inode, * the subpool has a minimum size, there may be some global * reservations already in place (gbl_reserve). */ - gbl_reserve = hugepage_subpool_get_pages(spool, chg); + gbl_reserve = hugepage_subpool_get_pages(spool, chg, info); if (gbl_reserve < 0) goto out_uncharge_cgroup;
@@ -6942,7 +6959,7 @@ bool hugetlb_reserve_pages(struct inode *inode, * Check enough hugepages are available for the reservation. * Hand the pages back to the subpool if there are not */ - if (hugetlb_acct_memory(h, gbl_reserve) < 0) + if (hugetlb_acct_memory(h, gbl_reserve, info) < 0) goto out_put_pages;
/* @@ -6960,7 +6977,7 @@ bool hugetlb_reserve_pages(struct inode *inode, add = region_add(resv_map, from, to, regions_needed, h, h_cg);
if (unlikely(add < 0)) { - hugetlb_acct_memory(h, -gbl_reserve); + hugetlb_acct_memory(h, -gbl_reserve, info); goto out_put_pages; } else if (unlikely(chg > add)) { /* @@ -6981,8 +6998,8 @@ bool hugetlb_reserve_pages(struct inode *inode, (chg - add) * pages_per_huge_page(h), h_cg);
rsv_adjust = hugepage_subpool_put_pages(spool, - chg - add); - hugetlb_acct_memory(h, -rsv_adjust); + chg - add, info); + hugetlb_acct_memory(h, -rsv_adjust, info); } else if (h_cg) { /* * The file_regions will hold their own reference to @@ -6997,7 +7014,7 @@ bool hugetlb_reserve_pages(struct inode *inode,
out_put_pages: /* put back original number of pages, chg */ - (void)hugepage_subpool_put_pages(spool, chg); + (void)hugepage_subpool_put_pages(spool, chg, info); out_uncharge_cgroup: hugetlb_cgroup_uncharge_cgroup_rsvd(hstate_index(h), chg * pages_per_huge_page(h), h_cg); @@ -7023,6 +7040,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, struct resv_map *resv_map = inode_resv_map(inode); long chg = 0; struct hugepage_subpool *spool = subpool_inode(inode); + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); long gbl_reserve;
/* @@ -7051,8 +7069,8 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end, * Note that !resv_map implies freed == 0. So (chg - freed) * won't go negative. */ - gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed)); - hugetlb_acct_memory(h, -gbl_reserve); + gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed), info); + hugetlb_acct_memory(h, -gbl_reserve, info);
return 0; }