[PATCH openEuler-1.0-LTS 1/6] dhugetlb: backport dynamic hugetlb feature

4 Feb 2023

From: Liu Shixin <liushixin2@hauwei.com>

hulk inclusion
category: feature
bugzilla: 46904, https://gitee.com/openeuler/kernel/issues/I6BDME
CVE: NA

--------------------------------

This feature has already beed supported on x86_64 and this is the origin
description:

 Dynamic hugetlb which is based on Hugetlb, supports to be splited
 dynamically in a specified cgroup. We add a hugetlb_pool in a
 mem_cgroup to manage dynamic hugetlb for corresponding cgroup.
 After dynamic hugepages are allocated for a cgroup, these hugepages
 can be used as 1G/2M/4K pages by split/merge opreation.

It is now supported on arm64. This feature will be limited to depends on
ARM64_4K_PAGES and not support cont-bits hugepage. We merge the previous
patches into one patch which is patch[1]. While merge the code ,we found
some code can be isolated by config DYNAMIC_HUGETLB, so we add patch[2] to
re-isolated them. In patch[3], we restrict the feature on mentioned limit.
The patch[4] add skip of dissolve hugepage which may conflict with memory
hotplug and memory failure. The patch[5] set DYNAMIC_HUGETLB to y in
hulk_defconfig to enable by default.

This patch includes all previous patches and the patches list is recorded
in bugzilla.

Signed-off-by: Liu Shixin <liushixin2@hauwei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/Kconfig                      |    9 +
 fs/hugetlbfs/inode.c            |    4 +
 include/linux/gfp.h             |    4 +-
 include/linux/hugetlb.h         |   97 +++
 include/linux/memcontrol.h      |   15 +
 include/linux/page-flags.h      |    3 +
 include/trace/events/dhugetlb.h |  123 ++++
 include/trace/events/mmflags.h  |    1 +
 kernel/cgroup/cgroup.c          |    6 +
 mm/huge_memory.c                |   16 +-
 mm/hugetlb.c                    | 1188 ++++++++++++++++++++++++++++++-
 mm/internal.h                   |    1 +
 mm/memcontrol.c                 |  391 ++++++++++
 mm/page_alloc.c                 |   33 +-
 14 files changed, 1862 insertions(+), 29 deletions(-)
 create mode 100644 include/trace/events/dhugetlb.h

diff --git a/fs/Kconfig b/fs/Kconfig
index 5921bfbebee4..e8800d8a73b3 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -211,6 +211,15 @@ config TMPFS_INODE64
 
 	  If unsure, say N.
 
+config DYNAMIC_HUGETLB
+	bool "Dynamic HugeTLB"
+	depends on HUGETLB_PAGE
+	depends on MEMCG
+	depends on CGROUP_HUGETLB
+	help
+	  Dynamic hugepage are used in memcg and can be splited into small pages
+	  automatically. The tasks in the memcg prefer to alloc dynamic hugepage.
+
 config HUGETLBFS
 	bool "HugeTLB file system support"
 	depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 005e05c442c5..30a29936372c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1164,6 +1164,8 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
 	 * private inode.  This simplifies hugetlbfs_destroy_inode.
 	 */
 	mpol_shared_policy_init(&p->policy, NULL);
+	/* Initialize hpool here in case of a quick call to destroy */
+	p->hpool = get_dhugetlb_pool_from_task(current);
 
 	return &p->vfs_inode;
 }
@@ -1178,6 +1180,8 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
 {
 	hugetlbfs_inc_free_inodes(HUGETLBFS_SB(inode->i_sb));
 	mpol_free_shared_policy(&HUGETLBFS_I(inode)->policy);
+	dhugetlb_pool_put(HUGETLBFS_I(inode)->hpool);
+	HUGETLBFS_I(inode)->hpool = NULL;
 	call_rcu(&inode->i_rcu, hugetlbfs_i_callback);
 }
 
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 152cb9bdf436..74b0375d7d2b 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -501,7 +501,9 @@ static inline void arch_alloc_page(struct page *page, int order) { }
 struct page *
 __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 							nodemask_t *nodemask);
-
+void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
+						unsigned int alloc_flags);
+bool free_pages_prepare(struct page *page, unsigned int order, bool check_free);
 static inline struct page *
 __alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
 {
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 2d2b06b36bd0..3a82ea9283ec 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -289,6 +289,7 @@ struct hugetlbfs_inode_info {
 	struct shared_policy policy;
 	struct inode vfs_inode;
 	unsigned int seals;
+	struct dhugetlb_pool *hpool;
 };
 
 static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode)
@@ -655,6 +656,102 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr
 
 #endif	/* CONFIG_HUGETLB_PAGE */
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+/* The number of small_page_pool for a dhugetlb_pool */
+#define NR_SMPOOL		num_possible_cpus()
+/* The max page number in a small_page_pool */
+#define MAX_SMPOOL_PAGE		1024
+/* number to move between list */
+#define BATCH_SMPOOL_PAGE	(MAX_SMPOOL_PAGE >> 2)
+/* We don't need to try 5 times, or we can't migrate the pages. */
+#define HPOOL_RECLAIM_RETRIES	5
+
+extern struct static_key_false dhugetlb_enabled_key;
+#define dhugetlb_enabled (static_branch_unlikely(&dhugetlb_enabled_key))
+
+#define DEFAULT_PAGESIZE	4096
+extern rwlock_t dhugetlb_pagelist_rwlock;
+struct dhugetlb_pagelist {
+	unsigned long count;
+	struct dhugetlb_pool *hpool[0];
+};
+extern struct dhugetlb_pagelist *dhugetlb_pagelist_t;
+
+struct split_pages {
+	struct list_head list;
+	unsigned long start_pfn;
+	unsigned long free_pages;
+};
+
+struct small_page_pool {
+	spinlock_t lock;
+	unsigned long free_pages;
+	long used_pages;
+	struct list_head head_page;
+};
+
+struct dhugetlb_pool {
+	int nid;
+	spinlock_t lock;
+	spinlock_t reserved_lock;
+	atomic_t refcnt;
+
+	struct mem_cgroup *attach_memcg;
+
+	struct list_head dhugetlb_1G_freelists;
+	struct list_head dhugetlb_2M_freelists;
+	struct list_head dhugetlb_4K_freelists;
+
+	struct list_head split_1G_freelists;
+	struct list_head split_2M_freelists;
+
+	unsigned long total_nr_pages;
+
+	unsigned long total_reserved_1G;
+	unsigned long free_reserved_1G;
+	unsigned long mmap_reserved_1G;
+	unsigned long used_1G;
+	unsigned long free_unreserved_1G;
+	unsigned long nr_split_1G;
+
+	unsigned long total_reserved_2M;
+	unsigned long free_reserved_2M;
+	unsigned long mmap_reserved_2M;
+	unsigned long used_2M;
+	unsigned long free_unreserved_2M;
+	unsigned long nr_split_2M;
+
+	unsigned long free_pages;
+	struct small_page_pool smpool[0];
+};
+
+bool dhugetlb_pool_get(struct dhugetlb_pool *hpool);
+void dhugetlb_pool_put(struct dhugetlb_pool *hpool);
+struct dhugetlb_pool *hpool_alloc(unsigned long nid);
+int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
+				unsigned long nid, unsigned long size);
+bool free_dhugetlb_pool(struct dhugetlb_pool *hpool);
+int update_dhugetlb_pagelist(unsigned long idx, struct dhugetlb_pool *hpool);
+struct dhugetlb_pool *get_dhugetlb_pool_from_dhugetlb_pagelist(
+							struct page *page);
+struct dhugetlb_pool *get_dhugetlb_pool_from_task(struct task_struct *tsk);
+bool move_pages_from_hpool_to_smpool(struct dhugetlb_pool *hpool,
+				     struct small_page_pool *smpool);
+void move_pages_from_smpool_to_hpool(struct dhugetlb_pool *hpool,
+				     struct small_page_pool *smpool);
+void dhugetlb_reserve_hugepages(struct dhugetlb_pool *hpool,
+				unsigned long count, bool gigantic);
+#else
+#define dhugetlb_enabled       0
+struct dhugetlb_pool {};
+static inline struct dhugetlb_pool *get_dhugetlb_pool_from_task(
+						struct task_struct *tsk)
+{
+	return NULL;
+}
+static inline void dhugetlb_pool_put(struct dhugetlb_pool *hpool) { return; }
+#endif /* CONFIG_DYNAMIC_HUGETLB */
+
 static inline spinlock_t *huge_pte_lock(struct hstate *h,
 					struct mm_struct *mm, pte_t *pte)
 {
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4517d132d1e2..22f40d5e0e8b 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -326,6 +326,7 @@ struct mem_cgroup {
 };
 
 struct mem_cgroup_extension {
+	struct dhugetlb_pool *hpool;
 #ifdef CONFIG_MEMCG_QOS
 	/* Currently support 0 and -1.
 	 * in the future it can expand to other value.
@@ -1406,4 +1407,18 @@ static inline void memcg_put_cache_ids(void)
 
 #endif /* CONFIG_MEMCG_KMEM */
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+struct dhugetlb_pool *get_dhugetlb_pool_from_memcg(struct mem_cgroup *memcg);
+struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp_mask);
+void free_page_to_dhugetlb_pool(struct page *page);
+int dhugetlb_pool_force_empty(struct mem_cgroup *memcg);
+bool dhugetlb_pool_is_free(struct cgroup_subsys_state *css);
+#else
+static inline struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp_mask)
+{
+	return NULL;
+}
+static inline void free_page_to_dhugetlb_pool(struct page *page) {}
+#endif
+
 #endif /* _LINUX_MEMCONTROL_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0c5d1c4c71e6..fd6cd68e00a2 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -102,6 +102,7 @@ enum pageflags {
 	PG_idle,
 #endif
 	PG_percpu_ref,
+	PG_pool,
 	__NR_PAGEFLAGS,
 
 	/* Filesystems */
@@ -284,6 +285,7 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 __PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
+PAGEFLAG(Pool, pool, PF_NO_TAIL)
 
 /* Xen */
 PAGEFLAG(Pinned, pinned, PF_NO_COMPOUND)
@@ -770,6 +772,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
 	 1UL << PG_private	| 1UL << PG_private_2	|	\
 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
 	 1UL << PG_slab		| 1UL << PG_active 	|	\
+	 1UL << PG_pool		|	\
 	 1UL << PG_unevictable	| __PG_MLOCKED)
 
 /*
diff --git a/include/trace/events/dhugetlb.h b/include/trace/events/dhugetlb.h
new file mode 100644
index 000000000000..20b3a54589d1
--- /dev/null
+++ b/include/trace/events/dhugetlb.h
@@ -0,0 +1,123 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dhugetlb
+
+#if !defined(_TRACE_DHUGETLB_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DHUGETLB_H
+
+#include <linux/tracepoint.h>
+#include <trace/events/mmflags.h>
+
+#define	DHUGETLB_SPLIT_1G       0x01u
+#define	DHUGETLB_SPLIT_2M       0x02u
+#define	DHUGETLB_MERGE_4K       0x04u
+#define	DHUGETLB_MIGRATE_4K     0x08u
+#define	DHUGETLB_RESV_1G	0x10u
+#define	DHUGETLB_UNRESV_1G	0x20u
+#define	DHUGETLB_RESV_2M	0x40u
+#define	DHUGETLB_UNRESV_2M	0x80u
+#define	DHUGETLB_ALLOC_1G	0x100u
+#define	DHUGETLB_FREE_1G	0x200u
+#define	DHUGETLB_ALLOC_2M	0x400u
+#define	DHUGETLB_FREE_2M	0x800u
+
+#define __def_action_names						\
+	{(unsigned long)DHUGETLB_SPLIT_1G,	"split_1G_to_2M"},	\
+	{(unsigned long)DHUGETLB_SPLIT_2M,	"split_2M_to_4K"},	\
+	{(unsigned long)DHUGETLB_MERGE_4K,	"merge_4K_to_2M"},	\
+	{(unsigned long)DHUGETLB_MIGRATE_4K,	"migrate_4K_to_2M"},	\
+	{(unsigned long)DHUGETLB_RESV_1G,	"resv_1G_page"},	\
+	{(unsigned long)DHUGETLB_UNRESV_1G,	"unresv_1G_page"},	\
+	{(unsigned long)DHUGETLB_RESV_2M,	"resv_2M_page"},	\
+	{(unsigned long)DHUGETLB_UNRESV_2M,	"unresv_2M_page"},	\
+	{(unsigned long)DHUGETLB_ALLOC_1G,	"alloc_1G_page"},	\
+	{(unsigned long)DHUGETLB_FREE_1G,	"free_1G_page"},	\
+	{(unsigned long)DHUGETLB_ALLOC_2M,	"alloc_2M_page"},	\
+	{(unsigned long)DHUGETLB_FREE_2M,	"free_2M_page"}
+
+#define show_action(action)						\
+	(action) ? __print_flags(action, "",				\
+	__def_action_names						\
+	) : "none"
+
+TRACE_EVENT(dhugetlb_split_merge,
+
+	TP_PROTO(const void *hpool, struct page *page, unsigned long action),
+
+	TP_ARGS(hpool, page, action),
+
+	TP_STRUCT__entry(
+		__field(	const void *,	hpool	)
+		__field(	unsigned long,	pfn	)
+		__field(	unsigned long,	action	)
+	),
+
+	TP_fast_assign(
+		__entry->hpool	= hpool;
+		__entry->pfn	= page ? page_to_pfn(page) : -1UL;
+		__entry->action	= action;
+	),
+
+	TP_printk("hpool=%p page=%p pfn=%lu action=%s",
+		__entry->hpool,
+		__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
+		__entry->pfn != -1UL ? __entry->pfn : 0,
+		show_action(__entry->action))
+);
+
+TRACE_EVENT(dhugetlb_acct_memory,
+
+	TP_PROTO(const void *hpool, unsigned long count, unsigned long action),
+
+	TP_ARGS(hpool, count, action),
+
+	TP_STRUCT__entry(
+		__field(	const void *,	hpool	)
+		__field(	unsigned long,	count	)
+		__field(	unsigned long,	action	)
+	),
+
+	TP_fast_assign(
+		__entry->hpool	= hpool;
+		__entry->count	= count;
+		__entry->action	= action;
+	),
+
+	TP_printk("hpool=%p action=%s, mmap_count=%lu",
+		__entry->hpool,
+		show_action(__entry->action),
+		__entry->count)
+);
+
+TRACE_EVENT(dhugetlb_alloc_free,
+
+	TP_PROTO(const void *hpool, struct page *page, unsigned long count,
+		 unsigned long action),
+
+	TP_ARGS(hpool, page, count, action),
+
+	TP_STRUCT__entry(
+		__field(	const void *,	hpool	)
+		__field(	unsigned long,	pfn	)
+		__field(	unsigned long,	count	)
+		__field(	unsigned long,	action	)
+	),
+
+	TP_fast_assign(
+		__entry->hpool	= hpool;
+		__entry->pfn	= page ? page_to_pfn(page) : -1UL;
+		__entry->count	= count;
+		__entry->action	= action;
+	),
+
+	TP_printk("hpool=%p page=%p pfn=%lu action=%s free_count=%lu",
+		__entry->hpool,
+		__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
+		__entry->pfn != -1UL ? __entry->pfn : 0,
+		show_action(__entry->action),
+		__entry->count)
+);
+
+#endif /* _TRACE_DHUGETLB_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index b817bf1885a0..4d06b47129f3 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -81,6 +81,7 @@
 
 #define __def_pageflag_names						\
 	{1UL << PG_locked,		"locked"	},		\
+	{1UL << PG_pool,		"pool"		},		\
 	{1UL << PG_waiters,		"waiters"	},		\
 	{1UL << PG_error,		"error"		},		\
 	{1UL << PG_referenced,		"referenced"	},		\
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 7456882e1a0f..b01490b71f32 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -65,6 +65,7 @@
 /* let's not notify more than 100 times per second */
 #define CGROUP_FILE_NOTIFY_MIN_INTV	DIV_ROUND_UP(HZ, 100)
 
+bool dhugetlb_pool_is_free(struct cgroup_subsys_state *css);
 /*
  * cgroup_mutex is the master lock.  Any modification to cgroup or its
  * hierarchy must be performed while holding it.
@@ -5280,6 +5281,11 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	if (css_has_online_children(&cgrp->self))
 		return -EBUSY;
 
+#ifdef CONFIG_MEMCG
+	/* If we use dynamic hugetlb, make sure dhugtlb_pool is free */
+	if (!dhugetlb_pool_is_free(cgrp->subsys[memory_cgrp_id]))
+		return -EBUSY;
+#endif
 	/*
 	 * Mark @cgrp and the associated csets dead.  The former prevents
 	 * further task migration and child creation by disabling
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index f8319265c1cf..484ffdbf5f45 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -396,6 +396,20 @@ static int __init hugepage_init(void)
 		return -EINVAL;
 	}
 
+	/*
+	 * When we alloc some pages(order = 0), system may help us to alloc
+	 * a page(order > 0) due to transparent hugepage. This result
+	 * dynamic hugetlb to be skipped. Actually, using dynamic hugetlb
+	 * means we have already optimized the program, so we should not
+	 * use transparent hugepage in addition.
+	 * (May result negative optimization)
+	 */
+	if (dhugetlb_enabled) {
+		transparent_hugepage_flags = 0;
+		pr_info("transparent hugepage is disabled due to confilct with dynamic hugetlb\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * hugepages can't be allocated by the buddy allocator
 	 */
@@ -2946,9 +2960,9 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
 {
 	struct pglist_data *pgdata = NODE_DATA(sc->nid);
 	unsigned long *split_queue_len = &pgdata->split_queue_len;
+#ifdef CONFIG_MEMCG
 	struct mem_cgroup_extension *memcg_ext;
 
-#ifdef CONFIG_MEMCG
 	if (sc->memcg) {
 		memcg_ext = container_of(sc->memcg, struct mem_cgroup_extension, memcg);
 		split_queue_len = &memcg_ext->split_queue_len;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 495d8b5b38fc..4c8c91acd6d5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -27,6 +27,12 @@
 #include <linux/jhash.h>
 #include <linux/mman.h>
 #include <linux/share_pool.h>
+#include <linux/kthread.h>
+#include <linux/cpuhotplug.h>
+#include <linux/freezer.h>
+#include <linux/delay.h>
+#include <linux/migrate.h>
+#include <linux/mm_inline.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -39,8 +45,14 @@
 #include <linux/userfaultfd_k.h>
 #include <linux/page_owner.h>
 #include <linux/share_pool.h>
+#include <linux/memblock.h>
 #include "internal.h"
 
+#if (defined CONFIG_DYNAMIC_HUGETLB) && (!defined __GENKSYMS__)
+#define CREATE_TRACE_POINTS
+#include <trace/events/dhugetlb.h>
+#endif
+
 int hugetlb_max_hstate __read_mostly;
 unsigned int default_hstate_idx;
 struct hstate hstates[HUGE_MAX_HSTATE];
@@ -89,7 +101,8 @@ static inline void ClearPageHugeFreed(struct page *head)
 }
 
 /* Forward declaration */
-static int hugetlb_acct_memory(struct hstate *h, long delta);
+static int hugetlb_acct_memory(struct hstate *h, long delta,
+			       struct dhugetlb_pool *hpool);
 
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
@@ -103,7 +116,7 @@ static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 	if (free) {
 		if (spool->min_hpages != -1)
 			hugetlb_acct_memory(spool->hstate,
-						-spool->min_hpages);
+						-spool->min_hpages, NULL);
 		kfree(spool);
 	}
 }
@@ -123,7 +136,7 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
 	spool->hstate = h;
 	spool->min_hpages = min_hpages;
 
-	if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages)) {
+	if (min_hpages != -1 && hugetlb_acct_memory(h, min_hpages, NULL)) {
 		kfree(spool);
 		return NULL;
 	}
@@ -149,13 +162,17 @@ void hugepage_put_subpool(struct hugepage_subpool *spool)
  * a subpool minimum size must be manitained.
  */
 static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
-				      long delta)
+				      long delta, struct dhugetlb_pool *hpool)
 {
 	long ret = delta;
 
 	if (!spool)
 		return ret;
 
+	/* Skip subpool when hugetlb file belongs to a hugetlb_pool */
+	if (dhugetlb_enabled && hpool)
+		return ret;
+
 	spin_lock(&spool->lock);
 
 	if (spool->max_hpages != -1) {		/* maximum size accounting */
@@ -194,13 +211,17 @@ static long hugepage_subpool_get_pages(struct hugepage_subpool *spool,
  * in the case where a subpool minimum size must be maintained.
  */
 static long hugepage_subpool_put_pages(struct hugepage_subpool *spool,
-				       long delta)
+				       long delta, struct dhugetlb_pool *hpool)
 {
 	long ret = delta;
 
 	if (!spool)
 		return delta;
 
+	/* Skip subpool when hugetlb file belongs to a hugetlb_pool */
+	if (dhugetlb_enabled && hpool)
+		return ret;
+
 	spin_lock(&spool->lock);
 
 	if (spool->max_hpages != -1)		/* maximum size accounting */
@@ -594,12 +615,13 @@ void hugetlb_fix_reserve_counts(struct inode *inode)
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	long rsv_adjust;
 	bool reserved = false;
+	struct dhugetlb_pool *hpool = HUGETLBFS_I(inode)->hpool;
 
-	rsv_adjust = hugepage_subpool_get_pages(spool, 1);
+	rsv_adjust = hugepage_subpool_get_pages(spool, 1, hpool);
 	if (rsv_adjust > 0) {
 		struct hstate *h = hstate_inode(inode);
 
-		if (!hugetlb_acct_memory(h, 1))
+		if (!hugetlb_acct_memory(h, 1, hpool))
 			reserved = true;
 	} else if (!rsv_adjust) {
 		reserved = true;
@@ -1300,6 +1322,56 @@ static inline void ClearPageHugeTemporary(struct page *page)
 	page[2].mapping = NULL;
 }
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+static void free_huge_page_to_dhugetlb_pool(struct page *page,
+					    bool restore_reserve)
+{
+	struct hstate *h = page_hstate(page);
+	struct dhugetlb_pool *hpool;
+
+	hpool = get_dhugetlb_pool_from_dhugetlb_pagelist(page);
+	if (unlikely(!hpool)) {
+		pr_err("dhugetlb: free error: get hpool failed\n");
+		return;
+	}
+
+	spin_lock(&hpool->lock);
+	ClearPagePool(page);
+	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+	if (!hstate_is_gigantic(h)) {
+		list_add(&page->lru, &hpool->dhugetlb_2M_freelists);
+		hpool->free_reserved_2M++;
+		hpool->used_2M--;
+		if (restore_reserve) {
+			hpool->mmap_reserved_2M++;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_2M,
+						   DHUGETLB_RESV_2M);
+		}
+		trace_dhugetlb_alloc_free(hpool, page, hpool->free_reserved_2M,
+					  DHUGETLB_FREE_2M);
+	} else {
+		list_add(&page->lru, &hpool->dhugetlb_1G_freelists);
+		hpool->free_reserved_1G++;
+		hpool->used_1G--;
+		if (restore_reserve) {
+			hpool->mmap_reserved_1G++;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_1G,
+						   DHUGETLB_RESV_1G);
+		}
+		trace_dhugetlb_alloc_free(hpool, page, hpool->free_reserved_1G,
+					  DHUGETLB_FREE_1G);
+	}
+	spin_unlock(&hpool->lock);
+	dhugetlb_pool_put(hpool);
+}
+#else
+void free_huge_page_to_dhugetlb_pool(struct page *page, bool restore_reserve)
+{
+}
+#endif
+
 void free_huge_page(struct page *page)
 {
 	/*
@@ -1320,6 +1392,17 @@ void free_huge_page(struct page *page)
 	restore_reserve = PagePrivate(page);
 	ClearPagePrivate(page);
 
+	if (dhugetlb_enabled && PagePool(page)) {
+		spin_lock(&hugetlb_lock);
+		clear_page_huge_active(page);
+		list_del(&page->lru);
+		hugetlb_cgroup_uncharge_page(hstate_index(h),
+					     pages_per_huge_page(h), page);
+		spin_unlock(&hugetlb_lock);
+		free_huge_page_to_dhugetlb_pool(page, restore_reserve);
+		return;
+	}
+
 	/*
 	 * If PagePrivate() was set on page, page allocation consumed a
 	 * reservation.  If the page was associated with a subpool, there
@@ -1335,7 +1418,7 @@ void free_huge_page(struct page *page)
 		 * after page is free.  Therefore, force restore_reserve
 		 * operation.
 		 */
-		if (hugepage_subpool_put_pages(spool, 1) == 0)
+		if (hugepage_subpool_put_pages(spool, 1, NULL) == 0)
 			restore_reserve = true;
 	}
 
@@ -2211,6 +2294,81 @@ static void restore_reserve_on_error(struct hstate *h,
 	}
 }
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+static struct page *__alloc_huge_page_from_dhugetlb_pool(
+		struct dhugetlb_pool *hpool, int idx, bool need_unreserved)
+{
+	unsigned long flags;
+	struct page *page = NULL;
+
+	spin_lock_irqsave(&hpool->lock, flags);
+	if (hstate_is_gigantic(&hstates[idx]) && hpool->free_reserved_1G) {
+		page = list_entry(hpool->dhugetlb_1G_freelists.next,
+				  struct page, lru);
+		list_del(&page->lru);
+		hpool->free_reserved_1G--;
+		hpool->used_1G++;
+		if (need_unreserved) {
+			SetPagePrivate(page);
+			hpool->mmap_reserved_1G--;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_1G,
+						   DHUGETLB_UNRESV_1G);
+		}
+		trace_dhugetlb_alloc_free(hpool, page, hpool->free_reserved_1G,
+					  DHUGETLB_ALLOC_1G);
+	} else if (!hstate_is_gigantic(&hstates[idx]) &&
+		   hpool->free_reserved_2M) {
+		page = list_entry(hpool->dhugetlb_2M_freelists.next,
+				  struct page, lru);
+		list_del(&page->lru);
+		hpool->free_reserved_2M--;
+		hpool->used_2M++;
+		if (need_unreserved) {
+			SetPagePrivate(page);
+			hpool->mmap_reserved_2M--;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_2M,
+						   DHUGETLB_UNRESV_2M);
+		}
+		trace_dhugetlb_alloc_free(hpool, page, hpool->free_reserved_2M,
+					  DHUGETLB_ALLOC_2M);
+	}
+	if (page) {
+		INIT_LIST_HEAD(&page->lru);
+		set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
+		set_page_refcounted(page);
+		SetPagePool(page);
+	}
+	spin_unlock_irqrestore(&hpool->lock, flags);
+
+	return page;
+}
+
+static struct page *alloc_huge_page_from_dhugetlb_pool(
+		struct vm_area_struct *vma, int idx, int avoid_reserve,
+		long gbl_chg, struct dhugetlb_pool *hpool)
+{
+	struct page *page;
+	bool need_unreserved = false;
+
+	if (!avoid_reserve && vma_has_reserves(vma, gbl_chg))
+		need_unreserved = true;
+
+	page = __alloc_huge_page_from_dhugetlb_pool(hpool, idx,
+						    need_unreserved);
+
+	return page;
+}
+#else
+static inline struct page *alloc_huge_page_from_dhugetlb_pool(
+		struct vm_area_struct *vma, int idx, int avoid_reserve,
+		long gbl_chg, struct dhugetlb_pool *hpool)
+{
+	return NULL;
+}
+#endif
+
 struct page *alloc_huge_page(struct vm_area_struct *vma,
 				    unsigned long addr, int avoid_reserve)
 {
@@ -2221,6 +2379,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	long gbl_chg;
 	int ret, idx;
 	struct hugetlb_cgroup *h_cg;
+	struct dhugetlb_pool *hpool =
+			HUGETLBFS_I(file_inode(vma->vm_file))->hpool;
 
 	idx = hstate_index(h);
 	/*
@@ -2240,7 +2400,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * checked against any subpool limit.
 	 */
 	if (map_chg || avoid_reserve) {
-		gbl_chg = hugepage_subpool_get_pages(spool, 1);
+		gbl_chg = hugepage_subpool_get_pages(spool, 1, hpool);
 		if (gbl_chg < 0) {
 			vma_end_reservation(h, vma, addr);
 			return ERR_PTR(-ENOSPC);
@@ -2262,6 +2422,26 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	if (ret)
 		goto out_subpool_put;
 
+	if (dhugetlb_enabled && hpool) {
+		page = alloc_huge_page_from_dhugetlb_pool(vma, idx,
+							  avoid_reserve,
+							  gbl_chg, hpool);
+		if (page) {
+			/*
+			 * Use hugetlb_lock to manage the account of
+			 * hugetlb cgroup.
+			 */
+			spin_lock(&hugetlb_lock);
+			list_add(&page->lru, &h->hugepage_activelist);
+			hugetlb_cgroup_commit_charge(idx,
+				pages_per_huge_page(hstate_vma(vma)),
+				h_cg, page);
+			spin_unlock(&hugetlb_lock);
+			goto out;
+		}
+		goto out_uncharge_cgroup;
+	}
+
 	spin_lock(&hugetlb_lock);
 	/*
 	 * glb_chg is passed to indicate whether or not a page must be taken
@@ -2284,7 +2464,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	}
 	hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
 	spin_unlock(&hugetlb_lock);
-
+out:
 	set_page_private(page, (unsigned long)spool);
 
 	map_commit = vma_commit_reservation(h, vma, addr);
@@ -2300,8 +2480,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 		 */
 		long rsv_adjust;
 
-		rsv_adjust = hugepage_subpool_put_pages(spool, 1);
-		hugetlb_acct_memory(h, -rsv_adjust);
+		rsv_adjust = hugepage_subpool_put_pages(spool, 1, hpool);
+		hugetlb_acct_memory(h, -rsv_adjust, hpool);
 	}
 	return page;
 
@@ -2309,7 +2489,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	hugetlb_cgroup_uncharge_cgroup(idx, pages_per_huge_page(h), h_cg);
 out_subpool_put:
 	if (map_chg || avoid_reserve)
-		hugepage_subpool_put_pages(spool, 1);
+		hugepage_subpool_put_pages(spool, 1, hpool);
 	vma_end_reservation(h, vma, addr);
 	return ERR_PTR(-ENOSPC);
 }
@@ -3098,6 +3278,932 @@ static void hugetlb_register_all_nodes(void) { }
 
 #endif
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+static bool enable_dhugetlb;
+DEFINE_STATIC_KEY_FALSE(dhugetlb_enabled_key);
+DEFINE_RWLOCK(dhugetlb_pagelist_rwlock);
+struct dhugetlb_pagelist *dhugetlb_pagelist_t;
+
+bool dhugetlb_pool_get(struct dhugetlb_pool *hpool)
+{
+	if (!hpool)
+		return false;
+
+	return atomic_inc_not_zero(&hpool->refcnt);
+}
+
+void dhugetlb_pool_put(struct dhugetlb_pool *hpool)
+{
+	if (!dhugetlb_enabled || !hpool)
+		return;
+
+	if (atomic_dec_and_test(&hpool->refcnt)) {
+		css_put(&hpool->attach_memcg->css);
+		kfree(hpool);
+	}
+}
+
+struct dhugetlb_pool *hpool_alloc(unsigned long nid)
+{
+	int i;
+	struct dhugetlb_pool *hpool;
+
+	hpool = kzalloc(sizeof(struct dhugetlb_pool) +
+			NR_SMPOOL * sizeof(struct small_page_pool), GFP_KERNEL);
+	if (!hpool)
+		return NULL;
+
+	spin_lock_init(&hpool->lock);
+	spin_lock_init(&hpool->reserved_lock);
+	hpool->nid = nid;
+	atomic_set(&hpool->refcnt, 1);
+	INIT_LIST_HEAD(&hpool->dhugetlb_1G_freelists);
+	INIT_LIST_HEAD(&hpool->dhugetlb_2M_freelists);
+	INIT_LIST_HEAD(&hpool->dhugetlb_4K_freelists);
+	INIT_LIST_HEAD(&hpool->split_1G_freelists);
+	INIT_LIST_HEAD(&hpool->split_2M_freelists);
+
+	for (i = 0; i < NR_SMPOOL; i++) {
+		spin_lock_init(&hpool->smpool[i].lock);
+		INIT_LIST_HEAD(&hpool->smpool[i].head_page);
+	}
+
+	return hpool;
+}
+
+int alloc_hugepage_from_hugetlb(struct dhugetlb_pool *hpool,
+				unsigned long nid, unsigned long size)
+{
+	int ret;
+	struct page *page, *next;
+	unsigned long idx;
+	unsigned long i = 0;
+	struct hstate *h = size_to_hstate(PUD_SIZE);
+
+	if (!h)
+		return -ENOMEM;
+
+	spin_lock(&hpool->lock);
+	spin_lock(&hugetlb_lock);
+	if (h->free_huge_pages_node[nid] < size) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	list_for_each_entry_safe(page, next, &h->hugepage_freelists[nid], lru) {
+		idx = page_to_pfn(page) >> (PUD_SHIFT - PAGE_SHIFT);
+		ret = update_dhugetlb_pagelist(idx, hpool);
+		if (ret)
+			continue;
+		ClearPageHugeFreed(page);
+		list_move_tail(&page->lru, &hpool->dhugetlb_1G_freelists);
+		h->free_huge_pages--;
+		h->free_huge_pages_node[nid]--;
+		hpool->total_nr_pages++;
+		hpool->free_unreserved_1G++;
+		if (++i == size)
+			break;
+	}
+	ret = 0;
+out_unlock:
+	spin_unlock(&hugetlb_lock);
+	spin_unlock(&hpool->lock);
+	return ret;
+}
+
+/*
+ * When we assign a hugepage to dhugetlb_pool, we need to record it in
+ * dhugetlb_pagelist_t. In this situation, we just need read_lock because
+ * there is not conflit when write to dhugetlb_pagelist_t->hpool.
+ *
+ * If page's pfn is greater than dhugetlb_pagelist_t->count (which may
+ * occurs due to memory hotplug), we need to realloc enough memory so that
+ * pfn = dhugetlb_pagelist_t->count - 1 and then record it.
+ * In this situation, we need write_lock because while we are reallocating,
+ * the read request should wait.
+ */
+int update_dhugetlb_pagelist(unsigned long idx, struct dhugetlb_pool *hpool)
+{
+	read_lock(&dhugetlb_pagelist_rwlock);
+	if (idx >= dhugetlb_pagelist_t->count) {
+		unsigned long size;
+		struct dhugetlb_pagelist *tmp;
+
+		read_unlock(&dhugetlb_pagelist_rwlock);
+		write_lock(&dhugetlb_pagelist_rwlock);
+
+		size = sizeof(struct dhugetlb_pagelist) +
+		       (idx + 1) * sizeof(struct dhugetlb_pool *);
+		tmp = krealloc(dhugetlb_pagelist_t, size, GFP_ATOMIC);
+		if (!tmp) {
+			write_unlock(&dhugetlb_pagelist_rwlock);
+			return -ENOMEM;
+		}
+		tmp->count = idx + 1;
+		dhugetlb_pagelist_t = tmp;
+
+		write_unlock(&dhugetlb_pagelist_rwlock);
+		read_lock(&dhugetlb_pagelist_rwlock);
+	}
+	dhugetlb_pagelist_t->hpool[idx] = hpool;
+	read_unlock(&dhugetlb_pagelist_rwlock);
+	return 0;
+}
+
+struct dhugetlb_pool *get_dhugetlb_pool_from_dhugetlb_pagelist(
+							struct page *page)
+{
+	struct dhugetlb_pool *hpool = NULL;
+	unsigned long idx = page_to_pfn(page) >> (PUD_SHIFT - PAGE_SHIFT);
+
+	read_lock(&dhugetlb_pagelist_rwlock);
+	if (idx < dhugetlb_pagelist_t->count)
+		hpool = dhugetlb_pagelist_t->hpool[idx];
+	read_unlock(&dhugetlb_pagelist_rwlock);
+	if (dhugetlb_pool_get(hpool))
+		return hpool;
+	return NULL;
+}
+
+struct dhugetlb_pool *get_dhugetlb_pool_from_task(struct task_struct *tsk)
+{
+	struct mem_cgroup *memcg;
+	struct dhugetlb_pool *hpool;
+
+	if (!dhugetlb_enabled)
+		return NULL;
+
+	rcu_read_lock();
+	memcg = mem_cgroup_from_task(tsk);
+	rcu_read_unlock();
+
+	hpool = get_dhugetlb_pool_from_memcg(memcg);
+
+	return hpool;
+}
+
+static void add_new_huge_page_to_pool(struct dhugetlb_pool *hpool,
+				      struct page *page, bool gigantic)
+{
+	lockdep_assert_held(&hpool->lock);
+	VM_BUG_ON_PAGE(page_mapcount(page), page);
+	INIT_LIST_HEAD(&page->lru);
+
+	if (gigantic) {
+		prep_compound_gigantic_page(page, PUD_SHIFT - PAGE_SHIFT);
+		list_add_tail(&page->lru, &hpool->dhugetlb_1G_freelists);
+		hpool->free_unreserved_1G++;
+	} else {
+		prep_new_page(page, PMD_SHIFT - PAGE_SHIFT, __GFP_COMP, 0);
+		set_page_count(page, 0);
+		list_add_tail(&page->lru, &hpool->dhugetlb_2M_freelists);
+		hpool->free_unreserved_2M++;
+	}
+	set_page_private(page, 0);
+	page->mapping = NULL;
+	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
+	set_hugetlb_cgroup(page, NULL);
+}
+
+static void free_dhugetlb_pcpool(struct dhugetlb_pool *hpool)
+{
+	int i;
+	struct  small_page_pool *smpool;
+
+	for (i = 0; i < NR_SMPOOL; i++) {
+		smpool = &hpool->smpool[i];
+		list_splice(&smpool->head_page, &hpool->dhugetlb_4K_freelists);
+		smpool->free_pages = 0;
+		smpool->used_pages = 0;
+		INIT_LIST_HEAD(&smpool->head_page);
+	}
+}
+
+static void __free_dhugetlb_small_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page, *next;
+	struct split_pages *split_huge, *split_next;
+
+	if (list_empty(&hpool->dhugetlb_4K_freelists))
+		return;
+
+	list_for_each_entry_safe(page, next,
+				 &hpool->dhugetlb_4K_freelists, lru) {
+		list_del(&page->lru);
+		add_new_huge_page_to_pool(hpool, page, false);
+	}
+
+	list_for_each_entry_safe(split_huge, split_next,
+				 &hpool->split_2M_freelists, list) {
+		list_del(&split_huge->list);
+		kfree(split_huge);
+		hpool->nr_split_2M--;
+	}
+
+	hpool->free_pages = 0;
+	INIT_LIST_HEAD(&hpool->dhugetlb_4K_freelists);
+}
+
+static void free_dhugetlb_small_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page, *next;
+	unsigned long nr_pages = 1 << (PMD_SHIFT - PAGE_SHIFT);
+
+	lockdep_assert_held(&hpool->lock);
+	if (list_empty(&hpool->dhugetlb_4K_freelists))
+		return;
+
+	list_for_each_entry_safe(page, next,
+				 &hpool->dhugetlb_4K_freelists, lru) {
+		if (page_to_pfn(page) % nr_pages != 0)
+			list_del(&page->lru);
+	}
+
+	__free_dhugetlb_small_page(hpool);
+}
+
+static void __free_dhugetlb_huge_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page, *next;
+	struct split_pages *split_giga, *split_next;
+
+	if (list_empty(&hpool->dhugetlb_2M_freelists))
+		return;
+
+	list_for_each_entry_safe(page, next,
+				 &hpool->dhugetlb_2M_freelists, lru) {
+		list_del(&page->lru);
+		add_new_huge_page_to_pool(hpool, page, true);
+	}
+	list_for_each_entry_safe(split_giga, split_next,
+				 &hpool->split_1G_freelists, list) {
+		list_del(&split_giga->list);
+		kfree(split_giga);
+		hpool->nr_split_1G--;
+	}
+
+	hpool->total_reserved_2M = 0;
+	hpool->free_reserved_2M = 0;
+	hpool->free_unreserved_2M = 0;
+	INIT_LIST_HEAD(&hpool->dhugetlb_2M_freelists);
+}
+
+static void free_dhugetlb_huge_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page, *next;
+	unsigned long nr_pages = 1 << (PUD_SHIFT - PAGE_SHIFT);
+	unsigned long block_size = 1 << (PMD_SHIFT - PAGE_SHIFT);
+	int i;
+
+	lockdep_assert_held(&hpool->lock);
+	if (list_empty(&hpool->dhugetlb_2M_freelists))
+		return;
+
+	list_for_each_entry_safe(page, next,
+				 &hpool->dhugetlb_2M_freelists, lru) {
+		set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+		atomic_set(compound_mapcount_ptr(page), 0);
+		for (i = 1; i < block_size; i++)
+			clear_compound_head(&page[i]);
+		set_compound_order(page, 0);
+		__ClearPageHead(page);
+		if (page_to_pfn(page) % nr_pages != 0)
+			list_del(&page->lru);
+	}
+	__free_dhugetlb_huge_page(hpool);
+}
+
+static int try_migrate_page(struct page *page, unsigned long nid)
+{
+	unsigned long pfn = page_to_pfn(page);
+	int ret = 0;
+
+	LIST_HEAD(source);
+
+	if (!pfn_valid(pfn))
+		return 0;
+	BUG_ON(PageHuge(page) || PageTransHuge(page));
+	/*
+	 * HWPoison pages have elevated reference counts so the migration
+	 * would fail on them. It also doesn't make any sense to migrate them
+	 * in the first place. Still try to unmap such a page in case it is
+	 * still mapped(e.g. current hwpoison implementation doesn't unmap
+	 * KSM pages but keep the unmap as the catch all safety net).
+	 */
+	if (PageHWPoison(page)) {
+		if (WARN_ON(PageLRU(page)))
+			isolate_lru_page(page);
+		if (page_mapped(page))
+			try_to_unmap(page,
+				     TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS);
+		return 0;
+	}
+
+	if (!get_page_unless_zero(page))
+		return 0;
+	/*
+	 * We can skip free pages. And we can deal with pages on
+	 * LRU and non-lru movable pages.
+	 */
+	if (PageLRU(page))
+		ret = isolate_lru_page(page);
+	else
+		ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
+	put_page(page);
+	if (ret) {
+		if (page_count(page))
+			ret = -EBUSY;
+		return ret;
+	}
+	list_add_tail(&page->lru, &source);
+	if (!__PageMovable(page))
+		inc_node_page_state(page,
+			NR_ISOLATED_ANON + page_is_file_cache(page));
+
+	ret = migrate_pages(&source, alloc_new_node_page, NULL, nid,
+			    MIGRATE_SYNC_LIGHT, MR_COMPACTION);
+	if (ret)
+		putback_movable_pages(&source);
+	return ret;
+}
+
+static void try_migrate_pages(struct dhugetlb_pool *hpool)
+{
+	int i, j;
+	unsigned long nr_free_pages;
+	struct split_pages *split_giga, *next;
+	unsigned int nr_pages = 1 << (PMD_SHIFT - PAGE_SHIFT);
+	struct page *page;
+	int sleep_interval = 100; /* wait for the migration */
+
+	spin_unlock(&hpool->lock);
+	for (i = NR_SMPOOL - 1; i >= 0; i--)
+		spin_unlock(&hpool->smpool[i].lock);
+
+	msleep(sleep_interval);
+	dhugetlb_pool_force_empty(hpool->attach_memcg);
+
+	spin_lock(&hpool->lock);
+	nr_free_pages = hpool->free_pages;
+	spin_unlock(&hpool->lock);
+	for (i = 0; i < NR_SMPOOL; i++) {
+		spin_lock(&hpool->smpool[i].lock);
+		nr_free_pages += hpool->smpool[i].free_pages;
+		spin_unlock(&hpool->smpool[i].lock);
+	}
+
+	if (nr_free_pages >> HUGETLB_PAGE_ORDER < hpool->nr_split_2M) {
+		list_for_each_entry_safe(split_giga, next,
+				&hpool->split_1G_freelists, list) {
+			for (i = 0; i < nr_pages; i++) {
+				if (PageCompound(pfn_to_page(
+					split_giga->start_pfn + i * nr_pages)))
+					continue;
+				page = pfn_to_page(split_giga->start_pfn +
+						   i * nr_pages);
+				for (j = 0; j < nr_pages; j++) {
+					if (PagePool(page + j))
+						try_migrate_page(page + j,
+								 hpool->nid);
+				}
+			}
+		}
+	}
+
+	for (i = 0; i < NR_SMPOOL; i++)
+		spin_lock(&hpool->smpool[i].lock);
+	spin_lock(&hpool->lock);
+}
+
+/*
+ * If there are some pages are still in use. We will try to reclaim/migrate it.
+ * After trying at most HPOOL_RECLAIM_RETRIES times, we may success.
+ * Or we will print the failed information and return false.
+ */
+static bool free_dhugetlb_pages(struct dhugetlb_pool *hpool)
+{
+	int i;
+	long used_pages;
+	int try_count = 0;
+
+retry:
+	used_pages = 0;
+	for (i = 0; i < NR_SMPOOL; i++)
+		used_pages += hpool->smpool[i].used_pages;
+
+	if (try_count < HPOOL_RECLAIM_RETRIES &&
+	    (used_pages || hpool->used_2M || hpool->used_1G)) {
+		try_migrate_pages(hpool);
+		try_count++;
+		goto retry;
+	}
+
+	if (used_pages)
+		pr_err("dhugetlb: some 4K pages not free, memcg: %s delete failed!\n",
+			hpool->attach_memcg->css.cgroup->kn->name);
+	else if (hpool->used_2M)
+		pr_err("dhugetlb: some 2M pages not free, memcg: %s delete failed!\n",
+			hpool->attach_memcg->css.cgroup->kn->name);
+	else if (hpool->used_1G)
+		pr_err("dhugetlb: some 1G pages not free, memcg: %s delete failed!\n",
+			hpool->attach_memcg->css.cgroup->kn->name);
+	else {
+		free_dhugetlb_pcpool(hpool);
+		free_dhugetlb_small_page(hpool);
+		free_dhugetlb_huge_page(hpool);
+		return true;
+	}
+	return false;
+}
+
+static void free_back_hugetlb(struct dhugetlb_pool *hpool)
+{
+	int nid;
+	unsigned int  nr_pages;
+	unsigned long pfn, idx;
+	struct page *page, *page_next, *p;
+	struct hstate *h = size_to_hstate(PUD_SIZE);
+
+	if (!h)
+		return;
+
+	spin_lock(&hugetlb_lock);
+	list_for_each_entry_safe(page, page_next,
+				 &hpool->dhugetlb_1G_freelists, lru) {
+		nr_pages = 1 << huge_page_order(h);
+		pfn = page_to_pfn(page);
+		for (; nr_pages--; pfn++) {
+			p = pfn_to_page(pfn);
+			p->mapping = NULL;
+		}
+		SetPageHugeFreed(page);
+		set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
+		nid = page_to_nid(page);
+		BUG_ON(nid >= MAX_NUMNODES);
+		list_move(&page->lru, &h->hugepage_freelists[nid]);
+		h->free_huge_pages_node[nid]++;
+		read_lock(&dhugetlb_pagelist_rwlock);
+		idx = page_to_pfn(page) >> (PUD_SHIFT - PAGE_SHIFT);
+		if (idx < dhugetlb_pagelist_t->count)
+			dhugetlb_pagelist_t->hpool[idx] = NULL;
+		read_unlock(&dhugetlb_pagelist_rwlock);
+	}
+	h->free_huge_pages += hpool->total_nr_pages;
+	hpool->total_nr_pages = 0;
+	hpool->free_unreserved_1G = 0;
+	hpool->free_reserved_1G = 0;
+	hpool->total_reserved_1G = 0;
+	INIT_LIST_HEAD(&hpool->dhugetlb_1G_freelists);
+	spin_unlock(&hugetlb_lock);
+}
+
+bool free_dhugetlb_pool(struct dhugetlb_pool *hpool)
+{
+	int i;
+	bool ret = false;
+
+	for (i = 0; i < NR_SMPOOL; i++)
+		spin_lock(&hpool->smpool[i].lock);
+	spin_lock(&hpool->lock);
+
+	ret = free_dhugetlb_pages(hpool);
+	if (!ret)
+		goto out_unlock;
+
+	free_back_hugetlb(hpool);
+
+out_unlock:
+	spin_unlock(&hpool->lock);
+	for (i = NR_SMPOOL - 1; i >= 0; i--)
+		spin_unlock(&hpool->smpool[i].lock);
+
+	if (ret)
+		dhugetlb_pool_put(hpool);
+	return ret;
+}
+
+static void __split_free_huge_page(struct dhugetlb_pool *hpool,
+				   struct page *page)
+{
+	int i;
+	int order_h = PUD_SHIFT - PAGE_SHIFT;
+	int order_m = PMD_SHIFT - PAGE_SHIFT;
+	int blocks = 1 << (order_h - order_m);
+	struct page *p = page + 1;
+
+	lockdep_assert_held(&hpool->lock);
+	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+	atomic_set(compound_mapcount_ptr(page), 0);
+	for (i = 1; i < (1 << order_h); i++, p = mem_map_next(p, page, i))
+		clear_compound_head(p);
+
+	set_compound_order(page, 0);
+	__ClearPageHead(page);
+
+	/* make it be 2M huge pages and put it to huge pool */
+	for (i = 0; i < blocks; i++, page += (1 << order_m))
+		add_new_huge_page_to_pool(hpool, page, false);
+}
+
+static void __split_free_small_page(struct dhugetlb_pool *hpool,
+				    struct page *page)
+{
+	int i;
+	int nr_pages = 1 << (PMD_SHIFT - PAGE_SHIFT);
+
+	lockdep_assert_held(&hpool->lock);
+	set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+	set_compound_order(page, 0);
+	for (i = 0; i < nr_pages; i++) {
+		if (i != 0) {
+			page[i].mapping = NULL;
+			clear_compound_head(&page[i]);
+		} else
+			__ClearPageHead(page);
+
+		/*
+		 * If a hugepage is mapped in private mode, the PG_uptodate bit
+		 * will not be cleared when the hugepage freed. Clear the
+		 * hugepage using free_pages_prepare() here.
+		 */
+		free_pages_prepare(&page[i], 0, false);
+		hpool->free_pages++;
+		list_add_tail(&page[i].lru, &hpool->dhugetlb_4K_freelists);
+	}
+}
+
+static bool split_free_huge_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page;
+	struct split_pages *split_page;
+
+	lockdep_assert_held(&hpool->lock);
+
+	if (!hpool->free_unreserved_1G)
+		return false;
+
+	split_page = kzalloc(sizeof(struct split_pages), GFP_ATOMIC);
+	if (!split_page)
+		return false;
+
+	page = list_entry(hpool->dhugetlb_1G_freelists.next, struct page, lru);
+	list_del(&page->lru);
+	hpool->free_unreserved_1G--;
+
+	split_page->start_pfn = page_to_pfn(page);
+	list_add(&split_page->list, &hpool->split_1G_freelists);
+	hpool->nr_split_1G++;
+
+	trace_dhugetlb_split_merge(hpool, page, DHUGETLB_SPLIT_1G);
+
+	__split_free_huge_page(hpool, page);
+	return true;
+}
+
+static bool split_free_small_page(struct dhugetlb_pool *hpool)
+{
+	struct page *page;
+	struct split_pages *split_page;
+
+	lockdep_assert_held(&hpool->lock);
+
+	if (!hpool->free_unreserved_2M && !split_free_huge_page(hpool))
+		return false;
+
+	split_page = kzalloc(sizeof(struct split_pages), GFP_ATOMIC);
+	if (!split_page)
+		return false;
+
+	page = list_entry(hpool->dhugetlb_2M_freelists.next, struct page, lru);
+	list_del(&page->lru);
+	hpool->free_unreserved_2M--;
+
+	split_page->start_pfn = page_to_pfn(page);
+	list_add(&split_page->list, &hpool->split_2M_freelists);
+	hpool->nr_split_2M++;
+
+	trace_dhugetlb_split_merge(hpool, page, DHUGETLB_SPLIT_2M);
+
+	__split_free_small_page(hpool, page);
+	return true;
+}
+
+bool move_pages_from_hpool_to_smpool(struct dhugetlb_pool *hpool,
+				     struct small_page_pool *smpool)
+{
+	int i = 0;
+	struct page *page, *next;
+
+	if (!hpool->free_pages && !split_free_small_page(hpool))
+		return false;
+
+	list_for_each_entry_safe(page, next,
+				 &hpool->dhugetlb_4K_freelists, lru) {
+		list_del(&page->lru);
+		hpool->free_pages--;
+		list_add_tail(&page->lru, &smpool->head_page);
+		smpool->free_pages++;
+		if (++i == BATCH_SMPOOL_PAGE)
+			break;
+	}
+	return true;
+}
+
+void move_pages_from_smpool_to_hpool(struct dhugetlb_pool *hpool,
+				     struct small_page_pool *smpool)
+{
+	int i = 0;
+	struct page *page, *next;
+
+	list_for_each_entry_safe(page, next, &smpool->head_page, lru) {
+		list_del(&page->lru);
+		smpool->free_pages--;
+		list_add(&page->lru, &hpool->dhugetlb_4K_freelists);
+		hpool->free_pages++;
+		if (++i == BATCH_SMPOOL_PAGE)
+			break;
+	}
+}
+
+static unsigned long list_len(struct list_head *head)
+{
+	unsigned long len = 0;
+	struct page *page;
+
+	list_for_each_entry(page, head, lru)
+		len++;
+
+	return len;
+}
+
+static void hugetlb_migrate_pages(struct dhugetlb_pool *hpool,
+				  unsigned long count)
+{
+	int i, try;
+	struct page *page;
+	struct split_pages *split_huge, *split_next;
+	unsigned long nr_pages = 1 << (PMD_SHIFT - PAGE_SHIFT);
+	LIST_HEAD(wait_page_list);
+
+	list_for_each_entry_safe(split_huge, split_next,
+				 &hpool->split_2M_freelists, list) {
+		/*
+		 * Isolate free page first because we dont want them to be
+		 * allocated.
+		 */
+		for (i = 0; i < nr_pages; i++) {
+			page = pfn_to_page(split_huge->start_pfn + i);
+			if (!PagePool(page))
+				list_move(&page->lru, &wait_page_list);
+		}
+
+		for (try = 0; try < HPOOL_RECLAIM_RETRIES; try++) {
+			/*
+			 * Unlock and try migration, after migration we need
+			 * to lock back.
+			 */
+			for (i = 0; i < NR_SMPOOL; i++)
+				hpool->smpool[i].free_pages =
+					list_len(&hpool->smpool[i].head_page);
+			hpool->free_pages =
+				list_len(&hpool->dhugetlb_4K_freelists);
+			spin_unlock(&hpool->lock);
+			for (i = NR_SMPOOL - 1; i >= 0; i--)
+				spin_unlock(&hpool->smpool[i].lock);
+
+			for (i = 0; i < nr_pages; i++) {
+				page = pfn_to_page(split_huge->start_pfn + i);
+				if (PagePool(page))
+					try_migrate_page(page, hpool->nid);
+			}
+			for (i = 0; i < NR_SMPOOL; i++)
+				spin_lock(&hpool->smpool[i].lock);
+			spin_lock(&hpool->lock);
+
+			/*
+			 * Isolate free page. If all page in the split_huge
+			 * is free, return it.
+			 */
+			split_huge->free_pages = 0;
+			for (i = 0; i < nr_pages; i++) {
+				page = pfn_to_page(split_huge->start_pfn + i);
+				if (!PagePool(page)) {
+					list_move(&page->lru, &wait_page_list);
+					split_huge->free_pages++;
+				}
+			}
+			if (split_huge->free_pages == nr_pages)
+				break;
+		}
+		if (split_huge->free_pages == nr_pages) {
+			for (i = 0; i < nr_pages; i++) {
+				page = pfn_to_page(split_huge->start_pfn + i);
+				list_del(&page->lru);
+			}
+			INIT_LIST_HEAD(&wait_page_list);
+			page = pfn_to_page(split_huge->start_pfn);
+			add_new_huge_page_to_pool(hpool, page, false);
+			list_del(&split_huge->list);
+			kfree(split_huge);
+			hpool->nr_split_2M--;
+
+			trace_dhugetlb_split_merge(hpool, page,
+						   DHUGETLB_MIGRATE_4K);
+
+			if (--count == 0)
+				return;
+		} else {
+			/* Failed, put back the isolate pages */
+			list_splice(&wait_page_list,
+				    &hpool->dhugetlb_4K_freelists);
+			INIT_LIST_HEAD(&wait_page_list);
+		}
+	}
+}
+
+static unsigned long merge_free_split_huge(struct dhugetlb_pool *hpool,
+					   unsigned long count)
+{
+	int i;
+	struct page *page;
+	struct split_pages *split_huge, *split_next;
+	unsigned long nr_pages = 1 << (PMD_SHIFT - PAGE_SHIFT);
+
+	list_for_each_entry_safe(split_huge, split_next,
+				 &hpool->split_2M_freelists, list) {
+		split_huge->free_pages = 0;
+		for (i = 0; i < nr_pages; i++) {
+			page = pfn_to_page(split_huge->start_pfn + i);
+			if (!PagePool(page))
+				split_huge->free_pages++;
+		}
+		if (split_huge->free_pages == nr_pages) {
+			for (i = 0; i < nr_pages; i++) {
+				page = pfn_to_page(split_huge->start_pfn + i);
+				list_del(&page->lru);
+			}
+			page = pfn_to_page(split_huge->start_pfn);
+			add_new_huge_page_to_pool(hpool, page, false);
+			list_del(&split_huge->list);
+			kfree(split_huge);
+			hpool->nr_split_2M--;
+
+			trace_dhugetlb_split_merge(hpool, page,
+						   DHUGETLB_MERGE_4K);
+
+			if (--count == 0)
+				return 0;
+		}
+	}
+	return count;
+}
+
+static void merge_free_small_page(struct dhugetlb_pool *hpool,
+				  unsigned long count)
+{
+	int i;
+	unsigned long need_migrate;
+
+	if (!hpool->nr_split_2M)
+		return;
+
+	need_migrate = merge_free_split_huge(hpool, count);
+	if (need_migrate)
+		hugetlb_migrate_pages(hpool, need_migrate);
+
+	for (i = 0; i < NR_SMPOOL; i++)
+		hpool->smpool[i].free_pages =
+				list_len(&hpool->smpool[i].head_page);
+	hpool->free_pages = list_len(&hpool->dhugetlb_4K_freelists);
+}
+
+static void dhugetlb_collect_2M_pages(struct dhugetlb_pool *hpool,
+				      unsigned long count)
+{
+	int i;
+
+	while (hpool->free_unreserved_1G &&
+	       count > hpool->free_unreserved_2M)
+		split_free_huge_page(hpool);
+
+	/*
+	 * If we try to merge 4K pages to 2M, we need to unlock hpool->lock
+	 * first, and then try to lock every lock in order to avoid deadlock.
+	 */
+	if (count > hpool->free_unreserved_2M) {
+		spin_unlock(&hpool->lock);
+		for (i = 0; i < NR_SMPOOL; i++)
+			spin_lock(&hpool->smpool[i].lock);
+		spin_lock(&hpool->lock);
+		merge_free_small_page(hpool, count - hpool->free_unreserved_2M);
+		for (i = NR_SMPOOL - 1; i >= 0; i--)
+			spin_unlock(&hpool->smpool[i].lock);
+	}
+}
+
+/*
+ * Parameter gigantic: true means reserve 1G pages and false means reserve
+ * 2M pages. When we want to reserve 2M pages more than
+ * hpool->free_unreserved_2M, we have to try split/merge. Still, we can't
+ * guarantee success.
+ */
+void dhugetlb_reserve_hugepages(struct dhugetlb_pool *hpool,
+				unsigned long count, bool gigantic)
+{
+	unsigned long delta;
+
+	spin_lock(&hpool->lock);
+	if (gigantic) {
+		if (count > hpool->total_reserved_1G) {
+			delta = min(count - hpool->total_reserved_1G,
+				    hpool->free_unreserved_1G);
+			hpool->total_reserved_1G += delta;
+			hpool->free_reserved_1G += delta;
+			hpool->free_unreserved_1G -= delta;
+		} else {
+			delta = min(hpool->total_reserved_1G - count,
+				    hpool->free_reserved_1G -
+				    hpool->mmap_reserved_1G);
+			hpool->total_reserved_1G -= delta;
+			hpool->free_reserved_1G -= delta;
+			hpool->free_unreserved_1G += delta;
+		}
+	} else {
+		if (count > hpool->total_reserved_2M) {
+			delta = count - hpool->total_reserved_2M;
+			if (delta > hpool->free_unreserved_2M)
+				dhugetlb_collect_2M_pages(hpool, delta);
+			delta = min(count - hpool->total_reserved_2M,
+				    hpool->free_unreserved_2M);
+			hpool->total_reserved_2M += delta;
+			hpool->free_reserved_2M += delta;
+			hpool->free_unreserved_2M -= delta;
+		} else {
+			delta = min(hpool->total_reserved_2M - count,
+				    hpool->free_reserved_2M -
+				    hpool->mmap_reserved_2M);
+			hpool->total_reserved_2M -= delta;
+			hpool->free_reserved_2M -= delta;
+			hpool->free_unreserved_2M += delta;
+		}
+	}
+	spin_unlock(&hpool->lock);
+}
+
+static int dhugetlb_acct_memory(struct hstate *h, long delta,
+				struct dhugetlb_pool *hpool)
+{
+	int ret = -ENOMEM;
+
+	if (delta == 0)
+		return 0;
+
+	spin_lock(&hpool->lock);
+	if (hstate_is_gigantic(h)) {
+		if (delta > 0 && delta <= hpool->free_reserved_1G -
+					  hpool->mmap_reserved_1G) {
+			hpool->mmap_reserved_1G += delta;
+			ret = 0;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_1G,
+						   DHUGETLB_RESV_1G);
+		} else if (delta < 0) {
+			hpool->mmap_reserved_1G -= (unsigned long)(-delta);
+			WARN_ON(hpool->mmap_reserved_1G < 0);
+			ret = 0;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_1G,
+						   DHUGETLB_UNRESV_1G);
+		}
+	} else {
+		if (delta > 0 && delta <= hpool->free_reserved_2M -
+					  hpool->mmap_reserved_2M) {
+			hpool->mmap_reserved_2M += delta;
+			ret = 0;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_2M,
+						   DHUGETLB_RESV_2M);
+		} else if (delta < 0) {
+			hpool->mmap_reserved_2M -= (unsigned long)(-delta);
+			WARN_ON(hpool->mmap_reserved_2M < 0);
+			ret = 0;
+			trace_dhugetlb_acct_memory(hpool,
+						   hpool->mmap_reserved_2M,
+						   DHUGETLB_UNRESV_2M);
+		}
+	}
+	spin_unlock(&hpool->lock);
+
+	return ret;
+}
+#else
+static int dhugetlb_acct_memory(struct hstate *h, long delta,
+				struct dhugetlb_pool *hpool)
+{
+	return 0;
+}
+#endif /* CONFIG_DYNAMIC_HUGETLB */
+
 static int __init hugetlb_init(void)
 {
 	int i;
@@ -3134,6 +4240,23 @@ static int __init hugetlb_init(void)
 	hugetlb_register_all_nodes();
 	hugetlb_cgroup_file_init();
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+	if (enable_dhugetlb) {
+		unsigned long count = max(max_pfn >> (PUD_SHIFT - PAGE_SHIFT),
+					  (unsigned long)DEFAULT_PAGESIZE);
+		unsigned long size = sizeof(struct dhugetlb_pagelist) +
+				     count * sizeof(struct dhugetlb_pool *);
+		dhugetlb_pagelist_t = kzalloc(size, GFP_KERNEL);
+		if (dhugetlb_pagelist_t) {
+			dhugetlb_pagelist_t->count = count;
+			static_branch_enable(&dhugetlb_enabled_key);
+			pr_info("Dynamic 1G hugepage enabled\n");
+		} else
+			pr_info("Dynamic 1G hugepage disabled due to out of memory, need %lu\n",
+				size);
+	}
+#endif
+
 #ifdef CONFIG_SMP
 	num_fault_mutexes = roundup_pow_of_two(8 * num_possible_cpus());
 #else
@@ -3270,6 +4393,16 @@ static int __init hugetlb_nrpages_setup(char *s)
 }
 __setup("hugepages=", hugetlb_nrpages_setup);
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+static int __init dhugetlb_setup(char *s)
+{
+	if (!strcmp(s, "on"))
+		enable_dhugetlb = true;
+	return 1;
+}
+__setup("dynamic_1G_hugepage=", dhugetlb_setup);
+#endif
+
 static int __init hugetlb_default_setup(char *s)
 {
 	default_hstate_size = memparse(s, &s);
@@ -3471,10 +4604,14 @@ unsigned long hugetlb_total_pages(void)
 	return nr_total_pages;
 }
 
-static int hugetlb_acct_memory(struct hstate *h, long delta)
+static int hugetlb_acct_memory(struct hstate *h, long delta,
+			       struct dhugetlb_pool *hpool)
 {
 	int ret = -ENOMEM;
 
+	if (dhugetlb_enabled && hpool)
+		return dhugetlb_acct_memory(h, delta, hpool);
+
 	spin_lock(&hugetlb_lock);
 	/*
 	 * When cpuset is configured, it breaks the strict hugetlb page
@@ -3535,6 +4672,8 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 	struct hugepage_subpool *spool = subpool_vma(vma);
 	unsigned long reserve, start, end;
 	long gbl_reserve;
+	struct dhugetlb_pool *hpool =
+			HUGETLBFS_I(file_inode(vma->vm_file))->hpool;
 
 	if (!resv || !is_vma_resv_set(vma, HPAGE_RESV_OWNER))
 		return;
@@ -3551,8 +4690,8 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 		 * Decrement reserve counts.  The global reserve count may be
 		 * adjusted if the subpool has a minimum size.
 		 */
-		gbl_reserve = hugepage_subpool_put_pages(spool, reserve);
-		hugetlb_acct_memory(h, -gbl_reserve);
+		gbl_reserve = hugepage_subpool_put_pages(spool, reserve, hpool);
+		hugetlb_acct_memory(h, -gbl_reserve, hpool);
 	}
 }
 
@@ -4934,6 +6073,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	struct resv_map *resv_map;
 	long gbl_reserve;
+	struct dhugetlb_pool *hpool = HUGETLBFS_I(inode)->hpool;
 
 	/* This should never happen */
 	if (from > to) {
@@ -4986,7 +6126,7 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * the subpool has a minimum size, there may be some global
 	 * reservations already in place (gbl_reserve).
 	 */
-	gbl_reserve = hugepage_subpool_get_pages(spool, chg);
+	gbl_reserve = hugepage_subpool_get_pages(spool, chg, hpool);
 	if (gbl_reserve < 0) {
 		ret = -ENOSPC;
 		goto out_err;
@@ -4996,10 +6136,10 @@ int hugetlb_reserve_pages(struct inode *inode,
 	 * Check enough hugepages are available for the reservation.
 	 * Hand the pages back to the subpool if there are not
 	 */
-	ret = hugetlb_acct_memory(h, gbl_reserve);
+	ret = hugetlb_acct_memory(h, gbl_reserve, hpool);
 	if (ret < 0) {
 		/* put back original number of pages, chg */
-		(void)hugepage_subpool_put_pages(spool, chg);
+		(void)hugepage_subpool_put_pages(spool, chg, hpool);
 		goto out_err;
 	}
 
@@ -5028,8 +6168,9 @@ int hugetlb_reserve_pages(struct inode *inode,
 			long rsv_adjust;
 
 			rsv_adjust = hugepage_subpool_put_pages(spool,
-								chg - add);
-			hugetlb_acct_memory(h, -rsv_adjust);
+								chg - add,
+								hpool);
+			hugetlb_acct_memory(h, -rsv_adjust, hpool);
 		}
 	}
 	return 0;
@@ -5051,6 +6192,7 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 	long chg = 0;
 	struct hugepage_subpool *spool = subpool_inode(inode);
 	long gbl_reserve;
+	struct dhugetlb_pool *hpool = HUGETLBFS_I(inode)->hpool;
 
 	/*
 	 * Since this routine can be called in the evict inode path for all
@@ -5075,8 +6217,8 @@ long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 	 * If the subpool has a minimum size, the number of global
 	 * reservations to be released may be adjusted.
 	 */
-	gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
-	hugetlb_acct_memory(h, -gbl_reserve);
+	gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed), hpool);
+	hugetlb_acct_memory(h, -gbl_reserve, hpool);
 
 	return 0;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 1b861446c751..deffd247b010 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -182,6 +182,7 @@ extern void __putback_isolated_page(struct page *page, unsigned int order,
 				    int mt);
 extern void __free_pages_core(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned int order);
+extern int check_new_page(struct page *page);
 extern void post_alloc_hook(struct page *page, unsigned int order,
 					gfp_t gfp_flags);
 extern int user_min_free_kbytes;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 63b91a030b02..bdc90e6fc082 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -997,6 +997,41 @@ static __always_inline struct mem_cgroup *get_mem_cgroup_from_current(void)
 	return get_mem_cgroup_from_mm(current->mm);
 }
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+void free_page_to_dhugetlb_pool(struct page *page)
+{
+	struct dhugetlb_pool *hpool;
+	struct small_page_pool *smpool;
+	unsigned long flags;
+
+	hpool = get_dhugetlb_pool_from_dhugetlb_pagelist(page);
+	if (unlikely(!hpool)) {
+		pr_err("dhugetlb: free error: get hpool failed\n");
+		return;
+	}
+
+	smpool = &hpool->smpool[smp_processor_id()];
+	spin_lock_irqsave(&smpool->lock, flags);
+
+	ClearPagePool(page);
+	if (!free_pages_prepare(page, 0, false)) {
+		SetPagePool(page);
+		goto out;
+	}
+	list_add(&page->lru, &smpool->head_page);
+	smpool->free_pages++;
+	smpool->used_pages--;
+	if (smpool->free_pages > MAX_SMPOOL_PAGE) {
+		spin_lock(&hpool->lock);
+		move_pages_from_smpool_to_hpool(hpool, smpool);
+		spin_unlock(&hpool->lock);
+	}
+out:
+	spin_unlock_irqrestore(&smpool->lock, flags);
+	dhugetlb_pool_put(hpool);
+}
+#endif /* CONFIG_DYNAMIC_HUGETLB */
+
 /**
  * mem_cgroup_iter - iterate over memory cgroup hierarchy
  * @root: hierarchy root
@@ -3118,6 +3153,31 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 
 	return 0;
 }
+#ifdef CONFIG_DYNAMIC_HUGETLB
+int dhugetlb_pool_force_empty(struct mem_cgroup *memcg)
+{
+	lru_add_drain_all();
+
+	drain_all_stock(memcg);
+
+	while (page_counter_read(&memcg->memory)) {
+		int progress;
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		progress = try_to_free_mem_cgroup_pages(memcg, 1,
+							GFP_HIGHUSER_MOVABLE,
+							false);
+
+		if (!progress) {
+			congestion_wait(BLK_RW_ASYNC, HZ/10);
+			break;
+		}
+	}
+	return 0;
+}
+#endif
 
 static ssize_t mem_cgroup_force_empty_write(struct kernfs_open_file *of,
 					    char *buf, size_t nbytes,
@@ -4652,6 +4712,305 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 	return ret;
 }
 
+#ifdef CONFIG_DYNAMIC_HUGETLB
+struct dhugetlb_pool *get_dhugetlb_pool_from_memcg(struct mem_cgroup *memcg)
+{
+	struct mem_cgroup_extension *memcg_ext;
+
+	if (!memcg)
+		return NULL;
+
+	memcg_ext = container_of(memcg, struct mem_cgroup_extension, memcg);
+	if (dhugetlb_pool_get(memcg_ext->hpool))
+		return memcg_ext->hpool;
+	return NULL;
+}
+
+static void set_dhugetlb_pool_to_memcg(struct mem_cgroup *memcg,
+				       struct dhugetlb_pool *hpool)
+{
+	struct mem_cgroup_extension *memcg_ext;
+
+	memcg_ext = container_of(memcg, struct mem_cgroup_extension, memcg);
+
+	memcg_ext->hpool = hpool;
+}
+
+static bool should_allocate_from_dhugetlb_pool(gfp_t gfp_mask)
+{
+	gfp_t gfp = gfp_mask & GFP_HIGHUSER_MOVABLE;
+
+	if (current->flags & PF_KTHREAD)
+		return false;
+
+	/*
+	 * The cgroup only charges anonymous and file pages from usespage.
+	 * some filesystem maybe has masked out the __GFP_IO | __GFP_FS
+	 * to avoid recursive memory request. eg: loop device, xfs.
+	 */
+	if ((gfp | __GFP_IO | __GFP_FS) != GFP_HIGHUSER_MOVABLE)
+		return false;
+
+	return true;
+}
+
+static struct page *__alloc_page_from_dhugetlb_pool(void)
+{
+	bool ret;
+	struct dhugetlb_pool *hpool;
+	struct small_page_pool *smpool;
+	struct page *page = NULL;
+	unsigned long flags;
+
+	hpool = get_dhugetlb_pool_from_task(current);
+	if (unlikely(!hpool))
+		goto out;
+
+	smpool = &hpool->smpool[smp_processor_id()];
+	spin_lock_irqsave(&smpool->lock, flags);
+
+	if (smpool->free_pages == 0) {
+		spin_lock(&hpool->lock);
+		ret = move_pages_from_hpool_to_smpool(hpool, smpool);
+		spin_unlock(&hpool->lock);
+		if (!ret)
+			goto unlock;
+	}
+
+	page = list_entry(smpool->head_page.next, struct page, lru);
+	list_del(&page->lru);
+	smpool->free_pages--;
+	smpool->used_pages++;
+	check_new_page(page);
+	SetPagePool(page);
+unlock:
+	spin_unlock_irqrestore(&smpool->lock, flags);
+out:
+	dhugetlb_pool_put(hpool);
+	return page;
+}
+
+struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp_mask)
+{
+	struct page *page = NULL;
+
+	if (should_allocate_from_dhugetlb_pool(gfp_mask))
+		page = __alloc_page_from_dhugetlb_pool();
+
+	return page;
+}
+
+static void assign_new_dhugetlb_pool(struct mem_cgroup *memcg,
+				     unsigned long nid)
+{
+	struct dhugetlb_pool *hpool;
+
+	hpool = hpool_alloc(nid);
+	if (!hpool)
+		return;
+
+	hpool->attach_memcg = memcg;
+	css_get(&memcg->css);
+	set_dhugetlb_pool_to_memcg(memcg, hpool);
+}
+
+static int update_dhugetlb_pool(struct mem_cgroup *memcg,
+				unsigned long nid, unsigned long size)
+{
+	int ret;
+	struct dhugetlb_pool *hpool = get_dhugetlb_pool_from_memcg(memcg);
+
+	if (!hpool) {
+		if (memcg_has_children(memcg))
+			return -EINVAL;
+		assign_new_dhugetlb_pool(memcg, nid);
+		hpool = get_dhugetlb_pool_from_memcg(memcg);
+	}
+	if (!hpool)
+		return -ENOMEM;
+	if (hpool->attach_memcg != memcg || hpool->nid != nid) {
+		dhugetlb_pool_put(hpool);
+		return -EINVAL;
+	}
+
+	ret = alloc_hugepage_from_hugetlb(hpool, nid, size);
+
+	dhugetlb_pool_put(hpool);
+	return ret;
+}
+
+/*
+ * Test whether an process can allocate specified memory size.
+ *
+ * Input must be in format '<nid> <size>'.
+ * size is regarded as how many it does 1G huge page.
+ */
+static ssize_t memcg_write_dhugetlb(struct kernfs_open_file *of,
+		char *buf, size_t nbytes, loff_t off)
+{
+	int ret;
+	unsigned long nid, size;
+	char *endp;
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	if (!dhugetlb_enabled)
+		return -EINVAL;
+
+	buf = strstrip(buf);
+	nid = memparse(buf, &endp);
+	if (*endp != ' ' || nid >= MAX_NUMNODES)
+		return -EINVAL;
+
+	buf = endp + 1;
+	size = memparse(buf, &endp);
+	if (*endp != '\0' || size == 0)
+		return -EINVAL;
+
+	ret = update_dhugetlb_pool(memcg, nid, size);
+
+	return ret ?: nbytes;
+}
+
+static int memcg_read_dhugetlb(struct seq_file *m, void *v)
+{
+	int i;
+	unsigned long free_pages;
+	long used_pages = 0;
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	struct dhugetlb_pool *hpool = get_dhugetlb_pool_from_memcg(memcg);
+
+	if (!dhugetlb_enabled)
+		return 0;
+	if (!hpool) {
+		seq_printf(m, "Curent hierarchial have not memory pool.\n");
+		return 0;
+	}
+
+	for (i = 0; i < NR_SMPOOL; i++)
+		spin_lock(&hpool->smpool[i].lock);
+	spin_lock(&hpool->lock);
+
+	free_pages = hpool->free_pages;
+	for (i = 0; i < NR_SMPOOL; i++) {
+		free_pages += hpool->smpool[i].free_pages;
+		used_pages += hpool->smpool[i].used_pages;
+	}
+
+	seq_printf(m, "dhugetlb_total_pages %ld\n"
+		      "1G_total_reserved_pages %ld\n"
+		      "1G_free_reserved_pages %ld\n"
+		      "1G_mmap_reserved_pages %ld\n"
+		      "1G_used_pages %ld\n"
+		      "1G_free_unreserved_pages %ld\n"
+		      "2M_total_reserved_pages %ld\n"
+		      "2M_free_reserved_pages %ld\n"
+		      "2M_mmap_reserved_pages %ld\n"
+		      "2M_used_pages %ld\n"
+		      "2M_free_unreserved_pages %ld\n"
+		      "4K_free_pages %ld\n"
+		      "4K_used_pages %ld\n",
+		   hpool->total_nr_pages,
+		   hpool->total_reserved_1G,
+		   hpool->free_reserved_1G,
+		   hpool->mmap_reserved_1G,
+		   hpool->used_1G,
+		   hpool->free_unreserved_1G,
+		   hpool->total_reserved_2M,
+		   hpool->free_reserved_2M,
+		   hpool->mmap_reserved_2M,
+		   hpool->used_2M,
+		   hpool->free_unreserved_2M,
+		   free_pages,
+		   used_pages);
+
+	spin_unlock(&hpool->lock);
+	for (i = NR_SMPOOL - 1; i >= 0; i--)
+		spin_unlock(&hpool->smpool[i].lock);
+	dhugetlb_pool_put(hpool);
+	return 0;
+}
+
+static int update_reserve_pages(struct kernfs_open_file *of,
+				char *buf, bool gigantic)
+{
+	unsigned long size;
+	char *endp;
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	struct dhugetlb_pool *hpool;
+
+	if (!dhugetlb_enabled)
+		return -EINVAL;
+
+	buf = strstrip(buf);
+	size = memparse(buf, &endp);
+	if (*endp != '\0')
+		return -EINVAL;
+
+	hpool = get_dhugetlb_pool_from_memcg(memcg);
+	if (!hpool)
+		return -EINVAL;
+	spin_lock(&hpool->reserved_lock);
+	dhugetlb_reserve_hugepages(hpool, size, gigantic);
+	spin_unlock(&hpool->reserved_lock);
+	dhugetlb_pool_put(hpool);
+	return 0;
+}
+
+static ssize_t dhugetlb_1G_reserve_write(struct kernfs_open_file *of,
+					 char *buf, size_t nbytes, loff_t off)
+{
+	return update_reserve_pages(of, buf, true) ?: nbytes;
+}
+
+static ssize_t dhugetlb_2M_reserve_write(struct kernfs_open_file *of,
+					 char *buf, size_t nbytes, loff_t off)
+{
+	return update_reserve_pages(of, buf, false) ?: nbytes;
+}
+
+static void dhugetlb_pool_inherits(struct mem_cgroup *memcg,
+				   struct mem_cgroup *parent)
+{
+	struct dhugetlb_pool *hpool;
+
+	hpool = get_dhugetlb_pool_from_memcg(parent);
+	if (!hpool)
+		return;
+
+	set_dhugetlb_pool_to_memcg(memcg, hpool);
+	dhugetlb_pool_put(hpool);
+}
+
+static bool dhugetlb_pool_free(struct mem_cgroup *memcg)
+{
+	bool ret = true;
+	struct dhugetlb_pool *hpool;
+
+	hpool = get_dhugetlb_pool_from_memcg(memcg);
+	if (hpool && hpool->attach_memcg == memcg)
+		ret = free_dhugetlb_pool(hpool);
+	dhugetlb_pool_put(hpool);
+	return ret;
+}
+
+bool dhugetlb_pool_is_free(struct cgroup_subsys_state *css)
+{
+	if (dhugetlb_enabled)
+		return dhugetlb_pool_free(mem_cgroup_from_css(css));
+	return true;
+}
+#else
+static void dhugetlb_pool_inherits(struct mem_cgroup *memcg,
+				   struct mem_cgroup *parent)
+{
+}
+
+bool dhugetlb_pool_is_free(struct cgroup_subsys_state *css)
+{
+	return true;
+}
+#endif /* CONFIG_DYNAMIC_HUGETLB */
+
 static struct cftype mem_cgroup_legacy_files[] = {
 	{
 		.name = "usage_in_bytes",
@@ -4700,6 +5059,27 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.write = memcg_write_event_control,
 		.flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE,
 	},
+#ifdef CONFIG_DYNAMIC_HUGETLB
+	{
+		.name = "dhugetlb.nr_pages",
+		.write = memcg_write_dhugetlb,
+		.seq_show = memcg_read_dhugetlb,
+		.flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE |
+			 CFTYPE_NOT_ON_ROOT,
+	},
+	{
+		.name = "dhugetlb.1G.reserved_pages",
+		.write = dhugetlb_1G_reserve_write,
+		.flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE |
+			 CFTYPE_NOT_ON_ROOT,
+	},
+	{
+		.name = "dhugetlb.2M.reserved_pages",
+		.write = dhugetlb_2M_reserve_write,
+		.flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE |
+			 CFTYPE_NOT_ON_ROOT,
+	},
+#endif
 	{
 		.name = "swappiness",
 		.read_u64 = mem_cgroup_swappiness_read,
@@ -5063,6 +5443,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		return &memcg->css;
 	}
 
+	if (dhugetlb_enabled)
+		dhugetlb_pool_inherits(memcg, parent);
+
 	error = memcg_online_kmem(memcg);
 	if (error)
 		goto fail;
@@ -5681,6 +6064,14 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
 	if (!p)
 		return 0;
 
+	if (dhugetlb_enabled) {
+		struct dhugetlb_pool *hpool = get_dhugetlb_pool_from_task(p);
+
+		if (hpool) {
+			dhugetlb_pool_put(hpool);
+			return -EPERM;
+		}
+	}
 	/*
 	 * We are now commited to this value whatever it is. Changes in this
 	 * tunable will only affect upcoming migrations, not the current one.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a6a2f254f61f..e722d73a3724 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1052,7 +1052,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
 	return ret;
 }
 
-static __always_inline bool free_pages_prepare(struct page *page,
+__always_inline bool free_pages_prepare(struct page *page,
 					unsigned int order, bool check_free)
 {
 	int bad = 0;
@@ -2012,7 +2012,7 @@ static void check_new_page_bad(struct page *page)
 /*
  * This page is about to be returned from the page allocator
  */
-static inline int check_new_page(struct page *page)
+inline int check_new_page(struct page *page)
 {
 	if (likely(page_expected_state(page,
 				PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
@@ -2075,8 +2075,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
 	set_page_owner(page, order, gfp_flags);
 }
 
-static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
-							unsigned int alloc_flags)
+void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags,
+						unsigned int alloc_flags)
 {
 	int i;
 
@@ -2955,6 +2955,12 @@ void free_unref_page(struct page *page)
 	unsigned long flags;
 	unsigned long pfn = page_to_pfn(page);
 
+	/* Free dynamic hugetlb page */
+	if (dhugetlb_enabled && PagePool(page)) {
+		free_page_to_dhugetlb_pool(page);
+		return;
+	}
+
 	if (!free_unref_page_prepare(page, pfn))
 		return;
 
@@ -2972,6 +2978,16 @@ void free_unref_page_list(struct list_head *list)
 	unsigned long flags, pfn;
 	int batch_count = 0;
 
+	/* Free dynamic hugetlb pages */
+	if (dhugetlb_enabled) {
+		list_for_each_entry_safe(page, next, list, lru) {
+			if (PagePool(page)) {
+				list_del(&page->lru);
+				free_page_to_dhugetlb_pool(page);
+			}
+		}
+	}
+
 	/* Prepare pages for freeing */
 	list_for_each_entry_safe(page, next, list, lru) {
 		pfn = page_to_pfn(page);
@@ -4785,6 +4801,15 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 
 	finalise_ac(gfp_mask, &ac);
 
+	/* Dynamic hugetlb allocation attemp */
+	if (dhugetlb_enabled && likely(order == 0)) {
+		page = alloc_page_from_dhugetlb_pool(gfp_mask);
+		if (page) {
+			prep_new_page(page, order, gfp_mask, alloc_flags);
+			goto out;
+		}
+	}
+
 	/* First allocation attempt */
 	page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac);
 	if (likely(page))
-- 
2.25.1