hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8S9BY CVE: NA
--------------------------------
This patch introduce a per-memcg memory pool named dynamic pool(dpool). The dpool is created by call dpool_create(). The child memcg will inherit the dpool. The dpool will be destroyed when remove the memcg.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_pool.h | 55 ++++++++++++ include/linux/memcontrol.h | 5 ++ kernel/cgroup/cgroup.c | 10 +++ mm/Kconfig | 9 ++ mm/Makefile | 1 + mm/dynamic_pool.c | 163 +++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 2 + 7 files changed, 245 insertions(+) create mode 100644 include/linux/dynamic_pool.h create mode 100644 mm/dynamic_pool.c
diff --git a/include/linux/dynamic_pool.h b/include/linux/dynamic_pool.h new file mode 100644 index 000000000000..aacd9f23cf41 --- /dev/null +++ b/include/linux/dynamic_pool.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef __LINUX_DYNAMIC_POOL_H +#define __LINUX_DYNAMIC_POOL_H + +#include <linux/memcontrol.h> +#include <linux/hugetlb.h> +#include <linux/kabi.h> + +#ifdef CONFIG_DYNAMIC_POOL + +DECLARE_STATIC_KEY_FALSE(dynamic_pool_key); +#define dpool_enabled (static_branch_unlikely(&dynamic_pool_key)) + +enum pages_pool_type { + PAGES_POOL_1G, + PAGES_POOL_2M, + PAGES_POOL_4K, + PAGES_POOL_MAX, +}; + +struct pages_pool { + unsigned long free_pages; + unsigned long used_pages; + struct list_head freelist; +}; + +struct dynamic_pool { + refcount_t refcnt; + bool online; + struct mem_cgroup *memcg; + + spinlock_t lock; + struct pages_pool pool[PAGES_POOL_MAX]; + + KABI_RESERVE(1) +}; + +void dynamic_pool_inherit(struct mem_cgroup *parent, struct mem_cgroup *memcg); +int dynamic_pool_destroy(struct cgroup *cgrp, bool *clear_css_online); + +#else +struct dynamic_pool {}; + +static inline void dynamic_pool_inherit(struct mem_cgroup *parent, + struct mem_cgroup *memcg) +{ +} + +static inline int dynamic_pool_destroy(struct cgroup *cgrp, + bool *clear_css_online) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_POOL */ +#endif /* __LINUX_DYNAMIC_POOL_H */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5f5bf2466808..72323733d95c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -28,6 +28,7 @@ struct page; struct mm_struct; struct kmem_cache; struct oom_control; +struct dynamic_pool;
/* Cgroup-specific page state, on top of universal node page state */ enum memcg_stat_item { @@ -364,6 +365,10 @@ struct mem_cgroup { struct swap_device *swap_dev; #endif
+#ifdef CONFIG_DYNAMIC_POOL + struct dynamic_pool *dpool; +#endif + struct mem_cgroup_per_node *nodeinfo[]; };
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e6e876fa4402..d411f1b77f74 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -59,6 +59,7 @@ #include <linux/sched/cputime.h> #include <linux/sched/deadline.h> #include <linux/psi.h> +#include <linux/dynamic_pool.h> #include <net/sock.h> #include <linux/backing-dev.h>
@@ -5965,6 +5966,7 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) struct cgroup_subsys_state *css; struct cgrp_cset_link *link; int ssid; + bool clear_css_online = false;
lockdep_assert_held(&cgroup_mutex);
@@ -5983,6 +5985,14 @@ static int cgroup_destroy_locked(struct cgroup *cgrp) if (css_has_online_children(&cgrp->self)) return -EBUSY;
+ /* + * If dynamic pool is enabled, make sure dpool is destroyed before + * removing the corresponding memory cgroup. If CSS_ONLINE is set, + * this function will clear it and set clear_css_online to true. + */ + if (dynamic_pool_destroy(cgrp, &clear_css_online)) + return -EBUSY; + /* * Mark @cgrp and the associated csets dead. The former prevents * further task migration and child creation by disabling diff --git a/mm/Kconfig b/mm/Kconfig index 2df11b146c84..82dbe6c28fcb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1365,6 +1365,15 @@ config MEMORY_RELIABLE To enable this function, mirrored memory is needed and "kernelcore=reliable" need to be added in kernel parameters.
+config DYNAMIC_POOL + bool "Dynamic Pool support" + depends on X86_64 || (ARM64 && ARM64_4K_PAGES) + depends on MEMCG && HUGETLB_PAGE + default n + help + A per-memcg pagepool. The task in the memcg will prefer to alloc + pages from corresponding pool. + source "mm/damon/Kconfig"
endmenu diff --git a/mm/Makefile b/mm/Makefile index e1a853e31856..8d7d2aeda6ea 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -145,3 +145,4 @@ obj-$(CONFIG_MEMCG_MEMFS_INFO) += memcg_memfs_info.o obj-$(CONFIG_PAGE_CACHE_LIMIT) += page_cache_limit.o obj-$(CONFIG_CLEAR_FREELIST_PAGE) += clear_freelist_page.o obj-$(CONFIG_MEMORY_RELIABLE) += mem_reliable.o +obj-$(CONFIG_DYNAMIC_POOL) += dynamic_pool.o diff --git a/mm/dynamic_pool.c b/mm/dynamic_pool.c new file mode 100644 index 000000000000..296d019c20ac --- /dev/null +++ b/mm/dynamic_pool.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dynamic pool core file + * + * Copyright (C) 2024 Huawei Limited. + */ + +#define pr_fmt(fmt) "Dynamic pool: " fmt + +#include <linux/dynamic_pool.h> + +/* Indicate the enabled of dynamic pool */ +DEFINE_STATIC_KEY_FALSE(dynamic_pool_key); + +/* Protect the operation of dynamic pool */ +static DEFINE_MUTEX(dpool_mutex); + +/* === reference function ============================================= */ + +static bool dpool_get_unless_zero(struct dynamic_pool *dpool) +{ + if (!dpool) + return false; + + return refcount_inc_not_zero(&dpool->refcnt); +} + +static void dpool_put(struct dynamic_pool *dpool) +{ + if (!dpool) + return; + + if (refcount_dec_and_test(&dpool->refcnt)) { + dpool->memcg->dpool = NULL; + css_put(&dpool->memcg->css); + synchronize_rcu(); + kfree(dpool); + } +} + +static struct dynamic_pool *dpool_get_from_memcg(struct mem_cgroup *memcg) +{ + struct dynamic_pool *dpool; + + rcu_read_lock(); + dpool = memcg->dpool; + if (!dpool_get_unless_zero(dpool)) + dpool = NULL; + rcu_read_unlock(); + + return dpool; +} + +/* === dynamic pool function ========================================== */ + +static void dpool_dump_child_memcg(struct mem_cgroup *memcg, void *message) +{ + struct mem_cgroup *root = (struct mem_cgroup *)message; + struct cgroup *cgrp; + + if (root == memcg) + return; + + cgrp = memcg->css.cgroup; + pr_err("child memcg exists: "); + pr_cont_cgroup_name(cgrp); + pr_cont("\n"); +} + +static struct dynamic_pool *dpool_create(struct mem_cgroup *memcg) +{ + struct dynamic_pool *dpool; + int i; + + if (memcg_has_children(memcg)) { + pr_err("create failed, memcg has children\n"); + mem_cgroup_scan_cgroups(memcg, dpool_dump_child_memcg, memcg); + return NULL; + } + + dpool = kzalloc(sizeof(struct dynamic_pool), GFP_KERNEL); + if (!dpool) + return NULL; + + spin_lock_init(&dpool->lock); + refcount_set(&dpool->refcnt, 1); + dpool->memcg = memcg; + + for (i = 0; i < PAGES_POOL_MAX; i++) + INIT_LIST_HEAD(&dpool->pool[i].freelist); + + css_get(&memcg->css); + memcg->dpool = dpool; + dpool->online = true; + + return dpool; +} + +void dynamic_pool_inherit(struct mem_cgroup *parent, struct mem_cgroup *memcg) +{ + struct dynamic_pool *dpool; + + if (!dpool_enabled || !parent || !memcg) + return; + + dpool = dpool_get_from_memcg(parent); + memcg->dpool = dpool; + + /* Don't increase refcount for child memcg */ + dpool_put(dpool); +} + +int dynamic_pool_destroy(struct cgroup *cgrp, bool *clear_css_online) +{ + struct cgroup_subsys_state *css = cgrp->subsys[memory_cgrp_id]; + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + struct dynamic_pool *dpool; + int ret = 0; + + if (!dpool_enabled || !memcg) + return 0; + + mutex_lock(&dpool_mutex); + dpool = dpool_get_from_memcg(memcg); + if (!dpool) + goto unlock; + + if (dpool->memcg != memcg) + goto put; + + /* A offline dpool is not allowed for allocation */ + dpool->online = false; + + memcg->dpool = NULL; + + /* Release the initial reference count */ + dpool_put(dpool); + + /* + * Since dpool is destroyed and the memcg will be freed then, + * clear CSS_ONLINE immediately to prevent race with create. + */ + if (cgrp->self.flags & CSS_ONLINE) { + cgrp->self.flags &= ~CSS_ONLINE; + *clear_css_online = true; + } + +put: + dpool_put(dpool); +unlock: + mutex_unlock(&dpool_mutex); + + return ret; +} + +static int __init dynamic_pool_init(void) +{ + static_branch_enable(&dynamic_pool_key); + pr_info("enabled\n"); + + return 0; +} +subsys_initcall(dynamic_pool_init); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ad006adec9c7..3e903a6d6860 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -66,6 +66,7 @@ #include <linux/memcg_memfs_info.h> #include <linux/sched/isolation.h> #include <linux/parser.h> +#include <linux/dynamic_pool.h>
#ifdef CONFIG_MEMCG_SWAP_QOS #include <linux/blkdev.h> @@ -6353,6 +6354,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) page_counter_init(&memcg->kmem, &parent->kmem); page_counter_init(&memcg->tcpmem, &parent->tcpmem); memcg_swap_device_init(memcg, parent); + dynamic_pool_inherit(parent, memcg); } else { init_memcg_events(); page_counter_init(&memcg->memory, NULL);