Support to configure memcg for zram device. After configuration, zram usage will be charge in the memcg. This is used where the zram device is only avail for the task in the memcg.
Liu Shixin (6): mm/dynamic_hugetlb: support to allocate page by specifying memcg zram: set memcg for zram device zram: charge zram usage in memory cgroup zram: support to allocate page from hpool for zram mm/swap: support to allocate page from hpool for swapcache openeuler_defconfig: enable memcg_zram for x86_64 and arm64
arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + drivers/block/zram/zram_drv.c | 151 ++++++++++++++++++++++++- drivers/block/zram/zram_drv.h | 4 + include/linux/dynamic_hugetlb.h | 7 +- include/linux/memcontrol.h | 18 +++ include/linux/vmalloc.h | 3 + include/linux/zsmalloc.h | 3 + mm/Kconfig | 8 ++ mm/dynamic_hugetlb.c | 28 +++-- mm/memcontrol.c | 123 ++++++++++++++++++++ mm/page_alloc.c | 2 +- mm/swap_state.c | 2 +- mm/vmalloc.c | 90 ++++++++++++++- mm/zsmalloc.c | 80 ++++++++++++- 15 files changed, 502 insertions(+), 19 deletions(-)
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/8145 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Z...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/8145 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/Z...
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
Next patch will support to allocate page from a hpool for zram and zspool if they are bounded with a memcg, and the memcg is bounded with the hpool. As a preparation, support to allocate page by specifying memcg.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/dynamic_hugetlb.h | 7 +++++-- mm/dynamic_hugetlb.c | 25 ++++++++++++++++++++----- mm/page_alloc.c | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-)
diff --git a/include/linux/dynamic_hugetlb.h b/include/linux/dynamic_hugetlb.h index 06b20a24dfe6..10bff6f093e0 100644 --- a/include/linux/dynamic_hugetlb.h +++ b/include/linux/dynamic_hugetlb.h @@ -103,7 +103,8 @@ void hugetlb_pool_inherit(struct mem_cgroup *memcg, struct mem_cgroup *parent); int hugetlb_pool_destroy(struct cgroup *cgrp); void __init dynamic_hugetlb_init(void);
-struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, unsigned int order, unsigned int flags); bool free_page_to_dhugetlb_pool(struct page *page); void free_page_list_to_dhugetlb_pool(struct list_head *list); @@ -142,7 +143,9 @@ static inline void __init dynamic_hugetlb_init(void) { }
-static inline struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +static inline struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, + unsigned int order, unsigned int flags) { return NULL; diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 72061fd0395e..95a2a82eb2b0 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -576,10 +576,9 @@ static bool should_allocate_from_dhugetlb_pool(gfp_t gfp_mask) return true; }
-static struct page *__alloc_page_from_dhugetlb_pool(void) +static struct page *__alloc_page_from_dpool(struct dhugetlb_pool *hpool) { struct percpu_pages_pool *percpu_pool; - struct dhugetlb_pool *hpool; struct page *page = NULL; unsigned long flags;
@@ -630,9 +629,11 @@ static struct page *__alloc_page_from_dhugetlb_pool(void) return page; }
-struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, +struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg, + gfp_t gfp, unsigned int order, unsigned int flags) { + struct dhugetlb_pool *hpool; struct page *page = NULL;
if (!dhugetlb_enabled) @@ -641,11 +642,25 @@ struct page *alloc_page_from_dhugetlb_pool(gfp_t gfp, unsigned int order, if (order != 0) return NULL;
- if (should_allocate_from_dhugetlb_pool(gfp)) - page = __alloc_page_from_dhugetlb_pool(); + if (memcg) { + hpool = memcg->hpool; + if (!hpool) + return NULL; + goto alloc_page; + } + + if (!should_allocate_from_dhugetlb_pool(gfp)) + return NULL; + + hpool = find_hpool_by_task(current); + if (!hpool) + return NULL;
+alloc_page: + page = __alloc_page_from_dpool(hpool); if (page) prep_new_page(page, order, gfp, flags); + return page; }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1ba392f11e6b..e5878707222f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5397,7 +5397,7 @@ struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);
/* Before alloc from buddy system, alloc from hpool firstly */ - page = alloc_page_from_dhugetlb_pool(alloc_gfp, order, alloc_flags); + page = alloc_page_from_dhugetlb_pool(NULL, alloc_gfp, order, alloc_flags); if (page) goto out;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
Add a new zram file mem_cgroup to set memcg for zram device. The memcg will be record in zs_pool too when create zs_pool.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- drivers/block/zram/zram_drv.c | 107 +++++++++++++++++++++++++++++++++- drivers/block/zram/zram_drv.h | 4 ++ include/linux/memcontrol.h | 3 + include/linux/zsmalloc.h | 3 + mm/Kconfig | 8 +++ mm/memcontrol.c | 27 +++++++++ mm/zsmalloc.c | 27 +++++++++ 7 files changed, 178 insertions(+), 1 deletion(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 955f0c4d358f..0c2c068b2374 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1265,6 +1265,106 @@ static DEVICE_ATTR_RO(bd_stat); #endif static DEVICE_ATTR_RO(debug_stat);
+#ifdef CONFIG_MEMCG_ZRAM +static inline int init_memcg(struct zram *zram, struct mem_cgroup *memcg) +{ + if (init_done(zram)) + return -EINVAL; + + if (zram->memcg) + css_put(&zram->memcg->css); + + zram->memcg = memcg; + + return 0; +} + +static inline void reset_memcg(struct zram *zram) +{ + struct mem_cgroup *memcg = zram->memcg; + + if (!memcg) + return; + + zram->memcg = NULL; + css_put(&memcg->css); +} + + +static ssize_t mem_cgroup_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + struct mem_cgroup *memcg = zram->memcg; + + if (mem_cgroup_disabled() || !memcg) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + if (!cgroup_path(memcg->css.cgroup, buf, PATH_MAX)) + return scnprintf(buf, PAGE_SIZE, "none\n"); + + return scnprintf(buf, PAGE_SIZE, "%s\n", buf); +} + +static ssize_t mem_cgroup_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct zram *zram = dev_to_zram(dev); + struct mem_cgroup *memcg; + char *kbuf; + size_t sz; + int ret = 0; + + if (mem_cgroup_disabled()) + return -EINVAL; + + kbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + + strlcpy(kbuf, buf, PATH_MAX); + sz = strlen(kbuf); + if (sz > 0 && kbuf[sz - 1] == '\n') + kbuf[sz - 1] = 0x00; + + if (!strcmp(kbuf, "none")) { + memcg = NULL; + } else { + memcg = memcg_get_from_path(kbuf, PATH_MAX); + if (!memcg) { + ret = -EINVAL; + goto out; + } + } + + down_write(&zram->init_lock); + ret = init_memcg(zram, memcg); + if (ret && memcg) + css_put(&memcg->css); + up_write(&zram->init_lock); + +out: + kfree(kbuf); + return ret ? ret : len; +} +static DEVICE_ATTR_RW(mem_cgroup); + +static inline struct zs_pool *zram_create_pool(struct zram *zram) +{ + return zs_create_pool_with_memcg(zram->disk->disk_name, + zram->memcg); +} +#else +static inline void reset_memcg(struct zram *zram) +{ +} + +static inline struct zs_pool *zram_create_pool(struct zram *zram) +{ + return zs_create_pool(zram->disk->disk_name); +} +#endif + static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -1287,7 +1387,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) if (!zram->table) return false;
- zram->mem_pool = zs_create_pool(zram->disk->disk_name); + zram->mem_pool = zram_create_pool(zram); if (!zram->mem_pool) { vfree(zram->table); return false; @@ -2141,6 +2241,7 @@ static void zram_reset_device(struct zram *zram) zram->limit_pages = 0;
if (!init_done(zram)) { + reset_memcg(zram); up_write(&zram->init_lock); return; } @@ -2156,6 +2257,7 @@ static void zram_reset_device(struct zram *zram) zram_destroy_comps(zram); memset(&zram->stats, 0, sizeof(zram->stats)); reset_bdev(zram); + reset_memcg(zram);
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor); up_write(&zram->init_lock); @@ -2338,6 +2440,9 @@ static struct attribute *zram_disk_attrs[] = { #ifdef CONFIG_ZRAM_MULTI_COMP &dev_attr_recomp_algorithm.attr, &dev_attr_recompress.attr, +#endif +#ifdef CONFIG_MEMCG_ZRAM + &dev_attr_mem_cgroup.attr, #endif NULL, }; diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index eb13d0299f89..8987e77ac7ee 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -18,6 +18,7 @@ #include <linux/rwsem.h> #include <linux/zsmalloc.h> #include <linux/crypto.h> +#include <linux/memcontrol.h>
#include "zcomp.h"
@@ -142,5 +143,8 @@ struct zram { #ifdef CONFIG_ZRAM_MEMORY_TRACKING struct dentry *debugfs_dir; #endif +#ifdef CONFIG_MEMCG_ZRAM + struct mem_cgroup *memcg; +#endif }; #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ef3a6a8e640f..2804701f75dd 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1325,6 +1325,9 @@ int mem_cgroup_force_empty(struct mem_cgroup *memcg); int memcg_get_swap_type(struct page *page); void memcg_remove_swapfile(int type);
+#ifdef CONFIG_MEMCG_ZRAM +struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen); +#endif #else /* CONFIG_MEMCG */
#define MEM_CGROUP_ID_SHIFT 0 diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index a48cd0ffe57d..2c09676f9178 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -41,6 +41,9 @@ struct zs_pool_stats { struct zs_pool;
struct zs_pool *zs_create_pool(const char *name); +#ifdef CONFIG_MEMCG_ZRAM +struct zs_pool *zs_create_pool_with_memcg(const char *name, void *memcg); +#endif void zs_destroy_pool(struct zs_pool *pool);
unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t flags); diff --git a/mm/Kconfig b/mm/Kconfig index 0f9209cd969b..2642a05f89c1 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -521,6 +521,14 @@ config MEMCG_SWAP_QOS memcg swap control include memory force swapin, swapfile control and swap limit.
+config MEMCG_ZRAM + bool "Enable Memory Cgroup charge of zram usage" + depends on MEMCG_SWAP_QOS && ZRAM + depends on X86 || ARM64 + default n + help + Support to charge zram usage in memory cgroup. + config ETMEM_SCAN tristate "module: etmem page scan for etmem support" depends on ETMEM diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9007c3554771..575f382358e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3628,6 +3628,33 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, } }
+#ifdef CONFIG_MEMCG_ZRAM +struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen) +{ + struct mem_cgroup *memcg; + char *memcg_path; + + if (mem_cgroup_disabled()) + return NULL; + + memcg_path = kzalloc(buflen, GFP_KERNEL); + if (!memcg_path) + return NULL; + + for_each_mem_cgroup(memcg) { + cgroup_path(memcg->css.cgroup, memcg_path, buflen); + if (!strcmp(path, memcg_path) && css_tryget_online(&memcg->css)) { + mem_cgroup_iter_break(NULL, memcg); + break; + } + } + + kfree(memcg_path); + return memcg; +} +EXPORT_SYMBOL(memcg_get_from_path); +#endif + #ifdef CONFIG_MEMCG_KMEM static int memcg_online_kmem(struct mem_cgroup *memcg) { diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 6079f5625abb..85aba62d777d 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -57,6 +57,7 @@ #include <linux/wait.h> #include <linux/pagemap.h> #include <linux/fs.h> +#include <linux/memcontrol.h>
#define ZSPAGE_MAGIC 0x58
@@ -274,6 +275,9 @@ struct zs_pool { atomic_long_t isolated_pages; bool destroying; #endif +#ifdef CONFIG_MEMCG_ZRAM + struct mem_cgroup *memcg; +#endif };
struct zspage { @@ -2527,10 +2531,33 @@ struct zs_pool *zs_create_pool(const char *name) } EXPORT_SYMBOL_GPL(zs_create_pool);
+#ifdef CONFIG_MEMCG_ZRAM +static inline void zs_set_memcg(struct zs_pool *pool, void *memcg) +{ + if (pool) + pool->memcg = memcg; +} + +struct zs_pool *zs_create_pool_with_memcg(const char *name, void *memcg) +{ + struct zs_pool *pool = zs_create_pool(name); + + zs_set_memcg(pool, memcg); + + return pool; +} +EXPORT_SYMBOL_GPL(zs_create_pool_with_memcg); +#else +static inline void zs_set_memcg(struct zs_pool *pool, void *memcg) +{ +} +#endif + void zs_destroy_pool(struct zs_pool *pool) { int i;
+ zs_set_memcg(pool, NULL); zs_unregister_shrinker(pool); zs_unregister_migration(pool); zs_pool_stat_destroy(pool);
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
In the previous patch, the zram and zspool can be bounded with memcg. Now support to charge the usage of zspage and zram->table in memory cgroup. This can be help limiting memory usage in container environment and prevent zram resource contention between containers.
Add a new file memory.zram_usage_in_bytes to show zram usage. At the same time, the zram usage will also be added to memory.usage_in_bytes. Now the stats are as follows:
memory.zram_usage_in_bytes = zram_usage memory.usage_in_bytes = program_usage + zram_usage memory.memsw.usage_in_bytes = program_usage + swap_usage
Signed-off-by: Liu Shixin liushixin2@huawei.com --- drivers/block/zram/zram_drv.c | 32 +++++++++++++++++++-- include/linux/memcontrol.h | 5 ++++ mm/memcontrol.c | 52 +++++++++++++++++++++++++++++++++++ mm/zsmalloc.c | 31 +++++++++++++++++++++ 4 files changed, 117 insertions(+), 3 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 0c2c068b2374..a56a549ee92e 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1354,6 +1354,20 @@ static inline struct zs_pool *zram_create_pool(struct zram *zram) return zs_create_pool_with_memcg(zram->disk->disk_name, zram->memcg); } + +static inline void zram_charge_memory(struct zram *zram, unsigned long size) +{ + unsigned long nr_pages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + memcg_charge_zram(zram->memcg, nr_pages); +} + +static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) +{ + unsigned long nr_pages = ALIGN(size, PAGE_SIZE) >> PAGE_SHIFT; + + memcg_uncharge_zram(zram->memcg, nr_pages); +} #else static inline void reset_memcg(struct zram *zram) { @@ -1363,11 +1377,20 @@ static inline struct zs_pool *zram_create_pool(struct zram *zram) { return zs_create_pool(zram->disk->disk_name); } + +static inline void zram_charge_memory(struct zram *zram, unsigned long size) +{ +} + +static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) +{ +} #endif
static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; + unsigned long size = array_size(num_pages, sizeof(*zram->table)); size_t index;
/* Free all pages that are still in this zram device */ @@ -1376,14 +1399,15 @@ static void zram_meta_free(struct zram *zram, u64 disksize)
zs_destroy_pool(zram->mem_pool); vfree(zram->table); + zram_uncharge_memory(zram, size); }
static bool zram_meta_alloc(struct zram *zram, u64 disksize) { - size_t num_pages; + size_t num_pages = disksize >> PAGE_SHIFT; + unsigned long size = array_size(num_pages, sizeof(*zram->table));
- num_pages = disksize >> PAGE_SHIFT; - zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table))); + zram->table = vzalloc(size); if (!zram->table) return false;
@@ -1393,6 +1417,8 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) return false; }
+ zram_charge_memory(zram, size); + if (!huge_class_size) huge_class_size = zs_huge_class_size(zram->mem_pool); return true; diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2804701f75dd..450300805b21 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -248,6 +248,9 @@ struct obj_cgroup { struct swap_device { unsigned long max; int type; +#ifdef CONFIG_MEMCG_ZRAM + atomic64_t zram_usage; +#endif };
/* @@ -1327,6 +1330,8 @@ void memcg_remove_swapfile(int type);
#ifdef CONFIG_MEMCG_ZRAM struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen); +void memcg_charge_zram(struct mem_cgroup *memcg, unsigned int nr_pages); +void memcg_uncharge_zram(struct mem_cgroup *memcg, unsigned int nr_pages); #endif #else /* CONFIG_MEMCG */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 575f382358e9..f7cdcdfa81b1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3653,6 +3653,49 @@ struct mem_cgroup *memcg_get_from_path(char *path, size_t buflen) return memcg; } EXPORT_SYMBOL(memcg_get_from_path); + +static inline void memcg_zram_usage_init(struct mem_cgroup *memcg) +{ + atomic64_set(&memcg->swap_dev->zram_usage, 0); +} + +void memcg_charge_zram(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + if (mem_cgroup_disabled() || !memcg) + return; + + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return; + + page_counter_charge(&memcg->memory, nr_pages); + atomic_long_add(nr_pages, &memcg->swap_dev->zram_usage); +} +EXPORT_SYMBOL_GPL(memcg_charge_zram); + +void memcg_uncharge_zram(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + if (mem_cgroup_disabled() || !memcg) + return; + + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return; + + page_counter_uncharge(&memcg->memory, nr_pages); + atomic_long_sub(nr_pages, &memcg->swap_dev->zram_usage); +} +EXPORT_SYMBOL_GPL(memcg_uncharge_zram); + +static u64 mem_cgroup_zram_usage(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(css); + + return (u64)atomic64_read(&memcg->swap_dev->zram_usage) * PAGE_SIZE; +} +#else +static inline void memcg_zram_usage_init(struct mem_cgroup *memcg) +{ +} #endif
#ifdef CONFIG_MEMCG_KMEM @@ -4251,6 +4294,8 @@ static void memcg_swap_device_init(struct mem_cgroup *memcg, WRITE_ONCE(memcg->swap_dev->type, READ_ONCE(parent->swap_dev->type)); } + + memcg_zram_usage_init(memcg); }
u64 memcg_swapmax_read(struct cgroup_subsys_state *css, struct cftype *cft) @@ -6247,6 +6292,13 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = memcg_swapfile_write, .seq_show = memcg_swapfile_read, }, +#ifdef CONFIG_MEMCG_ZRAM + { + .name = "zram_usage_in_bytes", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = mem_cgroup_zram_usage, + }, +#endif #endif { .name = "high_async_ratio", diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 85aba62d777d..934101f9f09e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -935,6 +935,35 @@ static int trylock_zspage(struct zspage *zspage) return 0; }
+#ifdef CONFIG_MEMCG_ZRAM +static inline void zs_charge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ + /* + * Since only zram configures memcg for zs_pool, + * charge the memory in zram usage. + */ + memcg_charge_zram(pool->memcg, nr_pages); +} + +static inline void zs_uncharge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ + /* See zs_charge_memory() for detail */ + memcg_uncharge_zram(pool->memcg, nr_pages); +} +#else +static inline void zs_charge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ +} + +static inline void zs_uncharge_memory(struct zs_pool *pool, + unsigned long nr_pages) +{ +} +#endif + static void __free_zspage(struct zs_pool *pool, struct size_class *class, struct zspage *zspage) { @@ -965,6 +994,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class, zs_stat_dec(class, OBJ_ALLOCATED, class->objs_per_zspage); atomic_long_sub(class->pages_per_zspage, &pool->pages_allocated); + zs_uncharge_memory(pool, class->pages_per_zspage); }
static void free_zspage(struct zs_pool *pool, struct size_class *class, @@ -1484,6 +1514,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp) record_obj(handle, obj); atomic_long_add(class->pages_per_zspage, &pool->pages_allocated); + zs_charge_memory(pool, class->pages_per_zspage); zs_stat_inc(class, OBJ_ALLOCATED, class->objs_per_zspage);
/* We completely set up zspage so mark them as movable */
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
After bound the zram with a memcg which has a hpool as memory provider. The memory usage of zram should prefer to allocate from hpool. zspool is allocated by alloc_page() before, replace it with zs_alloc_page() to achieve the effect. zram->table is allocated by vzalloc(), and vzalloc() is calling alloc_pages_node() to allocate memory. Add vzalloc_with_memcg() and __vmalloc_alloc_pages() to replace vzalloc() and alloc_pages_node() respectively to achieve the effect.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- drivers/block/zram/zram_drv.c | 14 +++++- include/linux/vmalloc.h | 3 ++ mm/dynamic_hugetlb.c | 3 +- mm/vmalloc.c | 90 +++++++++++++++++++++++++++++++++-- mm/zsmalloc.c | 22 ++++++++- 5 files changed, 125 insertions(+), 7 deletions(-)
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index a56a549ee92e..90e2c989a553 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -1368,6 +1368,12 @@ static inline void zram_uncharge_memory(struct zram *zram, unsigned long size)
memcg_uncharge_zram(zram->memcg, nr_pages); } + +static inline struct zram_table_entry *zram_table_alloc(struct zram *zram, + unsigned long size) +{ + return vzalloc_with_memcg(size, zram->memcg); +} #else static inline void reset_memcg(struct zram *zram) { @@ -1385,6 +1391,12 @@ static inline void zram_charge_memory(struct zram *zram, unsigned long size) static inline void zram_uncharge_memory(struct zram *zram, unsigned long size) { } + +static inline struct zram_table_entry *zram_table_alloc(struct zram *zram, + unsigned long size) +{ + return vzalloc(size); +} #endif
static void zram_meta_free(struct zram *zram, u64 disksize) @@ -1407,7 +1419,7 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) size_t num_pages = disksize >> PAGE_SHIFT; unsigned long size = array_size(num_pages, sizeof(*zram->table));
- zram->table = vzalloc(size); + zram->table = zram_table_alloc(zram, size); if (!zram->table) return false;
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 1ebe364ed29a..c0c9a3476700 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -131,6 +131,9 @@ static inline unsigned long vmalloc_nr_pages(void) { return 0; }
extern void *vmalloc(unsigned long size); extern void *vzalloc(unsigned long size); +#ifdef CONFIG_MEMCG_ZRAM +extern void *vzalloc_with_memcg(unsigned long size, void *memcg); +#endif extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); extern void *vzalloc_node(unsigned long size, int node); diff --git a/mm/dynamic_hugetlb.c b/mm/dynamic_hugetlb.c index 95a2a82eb2b0..149b5fa54d80 100644 --- a/mm/dynamic_hugetlb.c +++ b/mm/dynamic_hugetlb.c @@ -582,8 +582,6 @@ static struct page *__alloc_page_from_dpool(struct dhugetlb_pool *hpool) struct page *page = NULL; unsigned long flags;
- hpool = find_hpool_by_task(current); - if (!get_hpool_unless_zero(hpool)) return NULL;
@@ -663,6 +661,7 @@ struct page *alloc_page_from_dhugetlb_pool(struct mem_cgroup *memcg,
return page; } +EXPORT_SYMBOL_GPL(alloc_page_from_dhugetlb_pool);
static void __free_page_to_dhugetlb_pool(struct page *page) { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6d802924d9e8..f0aaae496ec4 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2889,9 +2889,36 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot) EXPORT_SYMBOL_GPL(vmap_pfn); #endif /* CONFIG_VMAP_PFN */
+#include <linux/dynamic_hugetlb.h> + +#ifdef CONFIG_MEMCG_ZRAM +static inline struct page *__vmalloc_alloc_pages(int node, gfp_t gfp_mask, + unsigned int order, + struct mem_cgroup *memcg) +{ + struct page *page; + + if (!memcg) + return alloc_pages_node(node, gfp_mask, order); + + page = alloc_page_from_dhugetlb_pool(memcg, gfp_mask, order, 0); + if (!page) + page = alloc_pages_node(node, gfp_mask, order); + + return page; +} +#else +static inline struct page *__vmalloc_alloc_pages(int node, gfp_t gfp_mask, + unsigned int order, + struct mem_cgroup *memcg) +{ + return alloc_pages_node(node, gfp_mask, order); +} +#endif + static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot, unsigned int page_shift, - int node) + int node, struct mem_cgroup *memcg) { const gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO; unsigned long addr = (unsigned long)area->addr; @@ -2940,7 +2967,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, int p;
/* Compound pages required for remap_vmalloc_page */ - page = alloc_pages_node(node, gfp_mask | __GFP_COMP, page_order); + page = __vmalloc_alloc_pages(node, gfp_mask | __GFP_COMP, + page_order, memcg); if (unlikely(!page)) { /* Successfully allocated i pages, free them in __vfree() */ area->nr_pages = i; @@ -3050,7 +3078,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, goto fail; }
- addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node); + addr = __vmalloc_area_node(area, gfp_mask, prot, shift, node, NULL); if (!addr) goto fail;
@@ -3180,6 +3208,62 @@ void *vzalloc(unsigned long size) } EXPORT_SYMBOL(vzalloc);
+#ifdef CONFIG_MEMCG_ZRAM +static void *__vmalloc_with_memcg(unsigned long size, gfp_t gfp_mask, + struct mem_cgroup *memcg, const void *caller) +{ + struct vm_struct *area; + void *addr; + + if (WARN_ON_ONCE(!size)) + return NULL; + + if ((size >> PAGE_SHIFT) > totalram_pages()) { + warn_alloc(gfp_mask, NULL, + "vmalloc size %lu: exceeds total pages", size); + return NULL; + } + + area = __get_vm_area_node(size, 1, PAGE_SHIFT, VM_ALLOC | + VM_UNINITIALIZED, VMALLOC_START, + VMALLOC_END, NUMA_NO_NODE, + gfp_mask, caller); + if (!area) { + warn_alloc(gfp_mask, NULL, + "vmalloc size %lu: vm_struct allocation failed", size); + return NULL; + } + + addr = __vmalloc_area_node(area, gfp_mask, PAGE_KERNEL, PAGE_SHIFT, + NUMA_NO_NODE, memcg); + if (!addr) + return NULL; + + /* + * In this function, newly allocated vm_struct has VM_UNINITIALIZED + * flag. It means that vm_struct is not fully initialized. + * Now, it is fully initialized, so remove this flag here. + */ + clear_vm_uninitialized_flag(area); + + size = PAGE_ALIGN(size); + kmemleak_vmalloc(area, size, gfp_mask); + + return addr; +} + +void *vzalloc_with_memcg(unsigned long size, void *memcg) +{ + if (!memcg) + return vzalloc(size); + + return __vmalloc_with_memcg(size, GFP_KERNEL | __GFP_ZERO, + (struct mem_cgroup *)memcg, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(vzalloc_with_memcg); +#endif + /** * vmalloc_user - allocate zeroed virtually contiguous memory for userspace * @size: allocation size diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 934101f9f09e..55a1d9ea16ce 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -952,6 +952,21 @@ static inline void zs_uncharge_memory(struct zs_pool *pool, /* See zs_charge_memory() for detail */ memcg_uncharge_zram(pool->memcg, nr_pages); } + +static inline struct page *zs_alloc_page(struct zs_pool *pool, gfp_t gfp) +{ + struct mem_cgroup *memcg = pool->memcg; + struct page *page; + + if (!memcg) + return alloc_page(gfp); + + page = alloc_page_from_dhugetlb_pool(memcg, gfp, 0, 0); + if (!page) + page = alloc_page(gfp); + + return page; +} #else static inline void zs_charge_memory(struct zs_pool *pool, unsigned long nr_pages) @@ -962,6 +977,11 @@ static inline void zs_uncharge_memory(struct zs_pool *pool, unsigned long nr_pages) { } + +static inline struct page *zs_alloc_page(struct zs_pool *pool, gfp_t gfp) +{ + return alloc_page(gfp); +} #endif
static void __free_zspage(struct zs_pool *pool, struct size_class *class, @@ -1111,7 +1131,7 @@ static struct zspage *alloc_zspage(struct zs_pool *pool, for (i = 0; i < class->pages_per_zspage; i++) { struct page *page;
- page = alloc_page(gfp); + page = zs_alloc_page(pool, gfp); if (!page) { while (--i >= 0) { dec_zone_page_state(pages[i], NR_ZSPAGES);
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
When swapin in async mode, if the swap entry is belong to a memcg and the memcg is bound with a hpool, then we should try to allocate page from hpool firstly. If the swapin is triggered in page fault, then we don't need to allocate from hpool proactively, since the task is belong to the memcg, the page will allocate from hpool naturally.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- include/linux/memcontrol.h | 10 +++++++++ mm/memcontrol.c | 44 ++++++++++++++++++++++++++++++++++++++ mm/swap_state.c | 2 +- 3 files changed, 55 insertions(+), 1 deletion(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 450300805b21..bac4aade25b4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -808,6 +808,9 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); }
+struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr); + int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
void mem_cgroup_uncharge(struct page *page); @@ -1411,6 +1414,13 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; }
+static inline struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, + unsigned long addr) +{ + return alloc_page_vma(gfp_mask, vma, addr); +} + static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f7cdcdfa81b1..3195991c98ae 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -7954,6 +7954,50 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, atomic_long_read(&parent->memory.children_low_usage))); }
+#ifdef CONFIG_DYNAMIC_HUGETLB +static struct mem_cgroup *get_mem_cgroup_from_swap(swp_entry_t entry) +{ + struct mem_cgroup *memcg; + unsigned short id; + + if (mem_cgroup_disabled()) + return NULL; + + id = lookup_swap_cgroup_id(entry); + + rcu_read_lock(); + memcg = mem_cgroup_from_id(id); + if (memcg && !css_tryget_online(&memcg->css)) + memcg = NULL; + rcu_read_unlock(); + + return memcg; +} + +struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr) +{ + struct mem_cgroup *memcg; + struct page *page = NULL; + + memcg = get_mem_cgroup_from_swap(entry); + if (memcg) { + page = alloc_page_from_dhugetlb_pool(memcg, gfp_mask, 0, 0); + css_put(&memcg->css); + } + if (!page) + page = alloc_page_vma(gfp_mask, vma, addr); + + return page; +} +#else +struct page *memcg_alloc_page_vma(swp_entry_t entry, gfp_t gfp_mask, + struct vm_area_struct *vma, unsigned long addr) +{ + return alloc_page_vma(gfp_mask, vma, addr); +} +#endif + /** * mem_cgroup_charge - charge a newly allocated page to a cgroup * @page: page to charge diff --git a/mm/swap_state.c b/mm/swap_state.c index 69d71c4be7b8..f0929da6225a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -491,7 +491,7 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * before marking swap_map SWAP_HAS_CACHE, when -EEXIST will * cause any racers to loop around until we add it to cache. */ - page = alloc_page_vma(gfp_mask, vma, addr); + page = memcg_alloc_page_vma(entry, gfp_mask, vma, addr); if (!page) return NULL;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9H66Y CVE: NA
--------------------------------
Enable CONFIG_MEMCG_ZRAM to support memcg_zram feature.
Signed-off-by: Liu Shixin liushixin2@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + 2 files changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 664b2a47aab5..a0c6402935ae 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1122,6 +1122,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +CONFIG_MEMCG_ZRAM=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 07dab7199156..a0af83982bf3 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1055,6 +1055,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +CONFIG_MEMCG_ZRAM=y CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y