[PATCH OLK-6.6 0/2] add perpcu stock for kmem

add perpcu stock for kmem Chen Ridong (2): memcg: add CONFIG_MEMCG_KMEM_STOCK memcg: add stock for kmem [un]charging arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + init/Kconfig | 14 +++ mm/memcontrol.c | 122 ++++++++++++++++++++++++- 4 files changed, 137 insertions(+), 1 deletion(-) -- 2.34.1

hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICI3RV ---------------------------------------------------------------------- This config enables a per-CPU cache (stock) for kernel memory (kmem) charge/uncharge operations. When multiple memory cgroups (memcgs) frequently charge/uncharge kernel memory, they may contend on atomic operations in shared parent memcgs, causing performance degradation. The per-CPU stock reduces contention by batching charges locally before flushing to the shared parent, improving scalability for high-frequency kmem allocations. Signed-off-by: Chen Ridong <chenridong@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + init/Kconfig | 14 ++++++++++++++ 3 files changed, 16 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index f048a1b098d1f..3c659f7ffe6da 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -167,6 +167,7 @@ CONFIG_MEMCG_MEMFS_INFO=y CONFIG_MEMCG_OOM_PRIORITY=y CONFIG_MEMCG_SWAP_QOS=y CONFIG_MEMCG_KMEM=y +CONFIG_MEMCG_KMEM_STOCK=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index b46394fa0f144..3cbae4c5f3902 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -186,6 +186,7 @@ CONFIG_MEMCG_MEMFS_INFO=y CONFIG_MEMCG_OOM_PRIORITY=y CONFIG_MEMCG_SWAP_QOS=y CONFIG_MEMCG_KMEM=y +CONFIG_MEMCG_KMEM_STOCK=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y CONFIG_CGROUP_V1_WRITEBACK=y diff --git a/init/Kconfig b/init/Kconfig index 486f3a333f95b..c08649062bb6d 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1028,6 +1028,20 @@ config MEMCG_KMEM depends on MEMCG default y +config MEMCG_KMEM_STOCK + bool + depends on MEMCG_KMEM + default y + help + This option enables a per-CPU cache (stock) for kernel memory (kmem) + charge/uncharge operations. When multiple memory cgroups (memcgs) + frequently charge/uncharge kernel memory, they may contend on atomic + operations in shared parent memcgs, causing performance degradation. + + The per-CPU stock reduces contention by batching charges locally + before flushing to the shared parent, improving scalability for + high-frequency kmem allocations. + config BLK_CGROUP bool "IO controller" depends on BLOCK -- 2.34.1

hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICI3RV ---------------------------------------------------------------------- memcg: implement per-CPU stock caching for kmem accounting The current percpu_stock mechanism optimizes memory charging through try_charge_memcg(), but kmem charge/uncharge operations still directly modify mem_cgroup.kmem atomic counters. This creates significant contention in high-density container environments where multiple processes concurrently access shared memory cgroups. This patch introduces a per-CPU deferred stock cache for kmem operations. Benchmark results with Redis instances show consistent improvements: Throughput (ops/sec): | Containers | Before | After | Improvement | |------------|---------|---------|-------------| | 4 | 293,255 | 316,365 | 7.9% | | 40 | 265,818 | 270,873 | 1.9% | P99 Latency (ms): | Containers | Before | After | Improvement | |------------|--------|--------|-------------| | 4 | 0.247 | 0.231 | 6.5% | | 40 | 0.279 | 0.266 | 4.7% | Signed-off-by: Chen Ridong <chenridong@huawei.com> --- mm/memcontrol.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b3609b71cbe8f..d972e344b3d6c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2282,6 +2282,102 @@ static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock) = { static DEFINE_MUTEX(percpu_charge_mutex); #ifdef CONFIG_MEMCG_KMEM +#ifdef CONFIG_MEMCG_KMEM_STOCK +struct kmem_stock_pcp { + local_lock_t stock_lock; + struct mem_cgroup *cached; /* this never be root cgroup */ + unsigned int nr_pages; +}; +static DEFINE_PER_CPU(struct kmem_stock_pcp, kmem_stock) = { + .stock_lock = INIT_LOCAL_LOCK(stock_lock), +}; + +static bool consume_kmem(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + struct kmem_stock_pcp *stock; + unsigned long flags; + bool ret = false; + + if (nr_pages > MEMCG_CHARGE_BATCH) + return ret; + + local_lock_irqsave(&kmem_stock.stock_lock, flags); + + stock = this_cpu_ptr(&kmem_stock); + if (memcg == READ_ONCE(stock->cached) && stock->nr_pages >= nr_pages) { + stock->nr_pages -= nr_pages; + ret = true; + } + + local_unlock_irqrestore(&kmem_stock.stock_lock, flags); + + return ret; +} + +static void drain_kmem(struct kmem_stock_pcp *stock) +{ + struct mem_cgroup *old = READ_ONCE(stock->cached); + + if (!old) + return; + + if (stock->nr_pages) { + page_counter_uncharge(&old->kmem, stock->nr_pages); + stock->nr_pages = 0; + } + + css_put(&old->css); + WRITE_ONCE(stock->cached, NULL); +} + +static void __refill_kmem(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + struct kmem_stock_pcp *stock; + + stock = this_cpu_ptr(&kmem_stock); + if (READ_ONCE(stock->cached) != memcg) { /* reset if necessary */ + drain_kmem(stock); + css_get(&memcg->css); + WRITE_ONCE(stock->cached, memcg); + } + stock->nr_pages += nr_pages; + + if (stock->nr_pages > MEMCG_CHARGE_BATCH) + drain_kmem(stock); +} + +static void refill_kmem(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + unsigned long flags; + + local_lock_irqsave(&kmem_stock.stock_lock, flags); + __refill_kmem(memcg, nr_pages); + local_unlock_irqrestore(&kmem_stock.stock_lock, flags); +} + +static bool uncharge_to_kmem_stock(struct mem_cgroup *memcg, unsigned int nr_pages) +{ + struct kmem_stock_pcp *stock; + unsigned long flags; + bool ret = false; + + if (nr_pages >= MEMCG_CHARGE_BATCH) + return ret; + + local_irq_save(flags); + + stock = this_cpu_ptr(&kmem_stock); + if (memcg == stock->cached && stock->nr_pages + nr_pages <= MEMCG_CHARGE_BATCH) { + stock->nr_pages += nr_pages; + ret = true; + } + + local_irq_restore(flags); + + return ret; +} +#endif + static struct obj_cgroup *drain_obj_stock(struct memcg_stock_pcp *stock); static bool obj_stock_flush_required(struct memcg_stock_pcp *stock, struct mem_cgroup *root_memcg); @@ -3295,6 +3391,30 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) return objcg; } +#ifdef CONFIG_MEMCG_KMEM_STOCK +static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages) +{ + unsigned int batch = MEMCG_CHARGE_BATCH; + + mod_memcg_state(memcg, MEMCG_KMEM, nr_pages); + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return; + + if (nr_pages > 0) { + if (consume_kmem(memcg, nr_pages)) + return; + if (batch < nr_pages) + batch = nr_pages; + page_counter_charge(&memcg->kmem, batch); + + if (batch > nr_pages) + refill_kmem(memcg, batch - nr_pages); + } else { + if (!uncharge_to_kmem_stock(memcg, -nr_pages)) + page_counter_uncharge(&memcg->kmem, -nr_pages); + } +} +#else static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages) { mod_memcg_state(memcg, MEMCG_KMEM, nr_pages); @@ -3305,7 +3425,7 @@ static void memcg_account_kmem(struct mem_cgroup *memcg, int nr_pages) page_counter_uncharge(&memcg->kmem, -nr_pages); } } - +#endif /* * obj_cgroup_uncharge_pages: uncharge a number of kernel pages from a objcg -- 2.34.1

反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/16862 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/FBY... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/16862 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/FBY...
participants (2)
-
Chen Ridong
-
patchwork bot