From: Yosry Ahmed yosryahmed@google.com
mainline inclusion from mainline-v6.5-rc1 commit dddb44ffa0d59c8a3f2a5cb9690ccebe3150810c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
Patch series "memcg: OOM log improvements", v2.
This short patch series brings back some cgroup v1 stats in OOM logs that were unnecessarily changed before. It also makes memcg OOM logs less reliant on printk() internals.
This patch (of 2):
Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") made sure we dump all the stats in memory.stat during a cgroup OOM, but it also introduced a slight behavioral change. The code used to print the non-hierarchical v1 cgroup stats for the entire cgroup subtree, now it only prints the v2 cgroup stats for the cgroup under OOM.
For cgroup v1 users, this introduces a few problems:
(a) The non-hierarchical stats of the memcg under OOM are no longer shown.
(b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer shown.
(c) We show the list of cgroup v2 stats, even in cgroup v1. This list of stats is not tracked with v1 in mind. While most of the stats seem to be working on v1, there may be some stats that are not fully or correctly tracked.
Although OOM log is not set in stone, we should not change it for no reason. When upgrading the kernel version to a version including commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM"), these behavioral changes are noticed in cgroup v1.
The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") separated stats formatting from stats display for v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
Move the v2 specific formatting from memory_stat_format() to memcg_stat_format(), add memcg1_stat_format() for v1, and make memory_stat_format() select between them based on cgroup version. Since memory_stat_show() now works for both v1 & v2, drop memcg_stat_show().
Link: https://lkml.kernel.org/r/20230428132406.2540811-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20230428132406.2540811-3-yosryahmed@google.com Signed-off-by: Yosry Ahmed yosryahmed@google.com Acked-by: Shakeel Butt shakeelb@google.com Acked-by: Michal Hocko mhocko@kernel.org Cc: Johannes Weiner hannes@cmpxchg.org Cc: Muchun Song muchun.song@linux.dev Cc: Petr Mladek pmladek@suse.com Cc: Roman Gushchin roman.gushchin@linux.dev Cc: Sergey Senozhatsky senozhatsky@chromium.org Cc: Steven Rostedt (Google) rostedt@goodmis.org Cc: Michal Hocko mhocko@suse.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6d2685e2839c..0e22bb3646a2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1493,7 +1493,7 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init);
-static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { int i;
@@ -1557,6 +1557,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) WARN_ON_ONCE(seq_buf_has_overflowed(s)); }
+static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); + +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +{ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memcg_stat_format(memcg, s); + else + memcg1_stat_format(memcg, s); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); +} + #define K(x) ((x) << (PAGE_SHIFT-10)) /** * mem_cgroup_print_oom_context: Print OOM information relevant to @@ -4115,9 +4126,8 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, };
-static int memcg_stat_show(struct seq_file *m, void *v) +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); unsigned long memory, memsw; struct mem_cgroup *mi; unsigned int i; @@ -4136,15 +4146,15 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); }
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", lru_list_name(i), + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE);
@@ -4154,11 +4164,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) memory = min(memory, READ_ONCE(mi->memory.max)); memsw = min(memsw, READ_ONCE(mi->memsw.max)); } - seq_printf(m, "hierarchical_memory_limit %llu\n", - (u64)memory * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", + (u64)memory * PAGE_SIZE); if (do_memsw_account()) - seq_printf(m, "hierarchical_memsw_limit %llu\n", - (u64)memsw * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", + (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long nr; @@ -4170,17 +4180,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], (u64)nr * PAGE_SIZE); }
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", + seq_buf_printf(s, "total_%s %llu\n", vm_event_name(memcg1_events[i]), (u64)memcg_events(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", lru_list_name(i), + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE);
@@ -4197,12 +4207,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) anon_cost += mz->lruvec.anon_cost; file_cost += mz->lruvec.file_cost; } - seq_printf(m, "anon_cost %lu\n", anon_cost); - seq_printf(m, "file_cost %lu\n", file_cost); + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); + seq_buf_printf(s, "file_cost %lu\n", file_cost); } #endif - - return 0; }
static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, @@ -5150,6 +5158,8 @@ static int memcg_events_local_show(struct seq_file *m, void *v) return 0; }
+static int memory_stat_show(struct seq_file *m, void *v); + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5182,7 +5192,7 @@ static struct cftype mem_cgroup_legacy_files[] = { }, { .name = "stat", - .seq_show = memcg_stat_show, + .seq_show = memory_stat_show, }, { .name = "force_empty",