Sergey Senozhatsky (1): seq_buf: Add seq_buf_do_printk() helper
Steven Rostedt (VMware) (1): seq_buf: Add seq_buf_terminate() API
Yosry Ahmed (2): memcg: use seq_buf_do_printk() with mem_cgroup_print_oom_meminfo() memcg: dump memory.stat during cgroup OOM for v1
include/linux/seq_buf.h | 27 +++++++++++++ lib/seq_buf.c | 32 +++++++++++++++ mm/memcontrol.c | 87 +++++++++++++++++++++++------------------ 3 files changed, 108 insertions(+), 38 deletions(-)
From: "Steven Rostedt (VMware)" rostedt@goodmis.org
mainline inclusion from mainline-v5.13-rc1 commit f2616c772c768485de18e7fcb2816bcdcd098339 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
In the case that the seq_buf buffer needs to be printed directly, add a way to make sure that the buffer is safe to read by forcing a nul terminating character at the end of the string, or the last byte of the buffer if the string has overflowed.
Signed-off-by: Steven Rostedt (VMware) rostedt@goodmis.org Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/seq_buf.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+)
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 9d6c28cc4d8f..5b31c5147969 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -71,6 +71,31 @@ static inline unsigned int seq_buf_used(struct seq_buf *s) return min(s->len, s->size); }
+/** + * seq_buf_terminate - Make sure buffer is nul terminated + * @s: the seq_buf descriptor to terminate. + * + * This makes sure that the buffer in @s is nul terminated and + * safe to read as a string. + * + * Note, if this is called when the buffer has overflowed, then + * the last byte of the buffer is zeroed, and the len will still + * point passed it. + * + * After this function is called, s->buffer is safe to use + * in string operations. + */ +static inline void seq_buf_terminate(struct seq_buf *s) +{ + if (WARN_ON(s->size == 0)) + return; + + if (seq_buf_buffer_left(s)) + s->buffer[s->len] = 0; + else + s->buffer[s->size - 1] = 0; +} + /** * seq_buf_get_buf - get buffer to write arbitrary data to * @s: the seq_buf handle
From: Sergey Senozhatsky senozhatsky@chromium.org
mainline inclusion from mainline-v6.4-rc1 commit 96928d9032a7c34f12a88df879665562bcebf59a category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
Sometimes we use seq_buf to format a string buffer, which we then pass to printk(). However, in certain situations the seq_buf string buffer can get too big, exceeding the PRINTKRB_RECORD_MAX bytes limit, and causing printk() to truncate the string.
Add a new seq_buf helper. This helper prints the seq_buf string buffer line by line, using \n as a delimiter, rather than passing the whole string buffer to printk() at once.
Link: https://lkml.kernel.org/r/20230415100110.1419872-1-senozhatsky@chromium.org
Cc: Andrew Morton akpm@linux-foundation.org Signed-off-by: Sergey Senozhatsky senozhatsky@chromium.org Reviewed-by: Petr Mladek pmladek@suse.com Tested-by: Yosry Ahmed yosryahmed@google.com Signed-off-by: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/seq_buf.h | 2 ++ lib/seq_buf.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+)
diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h index 5b31c5147969..515d7fcb9634 100644 --- a/include/linux/seq_buf.h +++ b/include/linux/seq_buf.h @@ -159,4 +159,6 @@ extern int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary); #endif
+void seq_buf_do_printk(struct seq_buf *s, const char *lvl); + #endif /* _LINUX_SEQ_BUF_H */ diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 6dafde851333..5ffae44c1055 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -93,6 +93,38 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) } EXPORT_SYMBOL_GPL(seq_buf_printf);
+/** + * seq_buf_do_printk - printk seq_buf line by line + * @s: seq_buf descriptor + * @lvl: printk level + * + * printk()-s a multi-line sequential buffer line by line. The function + * makes sure that the buffer in @s is nul terminated and safe to read + * as a string. + */ +void seq_buf_do_printk(struct seq_buf *s, const char *lvl) +{ + const char *start, *lf; + + if (s->size == 0 || s->len == 0) + return; + + seq_buf_terminate(s); + + start = s->buffer; + while ((lf = strchr(start, '\n'))) { + int len = lf - start + 1; + + printk("%s%.*s", lvl, len, start); + start = ++lf; + } + + /* No trailing LF */ + if (start < s->buffer + s->len) + printk("%s%s\n", lvl, start); +} +EXPORT_SYMBOL_GPL(seq_buf_do_printk); + #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments
From: Yosry Ahmed yosryahmed@google.com
mainline inclusion from mainline-v6.5-rc1 commit 5b42360c73b0679505dac6ec44d234cfec61120c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
Currently, we format all the memcg stats into a buffer in mem_cgroup_print_oom_meminfo() and use pr_info() to dump it to the logs. However, this buffer is large in size. Although it is currently working as intended, ther is a dependency between the memcg stats buffer and the printk record size limit.
If we add more stats in the future and the buffer becomes larger than the printk record size limit, or if the prink record size limit is reduced, the logs may be truncated.
It is safer to use seq_buf_do_printk(), which will automatically break up the buffer at line breaks and issue small printk() calls.
Refactor the code to move the seq_buf from memory_stat_format() to its callers, and use seq_buf_do_printk() to print the seq_buf in mem_cgroup_print_oom_meminfo().
Link: https://lkml.kernel.org/r/20230428132406.2540811-2-yosryahmed@google.com Signed-off-by: Yosry Ahmed yosryahmed@google.com Acked-by: Michal Hocko mhocko@suse.com Reviewed-by: Sergey Senozhatsky senozhatsky@chromium.org Acked-by: Shakeel Butt shakeelb@google.com Reviewed-by: Muchun Song songmuchun@bytedance.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Michal Hocko mhocko@kernel.org Cc: Petr Mladek pmladek@suse.com Cc: Roman Gushchin roman.gushchin@linux.dev Cc: Steven Rostedt (Google) rostedt@goodmis.org Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9603b6dff9aa..2c9cf84b4fec 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1514,13 +1514,10 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init);
-static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct seq_buf s; int i;
- seq_buf_init(&s, buf, bufsize); - /* * Provide statistics on the state of the memory subsystem as * well as cumulative event counters that show past behavior. @@ -1538,47 +1535,47 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
size = memcg_page_state(memcg, memory_stats[i].idx); size *= memory_stats[i].ratio; - seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size); + seq_buf_printf(s, "%s %llu\n", memory_stats[i].name, size);
if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) { size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) + memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B); - seq_buf_printf(&s, "slab %llu\n", size); + seq_buf_printf(s, "slab %llu\n", size); } }
/* Accumulated memory events */
- seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGFAULT), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGFAULT), memcg_events(memcg, PGFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGMAJFAULT), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGMAJFAULT), memcg_events(memcg, PGMAJFAULT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGREFILL), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGREFILL), memcg_events(memcg, PGREFILL)); - seq_buf_printf(&s, "pgscan %lu\n", + seq_buf_printf(s, "pgscan %lu\n", memcg_events(memcg, PGSCAN_KSWAPD) + memcg_events(memcg, PGSCAN_DIRECT)); - seq_buf_printf(&s, "pgsteal %lu\n", + seq_buf_printf(s, "pgsteal %lu\n", memcg_events(memcg, PGSTEAL_KSWAPD) + memcg_events(memcg, PGSTEAL_DIRECT)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGACTIVATE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGACTIVATE), memcg_events(memcg, PGACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGDEACTIVATE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGDEACTIVATE), memcg_events(memcg, PGDEACTIVATE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREE), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGLAZYFREE), memcg_events(memcg, PGLAZYFREE)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(PGLAZYFREED), + seq_buf_printf(s, "%s %lu\n", vm_event_name(PGLAZYFREED), memcg_events(memcg, PGLAZYFREED));
#ifdef CONFIG_TRANSPARENT_HUGEPAGE - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), + seq_buf_printf(s, "%s %lu\n", vm_event_name(THP_FAULT_ALLOC), memcg_events(memcg, THP_FAULT_ALLOC)); - seq_buf_printf(&s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), + seq_buf_printf(s, "%s %lu\n", vm_event_name(THP_COLLAPSE_ALLOC), memcg_events(memcg, THP_COLLAPSE_ALLOC)); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/* The above should easily fit into one page */ - WARN_ON_ONCE(seq_buf_has_overflowed(&s)); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); }
#define K(x) ((x) << (PAGE_SHIFT-10)) @@ -1617,6 +1614,7 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) /* Use static buffer, for the caller is holding oom_lock. */ static char buf[PAGE_SIZE]; static char pathbuf[PATH_MAX]; + struct seq_buf s;
lockdep_assert_held(&oom_lock);
@@ -1639,8 +1637,9 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) pr_info("Memory cgroup stats for "); pr_cont_cgroup_path(memcg->css.cgroup); pr_cont(":"); - memory_stat_format(memcg, buf, sizeof(buf)); - pr_info("%s", buf); + seq_buf_init(&s, buf, sizeof(buf)); + memory_stat_format(memcg, &s); + seq_buf_do_printk(&s, KERN_INFO);
mem_cgroup_print_memfs_info(memcg, pathbuf, NULL); } @@ -7425,10 +7424,12 @@ static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + struct seq_buf s;
if (!buf) return -ENOMEM; - memory_stat_format(memcg, buf, PAGE_SIZE); + seq_buf_init(&s, buf, PAGE_SIZE); + memory_stat_format(memcg, &s); seq_puts(m, buf); kfree(buf); return 0;
From: Yosry Ahmed yosryahmed@google.com
mainline inclusion from mainline-v6.5-rc1 commit dddb44ffa0d59c8a3f2a5cb9690ccebe3150810c category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8C7BS
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
-------------------------------------------------
Patch series "memcg: OOM log improvements", v2.
This short patch series brings back some cgroup v1 stats in OOM logs that were unnecessarily changed before. It also makes memcg OOM logs less reliant on printk() internals.
This patch (of 2):
Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") made sure we dump all the stats in memory.stat during a cgroup OOM, but it also introduced a slight behavioral change. The code used to print the non-hierarchical v1 cgroup stats for the entire cgroup subtree, now it only prints the v2 cgroup stats for the cgroup under OOM.
For cgroup v1 users, this introduces a few problems:
(a) The non-hierarchical stats of the memcg under OOM are no longer shown.
(b) A couple of v1-only stats (e.g. pgpgin, pgpgout) are no longer shown.
(c) We show the list of cgroup v2 stats, even in cgroup v1. This list of stats is not tracked with v1 in mind. While most of the stats seem to be working on v1, there may be some stats that are not fully or correctly tracked.
Although OOM log is not set in stone, we should not change it for no reason. When upgrading the kernel version to a version including commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM"), these behavioral changes are noticed in cgroup v1.
The fix is simple. Commit c8713d0b2312 ("mm: memcontrol: dump memory.stat during cgroup OOM") separated stats formatting from stats display for v2, to reuse the stats formatting in the OOM logs. Do the same for v1.
Move the v2 specific formatting from memory_stat_format() to memcg_stat_format(), add memcg1_stat_format() for v1, and make memory_stat_format() select between them based on cgroup version. Since memory_stat_show() now works for both v1 & v2, drop memcg_stat_show().
Link: https://lkml.kernel.org/r/20230428132406.2540811-1-yosryahmed@google.com Link: https://lkml.kernel.org/r/20230428132406.2540811-3-yosryahmed@google.com Signed-off-by: Yosry Ahmed yosryahmed@google.com Acked-by: Shakeel Butt shakeelb@google.com Acked-by: Michal Hocko mhocko@kernel.org Cc: Johannes Weiner hannes@cmpxchg.org Cc: Muchun Song muchun.song@linux.dev Cc: Petr Mladek pmladek@suse.com Cc: Roman Gushchin roman.gushchin@linux.dev Cc: Sergey Senozhatsky senozhatsky@chromium.org Cc: Steven Rostedt (Google) rostedt@goodmis.org Cc: Michal Hocko mhocko@suse.com Signed-off-by: Andrew Morton akpm@linux-foundation.org Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 46 ++++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2c9cf84b4fec..1b6884e50885 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1514,7 +1514,7 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init);
-static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +static void memcg_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { int i;
@@ -1578,6 +1578,17 @@ static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) WARN_ON_ONCE(seq_buf_has_overflowed(s)); }
+static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s); + +static void memory_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) +{ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + memcg_stat_format(memcg, s); + else + memcg1_stat_format(memcg, s); + WARN_ON_ONCE(seq_buf_has_overflowed(s)); +} + #define K(x) ((x) << (PAGE_SHIFT-10)) /** * mem_cgroup_print_oom_context: Print OOM information relevant to @@ -4522,9 +4533,8 @@ static const unsigned int memcg1_events[] = { PGMAJFAULT, };
-static int memcg_stat_show(struct seq_file *m, void *v) +static void memcg1_stat_format(struct mem_cgroup *memcg, struct seq_buf *s) { - struct mem_cgroup *memcg = mem_cgroup_from_seq(m); unsigned long memory, memsw; struct mem_cgroup *mi; unsigned int i; @@ -4543,15 +4553,15 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); + seq_buf_printf(s, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE); }
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "%s %lu\n", vm_event_name(memcg1_events[i]), + seq_buf_printf(s, "%s %lu\n", vm_event_name(memcg1_events[i]), memcg_events_local(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "%s %lu\n", lru_list_name(i), + seq_buf_printf(s, "%s %lu\n", lru_list_name(i), memcg_page_state_local(memcg, NR_LRU_BASE + i) * PAGE_SIZE);
@@ -4561,11 +4571,11 @@ static int memcg_stat_show(struct seq_file *m, void *v) memory = min(memory, READ_ONCE(mi->memory.max)); memsw = min(memsw, READ_ONCE(mi->memsw.max)); } - seq_printf(m, "hierarchical_memory_limit %llu\n", - (u64)memory * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memory_limit %llu\n", + (u64)memory * PAGE_SIZE); if (do_memsw_account()) - seq_printf(m, "hierarchical_memsw_limit %llu\n", - (u64)memsw * PAGE_SIZE); + seq_buf_printf(s, "hierarchical_memsw_limit %llu\n", + (u64)memsw * PAGE_SIZE);
for (i = 0; i < ARRAY_SIZE(memcg1_stats); i++) { unsigned long nr; @@ -4577,17 +4587,17 @@ static int memcg_stat_show(struct seq_file *m, void *v) if (memcg1_stats[i] == NR_ANON_THPS) nr *= HPAGE_PMD_NR; #endif - seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i], + seq_buf_printf(s, "total_%s %llu\n", memcg1_stat_names[i], (u64)nr * PAGE_SIZE); }
for (i = 0; i < ARRAY_SIZE(memcg1_events); i++) - seq_printf(m, "total_%s %llu\n", + seq_buf_printf(s, "total_%s %llu\n", vm_event_name(memcg1_events[i]), (u64)memcg_events(memcg, memcg1_events[i]));
for (i = 0; i < NR_LRU_LISTS; i++) - seq_printf(m, "total_%s %llu\n", lru_list_name(i), + seq_buf_printf(s, "total_%s %llu\n", lru_list_name(i), (u64)memcg_page_state(memcg, NR_LRU_BASE + i) * PAGE_SIZE);
@@ -4604,12 +4614,10 @@ static int memcg_stat_show(struct seq_file *m, void *v) anon_cost += mz->lruvec.anon_cost; file_cost += mz->lruvec.file_cost; } - seq_printf(m, "anon_cost %lu\n", anon_cost); - seq_printf(m, "file_cost %lu\n", file_cost); + seq_buf_printf(s, "anon_cost %lu\n", anon_cost); + seq_buf_printf(s, "file_cost %lu\n", file_cost); } #endif - - return 0; }
static u64 mem_cgroup_swappiness_read(struct cgroup_subsys_state *css, @@ -5883,6 +5891,8 @@ static ssize_t wb_blkio_write(struct kernfs_open_file *of, char *buf, } #endif
+static int memory_stat_show(struct seq_file *m, void *v); + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5915,7 +5925,7 @@ static struct cftype mem_cgroup_legacy_files[] = { }, { .name = "stat", - .seq_show = memcg_stat_show, + .seq_show = memory_stat_show, }, { .name = "force_empty",
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/2683 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/P...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/2683 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/P...