From: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp
mainline inclusion from mainline-v6.0-rc1 commit 68aaee147e597b495622b7c9038e5922c7c61f57 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I6ADCF CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
--------------------------------
syzbot is reporting GFP_KERNEL allocation with oom_lock held when reporting memcg OOM [1]. If this allocation triggers the global OOM situation then the system can livelock because the GFP_KERNEL allocation with oom_lock held cannot trigger the global OOM killer because __alloc_pages_may_oom() fails to hold oom_lock.
Fix this problem by removing the allocation from memory_stat_format() completely, and pass static buffer when calling from memcg OOM path.
Note that the caller holding filesystem lock was the trigger for syzbot to report this locking dependency. Doing GFP_KERNEL allocation with filesystem lock held can deadlock the system even without involving OOM situation.
Link: https://syzkaller.appspot.com/bug?extid=2d2aeadc6ce1e1f11d45 [1] Link: https://lkml.kernel.org/r/86afb39f-8c65-bec2-6cfc-c5e3cd600c0b@I-love.SAKURA... Fixes: c8713d0b23123759 ("mm: memcontrol: dump memory.stat during cgroup OOM") Signed-off-by: Tetsuo Handa penguin-kernel@I-love.SAKURA.ne.jp Reported-by: syzbot syzbot+2d2aeadc6ce1e1f11d45@syzkaller.appspotmail.com Suggested-by: Michal Hocko mhocko@suse.com Acked-by: Michal Hocko mhocko@suse.com Cc: Johannes Weiner hannes@cmpxchg.org Cc: Roman Gushchin roman.gushchin@linux.dev Cc: Shakeel Butt shakeelb@google.com Signed-off-by: Andrew Morton akpm@linux-foundation.org
conflicts: mm/memcontrol.c
Signed-off-by: Cai Xinchen caixinchen1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Jialin Zhang zhangjialin11@huawei.com --- mm/memcontrol.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index fc69ceceeea9..4a069f9112e8 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1496,14 +1496,12 @@ static int __init memory_stats_init(void) } pure_initcall(memory_stats_init);
-static char *memory_stat_format(struct mem_cgroup *memcg) +static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize) { struct seq_buf s; int i;
- seq_buf_init(&s, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE); - if (!s.buffer) - return NULL; + seq_buf_init(&s, buf, bufsize);
/* * Provide statistics on the state of the memory subsystem as @@ -1563,8 +1561,6 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
/* The above should easily fit into one page */ WARN_ON_ONCE(seq_buf_has_overflowed(&s)); - - return s.buffer; }
#define K(x) ((x) << (PAGE_SHIFT-10)) @@ -1600,7 +1596,10 @@ void mem_cgroup_print_oom_context(struct mem_cgroup *memcg, struct task_struct * */ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) { - char *buf; + /* Use static buffer, for the caller is holding oom_lock. */ + static char buf[PAGE_SIZE]; + + lockdep_assert_held(&oom_lock);
pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n", K((u64)page_counter_read(&memcg->memory)), @@ -1621,11 +1620,8 @@ void mem_cgroup_print_oom_meminfo(struct mem_cgroup *memcg) pr_info("Memory cgroup stats for "); pr_cont_cgroup_path(memcg->css.cgroup); pr_cont(":"); - buf = memory_stat_format(memcg); - if (!buf) - return; + memory_stat_format(memcg, buf, sizeof(buf)); pr_info("%s", buf); - kfree(buf);
mem_cgroup_print_memfs_info(memcg, NULL); } @@ -6639,11 +6635,11 @@ static int memory_events_local_show(struct seq_file *m, void *v) static int memory_stat_show(struct seq_file *m, void *v) { struct mem_cgroup *memcg = mem_cgroup_from_seq(m); - char *buf; + char *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
- buf = memory_stat_format(memcg); if (!buf) return -ENOMEM; + memory_stat_format(memcg, buf, PAGE_SIZE); seq_puts(m, buf); kfree(buf); return 0;