*** BLURB HERE ***
Chen Ridong (1): cgroup_writeback: fix softlockup for blkcg->memcg_list
Lu Jialin (1): cgroup_writeback: fix deadlock in cgroup1_writeback
mm/backing-dev.c | 17 ++++++++++------- mm/memcontrol.c | 22 +++++++++++++++------- 2 files changed, 25 insertions(+), 14 deletions(-)
From: Lu Jialin lujialin4@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8Y0RW
--------------------------------
When read/write memory.wb_blkio_ino of a memcg, and delete the memcg at the same time, it could cause deadlock as below: CPU0 CPU1 rlock(kn->active#4); lock(cgroup_mutex); lock(kn->active#4); lock(cgroup_mutex);
Therefore, delete locking cgroup_mutex in reading/writing memory.wb_blkio_ino, just use refcnt and rcu to make sure that the blkcg exists.
Fixes: 13b8199f0a47 ("cgroup: support cgroup writeback on cgroupv1") Signed-off-by: Lu Jialin lujialin4@huawei.com Signed-off-by: Chen Ridong chenridong@huawei.com --- mm/backing-dev.c | 2 -- mm/memcontrol.c | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 45ac57fd6e7a..1ae2b74cbad8 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -1121,8 +1121,6 @@ void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, if (!cgroup1_writeback) return;
- lockdep_assert_held(&cgroup_mutex); - css_get(blkcg_css); memcg->wb_blk_css = blkcg_css; if (pre_blkcss == NULL) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3e9e93fcb715..ffb6b3e37733 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5358,15 +5358,22 @@ static int wb_blkio_show(struct seq_file *m, void *v) if (!path) return -ENOMEM;
- mutex_lock(&cgroup_mutex); + rcu_read_lock(); blkcg_css = memcg->wb_blk_css; + if (!css_tryget_online(blkcg_css)) { + kfree(path); + rcu_read_unlock(); + + return -EINVAL; + } blkcg_cgroup = blkcg_css->cgroup; blkcg_id = cgroup_ino(blkcg_cgroup); cgroup_path(blkcg_cgroup, path, PATH_MAX); - mutex_unlock(&cgroup_mutex); seq_printf(m, "wb_blkio_path:%s\n", path); seq_printf(m, "wb_blkio_ino:%lu\n", blkcg_id); kfree(path); + css_put(blkcg_css); + rcu_read_unlock();
return 0; } @@ -5389,22 +5396,24 @@ static ssize_t wb_blkio_write(struct kernfs_open_file *of, char *buf, if (ret) return ret;
- mutex_lock(&cgroup_mutex); + rcu_read_lock(); root = blkcg_root_css->cgroup->root; blk_cgroup = cgroup1_get_from_id(root, cgrp_id); if (IS_ERR(blk_cgroup)) { - mutex_unlock(&cgroup_mutex); + rcu_read_unlock(); return -EINVAL; } blkcg_css = cgroup_tryget_css(blk_cgroup, &io_cgrp_subsys); - if (!blkcg_css) + if (!blkcg_css) { + ret = -EINVAL; goto out_unlock; + } wb_attach_memcg_to_blkcg(memcg_css, blkcg_css); css_put(blkcg_css);
out_unlock: cgroup_put(blk_cgroup); - mutex_unlock(&cgroup_mutex); + rcu_read_unlock();
return ret < 0 ? ret : nbytes; }
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I99KM6
--------------------------------
When multi tasks write memory.wb_blkio_ino of a memcg at same time, it may insert same node to blkcg->memcg_list, which will lead to softlockup. This is because no protection when write blkcg->memcg_list, and it may also lead to NULL pointer dereference when write and rmdir same memcg at the same time. So add spin_lock to protect blkcg->memcg_list.
The error logs: watchdog: BUG: soft lockup - CPU#4 stuck for 23s! [rmdir:938142] [ 5716.668802] ? irq_work_claim+0x25/0x60 [ 5716.668812] ? __list_add_valid+0x9c/0xe0 [ 5716.668820] wb_kill_blkcg+0x19f/0x310 [ 5716.668829] wb_kill_memcg_blkcg+0x8d/0xa0 [ 5716.668837] kill_css+0x89/0xd0 [ 5716.668846] cgroup_destroy_locked+0x1c6/0x380 [ 5716.668855] ? css_has_online_children+0x110/0x110 [ 5716.668864] ? selinux_inode_setxattr+0x4e0/0x4e0 [ 5716.668876] cgroup_rmdir+0x37/0x140 [ 5716.668888] kernfs_iop_rmdir+0xbb/0xf0 [ 5716.668898] vfs_rmdir.part.0+0xa5/0x230 [ 5716.668909] do_rmdir+0x2e0/0x320 [ 5716.668926] ? do_file_open_root+0x330/0x330 [ 5716.668933] ? __check_object_size+0x38/0x50 [ 5716.668942] ? getname_flags+0x14d/0x320 [ 5716.668952] do_syscall_64+0x33/0x40 [ 5716.668961] entry_SYSCALL_64_after_hwframe+0x62/0xc7 [ 5716.668968] RIP: 0033:0x7f742706cafb
Fixes: 404067a201b1 ("cgroup_writeback: fix deadlock in cgroup1_writeback") Signed-off-by: Chen Ridong chenridong@huawei.com --- mm/backing-dev.c | 15 ++++++++++----- mm/memcontrol.c | 5 ++--- 2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1ae2b74cbad8..fa2e26826150 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -438,8 +438,8 @@ static struct cgroup_subsys_state *cgwbv1_get_blkcss(struct mem_cgroup *memcg) struct cgroup_subsys_state *blkcg_css;
rcu_read_lock(); - blkcg_css = memcg->wb_blk_css; - if (!css_tryget_online(blkcg_css)) { + blkcg_css = READ_ONCE(memcg->wb_blk_css); + if (!blkcg_css || !css_tryget_online(blkcg_css)) { blkcg_css = blkcg_root_css; css_get(blkcg_css); } @@ -1067,6 +1067,7 @@ EXPORT_SYMBOL(wait_iff_congested); #include "../kernel/cgroup/cgroup-internal.h"
static bool cgroup1_writeback __read_mostly; +DEFINE_SPINLOCK(wb_blk_memlist_lock);
bool cgroup1_writeback_enabled(void) { @@ -1080,6 +1081,7 @@ static void wb_kill_memcg(struct cgroup_subsys_state *memcg_css)
list_del_init(&memcg->memcg_node); css_put(memcg->wb_blk_css); + memcg->wb_blk_css = NULL; }
static void wb_kill_blkcg(struct cgroup_subsys_state *blkcg_css) @@ -1103,24 +1105,26 @@ void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css) if (!cgroup1_writeback) return;
- lockdep_assert_held(&cgroup_mutex); - + spin_lock(&wb_blk_memlist_lock); if (ss->id == io_cgrp_id) wb_kill_blkcg(css); else if (ss->id == memory_cgrp_id) wb_kill_memcg(css); + spin_unlock(&wb_blk_memlist_lock); }
void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, struct cgroup_subsys_state *blkcg_css) { struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); - struct cgroup_subsys_state *pre_blkcss = memcg->wb_blk_css; + struct cgroup_subsys_state *pre_blkcss = NULL; struct blkcg *blkcg = css_to_blkcg(blkcg_css);
if (!cgroup1_writeback) return;
+ spin_lock(&wb_blk_memlist_lock); + pre_blkcss = memcg->wb_blk_css; css_get(blkcg_css); memcg->wb_blk_css = blkcg_css; if (pre_blkcss == NULL) @@ -1129,6 +1133,7 @@ void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, list_move(&memcg->memcg_node, &blkcg->memcg_list); css_put(pre_blkcss); } + spin_unlock(&wb_blk_memlist_lock); }
static int __init enable_cgroup1_writeback(char *s) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ffb6b3e37733..2384de0c4374 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5359,11 +5359,10 @@ static int wb_blkio_show(struct seq_file *m, void *v) return -ENOMEM;
rcu_read_lock(); - blkcg_css = memcg->wb_blk_css; - if (!css_tryget_online(blkcg_css)) { + blkcg_css = READ_ONCE(memcg->wb_blk_css); + if (!blkcg_css || !css_tryget_online(blkcg_css)) { kfree(path); rcu_read_unlock(); - return -EINVAL; } blkcg_cgroup = blkcg_css->cgroup;
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/6299 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/6299 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/H...