hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8LY4S
-------------------------------
In cgroupv1, cgroup writeback is not supproted for two problems: 1) Blkcg_css and memcg_css are mounted on different cgroup trees. Therefore, blkcg_css cannot be found according to a certain memcg_css. 2) Buffer I/O is worked by kthread, which is in the root_blkcg. Therefore, blkcg cannot limit wbps and wiops of buffer I/O.
We solve the two problems to support cgroup writeback on cgroupv1. 1) A memcg is attached to the blkcg_root css when the memcg was created. 2) We add a member "wb_blkio_ino" in mem_cgroup_legacy_files. User can attach a memcg to a cerntain blkcg through echo the file inode of the blkcg into the wb_blkio of the memcg. 3) inode_cgwb_enabled() return true when memcg and io are both mounted on cgroupv2 or both on cgroupv1. 4) Buffer I/O can find a blkcg according to its memcg.
Thus, a memcg can find a certain blkcg, and cgroup writeback can be supported on cgroupv1.
Signed-off-by: Lu Jialin lujialin4@huawei.com Signed-off-by: chenridong chenridong@huawei.com --- block/blk-cgroup.c | 3 + block/blk-cgroup.h | 3 + include/linux/backing-dev.h | 29 +++++++-- include/linux/cgroup.h | 2 +- include/linux/memcontrol.h | 5 ++ init/Kconfig | 5 ++ kernel/cgroup/cgroup.c | 5 +- mm/backing-dev.c | 116 +++++++++++++++++++++++++++++++++++- mm/memcontrol.c | 83 +++++++++++++++++++++++++- 9 files changed, 242 insertions(+), 9 deletions(-)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4a42ea2972ad..a1460948f663 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1363,6 +1363,9 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css) INIT_HLIST_HEAD(&blkcg->blkg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&blkcg->cgwb_list); +#endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + INIT_LIST_HEAD(&blkcg->memcg_list); #endif list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 624c03c8fe64..ff2544e574c7 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -114,6 +114,9 @@ struct blkcg { #ifdef CONFIG_CGROUP_WRITEBACK struct list_head cgwb_list; #endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + struct list_head memcg_list; +#endif };
static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 1a97277f99b1..600a5178b40c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -149,6 +149,26 @@ static inline bool mapping_can_writeback(struct address_space *mapping) return inode_to_bdi(mapping->host)->capabilities & BDI_CAP_WRITEBACK; }
+#ifdef CONFIG_CGROUP_V1_WRITEBACK +void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css); +void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css); +bool cgroup1_writeback_enabled(void); +#else +static inline void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css) +{ +} +static inline void +wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css) +{ +} +static inline bool cgroup1_writeback_enabled(void) +{ + return false; +} +#endif /* CONFIG_CGROUP_V1_WRITEBACK */ + #ifdef CONFIG_CGROUP_WRITEBACK
struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, @@ -174,10 +194,11 @@ static inline bool inode_cgwb_enabled(struct inode *inode) { struct backing_dev_info *bdi = inode_to_bdi(inode);
- return cgroup_subsys_on_dfl(memory_cgrp_subsys) && - cgroup_subsys_on_dfl(io_cgrp_subsys) && - (bdi->capabilities & BDI_CAP_WRITEBACK) && - (inode->i_sb->s_iflags & SB_I_CGROUPWB); + return ((cgroup_subsys_on_dfl(memory_cgrp_subsys) && + cgroup_subsys_on_dfl(io_cgrp_subsys)) || + cgroup1_writeback_enabled()) && + (bdi->capabilities & BDI_CAP_WRITEBACK) && + (inode->i_sb->s_iflags & SB_I_CGROUPWB); }
/** diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 3823642a66bd..62cea15eb6df 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -633,7 +633,7 @@ static inline void cgroup_kthread_ready(void) }
void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen); -struct cgroup __cgroup_get_from_id(struct cgroup_root *root, u64 id); +struct cgroup *__cgroup_get_from_id(struct cgroup_root *root, u64 id); struct cgroup *cgroup_get_from_id(u64 id); #else /* !CONFIG_CGROUPS */
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index fb8db0885bdf..5bfaa77e2b82 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -320,6 +320,11 @@ struct mem_cgroup { struct memcg_cgwb_frn cgwb_frn[MEMCG_CGWB_FRN_CNT]; #endif
+#ifdef CONFIG_CGROUP_V1_WRITEBACK + struct cgroup_subsys_state *wb_blk_css; + struct list_head memcg_node; +#endif + /* List of events which userspace want to receive */ struct list_head event_list; spinlock_t event_list_lock; diff --git a/init/Kconfig b/init/Kconfig index 2ee1384c4f81..6ec688e72242 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -981,6 +981,11 @@ config CGROUP_WRITEBACK depends on MEMCG && BLK_CGROUP default y
+config CGROUP_V1_WRITEBACK + bool "Support Cgroup Writeback On Cgroupv1" + depends on CGROUP_WRITEBACK + default n + menuconfig CGROUP_SCHED bool "CPU controller" default n diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0279af218528..88b1af7045d5 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -60,6 +60,7 @@ #include <linux/sched/deadline.h> #include <linux/psi.h> #include <net/sock.h> +#include <linux/backing-dev.h>
#define CREATE_TRACE_POINTS #include <trace/events/cgroup.h> @@ -5580,6 +5581,7 @@ static struct cgroup_subsys_state *css_create(struct cgroup *cgrp, list_del_rcu(&css->sibling); err_free_css: list_del_rcu(&css->rstat_css_node); + wb_kill_memcg_blkcg(css); INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn); queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork); return ERR_PTR(err); @@ -5848,6 +5850,7 @@ static void kill_css(struct cgroup_subsys_state *css) */ css_get(css);
+ wb_kill_memcg_blkcg(css); /* * cgroup core guarantees that, by the time ->css_offline() is * invoked, no new css reference will be given out via @@ -6203,7 +6206,7 @@ void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) }
/* - * cgroup_get_from_id : get the cgroup associated with cgroup id + * __cgroup_get_from_id : get the cgroup associated with cgroup id * @id: cgroup id * On success return the cgrp or ERR_PTR on failure * Only cgroups within current task's cgroup NS are valid. diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 1e3447bccdb1..d70f8420c805 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -573,6 +573,29 @@ static void cgwb_remove_from_bdi_list(struct bdi_writeback *wb) spin_unlock_irq(&cgwb_lock); }
+#ifdef CONFIG_CGROUP_V1_WRITEBACK +static struct cgroup_subsys_state *cgwbv1_get_blkcss(struct mem_cgroup *memcg) +{ + struct cgroup_subsys_state *blkcg_css; + + rcu_read_lock(); + blkcg_css = memcg->wb_blk_css; + if (!css_tryget_online(blkcg_css)) { + blkcg_css = blkcg_root_css; + css_get(blkcg_css); + } + rcu_read_unlock(); + + return blkcg_css; +} +#else +static inline struct cgroup_subsys_state * +cgwbv1_get_blkcss(struct mem_cgroup *memcg) +{ + return NULL; +} +#endif + static int cgwb_create(struct backing_dev_info *bdi, struct cgroup_subsys_state *memcg_css, gfp_t gfp) { @@ -584,7 +607,11 @@ static int cgwb_create(struct backing_dev_info *bdi, int ret = 0;
memcg = mem_cgroup_from_css(memcg_css); - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (cgroup1_writeback_enabled()) + blkcg_css = cgwbv1_get_blkcss(memcg); + else + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, + &io_cgrp_subsys); memcg_cgwb_list = &memcg->cgwb_list; blkcg_cgwb_list = blkcg_get_cgwb_list(blkcg_css);
@@ -703,9 +730,14 @@ struct bdi_writeback *wb_get_lookup(struct backing_dev_info *bdi, wb = radix_tree_lookup(&bdi->cgwb_tree, memcg_css->id); if (wb) { struct cgroup_subsys_state *blkcg_css; + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css);
/* see whether the blkcg association has changed */ - blkcg_css = cgroup_get_e_css(memcg_css->cgroup, &io_cgrp_subsys); + if (cgroup1_writeback_enabled()) + blkcg_css = cgwbv1_get_blkcss(memcg); + else + blkcg_css = cgroup_get_e_css(memcg_css->cgroup, + &io_cgrp_subsys); if (unlikely(wb->blkcg_css != blkcg_css || !wb_tryget(wb))) wb = NULL; css_put(blkcg_css); @@ -1129,3 +1161,83 @@ const char *bdi_dev_name(struct backing_dev_info *bdi) return bdi->dev_name; } EXPORT_SYMBOL_GPL(bdi_dev_name); + +#ifdef CONFIG_CGROUP_V1_WRITEBACK +#include "../block/blk-cgroup.h" +#include "../kernel/cgroup/cgroup-internal.h" + +static bool cgroup1_writeback __read_mostly; + +bool cgroup1_writeback_enabled(void) +{ + return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && + !cgroup_subsys_on_dfl(io_cgrp_subsys) && cgroup1_writeback; +} + +static void wb_kill_memcg(struct cgroup_subsys_state *memcg_css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); + + list_del_init(&memcg->memcg_node); + css_put(memcg->wb_blk_css); +} + +static void wb_kill_blkcg(struct cgroup_subsys_state *blkcg_css) +{ + struct mem_cgroup *memcg, *tmp; + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + struct blkcg *root_blkcg = css_to_blkcg(blkcg_root_css); + + list_for_each_entry_safe(memcg, tmp, &blkcg->memcg_list, memcg_node) { + css_get(blkcg_root_css); + memcg->wb_blk_css = blkcg_root_css; + list_move(&memcg->memcg_node, &root_blkcg->memcg_list); + css_put(blkcg_css); + } +} + +void wb_kill_memcg_blkcg(struct cgroup_subsys_state *css) +{ + struct cgroup_subsys *ss = css->ss; + + if (!cgroup1_writeback) + return; + + lockdep_assert_held(&cgroup_mutex); + + if (ss->id == io_cgrp_id) + wb_kill_blkcg(css); + else if (ss->id == memory_cgrp_id) + wb_kill_memcg(css); +} + +void wb_attach_memcg_to_blkcg(struct cgroup_subsys_state *memcg_css, + struct cgroup_subsys_state *blkcg_css) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(memcg_css); + struct cgroup_subsys_state *pre_blkcss = memcg->wb_blk_css; + struct blkcg *blkcg = css_to_blkcg(blkcg_css); + + if (!cgroup1_writeback) + return; + + lockdep_assert_held(&cgroup_mutex); + + css_get(blkcg_css); + memcg->wb_blk_css = blkcg_css; + if (pre_blkcss == NULL) + list_add(&memcg->memcg_node, &blkcg->memcg_list); + else { + list_move(&memcg->memcg_node, &blkcg->memcg_list); + css_put(pre_blkcss); + } +} + +static int __init enable_cgroup1_writeback(char *s) +{ + cgroup1_writeback = true; + + return 1; +} +__setup("cgroup1_writeback", enable_cgroup1_writeback); +#endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2489f59ddd5a..624b7018eede 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -383,7 +383,8 @@ struct cgroup_subsys_state *mem_cgroup_css_from_folio(struct folio *folio) { struct mem_cgroup *memcg = folio_memcg(folio);
- if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + if (!memcg || !(cgroup_subsys_on_dfl(memory_cgrp_subsys) || + cgroup1_writeback_enabled())) memcg = root_mem_cgroup;
return &memcg->css; @@ -5150,6 +5151,77 @@ static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of, } #endif
+#ifdef CONFIG_CGROUP_V1_WRITEBACK +#include "../kernel/cgroup/cgroup-internal.h" + +static int wb_blkio_show(struct seq_file *m, void *v) +{ + char *path; + ino_t blkcg_id; + struct cgroup *blkcg_cgroup; + struct cgroup_subsys_state *blkcg_css; + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + if (!cgroup1_writeback_enabled()) + return -EOPNOTSUPP; + + path = kzalloc(PATH_MAX, GFP_KERNEL); + if (!path) + return -ENOMEM; + + mutex_lock(&cgroup_mutex); + blkcg_css = memcg->wb_blk_css; + blkcg_cgroup = blkcg_css->cgroup; + blkcg_id = cgroup_ino(blkcg_cgroup); + cgroup_path(blkcg_cgroup, path, PATH_MAX); + mutex_unlock(&cgroup_mutex); + seq_printf(m, "wb_blkio_path:%s\n", path); + seq_printf(m, "wb_blkio_ino:%lu\n", blkcg_id); + kfree(path); + + return 0; +} + +static ssize_t wb_blkio_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + int ret = 0; + u64 cgrp_id; + struct cgroup_root *root; + struct cgroup *blk_cgroup; + struct cgroup_subsys_state *blkcg_css; + struct cgroup_subsys_state *memcg_css = of_css(of); + + if (!cgroup1_writeback_enabled()) + return -EOPNOTSUPP; + + buf = strstrip(buf); + ret = kstrtou64(buf, 0, &cgrp_id); + if (ret) + return ret; + + mutex_lock(&cgroup_mutex); + root = blkcg_root_css->cgroup->root; + blk_cgroup = __cgroup_get_from_id(root, cgrp_id); + if (IS_ERR(blk_cgroup)) { + mutex_unlock(&cgroup_mutex); + return -EINVAL; + } + blkcg_css = cgroup_tryget_css(blk_cgroup, &io_cgrp_subsys); + if (!blkcg_css) + goto out_unlock; + wb_attach_memcg_to_blkcg(memcg_css, blkcg_css); + css_put(blkcg_css); + +out_unlock: + cgroup_put(blk_cgroup); + mutex_unlock(&cgroup_mutex); + + return ret < 0 ? ret : nbytes; +} +#endif + + static struct cftype mem_cgroup_legacy_files[] = { { .name = "usage_in_bytes", @@ -5317,6 +5389,14 @@ static struct cftype mem_cgroup_legacy_files[] = { .name = "reclaim", .write = memory_reclaim, }, +#endif +#ifdef CONFIG_CGROUP_V1_WRITEBACK + { + .name = "wb_blkio_ino", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = wb_blkio_show, + .write = wb_blkio_write, + }, #endif { }, /* terminate */ }; @@ -5576,6 +5656,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) static_branch_inc(&memcg_bpf_enabled_key); #endif
+ wb_attach_memcg_to_blkcg(&memcg->css, blkcg_root_css); return &memcg->css; }