Chen Wandun (1): memcg: introduce per-memcg reclaim interface for cgroup v1
Lu Jialin (7): cgroup: Export cgroup.kill from cgroupv2 to cgroupv1 memcg: Export memcg.{min/low} from cgroupv2 to cgroupv1 memcg: Export memcg.high from cgroupv2 to cgroupv1 memcg: Export memory.events and memory.events.local from cgroupv2 to cgroupv1 memcg: enable memcg async reclaim memcg: export high_async_ratio to userland config: enable CONFIG_MEMCG_V1_RECLAIM and CONFIG_CGROUP_V1_KILL
arch/arm64/configs/openeuler_defconfig | 2 + arch/x86/configs/openeuler_defconfig | 2 + include/linux/memcontrol.h | 24 +- init/Kconfig | 10 + kernel/cgroup/cgroup-internal.h | 3 + kernel/cgroup/cgroup-v1.c | 7 + kernel/cgroup/cgroup.c | 4 +- mm/memcontrol.c | 471 +++++++++++++++++-------- 8 files changed, 369 insertions(+), 154 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
Export cgroup.kill feature from cgroupv2 to cgroupv1. Therefore, user can kill all process in one cgroup and its subcgroups instead of kill them one by one.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- init/Kconfig | 5 +++++ kernel/cgroup/cgroup-internal.h | 3 +++ kernel/cgroup/cgroup-v1.c | 7 +++++++ kernel/cgroup/cgroup.c | 4 ++-- 4 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig index 6d35728b94b2..f37422327249 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1175,6 +1175,11 @@ config SOCK_CGROUP_DATA bool default n
+config CGROUP_V1_KILL + bool "Kill All Tasks In Cgroup V1" + default n + depends on CGROUPS + endif # CGROUPS
menuconfig NAMESPACES diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h index c56071f150f2..d5a197d4b0ec 100644 --- a/kernel/cgroup/cgroup-internal.h +++ b/kernel/cgroup/cgroup-internal.h @@ -264,6 +264,9 @@ int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node, int __cgroup_task_count(const struct cgroup *cgrp); int cgroup_task_count(const struct cgroup *cgrp);
+ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off); + /* * rstat.c */ diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index 76db6c67e39a..134a15e1d83a 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -660,6 +660,13 @@ struct cftype cgroup1_base_files[] = { .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, }, +#ifdef CONFIG_CGROUP_V1_KILL + { + .name = "cgroup.kill", + .flags = CFTYPE_NOT_ON_ROOT, + .write = cgroup_kill_write, + }, +#endif { } /* terminate */ };
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 518725b57200..8db5e7eb242b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3965,8 +3965,8 @@ static void cgroup_kill(struct cgroup *cgrp) __cgroup_kill(dsct); }
-static ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) +ssize_t cgroup_kill_write(struct kernfs_open_file *of, char *buf, size_t nbytes, + loff_t off) { ssize_t ret = 0; int kill;
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
Export memcg.min and memcg.low from cgroupv2 to cgroupv1, in order to reduce the negtive impact between cgroups when the system memory is insufficient.
Only export memory.{min/low} numbers in mem_cgroup_legacy_files and move related functions in front of mem_cgroup_legacy_files. There is no need to other changes.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- init/Kconfig | 5 ++ mm/memcontrol.c | 126 +++++++++++++++++++++++++++--------------------- 2 files changed, 75 insertions(+), 56 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig index f37422327249..d4d9ba7e1b48 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -944,6 +944,11 @@ config MEMCG help Provides control over the memory footprint of tasks in a cgroup.
+config MEMCG_V1_RECLAIM + bool "Enable Per Memory Cgroup Qos Reclaim in Cgroup V1" + depends on MEMCG + default n + config MEMCG_KMEM bool depends on MEMCG diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8a881ab21f6c..d4152e1ec2e9 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5010,6 +5010,62 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p) } #endif
+static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) +{ + if (value == PAGE_COUNTER_MAX) + seq_puts(m, "max\n"); + else + seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE); + + return 0; +} + +static int memory_min_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.min)); +} + +static ssize_t memory_min_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long min; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &min); + if (err) + return err; + + page_counter_set_min(&memcg->memory, min); + + return nbytes; +} + +static int memory_low_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.low)); +} + +static ssize_t memory_low_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned long low; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &low); + if (err) + return err; + + page_counter_set_low(&memcg->memory, low); + + return nbytes; +} + static int memory_stat_show(struct seq_file *m, void *v);
static struct cftype mem_cgroup_legacy_files[] = { @@ -5138,6 +5194,20 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, +#ifdef CONFIG_MEMCG_V1_RECLAIM + { + .name = "min", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_min_show, + .write = memory_min_write, + }, + { + .name = "low", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_low_show, + .write = memory_low_write, + }, +#endif { }, /* terminate */ };
@@ -6402,16 +6472,6 @@ static void mem_cgroup_attach(struct cgroup_taskset *tset) } #endif /* CONFIG_LRU_GEN */
-static int seq_puts_memcg_tunable(struct seq_file *m, unsigned long value) -{ - if (value == PAGE_COUNTER_MAX) - seq_puts(m, "max\n"); - else - seq_printf(m, "%llu\n", (u64)value * PAGE_SIZE); - - return 0; -} - static u64 memory_current_read(struct cgroup_subsys_state *css, struct cftype *cft) { @@ -6428,52 +6488,6 @@ static u64 memory_peak_read(struct cgroup_subsys_state *css, return (u64)memcg->memory.watermark * PAGE_SIZE; }
-static int memory_min_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.min)); -} - -static ssize_t memory_min_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long min; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &min); - if (err) - return err; - - page_counter_set_min(&memcg->memory, min); - - return nbytes; -} - -static int memory_low_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.low)); -} - -static ssize_t memory_low_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned long low; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &low); - if (err) - return err; - - page_counter_set_low(&memcg->memory, low); - - return nbytes; -} - static int memory_high_show(struct seq_file *m, void *v) { return seq_puts_memcg_tunable(m,
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
Export memory.high from cgroupv2 to cgroupv1. Therefore, when the usage of the memcg is larger than memory.high, some pages will be reclaimed before return to userland, which will throttle the process.
Only export memory.high number in mem_cgroup_legacy_files and move related functions in front of mem_cgroup_legacy_files. There is no need to other changes.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 104 +++++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 49 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4152e1ec2e9..26a4238f618c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5066,6 +5066,55 @@ static ssize_t memory_low_write(struct kernfs_open_file *of, return nbytes; }
+static int memory_high_show(struct seq_file *m, void *v) +{ + return seq_puts_memcg_tunable(m, + READ_ONCE(mem_cgroup_from_seq(m)->memory.high)); +} + +static ssize_t memory_high_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + bool drained = false; + unsigned long high; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "max", &high); + if (err) + return err; + + page_counter_set_high(&memcg->memory, high); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + unsigned long reclaimed; + + if (nr_pages <= high) + break; + + if (signal_pending(current)) + break; + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, + GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + + if (!reclaimed && !nr_retries--) + break; + } + + memcg_wb_domain_size_changed(memcg); + return nbytes; +} + static int memory_stat_show(struct seq_file *m, void *v);
static struct cftype mem_cgroup_legacy_files[] = { @@ -5207,6 +5256,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memory_low_show, .write = memory_low_write, }, + { + .name = "high", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memory_high_show, + .write = memory_high_write, + }, #endif { }, /* terminate */ }; @@ -6488,55 +6543,6 @@ static u64 memory_peak_read(struct cgroup_subsys_state *css, return (u64)memcg->memory.watermark * PAGE_SIZE; }
-static int memory_high_show(struct seq_file *m, void *v) -{ - return seq_puts_memcg_tunable(m, - READ_ONCE(mem_cgroup_from_seq(m)->memory.high)); -} - -static ssize_t memory_high_write(struct kernfs_open_file *of, - char *buf, size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned int nr_retries = MAX_RECLAIM_RETRIES; - bool drained = false; - unsigned long high; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "max", &high); - if (err) - return err; - - page_counter_set_high(&memcg->memory, high); - - for (;;) { - unsigned long nr_pages = page_counter_read(&memcg->memory); - unsigned long reclaimed; - - if (nr_pages <= high) - break; - - if (signal_pending(current)) - break; - - if (!drained) { - drain_all_stock(memcg); - drained = true; - continue; - } - - reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high, - GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); - - if (!reclaimed && !nr_retries--) - break; - } - - memcg_wb_domain_size_changed(memcg); - return nbytes; -} - static int memory_max_show(struct seq_file *m, void *v) { return seq_puts_memcg_tunable(m,
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
Export "memory.events" and "memory.events.local" from cgroupv2 to cgroupv1.
There are some differences between v2 and v1:
1)events of MEMCG_OOM_GROUP_KILL is not included in cgroupv1. Because, there is no member of memory.oom.group.
2)events of MEMCG_MAX is represented with "limit_in_bytes" in cgroupv1 instead of memory.max
3)event of oom_kill is include in memory.oom_control. make oom_kill include its descendants' events and add oom_kill_local include its oom_kill event only.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/memcontrol.h | 19 ++++++++++++++-- mm/memcontrol.c | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+), 2 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index e4e24da16d2c..3f4684a38725 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1113,6 +1113,20 @@ static inline void count_memcg_event_mm(struct mm_struct *mm, rcu_read_unlock(); }
+static bool memcg_event_add(struct mem_cgroup *memcg, + enum memcg_memory_event event) +{ + if (!mem_cgroup_is_root(memcg)) + return true; + +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (event == MEMCG_OOM_KILL && !cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return true; +#endif + + return false; +} + static inline void memcg_memory_event(struct mem_cgroup *memcg, enum memcg_memory_event event) { @@ -1129,13 +1143,14 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, cgroup_file_notify(&memcg->swap_events_file); else cgroup_file_notify(&memcg->events_file); - +#ifndef CONFIG_MEMCG_V1_RECLAIM if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; +#endif if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && - !mem_cgroup_is_root(memcg)); + memcg_event_add(memcg, event)); }
static inline void memcg_memory_event_mm(struct mm_struct *mm, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 26a4238f618c..a8db10c1b5d3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4530,6 +4530,11 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); seq_printf(sf, "oom_kill %lu\n", atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); +#ifdef CONFIG_MEMCG_V1_RECLAIM + seq_printf(sf, "oom_kill_local %lu\n", + atomic_long_read(&memcg->memory_events_local[MEMCG_OOM_KILL])); +#endif + return 0; }
@@ -5115,6 +5120,33 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, return nbytes; }
+#ifdef CONFIG_MEMCG_V1_RECLAIM +static void __memcg_events_show(struct seq_file *m, atomic_long_t *events) +{ + seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW])); + seq_printf(m, "high %lu\n", atomic_long_read(&events[MEMCG_HIGH])); + seq_printf(m, "limit_in_bytes %lu\n", + atomic_long_read(&events[MEMCG_MAX])); + seq_printf(m, "oom %lu\n", atomic_long_read(&events[MEMCG_OOM])); +} + +static int memcg_events_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + __memcg_events_show(m, memcg->memory_events); + return 0; +} + +static int memcg_events_local_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + __memcg_events_show(m, memcg->memory_events_local); + return 0; +} +#endif + static int memory_stat_show(struct seq_file *m, void *v);
static struct cftype mem_cgroup_legacy_files[] = { @@ -5262,6 +5294,18 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memory_high_show, .write = memory_high_write, }, + { + .name = "events", + .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct mem_cgroup, events_file), + .seq_show = memcg_events_show, + }, + { + .name = "events.local", + .flags = CFTYPE_NOT_ON_ROOT, + .file_offset = offsetof(struct mem_cgroup, events_local_file), + .seq_show = memcg_events_local_show, + }, #endif { }, /* terminate */ };
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
-------------------------------
Introduce two memcg watermarks: warning watermark and safe watermark. warning watermark = memory.high * memory.high_async_ratio / 100; safe watermark = memory.high * (memory.high_async_ratio - 10) / 100; Start memcg async reclaim when memcg usage is larger than warning watermark but smaller than memory.high; the aim reclaim pages is the diff of memcg usage and safe watermark. The default memory.high_async_ratio is 100; when memory.high_async_ratio is 100, memcg async reclaim is disabled;
Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/memcontrol.h | 5 +++ mm/memcontrol.c | 68 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 70 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3f4684a38725..133e2e5b0ae0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -333,6 +333,11 @@ struct mem_cgroup { struct lru_gen_mm_list mm_list; #endif
+#ifdef CONFIG_MEMCG_V1_RECLAIM + int high_async_ratio; + bool high_async_reclaim; +#endif + struct mem_cgroup_per_node *nodeinfo[]; };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a8db10c1b5d3..76c3e493f83b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -105,6 +105,18 @@ static bool do_memsw_account(void) #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024
+/* + * memcg warning watermark = memory.high * memcg->high_async_ratio / + * HIGH_ASYNC_RATIO_BASE. + * when memcg usage is larger than warning watermark, but smaller than + * memory.high, start memcg async reclaim; + * when memcg->high_async_ratio is HIGH_ASYNC_RATIO_BASE, memcg async + * relcaim is disabled; + */ + +#define HIGH_ASYNC_RATIO_BASE 100 +#define HIGH_ASYNC_RATIO_GAP 10 + /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -2406,12 +2418,52 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, return nr_reclaimed; }
+#ifdef CONFIG_MEMCG_V1_RECLAIM +static bool is_high_async_reclaim(struct mem_cgroup *memcg) +{ + int ratio = READ_ONCE(memcg->high_async_ratio); + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + + if (ratio == HIGH_ASYNC_RATIO_BASE || memcg_high == PAGE_COUNTER_MAX) + return false; + + return page_counter_read(&memcg->memory) > + memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; +} + +static void async_reclaim_high(struct mem_cgroup *memcg) +{ + unsigned long nr_pages, pflags; + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + unsigned long memcg_usage = page_counter_read(&memcg->memory); + int ratio = READ_ONCE(memcg->high_async_ratio) - HIGH_ASYNC_RATIO_GAP; + unsigned long safe_pages = memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; + + if (!is_high_async_reclaim(memcg)) { + WRITE_ONCE(memcg->high_async_reclaim, false); + return; + } + + psi_memstall_enter(&pflags); + nr_pages = memcg_usage > safe_pages ? memcg_usage - safe_pages : + MEMCG_CHARGE_BATCH; + try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + psi_memstall_leave(&pflags); + WRITE_ONCE(memcg->high_async_reclaim, false); +} +#endif + static void high_work_func(struct work_struct *work) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = container_of(work, struct mem_cgroup, + high_work);
- memcg = container_of(work, struct mem_cgroup, high_work); - reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (READ_ONCE(memcg->high_async_reclaim)) + async_reclaim_high(memcg); + else +#endif + reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); }
/* @@ -2799,6 +2851,13 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, } continue; } +#ifdef CONFIG_MEMCG_V1_RECLAIM + if (is_high_async_reclaim(memcg) && !mem_high) { + WRITE_ONCE(memcg->high_async_reclaim, true); + schedule_work(&memcg->high_work); + break; + } +#endif
if (mem_high || swap_high) { /* @@ -5533,6 +5592,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) WRITE_ONCE(memcg->soft_limit, PAGE_COUNTER_MAX); #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) memcg->zswap_max = PAGE_COUNTER_MAX; +#endif +#ifdef CONFIG_MEMCG_V1_RECLAIM + memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE; #endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) {
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
-------------------------------
User can set high_async_ratio from 10 to 100; start memcg high async when memcg_usage is larger than memory.high * high_async_ratio / 100;
Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 76c3e493f83b..cebd070d7757 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5204,6 +5204,37 @@ static int memcg_events_local_show(struct seq_file *m, void *v) __memcg_events_show(m, memcg->memory_events_local); return 0; } + +static int memcg_high_async_ratio_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", + READ_ONCE(mem_cgroup_from_seq(m)->high_async_ratio)); + return 0; +} + +static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, + loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + int ret, high_async_ratio; + + buf = strstrip(buf); + if (!buf) + return -EINVAL; + + ret = kstrtoint(buf, 0, &high_async_ratio); + if (ret) + return ret; + + if (high_async_ratio > HIGH_ASYNC_RATIO_BASE || + high_async_ratio <= HIGH_ASYNC_RATIO_GAP) + return -EINVAL; + + WRITE_ONCE(memcg->high_async_ratio, high_async_ratio); + + return nbytes; +} #endif
static int memory_stat_show(struct seq_file *m, void *v); @@ -5365,6 +5396,12 @@ static struct cftype mem_cgroup_legacy_files[] = { .file_offset = offsetof(struct mem_cgroup, events_local_file), .seq_show = memcg_events_local_show, }, + { + .name = "high_async_ratio", + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = memcg_high_async_ratio_show, + .write = memcg_high_async_ratio_write, + }, #endif { }, /* terminate */ };
From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
introduce per-memcg reclaim interface for cgroup v1, and disable memory reclaim for root memcg.
Signed-off-by: Chen Wandun chenwandun@huawei.com Link: https://gitee.com/openeuler/kernel/commit/f698ccf83bfded5033bea769cd663c7d94... Conflict: mm/memcontrol.c Signed-off-by: Lu Jialin lujialin4@huawei.com --- mm/memcontrol.c | 92 +++++++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 42 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cebd070d7757..ec4160b7fbbb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5237,6 +5237,52 @@ static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of, } #endif
+static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + unsigned long nr_to_reclaim, nr_reclaimed = 0; + unsigned int reclaim_options; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "", &nr_to_reclaim); + if (err) + return err; + + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && + mem_cgroup_is_root(memcg)) + return -EINVAL; + + reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + while (nr_reclaimed < nr_to_reclaim) { + unsigned long reclaimed; + + if (signal_pending(current)) + return -EINTR; + + /* + * This is the final attempt, drain percpu lru caches in the + * hope of introducing more evictable pages for + * try_to_free_mem_cgroup_pages(). + */ + if (!nr_retries) + lru_add_drain_all(); + + reclaimed = try_to_free_mem_cgroup_pages(memcg, + min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), + GFP_KERNEL, reclaim_options); + + if (!reclaimed && !nr_retries--) + return -EAGAIN; + + nr_reclaimed += reclaimed; + } + + return nbytes; +} + static int memory_stat_show(struct seq_file *m, void *v);
static struct cftype mem_cgroup_legacy_files[] = { @@ -5402,6 +5448,10 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memcg_high_async_ratio_show, .write = memcg_high_async_ratio_write, }, + { + .name = "reclaim", + .write = memory_reclaim, + }, #endif { }, /* terminate */ }; @@ -6850,48 +6900,6 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; }
-static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, - size_t nbytes, loff_t off) -{ - struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); - unsigned int nr_retries = MAX_RECLAIM_RETRIES; - unsigned long nr_to_reclaim, nr_reclaimed = 0; - unsigned int reclaim_options; - int err; - - buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); - if (err) - return err; - - reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; - while (nr_reclaimed < nr_to_reclaim) { - unsigned long reclaimed; - - if (signal_pending(current)) - return -EINTR; - - /* - * This is the final attempt, drain percpu lru caches in the - * hope of introducing more evictable pages for - * try_to_free_mem_cgroup_pages(). - */ - if (!nr_retries) - lru_add_drain_all(); - - reclaimed = try_to_free_mem_cgroup_pages(memcg, - min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), - GFP_KERNEL, reclaim_options); - - if (!reclaimed && !nr_retries--) - return -EAGAIN; - - nr_reclaimed += reclaimed; - } - - return nbytes; -} - static struct cftype memory_files[] = { { .name = "current",
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
--------------------------------
enable CONFIG_MEMCG_V1_RECLAIM and CONFIG_CGROUP_V1_KILL in openeuler_defconfig by default. --- arch/arm64/configs/openeuler_defconfig | 2 ++ arch/x86/configs/openeuler_defconfig | 2 ++ 2 files changed, 4 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 9d2f717c1f7c..faeabaab5a9c 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -154,9 +154,11 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y +CONFIG_MEMCG_V1_RECLAIM=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index b40100e16683..54932bcd6615 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -176,9 +176,11 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y # CONFIG_CGROUP_FAVOR_DYNMODS is not set CONFIG_MEMCG=y +CONFIG_MEMCG_V1_RECLAIM=y CONFIG_MEMCG_KMEM=y CONFIG_BLK_CGROUP=y CONFIG_CGROUP_WRITEBACK=y
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/3316 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/3316 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/B...