Lu Jialin (3): cgroup: add config isolation for cgroup_kill in cgroupv1 kabi: use CONFIG_KABI_RESERVE to isolate bpf cgroup reserve flags memcg: Introduce CONFIG_MEMCG_V1_THRESHOLD_QOS
arch/arm64/configs/openeuler_defconfig | 2 ++ arch/x86/configs/openeuler_defconfig | 2 ++ include/linux/bpf-cgroup.h | 2 ++ include/linux/bpf.h | 2 ++ include/linux/memcontrol.h | 12 ++++++++---- init/Kconfig | 10 ++++++++++ kernel/cgroup/cgroup-v1.c | 2 ++ mm/memcontrol.c | 18 ++++++++++++++++++ 8 files changed, 46 insertions(+), 4 deletions(-)
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9K8D1
--------------------------------
Introduce CONFIG_CGROUP_V1_KILL to isolate cgroup_kill feature in cgroupv1.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + init/Kconfig | 5 +++++ kernel/cgroup/cgroup-v1.c | 2 ++ 4 files changed, 9 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index a8f83a4e2a4f..791ddf6922e8 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -145,6 +145,7 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 86a2e4ea72da..61c889d4520f 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -150,6 +150,7 @@ CONFIG_ARCH_SUPPORTS_INT128=y CONFIG_NUMA_BALANCING=y CONFIG_NUMA_BALANCING_DEFAULT_ENABLED=y CONFIG_CGROUPS=y +CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y CONFIG_MEMCG_SWAP=y diff --git a/init/Kconfig b/init/Kconfig index 6e8801382618..17e42d22d8ac 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1271,6 +1271,11 @@ config CGROUP_FILES This supports catching misbehaving processes and return EMFILE instead of ENOMEM for kernel memory limits.
+config CGROUP_V1_KILL + bool "Kill All Tasks In Cgroup" + default n + depends on CGROUPS + endif # CGROUPS
menuconfig NAMESPACES diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index d26b1be9e602..19e175710b7f 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -659,11 +659,13 @@ struct cftype cgroup1_base_files[] = { .write = cgroup_release_agent_write, .max_write_len = PATH_MAX - 1, }, +#ifdef CONFIG_CGROUP_V1_KILL { .name = "cgroup.kill", .flags = CFTYPE_NOT_ON_ROOT, .write = cgroup_kill_write, }, +#endif { } /* terminate */ };
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9K8D1
--------------------------------
Limit bpf cgroup reserve flags to compile only on CONFIG_KABI_RESERVE.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/bpf-cgroup.h | 2 ++ include/linux/bpf.h | 2 ++ 2 files changed, 4 insertions(+)
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index bf74485b6567..0d61a7e4778b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -49,6 +49,7 @@ enum cgroup_bpf_attach_type { CGROUP_INET4_GETSOCKNAME, CGROUP_INET6_GETSOCKNAME, CGROUP_INET_SOCK_RELEASE, +#ifdef CONFIG_KABI_RESERVE CGROUP_ATTACH_TYPE_KABI_RESERVE_1, CGROUP_ATTACH_TYPE_KABI_RESERVE_2, CGROUP_ATTACH_TYPE_KABI_RESERVE_3, @@ -57,6 +58,7 @@ enum cgroup_bpf_attach_type { CGROUP_ATTACH_TYPE_KABI_RESERVE_6, CGROUP_ATTACH_TYPE_KABI_RESERVE_7, CGROUP_ATTACH_TYPE_KABI_RESERVE_8, +#endif MAX_CGROUP_BPF_ATTACH_TYPE };
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84d4e4849e3c..fc6754747781 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -562,6 +562,7 @@ struct bpf_prog_offload { enum bpf_cgroup_storage_type { BPF_CGROUP_STORAGE_SHARED, BPF_CGROUP_STORAGE_PERCPU, +#ifdef CONFIG_KABI_RESERVE BPF_CGROUP_STORAGE_KABI_RESERVE_1, BPF_CGROUP_STORAGE_KABI_RESERVE_2, BPF_CGROUP_STORAGE_KABI_RESERVE_3, @@ -570,6 +571,7 @@ enum bpf_cgroup_storage_type { BPF_CGROUP_STORAGE_KABI_RESERVE_6, BPF_CGROUP_STORAGE_KABI_RESERVE_7, BPF_CGROUP_STORAGE_KABI_RESERVE_8, +#endif __BPF_CGROUP_STORAGE_MAX };
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I9K8D1
--------------------------------
Introduce CONFIG_MEMCG_V1_THRESHOLD_QOS to isolate memcg qos management feature from baseline.
Signed-off-by: Lu Jialin lujialin4@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/memcontrol.h | 12 ++++++++---- init/Kconfig | 5 +++++ mm/memcontrol.c | 18 ++++++++++++++++++ 5 files changed, 33 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 791ddf6922e8..5b8089f8c4a1 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -148,6 +148,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y +CONFIG_MEMCG_V1_THRESHOLD_QOS=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_MEMCG_MEMFS_INFO=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index 61c889d4520f..82b673aa6498 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -153,6 +153,7 @@ CONFIG_CGROUPS=y CONFIG_CGROUP_V1_KILL=y CONFIG_PAGE_COUNTER=y CONFIG_MEMCG=y +CONFIG_MEMCG_V1_THRESHOLD_QOS=y CONFIG_MEMCG_SWAP=y CONFIG_MEMCG_KMEM=y CONFIG_MEMCG_MEMFS_INFO=y diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ef3a6a8e640f..d6b7d478e39b 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -385,9 +385,8 @@ struct mem_cgroup { #if defined(CONFIG_DYNAMIC_HUGETLB) && defined(CONFIG_X86_64) struct dhugetlb_pool *hpool; #endif -#ifndef __GENKSYMS__ - int high_async_ratio; - bool high_async_reclaim; +#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS + KABI_USE2(1, int high_async_ratio, bool high_async_reclaim) #else KABI_RESERVE(1) #endif @@ -1255,8 +1254,10 @@ static bool memcg_event_add(struct mem_cgroup *memcg, if (!mem_cgroup_is_root(memcg)) return true;
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS if (event == MEMCG_OOM_KILL && !cgroup_subsys_on_dfl(memory_cgrp_subsys)) return true; +#endif
return false; } @@ -1277,7 +1278,10 @@ static inline void memcg_memory_event(struct mem_cgroup *memcg, cgroup_file_notify(&memcg->swap_events_file); else cgroup_file_notify(&memcg->events_file); - +#ifndef CONFIG_MEMCG_V1_THRESHOLD_QOS + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + break; +#endif if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break; } while ((memcg = parent_mem_cgroup(memcg)) && diff --git a/init/Kconfig b/init/Kconfig index 17e42d22d8ac..93e34c55f041 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -941,6 +941,11 @@ config MEMCG help Provides control over the memory footprint of tasks in a cgroup.
+config MEMCG_V1_THRESHOLD_QOS + bool "Qos memcg threshold in v1" + depends on MEMCG + default n + config MEMCG_SWAP bool depends on MEMCG && SWAP diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9007c3554771..b8b07b46d9fc 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2392,6 +2392,7 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, return nr_reclaimed; }
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS static bool is_high_async_reclaim(struct mem_cgroup *memcg) { int ratio = READ_ONCE(memcg->high_async_ratio); @@ -2427,15 +2428,18 @@ static void async_reclaim_high(struct mem_cgroup *memcg) psi_memstall_leave(&pflags); WRITE_ONCE(memcg->high_async_reclaim, false); } +#endif
static void high_work_func(struct work_struct *work) { struct mem_cgroup *memcg = container_of(work, struct mem_cgroup, high_work);
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS if (READ_ONCE(memcg->high_async_reclaim)) async_reclaim_high(memcg); else +#endif reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); }
@@ -2828,11 +2832,13 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, continue; }
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS if (is_high_async_reclaim(memcg) && !mem_high) { WRITE_ONCE(memcg->high_async_reclaim, true); schedule_work(&memcg->high_work); break; } +#endif
if (mem_high || swap_high) { /* @@ -5063,8 +5069,10 @@ static int mem_cgroup_oom_control_read(struct seq_file *sf, void *v) seq_printf(sf, "under_oom %d\n", (bool)memcg->under_oom); seq_printf(sf, "oom_kill %lu\n", atomic_long_read(&memcg->memory_events[MEMCG_OOM_KILL])); +#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS seq_printf(sf, "oom_kill_local %lu\n", atomic_long_read(&memcg->memory_events_local[MEMCG_OOM_KILL])); +#endif
return 0; } @@ -5637,6 +5645,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of, return nbytes; }
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS static void __memcg_events_show(struct seq_file *m, atomic_long_t *events) { seq_printf(m, "low %lu\n", atomic_long_read(&events[MEMCG_LOW])); @@ -5661,6 +5670,7 @@ static int memcg_events_local_show(struct seq_file *m, void *v) __memcg_events_show(m, memcg->memory_events_local); return 0; } +#endif
static int reclaim_param_parse(char *buf, unsigned long *nr_pages, unsigned int *reclaim_options) @@ -5741,6 +5751,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, return nbytes; }
+#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS static int memcg_high_async_ratio_show(struct seq_file *m, void *v) { seq_printf(m, "%d\n", @@ -5770,6 +5781,7 @@ static ssize_t memcg_high_async_ratio_write(struct kernfs_open_file *of,
return nbytes; } +#endif
#ifdef CONFIG_KSM static int __memcg_set_ksm_for_tasks(struct mem_cgroup *memcg, bool enable) @@ -6168,6 +6180,7 @@ static struct cftype mem_cgroup_legacy_files[] = { .write = mem_cgroup_reset, .read_u64 = mem_cgroup_read_u64, }, +#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS { .name = "min", .flags = CFTYPE_NOT_ON_ROOT, @@ -6198,6 +6211,7 @@ static struct cftype mem_cgroup_legacy_files[] = { .file_offset = offsetof(struct mem_cgroup, events_local_file), .seq_show = memcg_events_local_show, }, +#endif { .name = "reclaim", .write = memory_reclaim, @@ -6221,12 +6235,14 @@ static struct cftype mem_cgroup_legacy_files[] = { .seq_show = memcg_swapfile_read, }, #endif +#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS { .name = "high_async_ratio", .flags = CFTYPE_NOT_ON_ROOT, .seq_show = memcg_high_async_ratio_show, .write = memcg_high_async_ratio_write, }, +#endif #ifdef CONFIG_CGROUP_V1_WRITEBACK { .name = "wb_blkio_ino", @@ -6448,7 +6464,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); memcg->soft_limit = PAGE_COUNTER_MAX; +#ifdef CONFIG_MEMCG_V1_THRESHOLD_QOS memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE; +#endif page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { memcg->swappiness = mem_cgroup_swappiness(parent);
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/8143 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/P...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/8143 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/P...