From: Jing Xiangfeng jingxiangfeng@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8PXX8 CVE: NA
--------------------------------------
This patch adds sysctl memcg_qos_enable to enable/disable memcg oom priority feature. If you want to enable it by writing 1:
echo 1 > /proc/sys/vm/memcg_qos_enable
Signed-off-by: Jing Xiangfeng jingxiangfeng@huawei.com Signed-off-by: Jinjiang Tu tujinjiang@huawei.com --- include/linux/memcontrol.h | 3 ++ mm/memcontrol.c | 79 ++++++++++++++++++++++++++++++++++++++ mm/oom_kill.c | 3 ++ 3 files changed, 85 insertions(+)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index a3a20aef438d..c42b78163e65 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -356,9 +356,12 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_OOM_PRIORITY #define MEMCG_LOW_OOM_PRIORITY -1 #define MEMCG_HIGH_OOM_PRIORITY 0 +extern int sysctl_memcg_oom_prio; + bool memcg_oom_prio_scan_tasks(int (*)(struct task_struct *, void *), void *); void memcg_print_bad_task(struct oom_control *oc); +bool memcg_oom_prio_disabled(void); #else static inline void memcg_print_bad_task(struct oom_control *oc) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 940022380b53..95df92ed8e39 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4056,6 +4056,15 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, #endif
#ifdef CONFIG_MEMCG_OOM_PRIORITY +#define ENABLE_MEMCG_OOM_PROIRITY 1 +#define DISABLE_MEMCG_OOM_PROIRITY 0 +int sysctl_memcg_oom_prio = DISABLE_MEMCG_OOM_PROIRITY; + +bool memcg_oom_prio_disabled(void) +{ + return READ_ONCE(sysctl_memcg_oom_prio) == DISABLE_MEMCG_OOM_PROIRITY; +} + static void memcg_oom_prio_init(struct mem_cgroup *memcg) { struct mem_cgroup *parent = parent_mem_cgroup(memcg); @@ -4087,6 +4096,9 @@ static s64 memcg_oom_prio_read(struct cgroup_subsys_state *css, { struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ if (memcg_oom_prio_disabled()) + return 0; + return READ_ONCE(memcg->oom_prio); }
@@ -4095,6 +4107,9 @@ static int memcg_oom_prio_write(struct cgroup_subsys_state *css, { struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+ if (memcg_oom_prio_disabled()) + return -EACCES; + if (mem_cgroup_is_root(memcg)) return -EINVAL;
@@ -4145,6 +4160,8 @@ bool memcg_oom_prio_scan_tasks(int (*fn)(struct task_struct *, void *), int ret = 0; bool retry = true;
+ if (memcg_oom_prio_disabled()) + return false; retry: max = memcg_find_max_usage(last); if (!max) @@ -4177,6 +4194,9 @@ bool memcg_oom_prio_scan_tasks(int (*fn)(struct task_struct *, void *),
void memcg_print_bad_task(struct oom_control *oc) { + if (memcg_oom_prio_disabled()) + return; + if (oc->chosen) { struct mem_cgroup *memcg;
@@ -4188,6 +4208,64 @@ void memcg_print_bad_task(struct oom_control *oc) rcu_read_unlock(); } } + +static void memcg_oom_prio_reset(void) +{ + struct mem_cgroup *iter; + struct cgroup_subsys_state *css; + + rcu_read_lock(); + css_for_each_descendant_pre(css, &root_mem_cgroup->css) { + iter = mem_cgroup_from_css(css); + WRITE_ONCE(iter->oom_prio, 0); + } + rcu_read_unlock(); +} + +static int sysctl_memcg_oom_prio_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + + if (write) { + if (READ_ONCE(sysctl_memcg_oom_prio) == DISABLE_MEMCG_OOM_PROIRITY) + memcg_oom_prio_reset(); + } + + return ret; +} + +static struct ctl_table memcg_oom_prio_sysctls[] = { + { + /* + * This sysctl is used to control memcg oom priority + * feature, the sysctl name is for compatibility. + */ + .procname = "memcg_qos_enable", + .data = &sysctl_memcg_oom_prio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_memcg_oom_prio_handler, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, +}; + +static __init int memcg_oom_prio_sysctls_init(void) +{ + register_sysctl_init("vm", memcg_oom_prio_sysctls); + return 0; +} +#else +static inline int memcg_oom_prio_sysctls_init(void) +{ + return 0; +} + #endif
#ifdef CONFIG_NUMA @@ -7958,6 +8036,7 @@ static int __init mem_cgroup_init(void) }
mem_cgroup_memfs_info_init(); + memcg_oom_prio_sysctls_init();
return 0; } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 38ed03c344f3..3da29ddfea1a 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -318,6 +318,9 @@ static bool oom_next_task(struct task_struct *task, struct oom_control *oc, struct mem_cgroup *oc_memcg; int cur_memcg_prio, oc_memcg_prio;
+ if (memcg_oom_prio_disabled()) + return points == LONG_MIN || points < oc->chosen_points; + if (points == LONG_MIN) return true;