From: Jing Xiangfeng jingxiangfeng@huawei.com
hulk inclusion category: feature bugzilla: 51827 CVE: NA
--------------------------------------
This patch adds a default-false static key to disable memcg priority feature. If you want to enable it by writing 1:
echo 1 > /proc/sys/vm/memcg_qos_enable
Signed-off-by: Jing Xiangfeng jingxiangfeng@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/memcontrol.h | 7 +++++ kernel/sysctl.c | 11 ++++++++ mm/memcontrol.c | 56 ++++++++++++++++++++++++++++++++++++++ mm/oom_kill.c | 7 ++--- 4 files changed, 77 insertions(+), 4 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index dce7aa54f2698..52604319712b9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -331,9 +331,16 @@ struct mem_cgroup_extension { container_of(cgroup, struct mem_cgroup_extension, memcg)
#ifdef CONFIG_MEMCG_QOS +#define ENABLE_MEMCG_QOS 1 +#define DISABLE_MEMCG_QOS 0 +extern int sysctl_memcg_qos_stat; +DECLARE_STATIC_KEY_FALSE(memcg_qos_stat_key); + bool memcg_low_priority_scan_tasks(int (*)(struct task_struct *, void *), void *); void memcg_print_bad_task(void *arg, int ret); +extern int sysctl_memcg_qos_handler(struct ctl_table *table, + int write, void __user *buffer, size_t *length, loff_t *ppos); #endif
/* diff --git a/kernel/sysctl.c b/kernel/sysctl.c index d111d02042eb7..af4d97b9dfce6 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1449,6 +1449,17 @@ static struct ctl_table vm_table[] = { .mode = 0644, .proc_handler = hugetlb_overcommit_handler, }, +#endif +#ifdef CONFIG_MEMCG_QOS + { + .procname = "memcg_qos_enable", + .data = &sysctl_memcg_qos_stat, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = sysctl_memcg_qos_handler, + .extra1 = &zero, + .extra2 = &one, + }, #endif { .procname = "lowmem_reserve_ratio", diff --git a/mm/memcontrol.c b/mm/memcontrol.c index bb6e7a0af502d..c1871d7b134cf 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3404,12 +3404,18 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css, #endif
#ifdef CONFIG_MEMCG_QOS +int sysctl_memcg_qos_stat = DISABLE_MEMCG_QOS; +DEFINE_STATIC_KEY_FALSE(memcg_qos_stat_key); + static void memcg_qos_init(struct mem_cgroup *memcg) { struct mem_cgroup *parent = parent_mem_cgroup(memcg); struct mem_cgroup_extension *memcg_ext; struct mem_cgroup_extension *parent_ext;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return; + if (!parent) return;
@@ -3426,6 +3432,9 @@ static s64 memcg_qos_read(struct cgroup_subsys_state *css, struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_extension *memcg_ext;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return 0; + memcg_ext = to_memcg_ext(memcg);
return memcg_ext->memcg_priority; @@ -3437,6 +3446,9 @@ static int memcg_qos_write(struct cgroup_subsys_state *css, struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct mem_cgroup_extension *memcg_ext;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return -EACCES; + memcg_ext = to_memcg_ext(memcg);
if (val >= 0) @@ -3484,6 +3496,8 @@ bool memcg_low_priority_scan_tasks(int (*fn)(struct task_struct *, void *), int ret = 0; bool retry = true;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return false; retry: max = memcg_find_max_usage(last); if (!max) @@ -3522,6 +3536,9 @@ void memcg_print_bad_task(void *arg, int ret) struct mem_cgroup *memcg; struct mem_cgroup_extension *memcg_ext;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return; + if (!ret && oc->chosen) { memcg = mem_cgroup_from_task(oc->chosen); memcg_ext = to_memcg_ext(memcg); @@ -3530,6 +3547,45 @@ void memcg_print_bad_task(void *arg, int ret) oc->chosen->pid, oc->chosen->comm); } } + +static void memcg_qos_reset(void) +{ + struct mem_cgroup *iter; + struct cgroup_subsys_state *css; + struct mem_cgroup_extension *memcg_ext; + + rcu_read_lock(); + css_for_each_descendant_pre(css, &root_mem_cgroup->css) { + iter = mem_cgroup_from_css(css); + memcg_ext = to_memcg_ext(iter); + + if (memcg_ext->memcg_priority) + memcg_ext->memcg_priority = 0; + } + rcu_read_unlock(); +} + +int sysctl_memcg_qos_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, length, ppos); + if (ret) + return ret; + if (write) { + if (sysctl_memcg_qos_stat == ENABLE_MEMCG_QOS) { + static_branch_enable(&memcg_qos_stat_key); + pr_info("enable memcg priority.\n"); + } else { + static_branch_disable(&memcg_qos_stat_key); + memcg_qos_reset(); + pr_info("disable memcg priority.\n"); + } + } + + return ret; +} #endif
#ifdef CONFIG_NUMA diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 38710c51bb40a..2e09b03432c08 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -330,6 +330,8 @@ static bool oom_next_task(struct task_struct *task, struct oom_control *oc, struct mem_cgroup_extension *cur_ext; struct mem_cgroup_extension *oc_ext;
+ if (!static_branch_likely(&memcg_qos_stat_key)) + return !points || points < oc->chosen_points;
if (!points) return true; @@ -357,10 +359,7 @@ static bool oom_next_task(struct task_struct *task, struct oom_control *oc, static inline bool oom_next_task(struct task_struct *task, struct oom_control *oc, unsigned long points) { - if (!points || points < oc->chosen_points) - return true; - - return false; + return !points || points < oc->chosen_points; } #endif