From: Lu Jialin lujialin4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4IMAK CVE: NA
-------------------------------
Enable memcg async reclaim when memcg usage is larger than memory_high * memcg->high_async_ratio / 10; if memcg usage is larger than memory_high * (memcg->high_async_ratio - 1)/ 10, the reclaim pages is the diff of memcg usage and memory_high * (memcg->high_async_ratio - 1)/ 10; else reclaim pages is MEMCG_CHARGE_BATCH; The default memcg->high_async_ratio is 0; when memcg->high_async_ratio is 0, memcg async reclaim is disabled;
The situation when enable memcg async reclaim is 1) try_charge; 2) reset memory_high
Signed-off-by: Lu Jialin lujialin4@huawei.com Reviewed-by: Xiu Jianfeng xiujianfeng@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/memcontrol.h | 5 +++ mm/memcontrol.c | 72 +++++++++++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 4 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6959aca37bc2..631a3bfeb066 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -375,7 +375,12 @@ struct mem_cgroup { #ifdef CONFIG_DYNAMIC_HUGETLB struct dhugetlb_pool *hpool; #endif +#ifndef __GENKSYMS__ + int high_async_ratio; + bool high_async_reclaim; +#else KABI_RESERVE(1) +#endif KABI_RESERVE(2) KABI_RESERVE(3) KABI_RESERVE(4) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 16decaf4844d..a6ee6091afb2 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -106,6 +106,22 @@ static bool do_memsw_account(void) #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024
+/* + * when memcg->high_async_ratio is HIGH_ASYNC_RATIO_DEFAULT, memcg async + * relcaim is disabled; + * when mem_usage is larger than memory.high * memcg->high_async_ratio/ + * HIGH_ASYNC_RATIO_BASE, start async reclaim; + * if mem_usage is larger than memory.high * (memcg->high_async_ratio - + * HIGH_ASYNC_RATIO_GAP) / HIGH_ASYNC_RATIO_BASE, the aim reclaim page is + * the diff of mem_usage and memory.high * (memcg->high_async_ratio - + * HIGH_ASYNC_RATIO_GAP) / HIGH_ASYNC_RATIO_BASE else the aim reclaim + * page is MEMCG_CHARGE_BATCH; + */ + +#define HIGH_ASYNC_RATIO_DEFAULT 0 +#define HIGH_ASYNC_RATIO_BASE 10 +#define HIGH_ASYNC_RATIO_GAP 1 + /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -2338,18 +2354,41 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu) return 0; }
+static bool is_high_async_reclaim(struct mem_cgroup *memcg) +{ + int ratio = READ_ONCE(memcg->high_async_ratio); + + if (ratio == HIGH_ASYNC_RATIO_DEFAULT) + return false; + + if (READ_ONCE(memcg->memory.high) == PAGE_COUNTER_MAX) + return false; + + return page_counter_read(&memcg->memory) > + (READ_ONCE(memcg->memory.high) * ratio / HIGH_ASYNC_RATIO_BASE); +} + static unsigned long reclaim_high(struct mem_cgroup *memcg, unsigned int nr_pages, gfp_t gfp_mask) { unsigned long nr_reclaimed = 0; + bool high_async_reclaim = READ_ONCE(memcg->high_async_reclaim); + + if (high_async_reclaim) + WRITE_ONCE(memcg->high_async_reclaim, false);
do { unsigned long pflags;
- if (page_counter_read(&memcg->memory) <= - READ_ONCE(memcg->memory.high)) - continue; + if (high_async_reclaim) { + if (!is_high_async_reclaim(memcg)) + continue; + } else { + if (page_counter_read(&memcg->memory) <= + READ_ONCE(memcg->memory.high)) + continue; + }
memcg_memory_event(memcg, MEMCG_HIGH);
@@ -2363,12 +2402,26 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, return nr_reclaimed; }
+static unsigned long get_reclaim_pages(struct mem_cgroup *memcg) +{ + unsigned long nr_pages = page_counter_read(&memcg->memory); + int ratio = READ_ONCE(memcg->high_async_ratio) - HIGH_ASYNC_RATIO_GAP; + unsigned long safe_pages = READ_ONCE(memcg->memory.high) * ratio / + HIGH_ASYNC_RATIO_BASE; + + return (nr_pages > safe_pages) ? (nr_pages - safe_pages) : + MEMCG_CHARGE_BATCH; +} + static void high_work_func(struct work_struct *work) { struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, high_work); - reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + if (memcg->high_async_reclaim) + reclaim_high(memcg, get_reclaim_pages(memcg), GFP_KERNEL); + else + reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); }
/* @@ -2755,6 +2808,11 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, continue; }
+ if (is_high_async_reclaim(memcg)) { + WRITE_ONCE(memcg->high_async_reclaim, true); + schedule_work(&memcg->high_work); + } + if (mem_high || swap_high) { /* * The allocating tasks in this cgroup will need to do @@ -5128,6 +5186,11 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
page_counter_set_high(&memcg->memory, high);
+ if (is_high_async_reclaim(memcg)) { + WRITE_ONCE(memcg->high_async_reclaim, true); + schedule_work(&memcg->high_work); + } + for (;;) { unsigned long nr_pages = page_counter_read(&memcg->memory); unsigned long reclaimed; @@ -5636,6 +5699,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
page_counter_set_high(&memcg->memory, PAGE_COUNTER_MAX); memcg->soft_limit = PAGE_COUNTER_MAX; + memcg->high_async_ratio = HIGH_ASYNC_RATIO_DEFAULT; page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { memcg->swappiness = mem_cgroup_swappiness(parent);