hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8JVN0
-------------------------------
Introduce two memcg watermarks: warning watermark and safe watermark. warning watermark = memory.high * memory.high_async_ratio / 100; safe watermark = memory.high * (memory.high_async_ratio - 10) / 100; Start memcg async reclaim when memcg usage is larger than warning watermark but smaller than memory.high; the aim reclaim pages is the diff of memcg usage and safe watermark. The default memory.high_async_ratio is 100; when memory.high_async_ratio is 100, memcg async reclaim is disabled;
Signed-off-by: Lu Jialin lujialin4@huawei.com --- include/linux/memcontrol.h | 2 ++ mm/memcontrol.c | 61 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 3 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0d4e7f1945dd..8e1ae2d252c6 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -332,6 +332,8 @@ struct mem_cgroup { /* per-memcg mm_struct list */ struct lru_gen_mm_list mm_list; #endif + int high_async_ratio; + bool high_async_reclaim;
struct mem_cgroup_per_node *nodeinfo[]; }; diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1b9dff25a03f..bf225648fcb4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -105,6 +105,18 @@ static bool do_memsw_account(void) #define THRESHOLDS_EVENTS_TARGET 128 #define SOFTLIMIT_EVENTS_TARGET 1024
+/* + * memcg warning watermark = memory.high * memcg->high_async_ratio / + * HIGH_ASYNC_RATIO_BASE. + * when memcg usage is larger than warning watermark, but smaller than + * memory.high, start memcg async reclaim; + * when memcg->high_async_ratio is HIGH_ASYNC_RATIO_BASE, memcg async + * relcaim is disabled; + */ + +#define HIGH_ASYNC_RATIO_BASE 100 +#define HIGH_ASYNC_RATIO_GAP 10 + /* * Cgroups above their limits are maintained in a RB-Tree, independent of * their hierarchy representation @@ -2406,12 +2418,48 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg, return nr_reclaimed; }
+static bool is_high_async_reclaim(struct mem_cgroup *memcg) +{ + int ratio = READ_ONCE(memcg->high_async_ratio); + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + + if (ratio == HIGH_ASYNC_RATIO_BASE || memcg_high == PAGE_COUNTER_MAX) + return false; + + return page_counter_read(&memcg->memory) > + memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; +} + +static void async_reclaim_high(struct mem_cgroup *memcg) +{ + unsigned long nr_pages, pflags; + unsigned long memcg_high = READ_ONCE(memcg->memory.high); + unsigned long memcg_usage = page_counter_read(&memcg->memory); + int ratio = READ_ONCE(memcg->high_async_ratio) - HIGH_ASYNC_RATIO_GAP; + unsigned long safe_pages = memcg_high * ratio / HIGH_ASYNC_RATIO_BASE; + + if (!is_high_async_reclaim(memcg)) { + WRITE_ONCE(memcg->high_async_reclaim, false); + return; + } + + psi_memstall_enter(&pflags); + nr_pages = memcg_usage > safe_pages ? memcg_usage - safe_pages : + MEMCG_CHARGE_BATCH; + try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP); + psi_memstall_leave(&pflags); + WRITE_ONCE(memcg->high_async_reclaim, false); +} + static void high_work_func(struct work_struct *work) { - struct mem_cgroup *memcg; + struct mem_cgroup *memcg = container_of(work, struct mem_cgroup, + high_work);
- memcg = container_of(work, struct mem_cgroup, high_work); - reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + if (READ_ONCE(memcg->high_async_reclaim)) + async_reclaim_high(memcg); + else + reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); }
/* @@ -2800,6 +2848,12 @@ static int try_charge_memcg(struct mem_cgroup *memcg, gfp_t gfp_mask, continue; }
+ if (is_high_async_reclaim(memcg) && !mem_high) { + WRITE_ONCE(memcg->high_async_reclaim, true); + schedule_work(&memcg->high_work); + break; + } + if (mem_high || swap_high) { /* * The allocating tasks in this cgroup will need to do @@ -5528,6 +5582,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP) memcg->zswap_max = PAGE_COUNTER_MAX; #endif + memcg->high_async_ratio = HIGH_ASYNC_RATIO_BASE; page_counter_set_high(&memcg->swap, PAGE_COUNTER_MAX); if (parent) { WRITE_ONCE(memcg->swappiness, mem_cgroup_swappiness(parent));