
From: Lu Jialin <lujialin4@huawei.com> hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4IMAK?from=project-issue CVE: NA -------- Since memory.high reclaim is sync whether is in interrupt, it could do more work than direct reclaim, i.e. write out dirty page, etc. So, add PF_KSWAPD flag, so that current_is_kswapd() would return true for memcg kswapd. Memcg kswapd should stop when usage of memcg fit the memcg kswapd stop flag. When the userland sets the memcg->memory.max, the stop_flag is (memcg->memory.high - memcg->memory.max * 10 / 1000), which is similar with global kswapd. Otherwise, the stop_flag is (memcg->memory.high - memcg->memory.high / 6), which is similar with most difference between watermark_low and watermark_high. And, memcg kswapd should not break memory.low protection for now. Signed-off-by: Lu Jialin <lujialin4@huawei.com> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Reviewed-by: weiyang wang <wangweiyang2@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> --- mm/memcontrol.c | 4 ++++ mm/vmscan.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f93bfe1659a3..a3cf9c074cfa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2364,8 +2364,10 @@ static void high_work_func(struct work_struct *work) { struct mem_cgroup *memcg; + current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD; memcg = container_of(work, struct mem_cgroup, high_work); reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD); } /* @@ -2535,9 +2537,11 @@ void mem_cgroup_handle_over_high(void) * memory.high is currently batched, whereas memory.max and the page * allocator run every time an allocation is made. */ + current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD; nr_reclaimed = reclaim_high(memcg, in_retry ? SWAP_CLUSTER_MAX : nr_pages, GFP_KERNEL); + current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD); /* * memory.high is breached and reclaim is unable to keep up. Throttle diff --git a/mm/vmscan.c b/mm/vmscan.c index c851e5f91842..e1e44f0c486d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -61,6 +61,8 @@ #include "internal.h" +#define MEMCG_KSWAPD_SCATOR 10 + #define CREATE_TRACE_POINTS #include <trace/events/vmscan.h> @@ -2834,6 +2836,24 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return inactive_lru_pages > pages_for_compaction; } +static bool is_memcg_kswapd_stopped(struct scan_control *sc) +{ + struct mem_cgroup *memcg = sc->target_mem_cgroup; + bool is_stop = false; + unsigned long stop_flag = 0; + + if (!cgroup_reclaim(sc)) + return false; + if (memcg->memory.max == PAGE_COUNTER_MAX) + stop_flag = memcg->memory.high / 6; + else + stop_flag = memcg->memory.high - memcg->memory.max * + MEMCG_KSWAPD_SCATOR / 1000; + is_stop = page_counter_read(&memcg->memory) < stop_flag; + + return (current_is_kswapd() && is_stop); +} + static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) { struct mem_cgroup *target_memcg = sc->target_mem_cgroup; @@ -2889,6 +2909,14 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed); + /* + * Memcg background reclaim would break iter once memcg kswapd + * flag is satisfied. + */ + if (is_memcg_kswapd_stopped(sc)) { + mem_cgroup_iter_break(target_memcg, memcg); + break; + } } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); } @@ -3257,6 +3285,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1); do { + if (is_memcg_kswapd_stopped(sc)) + break; + vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, sc->priority); sc->nr_scanned = 0; @@ -3319,8 +3350,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, goto retry; } - /* Untapped cgroup reserves? Don't OOM, retry. */ - if (sc->memcg_low_skipped) { + /* + * Untapped cgroup reserves? Don't OOM, retry. + * memcg usage is lower than memory.high / 2, memcg kswapd will lead to + * stop memcg reclaim, but should not break low protection. + */ + if (sc->memcg_low_skipped && + !(current_is_kswapd() && cgroup_reclaim(sc))) { sc->priority = initial_priority; sc->force_deactivate = 0; sc->memcg_low_reclaim = 1; -- 2.20.1