From: Lu Jialin lujialin4@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4IMAK?from=project-issue CVE: NA
--------
Since memory.high reclaim is sync whether is in interrupt, it could do more work than direct reclaim, i.e. write out dirty page, etc.
So, add PF_KSWAPD flag, so that current_is_kswapd() would return true for memcg kswapd.
Memcg kswapd should stop when usage of memcg fit the memcg kswapd stop flag. When the userland sets the memcg->memory.max, the stop_flag is (memcg->memory.high - memcg->memory.max * 10 / 1000), which is similar with global kswapd. Otherwise, the stop_flag is (memcg->memory.high - memcg->memory.high / 6), which is similar with most difference between watermark_low and watermark_high.
And, memcg kswapd should not break memory.low protection for now.
Signed-off-by: Lu Jialin lujialin4@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Reviewed-by: weiyang wang wangweiyang2@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- mm/memcontrol.c | 4 ++++ mm/vmscan.c | 40 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 2 deletions(-)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f93bfe1659a3..a3cf9c074cfa 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2364,8 +2364,10 @@ static void high_work_func(struct work_struct *work) { struct mem_cgroup *memcg;
+ current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD; memcg = container_of(work, struct mem_cgroup, high_work); reclaim_high(memcg, MEMCG_CHARGE_BATCH, GFP_KERNEL); + current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD); }
/* @@ -2535,9 +2537,11 @@ void mem_cgroup_handle_over_high(void) * memory.high is currently batched, whereas memory.max and the page * allocator run every time an allocation is made. */ + current->flags |= PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD; nr_reclaimed = reclaim_high(memcg, in_retry ? SWAP_CLUSTER_MAX : nr_pages, GFP_KERNEL); + current->flags &= ~(PF_SWAPWRITE | PF_MEMALLOC | PF_KSWAPD);
/* * memory.high is breached and reclaim is unable to keep up. Throttle diff --git a/mm/vmscan.c b/mm/vmscan.c index c851e5f91842..e1e44f0c486d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -61,6 +61,8 @@
#include "internal.h"
+#define MEMCG_KSWAPD_SCATOR 10 + #define CREATE_TRACE_POINTS #include <trace/events/vmscan.h>
@@ -2834,6 +2836,24 @@ static inline bool should_continue_reclaim(struct pglist_data *pgdat, return inactive_lru_pages > pages_for_compaction; }
+static bool is_memcg_kswapd_stopped(struct scan_control *sc) +{ + struct mem_cgroup *memcg = sc->target_mem_cgroup; + bool is_stop = false; + unsigned long stop_flag = 0; + + if (!cgroup_reclaim(sc)) + return false; + if (memcg->memory.max == PAGE_COUNTER_MAX) + stop_flag = memcg->memory.high / 6; + else + stop_flag = memcg->memory.high - memcg->memory.max * + MEMCG_KSWAPD_SCATOR / 1000; + is_stop = page_counter_read(&memcg->memory) < stop_flag; + + return (current_is_kswapd() && is_stop); +} + static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) { struct mem_cgroup *target_memcg = sc->target_mem_cgroup; @@ -2889,6 +2909,14 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) sc->nr_scanned - scanned, sc->nr_reclaimed - reclaimed);
+ /* + * Memcg background reclaim would break iter once memcg kswapd + * flag is satisfied. + */ + if (is_memcg_kswapd_stopped(sc)) { + mem_cgroup_iter_break(target_memcg, memcg); + break; + } } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); }
@@ -3257,6 +3285,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do { + if (is_memcg_kswapd_stopped(sc)) + break; + vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup, sc->priority); sc->nr_scanned = 0; @@ -3319,8 +3350,13 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, goto retry; }
- /* Untapped cgroup reserves? Don't OOM, retry. */ - if (sc->memcg_low_skipped) { + /* + * Untapped cgroup reserves? Don't OOM, retry. + * memcg usage is lower than memory.high / 2, memcg kswapd will lead to + * stop memcg reclaim, but should not break low protection. + */ + if (sc->memcg_low_skipped && + !(current_is_kswapd() && cgroup_reclaim(sc))) { sc->priority = initial_priority; sc->force_deactivate = 0; sc->memcg_low_reclaim = 1;