[PATCH] memcg: introduce memcg early oom feature
 
            From: Zhao Xuedong <zhaoxuedong@meituan.com> meituan inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/ICTXDJ CVE: NA -------------------------------- Introduce memcg early oom feature to trigger OOM killer earlier; this feature is disabled by default. Signed-off-by: Zhao Xuedong <zhaoxuedong@meituan.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + mm/Kconfig | 14 +++++ mm/vmscan.c | 75 ++++++++++++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 7027f5382554..95356ef40f7f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1147,6 +1147,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +# CONFIG_MEMCG_EARLY_OOM is not set CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index f76767da2c93..b67431413b58 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1069,6 +1069,7 @@ CONFIG_CLEANCACHE=y CONFIG_FRONTSWAP=y CONFIG_MEMCG_QOS=y CONFIG_MEMCG_SWAP_QOS=y +# CONFIG_MEMCG_EARLY_OOM is not set CONFIG_ETMEM_SCAN=m CONFIG_ETMEM_SWAP=m CONFIG_ETMEM=y diff --git a/mm/Kconfig b/mm/Kconfig index cc43f5124cb3..89bcb73b6a5b 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -521,6 +521,20 @@ config MEMCG_SWAP_QOS memcg swap control include memory force swapin, swapfile control and swap limit. +config MEMCG_EARLY_OOM + bool "Enable aggressive memcg OOM killing under memory pressure" + depends on MEMCG + depends on X86 || ARM64 + default n + help + MEMCG_EARLY_OOM makes memory cgroups trigger OOM killer earlier + and more aggressively when under memory pressure, rather than + attempting to reclaim very small amounts of file pages through + prolonged reclaim attempts. + + Say "y" if you prefer fast OOM kills over prolonged reclaim + attempts. + config ETMEM_SCAN tristate "module: etmem page scan for etmem support" depends on ETMEM diff --git a/mm/vmscan.c b/mm/vmscan.c index e82d7995b548..4b1d8dbc5e3c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2470,11 +2470,62 @@ static bool inactive_is_low(struct lruvec *lruvec, enum lru_list inactive_lru) return inactive * inactive_ratio < active; } +#ifdef CONFIG_MEMCG_EARLY_OOM +/* Check if swap usage is over the limit for cgroupv1. */ +static bool is_swap_over_limit(struct mem_cgroup *memcg) +{ + unsigned long mem_limit = READ_ONCE(memcg->memory.max); + unsigned long memsw_limit = READ_ONCE(memcg->memsw.max); + + if (memsw_limit <= mem_limit) + return false; + + return (page_counter_read(&memcg->memsw) - + page_counter_read(&memcg->memory)) > + (memsw_limit - mem_limit); +} + +/* + * Check if file cache is too small to reclaim and anonymous pages are reclaimable. + * Returns true if: + * 1. File cache (+ free space) is below the minimum threshold (pages_min), AND + * 2. Anonymous pages are allowed to be deactivated, AND + * 3. Anonymous pages are abundant relative to reclaim priority + */ +static bool memcg_should_skip_file_reclaim(struct mem_cgroup *memcg, + struct scan_control *sc, + struct lruvec *lruvec) +{ + unsigned long file, anon, free; + unsigned long mem_limit, memsw_usage, mem_high; + unsigned long pages_min; + + if (!cgroup_reclaim(sc)) + return false; + + file = lruvec_lru_size(lruvec, LRU_INACTIVE_FILE, sc->reclaim_idx) + + lruvec_lru_size(lruvec, LRU_ACTIVE_FILE, sc->reclaim_idx); + mem_limit = READ_ONCE(memcg->memory.max); + memsw_usage = page_counter_read(&memcg->memsw); + mem_high = READ_ONCE(memcg->memory.high); + anon = lruvec_lru_size(lruvec, LRU_INACTIVE_ANON, sc->reclaim_idx); + free = mem_limit > memsw_usage ? mem_limit - memsw_usage : 0; + pages_min = mem_limit > mem_high ? (mem_limit - mem_high) >> 2 : 0; + + return (file + free <= pages_min) && + !(sc->may_deactivate & DEACTIVATE_ANON) && + (anon >> sc->priority); +} +#endif + enum scan_balance { SCAN_EQUAL, SCAN_FRACT, SCAN_ANON, SCAN_FILE, +#ifdef CONFIG_MEMCG_EARLY_OOM + SCAN_NONE, +#endif }; /* @@ -2498,6 +2549,20 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, unsigned long ap, fp; enum lru_list lru; +#ifdef CONFIG_MEMCG_EARLY_OOM + /* + * if both file and anon pages are deemed non-reclaimable, + * we deliberately stop reclaiming early to trigger OOM killer + * faster. + */ + if (cgroup_reclaim(sc) && + is_swap_over_limit(memcg) && + memcg_should_skip_file_reclaim(memcg, sc, lruvec)) { + scan_balance = SCAN_NONE; + goto out; + } +#endif + if (sc->not_file) { scan_balance = SCAN_ANON; goto out; @@ -2543,7 +2608,12 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, /* * If the system is almost out of file pages, force-scan anon. */ +#ifdef CONFIG_MEMCG_EARLY_OOM + if (sc->file_is_tiny || + memcg_should_skip_file_reclaim(memcg, sc, lruvec)) { +#else if (sc->file_is_tiny) { +#endif scan_balance = SCAN_ANON; goto out; } @@ -2687,6 +2757,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, if ((scan_balance == SCAN_FILE) != file) scan = 0; break; +#ifdef CONFIG_MEMCG_EARLY_OOM + case SCAN_NONE: + scan = 0; + break; +#endif default: /* Look ma, no brain */ BUG(); -- 2.33.0
participants (1)
- 
                 zhaoxuedong zhaoxuedong