hulk inclusion category: bugfix bugzilla: https://atomgit.com/openeuler/kernel/issues/8456 ------------------------ During the conversion of struct percpu_counters from atomic mode to percpu mode, the entire percpu counter init process must be an atomic operation. It needs to ensure that mm counter can only use atomic mode when in a non-preemptible kernel. Add MM_COUNTER_ATOMIC configuration option to enable atomic mode for mm counter, now the atomic mode requires PREEMPT_NONE and disables PREEMPT_DYNAMIC. Additionally, add a cmdline option 'rss_atomic_disable' to actively disable atomic mode. Fixes: c333c4444953 ("mm: convert mm's rss stats to use atomic mode") Signed-off-by: Quanmin Yan <yanquanmin1@huawei.com> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> --- arch/arm64/configs/openeuler_defconfig | 1 + arch/x86/configs/openeuler_defconfig | 1 + include/linux/mm.h | 46 +++++++++++++++++++++++--- kernel/fork.c | 26 ++++++++++++--- mm/Kconfig | 12 +++++++ 5 files changed, 76 insertions(+), 10 deletions(-) diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 425616aa8422..daef0d8504d7 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -1278,6 +1278,7 @@ CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring CONFIG_THP_CONTROL=y +CONFIG_MM_COUNTER_ATOMIC=y # end of Memory Management options CONFIG_NET=y diff --git a/arch/x86/configs/openeuler_defconfig b/arch/x86/configs/openeuler_defconfig index e6c7a62045d9..43c0b2751024 100644 --- a/arch/x86/configs/openeuler_defconfig +++ b/arch/x86/configs/openeuler_defconfig @@ -1240,6 +1240,7 @@ CONFIG_DAMON_LRU_SORT=y # end of Data Access Monitoring # CONFIG_THP_CONTROL is not set +CONFIG_MM_COUNTER_ATOMIC=y # end of Memory Management options CONFIG_NET=y diff --git a/include/linux/mm.h b/include/linux/mm.h index 036822cb1b9d..2e5500e215c6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2643,11 +2643,35 @@ static inline bool get_user_page_fast_only(unsigned long addr, /* * per-process(per-mm_struct) statistics. */ +#ifdef CONFIG_MM_COUNTER_ATOMIC +extern bool mm_counter_atomic; + +static inline bool mm_counter_is_atomic(void) +{ + return mm_counter_atomic; +} + +static inline bool mm_counter_is_pcpu(struct percpu_counter *fbc) +{ + return !mm_counter_is_atomic() || percpu_counter_initialized(fbc); +} +#else +static inline bool mm_counter_is_atomic(void) +{ + return false; +} + +static inline bool mm_counter_is_pcpu(struct percpu_counter *fbc) +{ + return true; +} +#endif + static inline unsigned long get_mm_counter(struct mm_struct *mm, int member) { struct percpu_counter *fbc = &mm->rss_stat[member]; - if (percpu_counter_initialized(fbc)) + if (mm_counter_is_pcpu(fbc)) return percpu_counter_read_positive(fbc); return percpu_counter_atomic_read(fbc); @@ -2659,7 +2683,7 @@ static inline void add_mm_counter(struct mm_struct *mm, int member, long value) { struct percpu_counter *fbc = &mm->rss_stat[member]; - if (percpu_counter_initialized(fbc)) + if (mm_counter_is_pcpu(fbc)) percpu_counter_add(fbc, value); else percpu_counter_atomic_add(fbc, value); @@ -2681,7 +2705,7 @@ static inline s64 mm_counter_sum(struct mm_struct *mm, int member) { struct percpu_counter *fbc = &mm->rss_stat[member]; - if (percpu_counter_initialized(fbc)) + if (mm_counter_is_pcpu(fbc)) return percpu_counter_sum(fbc); return percpu_counter_atomic_read(fbc); @@ -2691,17 +2715,29 @@ static inline s64 mm_counter_sum_positive(struct mm_struct *mm, int member) { struct percpu_counter *fbc = &mm->rss_stat[member]; - if (percpu_counter_initialized(fbc)) + if (mm_counter_is_pcpu(fbc)) return percpu_counter_sum_positive(fbc); return percpu_counter_atomic_read(fbc); } -static inline int mm_counter_switch_to_pcpu(struct mm_struct *mm) +static inline int mm_counter_try_switch_to_pcpu(struct mm_struct *mm) { + if (!mm_counter_is_atomic()) + return 0; + return percpu_counter_switch_to_pcpu_many(mm->rss_stat, NR_MM_COUNTERS); } +static inline int mm_counter_init(struct mm_struct *mm) +{ + if (mm_counter_is_atomic()) + return 0; + + return percpu_counter_init_many(mm->rss_stat, 0, GFP_KERNEL_ACCOUNT, + NR_MM_COUNTERS); +} + static inline void mm_counter_destroy(struct mm_struct *mm) { percpu_counter_destroy_many(mm->rss_stat, NR_MM_COUNTERS); diff --git a/kernel/fork.c b/kernel/fork.c index 021fbacde947..9bffb74d26d5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1343,6 +1343,18 @@ static void mm_init_uprobes_state(struct mm_struct *mm) #endif } +#ifdef CONFIG_MM_COUNTER_ATOMIC +bool mm_counter_atomic __ro_after_init = true; + +static int __init disable_rss_atomic_mode(char *str) +{ + mm_counter_atomic = false; + + return 0; +} +__setup("rss_atomic_disable", disable_rss_atomic_mode); +#endif /* CONFIG_MM_COUNTER_ATOMIC */ + static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, struct user_namespace *user_ns) { @@ -1398,11 +1410,16 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, if (mm_alloc_cid(mm)) goto fail_cid; + if (mm_counter_init(mm)) + goto fail_pcpu; + sp_init_mm(mm); mm->user_ns = get_user_ns(user_ns); lru_gen_init_mm(mm); return mm; +fail_pcpu: + mm_destroy_cid(mm); fail_cid: destroy_context(mm); fail_nocontext: @@ -1824,14 +1841,13 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) return 0; /* - * For single-thread processes, rss_stat is in atomic mode, which + * For single-thread processes, mm counter is using atomic mode, which * reduces the memory consumption and performance regression caused by - * using percpu. For multiple-thread processes, rss_stat is switched to - * the percpu mode to reduce the error margin. + * using percpu mode. For multiple-thread processes, will try to switch + * to the percpu mode, but still using atomic mode once some error occurs. */ if (clone_flags & CLONE_THREAD) - if (mm_counter_switch_to_pcpu(oldmm)) - return -ENOMEM; + mm_counter_try_switch_to_pcpu(oldmm); if (clone_flags & CLONE_VM) { mmget(oldmm); diff --git a/mm/Kconfig b/mm/Kconfig index 12438e8dff88..ded89ba92a6c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -1537,4 +1537,16 @@ config THP_CONTROL help This provides interface to control thp policy. +config MM_COUNTER_ATOMIC + bool "use atomic for mm counters when single-thread process" + depends on PREEMPT_NONE && !PREEMPT_DYNAMIC + depends on SMP + default n + help + For single-thread processes, mm counter uses atomic counter + operations, which reduces the memory consumption and performance + regression caused by using percpu counter. For multiple-thread + processes, atomic counter is switched to percpu counter mode to + reduce the error margin. + endmenu -- 2.43.0