From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
----------------------------------------------
It is inaccurate when accumulate percpu variable without lock, it will result in pagecache_reliable_pages to be negative in sometime, and will prevent pagecache using reliable memory.
For more accurate statistic, replace percpu variable by percpu_counter.
The additional percpu_counter will be access in alloc_pages, the init of these two percpu_conter is too late in late_initcall, allocations that use alloc_pages should have to check if the two counter has been inited, that will introduce latency, in order to sovle this, init the two percpu_counter in advance.
Signed-off-by: Chen Wandun chenwandun@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- include/linux/mem_reliable.h | 17 +++++----- mm/mem_reliable.c | 62 ++++++++++++++++++------------------ mm/page_alloc.c | 5 +++ 3 files changed, 45 insertions(+), 39 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index ba5d41edbc44..c4f954340ea7 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -21,8 +21,9 @@ extern bool shmem_reliable; extern struct percpu_counter reliable_shmem_used_nr_page; extern bool pagecache_use_reliable_mem; DECLARE_PER_CPU(long, nr_reliable_buddy_pages); -DECLARE_PER_CPU(long, pagecache_reliable_pages); -DECLARE_PER_CPU(long, anon_reliable_pages); + +extern struct percpu_counter pagecache_reliable_pages; +extern struct percpu_counter anon_reliable_pages; extern unsigned long nr_reliable_reserve_pages __read_mostly; extern long shmem_reliable_nr_page __read_mostly;
@@ -43,6 +44,7 @@ extern void reliable_lru_add(enum lru_list lru, struct page *page, extern void page_cache_prepare_alloc(gfp_t *gfp); extern void reliable_lru_add_batch(int zid, enum lru_list lru, int val); +extern bool mem_reliable_counter_initialized(void);
static inline bool mem_reliable_is_enabled(void) { @@ -81,13 +83,10 @@ static inline void reliable_page_counter(struct page *page,
static inline bool reliable_mem_limit_check(unsigned long nr_page) { - int cpu; - long num = 0; + s64 num;
- for_each_possible_cpu(cpu) { - num += per_cpu(pagecache_reliable_pages, cpu); - num += per_cpu(anon_reliable_pages, cpu); - } + num = percpu_counter_read_positive(&pagecache_reliable_pages); + num += percpu_counter_read_positive(&anon_reliable_pages);
return num + nr_page <= task_reliable_limit / PAGE_SIZE; } @@ -184,6 +183,8 @@ static inline void reliable_lru_add(enum lru_list lru, static inline void page_cache_prepare_alloc(gfp_t *gfp) {} static inline void reliable_lru_add_batch(int zid, enum lru_list lru, int val) {} + +static inline bool mem_reliable_counter_initialized(void) { return false; } #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 62df78657ff9..033af716610f 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -34,12 +34,18 @@ unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE; long shmem_reliable_nr_page = LONG_MAX;
bool pagecache_use_reliable_mem __read_mostly = true; -DEFINE_PER_CPU(long, pagecache_reliable_pages); -DEFINE_PER_CPU(long, anon_reliable_pages); +struct percpu_counter pagecache_reliable_pages; +struct percpu_counter anon_reliable_pages;
static unsigned long zero; static unsigned long reliable_pagecache_max_bytes = ULONG_MAX;
+bool mem_reliable_counter_initialized(void) +{ + return likely(percpu_counter_initialized(&pagecache_reliable_pages)) && + likely((percpu_counter_initialized(&anon_reliable_pages))); +} + bool mem_reliable_status(void) { return mem_reliable_is_enabled(); @@ -66,9 +72,9 @@ void reliable_lru_add_batch(int zid, enum lru_list lru, int val)
if (zid < ZONE_MOVABLE && zid >= 0) { if (is_file_lru(lru)) - this_cpu_add(pagecache_reliable_pages, val); + percpu_counter_add(&pagecache_reliable_pages, val); else if (is_anon_lru(lru)) - this_cpu_add(anon_reliable_pages, val); + percpu_counter_add(&anon_reliable_pages, val); } }
@@ -78,14 +84,14 @@ void reliable_lru_add(enum lru_list lru, struct page *page, int val) return;
if (is_file_lru(lru)) - this_cpu_add(pagecache_reliable_pages, val); + percpu_counter_add(&pagecache_reliable_pages, val); else if (is_anon_lru(lru)) - this_cpu_add(anon_reliable_pages, val); + percpu_counter_add(&anon_reliable_pages, val); else if (lru == LRU_UNEVICTABLE) { if (PageAnon(page)) - this_cpu_add(anon_reliable_pages, val); + percpu_counter_add(&anon_reliable_pages, val); else - this_cpu_add(pagecache_reliable_pages, val); + percpu_counter_add(&pagecache_reliable_pages, val); } }
@@ -188,21 +194,20 @@ static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) void reliable_report_meminfo(struct seq_file *m) { bool pagecache_enabled = pagecache_reliable_is_enabled(); - long nr_pagecache_pages = 0; - long nr_anon_pages = 0; + s64 nr_pagecache_pages = 0; + s64 nr_anon_pages = 0; long nr_buddy_pages = 0; int cpu;
if (!mem_reliable_is_enabled()) return;
- for_each_possible_cpu(cpu) { + for_each_possible_cpu(cpu) nr_buddy_pages += per_cpu(nr_reliable_buddy_pages, cpu); - nr_anon_pages += per_cpu(anon_reliable_pages, cpu); - if (pagecache_enabled) - nr_pagecache_pages += - per_cpu(pagecache_reliable_pages, cpu); - } + + nr_anon_pages = percpu_counter_sum_positive(&anon_reliable_pages); + if (pagecache_enabled) + nr_pagecache_pages = percpu_counter_sum_positive(&pagecache_reliable_pages);
show_val_kb(m, "ReliableTotal: ", total_reliable_mem_sz() >> PAGE_SHIFT); @@ -445,8 +450,7 @@ static struct ctl_table reliable_dir_table[] = {
void page_cache_prepare_alloc(gfp_t *gfp) { - long nr_reliable = 0; - int cpu; + s64 nr_reliable = 0;
if (!mem_reliable_is_enabled()) return; @@ -454,11 +458,7 @@ void page_cache_prepare_alloc(gfp_t *gfp) if (!pagecache_reliable_is_enabled()) goto no_reliable;
- for_each_possible_cpu(cpu) - nr_reliable += this_cpu_read(pagecache_reliable_pages); - - if (nr_reliable < 0) - goto no_reliable; + nr_reliable = percpu_counter_read_positive(&pagecache_reliable_pages);
if (nr_reliable > reliable_pagecache_max_bytes >> PAGE_SHIFT) goto no_reliable; @@ -480,9 +480,12 @@ static int __init reliable_sysctl_init(void) return -1; }
+ percpu_counter_init(&pagecache_reliable_pages, 0, GFP_KERNEL); + percpu_counter_init(&anon_reliable_pages, 0, GFP_KERNEL); + return 0; } -late_initcall(reliable_sysctl_init); +arch_initcall(reliable_sysctl_init); #else static void mem_reliable_ctrl_bit_disabled(int idx) {} #endif @@ -515,21 +518,18 @@ static void mem_reliable_feature_disable(int idx)
void reliable_show_mem_info(void) { - int cpu; - long num = 0; + s64 num = 0;
if (!mem_reliable_is_enabled()) return;
- for_each_possible_cpu(cpu) { - num += per_cpu(anon_reliable_pages, cpu); - num += per_cpu(pagecache_reliable_pages, cpu); - } + num += percpu_counter_sum_positive(&anon_reliable_pages); + num += percpu_counter_sum_positive(&pagecache_reliable_pages);
pr_info("ReliableTotal: %lu kB", total_reliable_mem_sz() >> 10); pr_info("ReliableUsed: %lu kB", used_reliable_mem_sz() >> 10); pr_info("task_reliable_limit: %lu kB", task_reliable_limit >> 10); - pr_info("reliable_user_used: %ld kB", num << (PAGE_SHIFT - 10)); + pr_info("reliable_user_used: %lld kB", num << (PAGE_SHIFT - 10)); }
void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fd4354d9ebad..dab62d1d3a6e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4674,6 +4674,11 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order) * allocation trigger task_reliable_limit */ if (is_global_init(current)) { + if (!mem_reliable_counter_initialized()) { + *gfp_mask |= ___GFP_RELIABILITY; + return true; + } + if (reliable_mem_limit_check(1 << order) && mem_reliable_watermark_ok(1 << order)) *gfp_mask |= ___GFP_RELIABILITY;