From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
If system has mirrored memory, memblock will try to allocate mirrored memory firstly and fallback to non-mirrored memory when fails, but if with limited mirrored memory or some numa node without mirrored memory, lots of warning message about memblock allocation will occur.
This patch ratelimit the warning message to avoid a very long print during bootup.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/memblock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/memblock.c b/mm/memblock.c index 13be610a381f4..80c6975ace6f2 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -291,7 +291,7 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start, NUMA_NO_NODE, flags);
if (!ret && (flags & MEMBLOCK_MIRROR)) { - pr_warn("Could not allocate %pap bytes of mirrored memory\n", + pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", &size); flags &= ~MEMBLOCK_MIRROR; goto again; @@ -1425,7 +1425,7 @@ static void * __init memblock_virt_alloc_internal(
if (flags & MEMBLOCK_MIRROR) { flags &= ~MEMBLOCK_MIRROR; - pr_warn("Could not allocate %pap bytes of mirrored memory\n", + pr_warn_ratelimited("Could not allocate %pap bytes of mirrored memory\n", &size); goto again; }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
For a system only have limited mirrored memory or some numa node without mirrored memory, the per node vmemmap page_structs prefer to allocate memory from mirrored region, which will lead to vmemmap_verify() report lots of warning message.
This patch demote the "potential offnode page_structs" warning messages to debug level to avoid a very long print during bootup.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/sparse-vmemmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 8301293331a27..f312d80d4e61d 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -136,7 +136,7 @@ void __meminit vmemmap_verify(pte_t *pte, int node, int actual_node = early_pfn_to_nid(pfn);
if (node_distance(actual_node, node) > LOCAL_DISTANCE) - pr_warn("[%lx-%lx] potential offnode page_structs\n", + pr_debug("[%lx-%lx] potential offnode page_structs\n", start, end - 1); }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Introduce proc interface to control memory reliable features. These functions can be controlled via access /proc/sys/vm/reliable_debug.
Four bits are used to represent the following features - bit 0: memory reliable feature - bit 1: reliable fallback feature - bit 2: tmpfs use reliable memory feature - bit 3: pagecache use reliable memory feature
Bit 1~3 are valid if and only if the bit 0 is 1. If the first bit is 0, all other features will be closed no matter other bits's status.
For example, you can enable reliable fallback feature by
$ echo 3 > /proc/sys/vm/reliable_debug
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/sysctl/vm.txt | 20 ++++++ mm/mem_reliable.c | 136 ++++++++++++++++++++++++++++++++++-- 2 files changed, 150 insertions(+), 6 deletions(-)
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt index 8d824892d00d6..b2a1e9ac4fb27 100644 --- a/Documentation/sysctl/vm.txt +++ b/Documentation/sysctl/vm.txt @@ -66,6 +66,7 @@ Currently, these files are in /proc/sys/vm: - vfs_cache_pressure - watermark_scale_factor - zone_reclaim_mode +- reliable_debug
==============================================================
@@ -923,4 +924,23 @@ Allowing regular swap effectively restricts allocations to the local node unless explicitly overridden by memory policies or cpuset configurations.
+============================================================== + +reliable_debug: + +reliable_debug is used to control memory reliable features. + +Four bits are used to represent the following features +- bit 0: memory reliable feature +- bit 1: reliable fallback feature +- bit 2: tmpfs use reliable memory feature +- bit 3: pagecache use reliable memory feature + +Bit 1~3 are valid if and only if the bit 0 is 1. If the first bit is 0, all +other features will be closed no matter other bits's status. + +For example, you can enable reliable fallback feature by + + $ echo 3 > /proc/sys/vm/reliable_debug + ============ End of Document ================================= diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index d4458d7401945..cf53be97be5df 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -10,6 +10,14 @@ #include <linux/mmzone.h> #include <linux/oom.h>
+enum mem_reliable_types { + MEM_RELIABLE_ALL, + MEM_RELIABLE_FALLBACK, + MEM_RELIABLE_SHM, + MEM_RELIABLE_PAGECACHE, + MEM_RELIABLE_MAX +}; + DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled; @@ -205,6 +213,93 @@ int reliable_limit_handler(struct ctl_table *table, int write, return ret; }
+static void mem_reliable_feature_set(int idx, bool enable); + +#define CTRL_BITS_SHIFT MEM_RELIABLE_MAX +#define CTRL_BITS_MASK ((1 << CTRL_BITS_SHIFT) - 1) + +static unsigned long mem_reliable_ctrl_bits = CTRL_BITS_MASK; + +static void mem_reliable_ctrl_bit_set(int idx, bool enable) +{ + if (enable) + set_bit(idx, &mem_reliable_ctrl_bits); + else + clear_bit(idx, &mem_reliable_ctrl_bits); +} + +static bool mem_reliable_ctrl_bit_is_enabled(int idx) +{ + return !!test_bit(idx, &mem_reliable_ctrl_bits); +} + +static void mem_reliable_parse_ctrl_bits(unsigned long ctrl_bits) +{ + bool status; + int i; + + if (!mem_reliable_is_enabled()) { + static_branch_enable(&mem_reliable); + mem_reliable_ctrl_bit_set(MEM_RELIABLE_ALL, 1); + pr_info("memory reliable feature enabled.\n"); + } + + for (i = MEM_RELIABLE_FALLBACK; i < MEM_RELIABLE_MAX; i++) { + status = !!test_bit(i, &ctrl_bits); + + if (mem_reliable_ctrl_bit_is_enabled(i) ^ status) { + mem_reliable_ctrl_bit_set(i, status); + mem_reliable_feature_set(i, status); + } + } +} + +static void mem_reliable_disable_all(void) +{ + if (!mem_reliable_is_enabled()) + return; + + mem_reliable_ctrl_bits = 0; + + reliable_allow_fallback = false; + shmem_reliable = false; + pagecache_use_reliable_mem = false; + static_branch_disable(&mem_reliable); + + pr_info("memory reliable feature disabled.\n"); +} + +int reliable_debug_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + unsigned long old_ctrl_bits, new_ctrl_bits; + static DEFINE_MUTEX(reliable_debug_mutex); + int ret; + + mutex_lock(&reliable_debug_mutex); + old_ctrl_bits = mem_reliable_ctrl_bits; + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret == 0 && write) { + if (mem_reliable_ctrl_bits > (1 << CTRL_BITS_SHIFT) - 1) { + mem_reliable_ctrl_bits = old_ctrl_bits; + mutex_unlock(&reliable_debug_mutex); + + return -EINVAL; + } + + new_ctrl_bits = mem_reliable_ctrl_bits; + mem_reliable_ctrl_bits = old_ctrl_bits; + if (!!test_bit(MEM_RELIABLE_ALL, &new_ctrl_bits)) + mem_reliable_parse_ctrl_bits(new_ctrl_bits); + else + mem_reliable_disable_all(); + } + + mutex_unlock(&reliable_debug_mutex); + + return ret; +} + static struct ctl_table reliable_ctl_table[] = { { .procname = "task_reliable_limit", @@ -213,6 +308,13 @@ static struct ctl_table reliable_ctl_table[] = { .mode = 0644, .proc_handler = reliable_limit_handler, }, + { + .procname = "reliable_debug", + .data = &mem_reliable_ctrl_bits, + .maxlen = sizeof(mem_reliable_ctrl_bits), + .mode = 0600, + .proc_handler = reliable_debug_handler, + }, {} };
@@ -241,6 +343,31 @@ static int __init reliable_sysctl_init(void) late_initcall(reliable_sysctl_init); #endif
+static void mem_reliable_feature_set(int idx, bool enable) +{ + char *str = NULL; + + switch (idx) { + case MEM_RELIABLE_FALLBACK: + reliable_allow_fallback = enable; + str = "fallback"; + break; + case MEM_RELIABLE_SHM: + shmem_reliable = enable; + str = "shmem"; + break; + case MEM_RELIABLE_PAGECACHE: + pagecache_use_reliable_mem = enable; + str = "pagecache"; + break; + default: + pr_err("unknown index: %d", idx); + return; + } + + pr_info("%s is %s\n", str, enable ? "enabled" : "disabled"); +} + void reliable_show_mem_info(void) { if (mem_reliable_is_enabled()) { @@ -284,16 +411,13 @@ static int __init setup_reliable_debug(char *str) for (; *str && *str != ','; str++) { switch (*str) { case 'F': - reliable_allow_fallback = false; - pr_info("fallback disabled."); + mem_reliable_feature_set(MEM_RELIABLE_FALLBACK, false); break; case 'S': - shmem_reliable = false; - pr_info("shmem reliable disabled."); + mem_reliable_feature_set(MEM_RELIABLE_SHM, false); break; case 'P': - pagecache_use_reliable_mem = false; - pr_info("disable page cache use reliable memory\n"); + mem_reliable_feature_set(MEM_RELIABLE_PAGECACHE, false); break; default: pr_err("reliable_debug option '%c' unknown. skipped\n",
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
With this patch, kernel will check mirrored_kernelcore before calling efi_find_mirror() which will enable basic mirrored feature.
If system have some mirrored memory and mirrored feature is not spcified in boot parameter, the basic mirrored feature will be enabled and this will lead to the following situations:
- memblock memory allocation perfers mirrored region. This may have some unexpected influence on numa affinity.
- contiguous memory will be splited into server parts if parts of them is mirrored memroy via memblock_mark_mirror().
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- drivers/firmware/efi/efi.c | 3 +++ include/linux/mm.h | 2 ++ mm/page_alloc.c | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index a159ae07d66f8..184ad34b9c582 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -409,6 +409,9 @@ void __init efi_find_mirror(void) efi_memory_desc_t *md; u64 mirror_size = 0, total_size = 0;
+ if (!mirrored_kernelcore) + return; + for_each_efi_memory_desc(md) { unsigned long long start = md->phys_addr; unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; diff --git a/include/linux/mm.h b/include/linux/mm.h index b318e9c6cc43d..460b8d4cae877 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2270,6 +2270,8 @@ extern void free_bootmem_with_active_regions(int nid, unsigned long max_low_pfn); extern void sparse_memory_present_with_active_regions(int nid);
+extern bool mirrored_kernelcore; + #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 200e19fe216ae..14b4debd998e9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -319,7 +319,7 @@ static unsigned long required_kernelcore_percent __initdata; static unsigned long required_movablecore __initdata; static unsigned long required_movablecore_percent __initdata; static unsigned long zone_movable_pfn[MAX_NUMNODES] __meminitdata; -static bool mirrored_kernelcore __meminitdata; +bool mirrored_kernelcore __meminitdata;
/* movable_zone is the "real" zone pages in ZONE_MOVABLE are taken from */ int movable_zone;
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Memory reliable is based on memory mirrored feature(without independent config) and it only enabled when HAVE_MEMBLOCK_NODE_MAP enabled, let's add the depends in MEMORY_RELIABLE.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/Kconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/mm/Kconfig b/mm/Kconfig index 3a38eb4a6f020..e607d15761401 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -834,6 +834,7 @@ config PID_RESERVE
config MEMORY_RELIABLE bool "Support for memory reliable" + depends on HAVE_MEMBLOCK_NODE_MAP depends on ARM64 default n help
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Export the mem_reliable_status(), so it can be used by others to check memory reliable's status.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 2 ++ mm/mem_reliable.c | 6 ++++++ 2 files changed, 8 insertions(+)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 8dbebabc8b289..3fd4364d8ae4b 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -34,6 +34,7 @@ extern void reliable_report_usage(struct seq_file *m, struct mm_struct *mm); extern void reliable_show_mem_info(void); extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask); +extern bool mem_reliable_status(void);
static inline bool mem_reliable_is_enabled(void) { @@ -137,6 +138,7 @@ static inline void shmem_reliable_page_counter(struct page *page, int nr_page) static inline void page_cache_fallback_inc(gfp_t gfp, struct page *page) {}
static inline bool pagecache_reliable_is_enabled(void) { return false; } +static inline bool mem_reliable_status(void) { return false; } #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index cf53be97be5df..2501da40abf4a 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -32,6 +32,12 @@ struct percpu_counter reliable_shmem_used_nr_page __read_mostly; bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0);
+bool mem_reliable_status(void) +{ + return mem_reliable_is_enabled(); +} +EXPORT_SYMBOL_GPL(mem_reliable_status); + void add_reliable_mem_size(long sz) { atomic_long_add(sz, &total_reliable_mem);
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Add a counter to count mirrored pages in buddy system.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/filesystems/proc.txt | 2 ++ include/linux/mem_reliable.h | 8 ++++++++ mm/mem_reliable.c | 10 ++++++++++ mm/page_alloc.c | 4 ++++ 4 files changed, 24 insertions(+)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 78c76d24f9f7d..3c0e7e5f78a96 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -884,6 +884,7 @@ ShmemHugePages: 0 kB ShmemPmdMapped: 0 kB ReliableTotal: 7340032 kB ReliableUsed: 418824 kB +ReliableBuddyMem: 418824 kB ReliableShmem: 96 kB
@@ -977,6 +978,7 @@ VmallocChunk: largest contiguous block of vmalloc area which is free allocations. This stat excludes the cost of metadata. ReliableTotal: Total reliable memory size ReliableUsed: The used amount of reliable memory +ReliableBuddyMem: Total mirrored memory size in buddy system ReliableShmem: Reliable memory used by shmem
.............................................................................. diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 3fd4364d8ae4b..3e0d2a002aa1f 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -22,6 +22,7 @@ extern bool shmem_reliable; extern struct percpu_counter reliable_shmem_used_nr_page; extern bool pagecache_use_reliable_mem; extern atomic_long_t page_cache_fallback; +DECLARE_PER_CPU(long, nr_reliable_buddy_pages); extern void page_cache_fallback_inc(gfp_t gfp, struct page *page);
extern void add_reliable_mem_size(long sz); @@ -100,6 +101,12 @@ static inline void shmem_reliable_page_counter(struct page *page, int nr_page) percpu_counter_add(&reliable_shmem_used_nr_page, nr_page); }
+static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) +{ + if (page && page_reliable(page)) + this_cpu_add(nr_reliable_buddy_pages, nr_page); +} + #else #define reliable_enabled 0 #define reliable_allow_fb_enabled() false @@ -139,6 +146,7 @@ static inline void page_cache_fallback_inc(gfp_t gfp, struct page *page) {}
static inline bool pagecache_reliable_is_enabled(void) { return false; } static inline bool mem_reliable_status(void) { return false; } +static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) {} #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 2501da40abf4a..5491a1bafe02c 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -28,6 +28,7 @@ unsigned long task_reliable_limit = ULONG_MAX; bool reliable_allow_fallback __read_mostly = true; bool shmem_reliable __read_mostly = true; struct percpu_counter reliable_shmem_used_nr_page __read_mostly; +DEFINE_PER_CPU(long, nr_reliable_buddy_pages);
bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); @@ -168,11 +169,20 @@ static unsigned long used_reliable_mem_sz(void)
void reliable_report_meminfo(struct seq_file *m) { + long buddy_pages_sum = 0; + int cpu; + if (mem_reliable_is_enabled()) { + for_each_possible_cpu(cpu) + buddy_pages_sum += + per_cpu(nr_reliable_buddy_pages, cpu); + seq_printf(m, "ReliableTotal: %8lu kB\n", total_reliable_mem_sz() >> 10); seq_printf(m, "ReliableUsed: %8lu kB\n", used_reliable_mem_sz() >> 10); + seq_printf(m, "ReliableBuddyMem: %8lu kB\n", + buddy_pages_sum << (PAGE_SHIFT - 10));
if (shmem_reliable_is_enabled()) { unsigned long shmem = (unsigned long)percpu_counter_sum( diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 14b4debd998e9..e139605f1dbb4 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1340,6 +1340,7 @@ static void __free_pages_ok(struct page *page, unsigned int order, migratetype = get_pfnblock_migratetype(page, pfn); local_irq_save(flags); __count_vm_events(PGFREE, 1 << order); + mem_reliable_buddy_counter(page, 1 << order); free_one_page(page_zone(page), page, pfn, order, migratetype, fpi_flags); local_irq_restore(flags); @@ -2919,6 +2920,7 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
migratetype = get_pcppage_migratetype(page); __count_vm_event(PGFREE); + mem_reliable_buddy_counter(page, 1);
/* * We only track unmovable, reclaimable and movable on pcp lists. @@ -3156,6 +3158,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone, page = __rmqueue_pcplist(zone, migratetype, pcp, list); if (page) { __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + mem_reliable_buddy_counter(page, -(1 << order)); zone_statistics(preferred_zone, zone); } local_irq_restore(flags); @@ -3204,6 +3207,7 @@ struct page *rmqueue(struct zone *preferred_zone, get_pcppage_migratetype(page));
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); + mem_reliable_buddy_counter(page, -(1 << order)); zone_statistics(preferred_zone, zone); local_irq_restore(flags);
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
----------------------------------------------
Introuce min watermark to ensure kernel can use this reserved memory. This watermark will prevent special user tasks, pagecache, tmpfs user from using too many mirrored memory which will lead to kernel oom.
Memory allocation with ___GFP_RELIABILITY or special user tasks will be blocked after this watermark is reached. This memory allocation will fallback to non-mirrored region or trigger oom depends on memory reliable fallback's status.
This limit can be set or access via /proc/sys/vm/reliable_reserve_size. The default value of this limit is 256M. This limit can be set from 256M to the total size of mirrored memory.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 14 ++++++++++++++ mm/mem_reliable.c | 33 +++++++++++++++++++++++++++++++++ mm/page_alloc.c | 17 +++++++++++++---- 3 files changed, 60 insertions(+), 4 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 3e0d2a002aa1f..f8af64474f227 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -23,6 +23,7 @@ extern struct percpu_counter reliable_shmem_used_nr_page; extern bool pagecache_use_reliable_mem; extern atomic_long_t page_cache_fallback; DECLARE_PER_CPU(long, nr_reliable_buddy_pages); +extern unsigned long nr_reliable_reserve_pages __read_mostly; extern void page_cache_fallback_inc(gfp_t gfp, struct page *page);
extern void add_reliable_mem_size(long sz); @@ -107,6 +108,18 @@ static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) this_cpu_add(nr_reliable_buddy_pages, nr_page); }
+/* reserve mirrored memory for kernel usage */ +static inline bool mem_reliable_watermark_ok(int nr_page) +{ + long sum = 0; + int cpu; + + for_each_possible_cpu(cpu) + sum += per_cpu(nr_reliable_buddy_pages, cpu); + + return sum > nr_reliable_reserve_pages; +} + #else #define reliable_enabled 0 #define reliable_allow_fb_enabled() false @@ -147,6 +160,7 @@ static inline void page_cache_fallback_inc(gfp_t gfp, struct page *page) {} static inline bool pagecache_reliable_is_enabled(void) { return false; } static inline bool mem_reliable_status(void) { return false; } static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) {} +static inline bool mem_reliable_watermark_ok(int nr_page) { return true; } #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 5491a1bafe02c..eb55b7f40a885 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -10,6 +10,8 @@ #include <linux/mmzone.h> #include <linux/oom.h>
+#define MEM_RELIABLE_RESERVE_MIN (256UL << 20) + enum mem_reliable_types { MEM_RELIABLE_ALL, MEM_RELIABLE_FALLBACK, @@ -29,6 +31,7 @@ bool reliable_allow_fallback __read_mostly = true; bool shmem_reliable __read_mostly = true; struct percpu_counter reliable_shmem_used_nr_page __read_mostly; DEFINE_PER_CPU(long, nr_reliable_buddy_pages); +unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE;
bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); @@ -316,6 +319,29 @@ int reliable_debug_handler(struct ctl_table *table, int write, return ret; }
+static unsigned long sysctl_reliable_reserve_size = MEM_RELIABLE_RESERVE_MIN; + +int reliable_reserve_size_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + unsigned long *data_ptr = (unsigned long *)(table->data); + unsigned long old = *data_ptr; + int ret; + + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret == 0 && write) { + if (*data_ptr > total_reliable_mem_sz() || + *data_ptr < MEM_RELIABLE_RESERVE_MIN) { + *data_ptr = old; + return -EINVAL; + } + + nr_reliable_reserve_pages = *data_ptr / PAGE_SIZE; + } + + return ret; +} + static struct ctl_table reliable_ctl_table[] = { { .procname = "task_reliable_limit", @@ -331,6 +357,13 @@ static struct ctl_table reliable_ctl_table[] = { .mode = 0600, .proc_handler = reliable_debug_handler, }, + { + .procname = "reliable_reserve_size", + .data = &sysctl_reliable_reserve_size, + .maxlen = sizeof(sysctl_reliable_reserve_size), + .mode = 0644, + .proc_handler = reliable_reserve_size_handler, + }, {} };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e139605f1dbb4..571bfdecd5a0c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4652,8 +4652,15 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order) return true;
if (gfp_ori & ___GFP_RELIABILITY) { - *gfp_mask |= ___GFP_RELIABILITY; - return true; + if (mem_reliable_watermark_ok(1 << order)) { + *gfp_mask |= ___GFP_RELIABILITY; + return true; + } + + if (reliable_allow_fb_enabled()) + return true; + + return false; }
/* @@ -4661,7 +4668,8 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order) * allocation trigger task_reliable_limit */ if (is_global_init(current)) { - if (reliable_mem_limit_check(1 << order)) + if (reliable_mem_limit_check(1 << order) && + mem_reliable_watermark_ok(1 << order)) *gfp_mask |= ___GFP_RELIABILITY; return true; } @@ -4675,7 +4683,8 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order) */ if ((current->flags & PF_RELIABLE) && (gfp_ori & __GFP_HIGHMEM) && (gfp_ori & __GFP_MOVABLE)) { - if (reliable_mem_limit_check(1 << order)) { + if (reliable_mem_limit_check(1 << order) && + mem_reliable_watermark_ok(1 << order)) { *gfp_mask |= ___GFP_RELIABILITY; return true; }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
------------------------------------------
This limit is used to restrict the amount of mirrored memory by shmem. This memory allocation will return no memory if reliable fallback is off or fallback to non-mirrored region if reliable fallback on.
This limit can be set or access via /proc/sys/vm/shmem_reliable_bytes_limit. The default value of this limit is LONG_MAX. This limit can be set from 0 to the total size of mirrored memory.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 8 ++++++++ mm/mem_reliable.c | 34 ++++++++++++++++++++++++++++++++++ mm/shmem.c | 18 ++++++++++++++---- 3 files changed, 56 insertions(+), 4 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index f8af64474f227..a9d8e6780ec1b 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -24,6 +24,7 @@ extern bool pagecache_use_reliable_mem; extern atomic_long_t page_cache_fallback; DECLARE_PER_CPU(long, nr_reliable_buddy_pages); extern unsigned long nr_reliable_reserve_pages __read_mostly; +extern long shmem_reliable_nr_page __read_mostly; extern void page_cache_fallback_inc(gfp_t gfp, struct page *page);
extern void add_reliable_mem_size(long sz); @@ -120,6 +121,12 @@ static inline bool mem_reliable_watermark_ok(int nr_page) return sum > nr_reliable_reserve_pages; }
+static inline bool mem_reliable_shmem_limit_check(void) +{ + return percpu_counter_read_positive(&reliable_shmem_used_nr_page) < + shmem_reliable_nr_page; +} + #else #define reliable_enabled 0 #define reliable_allow_fb_enabled() false @@ -161,6 +168,7 @@ static inline bool pagecache_reliable_is_enabled(void) { return false; } static inline bool mem_reliable_status(void) { return false; } static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) {} static inline bool mem_reliable_watermark_ok(int nr_page) { return true; } +static inline bool mem_reliable_shmem_limit_check(void) { return true; } #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index eb55b7f40a885..03fb350858ede 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -32,6 +32,7 @@ bool shmem_reliable __read_mostly = true; struct percpu_counter reliable_shmem_used_nr_page __read_mostly; DEFINE_PER_CPU(long, nr_reliable_buddy_pages); unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE; +long shmem_reliable_nr_page = LONG_MAX;
bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); @@ -342,6 +343,30 @@ int reliable_reserve_size_handler(struct ctl_table *table, int write, return ret; }
+#ifdef CONFIG_SHMEM +static unsigned long sysctl_shmem_reliable_bytes_limit = ULONG_MAX; + +int reliable_shmem_bytes_limit_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + unsigned long *data_ptr = (unsigned long *)(table->data); + unsigned long old = *data_ptr; + int ret; + + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (ret == 0 && write) { + if (*data_ptr > total_reliable_mem_sz()) { + *data_ptr = old; + return -EINVAL; + } + + shmem_reliable_nr_page = *data_ptr >> PAGE_SHIFT; + } + + return ret; +} +#endif + static struct ctl_table reliable_ctl_table[] = { { .procname = "task_reliable_limit", @@ -364,6 +389,15 @@ static struct ctl_table reliable_ctl_table[] = { .mode = 0644, .proc_handler = reliable_reserve_size_handler, }, +#ifdef CONFIG_SHMEM + { + .procname = "shmem_reliable_bytes_limit", + .data = &sysctl_shmem_reliable_bytes_limit, + .maxlen = sizeof(sysctl_shmem_reliable_bytes_limit), + .mode = 0644, + .proc_handler = reliable_shmem_bytes_limit_handler, + }, +#endif {} };
diff --git a/mm/shmem.c b/mm/shmem.c index aabf0dc626da5..bc62dc7327812 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1597,12 +1597,20 @@ static struct page *shmem_alloc_page(gfp_t gfp, return page; }
-static inline void shmem_prepare_alloc(gfp_t *gfp_mask) +static inline bool shmem_prepare_alloc(gfp_t *gfp_mask) { if (!shmem_reliable_is_enabled()) - return; + return true; + + if (mem_reliable_shmem_limit_check()) { + *gfp_mask |= ___GFP_RELIABILITY; + return true; + } + + if (reliable_allow_fb_enabled()) + return true;
- *gfp_mask |= ___GFP_RELIABILITY; + return false; }
static struct page *shmem_alloc_and_acct_page(gfp_t gfp, @@ -1621,7 +1629,8 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, if (!shmem_inode_acct_block(inode, nr)) goto failed;
- shmem_prepare_alloc(&gfp); + if (!shmem_prepare_alloc(&gfp)) + goto no_mem;
if (huge) page = shmem_alloc_hugepage(gfp, info, index, node_id); @@ -1633,6 +1642,7 @@ static struct page *shmem_alloc_and_acct_page(gfp_t gfp, return page; }
+no_mem: err = -ENOMEM; shmem_inode_unacct_blocks(inode, nr); failed:
From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Add statistics for usage of reliable page cache, Item "ReliableFileCache" in /proc/meminfo show the usage of reliable page cache.
Signed-off-by: Chen Wandun chenwandun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 6 ++++++ include/linux/mm_inline.h | 5 +++++ mm/mem_reliable.c | 29 ++++++++++++++++++++++++++++- mm/vmscan.c | 1 + 4 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index a9d8e6780ec1b..2d017de08a679 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -39,6 +39,9 @@ extern void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask); extern bool mem_reliable_status(void);
+extern void page_cache_reliable_lru_add(enum lru_list lru, struct page *page, + int val); + static inline bool mem_reliable_is_enabled(void) { return static_branch_likely(&mem_reliable); @@ -169,6 +172,9 @@ static inline bool mem_reliable_status(void) { return false; } static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) {} static inline bool mem_reliable_watermark_ok(int nr_page) { return true; } static inline bool mem_reliable_shmem_limit_check(void) { return true; } +static inline void page_cache_reliable_lru_add(enum lru_list lru, + struct page *page, + int val) {} #endif
#endif diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index b0e3b4473ff2e..704a93c8f4506 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -4,6 +4,7 @@
#include <linux/huge_mm.h> #include <linux/swap.h> +#include <linux/mem_reliable.h>
/** * page_is_file_cache - should the page be on a file LRU or anon LRU? @@ -49,6 +50,8 @@ static __always_inline void add_page_to_lru_list(struct page *page, { update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); list_add(&page->lru, &lruvec->lists[lru]); + page_cache_reliable_lru_add(lru, page, hpage_nr_pages(page)); + }
static __always_inline void add_page_to_lru_list_tail(struct page *page, @@ -56,6 +59,7 @@ static __always_inline void add_page_to_lru_list_tail(struct page *page, { update_lru_size(lruvec, lru, page_zonenum(page), hpage_nr_pages(page)); list_add_tail(&page->lru, &lruvec->lists[lru]); + page_cache_reliable_lru_add(lru, page, hpage_nr_pages(page)); }
static __always_inline void del_page_from_lru_list(struct page *page, @@ -63,6 +67,7 @@ static __always_inline void del_page_from_lru_list(struct page *page, { list_del(&page->lru); update_lru_size(lruvec, lru, page_zonenum(page), -hpage_nr_pages(page)); + page_cache_reliable_lru_add(lru, page, -hpage_nr_pages(page)); }
/** diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 03fb350858ede..17776f387031d 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -36,7 +36,7 @@ long shmem_reliable_nr_page = LONG_MAX;
bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); - +DEFINE_PER_CPU(long, pagecache_reliable_pages); bool mem_reliable_status(void) { return mem_reliable_is_enabled(); @@ -66,6 +66,25 @@ static bool is_fallback_page(gfp_t gfp, struct page *page) return ret; }
+static bool reliable_and_lru_check(enum lru_list lru, struct page *page) +{ + if (!page || !page_reliable(page)) + return false; + + if (lru != LRU_ACTIVE_FILE && lru != LRU_INACTIVE_FILE) + return false; + + return true; +} + +void page_cache_reliable_lru_add(enum lru_list lru, struct page *page, int val) +{ + if (!reliable_and_lru_check(lru, page)) + return; + + this_cpu_add(pagecache_reliable_pages, val); +} + void page_cache_fallback_inc(gfp_t gfp, struct page *page) { long num; @@ -196,6 +215,7 @@ void reliable_report_meminfo(struct seq_file *m)
if (pagecache_reliable_is_enabled()) { unsigned long num = 0; + int cpu;
num += global_node_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); @@ -203,6 +223,13 @@ void reliable_report_meminfo(struct seq_file *m) LRU_INACTIVE_FILE); seq_printf(m, "FileCache: %8lu kB\n", num << (PAGE_SHIFT - 10)); + + num = 0; + for_each_possible_cpu(cpu) + num += per_cpu(pagecache_reliable_pages, cpu); + + seq_printf(m, "ReliableFileCache:%8lu kB\n", + num << (PAGE_SHIFT - 10)); } } } diff --git a/mm/vmscan.c b/mm/vmscan.c index efe572fd090c0..15e5864c51050 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -4596,6 +4596,7 @@ static int add_page_for_reclaim_swapcache(struct page *page, case 0: list_move(&head->lru, pagelist); update_lru_size(lruvec, lru, page_zonenum(head), -hpage_nr_pages(head)); + page_cache_reliable_lru_add(lru, head, -hpage_nr_pages(head)); break; case -EBUSY: list_move(&head->lru, src);
From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Add interface /proc/sys/vm/reliable_pagecache_max_bytes to set the max size for reliable page cache, the max size cant beyond total reliable ram.
the whole reliable memory feature depend on kernelcore=mirror, and which depend on NUMA, so remove redundant code in UMA.
Signed-off-by: Chen Wandun chenwandun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 2 ++ include/linux/pagemap.h | 10 +------ mm/filemap.c | 5 +--- mm/mem_reliable.c | 53 ++++++++++++++++++++++++++++++++++++ 4 files changed, 57 insertions(+), 13 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 2d017de08a679..2d3577ce71134 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -41,6 +41,7 @@ extern bool mem_reliable_status(void);
extern void page_cache_reliable_lru_add(enum lru_list lru, struct page *page, int val); +extern void page_cache_prepare_alloc(gfp_t *gfp);
static inline bool mem_reliable_is_enabled(void) { @@ -175,6 +176,7 @@ static inline bool mem_reliable_shmem_limit_check(void) { return true; } static inline void page_cache_reliable_lru_add(enum lru_list lru, struct page *page, int val) {} +static inline void page_cache_prepare_alloc(gfp_t *gfp) {} #endif
#endif diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a7d83fed0601d..085aed892ce58 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -249,15 +249,7 @@ extern struct page *__page_cache_alloc(gfp_t gfp); #else static inline struct page *__page_cache_alloc(gfp_t gfp) { - struct page *page; - - if (pagecache_reliable_is_enabled()) - gfp |= ___GFP_RELIABILITY; - - page = alloc_pages(gfp, 0); - page_cache_fallback_inc(gfp, page); - - return page; + return alloc_pages(gfp, 0); } #endif
diff --git a/mm/filemap.c b/mm/filemap.c index 2827e2b670e02..2ac6ddf630d80 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1040,10 +1040,7 @@ struct page *__page_cache_alloc(gfp_t gfp) int n; struct page *page;
- if (pagecache_reliable_is_enabled()) - gfp |= ___GFP_RELIABILITY; - else - WARN_ON_ONCE(gfp & ___GFP_RELIABILITY); + page_cache_prepare_alloc(&gfp);
if (cpuset_do_page_mem_spread()) { unsigned int cpuset_mems_cookie; diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 17776f387031d..dcdd937148b60 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -37,6 +37,10 @@ long shmem_reliable_nr_page = LONG_MAX; bool pagecache_use_reliable_mem __read_mostly = true; atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); DEFINE_PER_CPU(long, pagecache_reliable_pages); + +static unsigned long zero; +static unsigned long reliable_pagecache_max_bytes = ULONG_MAX; + bool mem_reliable_status(void) { return mem_reliable_is_enabled(); @@ -394,6 +398,23 @@ int reliable_shmem_bytes_limit_handler(struct ctl_table *table, int write, } #endif
+int reliable_pagecache_max_bytes_write(struct ctl_table *table, int write, + void __user *buffer, size_t *length, loff_t *ppos) +{ + unsigned long old_value = reliable_pagecache_max_bytes; + int ret; + + ret = proc_doulongvec_minmax(table, write, buffer, length, ppos); + if (!ret && write) { + if (reliable_pagecache_max_bytes > total_reliable_mem_sz()) { + reliable_pagecache_max_bytes = old_value; + return -EINVAL; + } + } + + return ret; +} + static struct ctl_table reliable_ctl_table[] = { { .procname = "task_reliable_limit", @@ -425,6 +446,14 @@ static struct ctl_table reliable_ctl_table[] = { .proc_handler = reliable_shmem_bytes_limit_handler, }, #endif + { + .procname = "reliable_pagecache_max_bytes", + .data = &reliable_pagecache_max_bytes, + .maxlen = sizeof(reliable_pagecache_max_bytes), + .mode = 0644, + .proc_handler = reliable_pagecache_max_bytes_write, + .extra1 = &zero, + }, {} };
@@ -438,6 +467,30 @@ static struct ctl_table reliable_dir_table[] = { {} };
+void page_cache_prepare_alloc(gfp_t *gfp) +{ + long nr_reliable = 0; + int cpu; + + if (!mem_reliable_is_enabled()) + return; + + for_each_possible_cpu(cpu) + nr_reliable += this_cpu_read(pagecache_reliable_pages); + + if (nr_reliable < 0) + goto no_reliable; + + if (nr_reliable > reliable_pagecache_max_bytes >> PAGE_SHIFT) + goto no_reliable; + + *gfp |= ___GFP_RELIABILITY; + return; + +no_reliable: + *gfp &= ~___GFP_RELIABILITY; +} + static int __init reliable_sysctl_init(void) { if (!mem_reliable_is_enabled())
From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
If reliable_debug=P, page cache should not use reliable memory.
Fixes: f5c691902452 ("mm: add cmdline for the reliable memory usage of page cache") Signed-off-by: Chen Wandun chenwandun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/mem_reliable.c | 3 +++ 1 file changed, 3 insertions(+)
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index dcdd937148b60..f27c79bbb0f4c 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -475,6 +475,9 @@ void page_cache_prepare_alloc(gfp_t *gfp) if (!mem_reliable_is_enabled()) return;
+ if (!pagecache_reliable_is_enabled()) + goto no_reliable; + for_each_possible_cpu(cpu) nr_reliable += this_cpu_read(pagecache_reliable_pages);
From: Chen Wandun chenwandun@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
This reverts commit 925368d878b5c446d9f871796ca27bc0d29102fb. Page cache fallback statistic will be replaced by other method.
Signed-off-by: Chen Wandun chenwandun@huawei.com Reviewed-by: Xie XiuQi xiexiuqi@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 3 --- mm/filemap.c | 6 +----- mm/mem_reliable.c | 29 ----------------------------- 3 files changed, 1 insertion(+), 37 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 2d3577ce71134..4c15db9c19443 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -21,11 +21,9 @@ extern bool reliable_allow_fallback; extern bool shmem_reliable; extern struct percpu_counter reliable_shmem_used_nr_page; extern bool pagecache_use_reliable_mem; -extern atomic_long_t page_cache_fallback; DECLARE_PER_CPU(long, nr_reliable_buddy_pages); extern unsigned long nr_reliable_reserve_pages __read_mostly; extern long shmem_reliable_nr_page __read_mostly; -extern void page_cache_fallback_inc(gfp_t gfp, struct page *page);
extern void add_reliable_mem_size(long sz); extern void mem_reliable_init(bool has_unmirrored_mem, @@ -166,7 +164,6 @@ static inline bool shmem_reliable_is_enabled(void) { return false; } static inline void shmem_reliable_page_counter(struct page *page, int nr_page) { } -static inline void page_cache_fallback_inc(gfp_t gfp, struct page *page) {}
static inline bool pagecache_reliable_is_enabled(void) { return false; } static inline bool mem_reliable_status(void) { return false; } diff --git a/mm/filemap.c b/mm/filemap.c index 2ac6ddf630d80..a89d70097e686 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1050,13 +1050,9 @@ struct page *__page_cache_alloc(gfp_t gfp) page = __alloc_pages_node(n, gfp, 0); } while (!page && read_mems_allowed_retry(cpuset_mems_cookie));
- page_cache_fallback_inc(gfp, page); return page; } - page = alloc_pages(gfp, 0); - page_cache_fallback_inc(gfp, page); - - return page; + return alloc_pages(gfp, 0); } EXPORT_SYMBOL(__page_cache_alloc); #endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index f27c79bbb0f4c..dcbe3f58f6927 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -35,7 +35,6 @@ unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE; long shmem_reliable_nr_page = LONG_MAX;
bool pagecache_use_reliable_mem __read_mostly = true; -atomic_long_t page_cache_fallback = ATOMIC_LONG_INIT(0); DEFINE_PER_CPU(long, pagecache_reliable_pages);
static unsigned long zero; @@ -57,19 +56,6 @@ bool page_reliable(struct page *page) return mem_reliable_is_enabled() && page_zonenum(page) < ZONE_MOVABLE; }
-static bool is_fallback_page(gfp_t gfp, struct page *page) -{ - bool ret = false; - - if (!page) - return ret; - - if ((gfp & ___GFP_RELIABILITY) && !page_reliable(page)) - ret = true; - - return ret; -} - static bool reliable_and_lru_check(enum lru_list lru, struct page *page) { if (!page || !page_reliable(page)) @@ -89,21 +75,6 @@ void page_cache_reliable_lru_add(enum lru_list lru, struct page *page, int val) this_cpu_add(pagecache_reliable_pages, val); }
-void page_cache_fallback_inc(gfp_t gfp, struct page *page) -{ - long num; - - if (!pagecache_reliable_is_enabled()) - return; - - if (!is_fallback_page(gfp, page)) - return; - - num = atomic_long_inc_return(&page_cache_fallback); - if (num < 0) - atomic_long_set(&page_cache_fallback, 0); -} - static int reliable_mem_notifier(struct notifier_block *nb, unsigned long action, void *arg) {
From: Peng Wu wupeng58@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
----------------------------------------------------
Reliable memory used shown in /proc/pid/status is incorrect because this var is about page count. So (PAGE_SHIFT - 10) is multiplied to get correct size in kB.
Fixes: 094eaabb3fe8 ("proc: Count reliable memory usage of reliable tasks") Signed-off-by: Peng Wu wupeng58@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/mem_reliable.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index dcbe3f58f6927..01b0af708bc63 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -211,10 +211,11 @@ void reliable_report_meminfo(struct seq_file *m)
void reliable_report_usage(struct seq_file *m, struct mm_struct *mm) { - if (mem_reliable_is_enabled()) { - seq_printf(m, "Reliable:\t%8lu kB\n", - atomic_long_read(&mm->reliable_nr_page)); - } + if (!mem_reliable_is_enabled()) + return; + + seq_printf(m, "Reliable:\t%8lu kB\n", + atomic_long_read(&mm->reliable_nr_page) << (PAGE_SHIFT - 10)); }
#ifdef CONFIG_SYSCTL
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Previous reliable_user_used was wrong if PAGE_SIZE is not 4K because this num was calculated by 4 * reliable_user_used_nr_page.
Now reliable_user_used is calculated by (PAGE_SHIFT - 10) * reliable_user_used_nr_page.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/mem_reliable.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 01b0af708bc63..f6988f4df533d 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -514,7 +514,8 @@ void reliable_show_mem_info(void) pr_info("task_reliable_limit: %lu kB", task_reliable_limit >> 10); pr_info("reliable_user_used: %ld kB", - atomic_long_read(&reliable_user_used_nr_page) * 4); + atomic_long_read(&reliable_user_used_nr_page) << + (PAGE_SHIFT - 10)); } }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Now reliable_report_meminfo() will exit if memory reliable is not enabled.
Percpu counter nr_pagecache_pages will be sumed with nr_buddy_pages together.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- mm/mem_reliable.c | 66 +++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 33 deletions(-)
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index f6988f4df533d..5c56b2c6f4b52 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -165,47 +165,47 @@ static unsigned long used_reliable_mem_sz(void) return total_reliable_mem_sz() - nr_page * PAGE_SIZE; }
+static void show_val_kb(struct seq_file *m, const char *s, unsigned long num) +{ + seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8); + seq_write(m, " kB\n", 4); +} + void reliable_report_meminfo(struct seq_file *m) { - long buddy_pages_sum = 0; + bool pagecache_enabled = pagecache_reliable_is_enabled(); + unsigned long nr_pagecache_pages = 0; + long nr_buddy_pages = 0; int cpu;
- if (mem_reliable_is_enabled()) { - for_each_possible_cpu(cpu) - buddy_pages_sum += - per_cpu(nr_reliable_buddy_pages, cpu); - - seq_printf(m, "ReliableTotal: %8lu kB\n", - total_reliable_mem_sz() >> 10); - seq_printf(m, "ReliableUsed: %8lu kB\n", - used_reliable_mem_sz() >> 10); - seq_printf(m, "ReliableBuddyMem: %8lu kB\n", - buddy_pages_sum << (PAGE_SHIFT - 10)); - - if (shmem_reliable_is_enabled()) { - unsigned long shmem = (unsigned long)percpu_counter_sum( - &reliable_shmem_used_nr_page) << (PAGE_SHIFT - 10); - seq_printf(m, "ReliableShmem: %8lu kB\n", shmem); - } + if (!mem_reliable_is_enabled()) + return;
- if (pagecache_reliable_is_enabled()) { - unsigned long num = 0; - int cpu; + for_each_possible_cpu(cpu) { + nr_buddy_pages += per_cpu(nr_reliable_buddy_pages, cpu); + if (pagecache_enabled) + nr_pagecache_pages += + per_cpu(pagecache_reliable_pages, cpu); + }
- num += global_node_page_state(NR_LRU_BASE + - LRU_ACTIVE_FILE); - num += global_node_page_state(NR_LRU_BASE + - LRU_INACTIVE_FILE); - seq_printf(m, "FileCache: %8lu kB\n", - num << (PAGE_SHIFT - 10)); + show_val_kb(m, "ReliableTotal: ", + total_reliable_mem_sz() >> PAGE_SHIFT); + show_val_kb(m, "ReliableUsed: ", + used_reliable_mem_sz() >> PAGE_SHIFT); + show_val_kb(m, "ReliableBuddyMem: ", nr_buddy_pages);
- num = 0; - for_each_possible_cpu(cpu) - num += per_cpu(pagecache_reliable_pages, cpu); + if (shmem_reliable_is_enabled()) { + show_val_kb(m, "ReliableShmem: ", + percpu_counter_sum(&reliable_shmem_used_nr_page)); + }
- seq_printf(m, "ReliableFileCache:%8lu kB\n", - num << (PAGE_SHIFT - 10)); - } + if (pagecache_enabled) { + unsigned long num = 0; + + num += global_node_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE); + num += global_node_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE); + show_val_kb(m, "FileCache: ", num); + show_val_kb(m, "ReliableFileCache:", nr_pagecache_pages); } }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Add ReliableTaskUsed in /proc/meminfo if memory reliable is enabled. - ReliableTaskUsed: reliable memory used by special user tasks and global init process
Change variable reliable_user_used_nr_page to a more accurate reliable_task_used_nr_page.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- Documentation/filesystems/proc.txt | 3 +++ include/linux/mem_reliable.h | 6 +++--- mm/mem_reliable.c | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-)
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index 3c0e7e5f78a96..7329e8a4281f7 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -884,6 +884,7 @@ ShmemHugePages: 0 kB ShmemPmdMapped: 0 kB ReliableTotal: 7340032 kB ReliableUsed: 418824 kB +ReliableTaskUsed: 418824 kB ReliableBuddyMem: 418824 kB ReliableShmem: 96 kB
@@ -978,6 +979,8 @@ VmallocChunk: largest contiguous block of vmalloc area which is free allocations. This stat excludes the cost of metadata. ReliableTotal: Total reliable memory size ReliableUsed: The used amount of reliable memory +ReliableTaskUsed: Reliable memory used by special user tasks and global + init process ReliableBuddyMem: Total mirrored memory size in buddy system ReliableShmem: Reliable memory used by shmem
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 4c15db9c19443..c9c9b9aa8dfbb 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -15,7 +15,7 @@ extern struct static_key_false mem_reliable;
extern bool reliable_enabled; -extern atomic_long_t reliable_user_used_nr_page; +extern atomic_long_t reliable_task_used_nr_page; extern unsigned long task_reliable_limit __read_mostly; extern bool reliable_allow_fallback; extern bool shmem_reliable; @@ -74,13 +74,13 @@ static inline void reliable_page_counter(struct page *page, { if (page_reliable(page)) { atomic_long_add(val, &mm->reliable_nr_page); - atomic_long_add(val, &reliable_user_used_nr_page); + atomic_long_add(val, &reliable_task_used_nr_page); } }
static inline bool reliable_mem_limit_check(unsigned long nr_page) { - return atomic_long_read(&reliable_user_used_nr_page) + nr_page <= + return atomic_long_read(&reliable_task_used_nr_page) + nr_page <= task_reliable_limit / PAGE_SIZE; }
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 5c56b2c6f4b52..4aa6b83ca4a6b 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -24,7 +24,7 @@ DEFINE_STATIC_KEY_FALSE(mem_reliable);
bool reliable_enabled; static atomic_long_t total_reliable_mem; -atomic_long_t reliable_user_used_nr_page; +atomic_long_t reliable_task_used_nr_page; /* reliable user limit for user tasks with reliable flag */ unsigned long task_reliable_limit = ULONG_MAX; bool reliable_allow_fallback __read_mostly = true; @@ -192,6 +192,8 @@ void reliable_report_meminfo(struct seq_file *m) total_reliable_mem_sz() >> PAGE_SHIFT); show_val_kb(m, "ReliableUsed: ", used_reliable_mem_sz() >> PAGE_SHIFT); + show_val_kb(m, "ReliableTaskUsed: ", + atomic_long_read(&reliable_task_used_nr_page)); show_val_kb(m, "ReliableBuddyMem: ", nr_buddy_pages);
if (shmem_reliable_is_enabled()) { @@ -514,7 +516,7 @@ void reliable_show_mem_info(void) pr_info("task_reliable_limit: %lu kB", task_reliable_limit >> 10); pr_info("reliable_user_used: %ld kB", - atomic_long_read(&reliable_user_used_nr_page) << + atomic_long_read(&reliable_task_used_nr_page) << (PAGE_SHIFT - 10)); } }
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Check whether page is NULL or not in page_reliable(). Return false if page is NULL.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wangwangkefeng.wang@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- include/linux/mem_reliable.h | 2 +- mm/mem_reliable.c | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index c9c9b9aa8dfbb..00c915f583699 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -107,7 +107,7 @@ static inline void shmem_reliable_page_counter(struct page *page, int nr_page)
static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) { - if (page && page_reliable(page)) + if (page_reliable(page)) this_cpu_add(nr_reliable_buddy_pages, nr_page); }
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 4aa6b83ca4a6b..ae4e9609f43cf 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -53,12 +53,15 @@ void add_reliable_mem_size(long sz)
bool page_reliable(struct page *page) { - return mem_reliable_is_enabled() && page_zonenum(page) < ZONE_MOVABLE; + if (!mem_reliable_is_enabled() || !page) + return false; + + return page_zonenum(page) < ZONE_MOVABLE; }
static bool reliable_and_lru_check(enum lru_list lru, struct page *page) { - if (!page || !page_reliable(page)) + if (!page_reliable(page)) return false;
if (lru != LRU_ACTIVE_FILE && lru != LRU_INACTIVE_FILE)