From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Introduce fallback mechanism for memory reliable. memory allocation will fallback to non-mirrored region if zone's low watermark is reached and kswapd will be awakened at this time.
This mechanism is enabled by default and can be disabled by adding "reliable_debug=F" to the kernel parameters. This mechanism rely on CONFIG_MEMORY_RELIABLE and need "kernelcore=reliable" in the kernel parameters.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- .../admin-guide/kernel-parameters.txt | 4 +- include/linux/mem_reliable.h | 10 ++++ mm/mem_reliable.c | 5 ++ mm/page_alloc.c | 56 ++++++++++++++++++- 4 files changed, 71 insertions(+), 4 deletions(-)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 53b820d51822..53c15bb5b977 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5519,11 +5519,13 @@ See Documentation/admin-guide/cgroup-v1/cpusets.rst.
reliable_debug= [ARM64] - Format: [P][,S] + Format: [P][,S][,F] Only works with CONFIG_MEMORY_RELIABLE and "kernelcore=reliable" is configured. P: Page cache does not use the reliable memory. S: The shmem does not use the reliable memory. + F: User memory allocation(special user task, tmpfs) will + not allocate memory from non-mirrored region if failed.
reserve= [KNL,BUGS] Force kernel to ignore I/O ports or memory Format: <base1>,<size1>[,<base2>,<size2>,...] diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h index 22a62deb8274..09621a4d5ae5 100644 --- a/include/linux/mem_reliable.h +++ b/include/linux/mem_reliable.h @@ -17,6 +17,7 @@ DECLARE_STATIC_KEY_FALSE(mem_reliable); extern bool reliable_enabled; extern struct file_operations proc_reliable_operations; extern bool shmem_reliable; +extern bool reliable_allow_fallback; extern bool pagecache_reliable; extern struct percpu_counter pagecache_reliable_pages; extern struct percpu_counter anon_reliable_pages; @@ -91,6 +92,11 @@ static inline bool skip_non_mirrored_zone(gfp_t gfp, struct zoneref *z) return false; }
+static inline bool reliable_allow_fb_enabled(void) +{ + return reliable_allow_fallback; +} + static inline bool mem_reliable_shmem_limit_check(void) { return percpu_counter_read_positive(&shmem_reliable_pages) < @@ -116,6 +122,9 @@ static inline bool shmem_prepare_alloc(gfp_t *gfp_mask) return true; }
+ if (reliable_allow_fb_enabled()) + return true; + return false; }
@@ -207,6 +216,7 @@ static inline void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order, int preferred_nid, nodemask_t *nodemask) {} +static inline bool reliable_allow_fb_enabled(void) { return false; } #endif
#endif diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index ae654d6bb047..a77d0cc4100d 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -24,6 +24,7 @@ unsigned long pagecache_reliable_limit = ULONG_MAX; /* reliable user limit for user tasks with reliable flag */ unsigned long task_reliable_limit = ULONG_MAX; unsigned long shmem_reliable_limit = ULONG_MAX; +bool reliable_allow_fallback __read_mostly = true;
bool mem_reliable_counter_initialized(void) { @@ -301,6 +302,10 @@ static int __init setup_reliable_debug(char *str) shmem_reliable = false; pr_info("disable shmem use reliable memory\n"); break; + case 'F': + reliable_allow_fallback = false; + pr_info("disable memory reliable fallback\n"); + break; default: pr_err("reliable_debug option '%c' unknown. skipped\n", *str); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 503ce164a1e3..8118695b959b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4041,6 +4041,52 @@ check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac) return false; }
+#ifdef CONFIG_MEMORY_RELIABLE +/* + * if fallback is enabled, fallback to movable zone if no dma/normal zone + * found + */ +static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, + struct alloc_context *ac) +{ + if (!reliable_allow_fb_enabled()) + return NULL; + + if (!(gfp_mask & GFP_RELIABLE)) + return NULL; + + ac->highest_zoneidx = gfp_zone(gfp_mask & ~GFP_RELIABLE); + ac->preferred_zoneref = first_zones_zonelist( + ac->zonelist, ac->highest_zoneidx, ac->nodemask); + return ac->preferred_zoneref->zone; +} + +static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, + struct alloc_context *ac) +{ + if (!reliable_allow_fb_enabled()) + return; + + if (gfp_mask & __GFP_NOFAIL) + return; + + if ((ac->highest_zoneidx == ZONE_NORMAL) && (gfp_mask & GFP_RELIABLE)) { + ac->highest_zoneidx = gfp_zone(gfp_mask & ~GFP_RELIABLE); + ac->preferred_zoneref = first_zones_zonelist( + ac->zonelist, ac->highest_zoneidx, ac->nodemask); + return; + } +} +#else +static inline struct zone *mem_reliable_fallback_zone(gfp_t gfp_mask, + struct alloc_context *ac) +{ + return NULL; +} +static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask, + struct alloc_context *ac) {} +#endif + static inline struct page * __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) @@ -4080,8 +4126,10 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, */ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->highest_zoneidx, ac->nodemask); - if (!ac->preferred_zoneref->zone) - goto nopage; + if (!ac->preferred_zoneref->zone) { + if (!mem_reliable_fallback_zone(gfp_mask, ac)) + goto nopage; + }
/* * Check for insane configurations where the cpuset doesn't contain @@ -4099,6 +4147,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, if (alloc_flags & ALLOC_KSWAPD) wake_all_kswapds(order, gfp_mask, ac);
+ mem_reliable_fallback_slowpath(gfp_mask, ac); + /* * The adjusted alloc_flags might result in immediate success, so try * that first @@ -4619,7 +4669,7 @@ static inline bool check_after_alloc(gfp_t *gfp, unsigned int order, *_page = NULL;
out_retry: - if (is_global_init(current)) { + if (reliable_allow_fb_enabled() || is_global_init(current)) { *gfp &= ~GFP_RELIABLE; return true; }