fix zoneref mapping problem and disable memory reliable if kdump is in progress. Reliable memory used by shmem will be accurate if swap is enabled.
Changelog since v1: - update ac->preferred_zoneref - add patch on shmem in reliable_fb_find_zone
Ma Wupeng (3): mm: disable memory reliable when kdump is in progress mm: fix zoneref mapping problem in memory reliable mm: Count reliable shmem used based on NR_SHMEM
mm/filemap.c | 5 ++++- mm/khugepaged.c | 2 ++ mm/mem_reliable.c | 6 ++++++ mm/migrate.c | 5 +++++ mm/page_alloc.c | 25 ++++++++++++------------- mm/shmem.c | 7 ++----- 6 files changed, 31 insertions(+), 19 deletions(-)
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
Kdump only have limited memory and will lead to bugly memory reliable features if memory reliable if enabled. So disable memory reliable if kdump is in progress.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- mm/mem_reliable.c | 6 ++++++ 1 file changed, 6 insertions(+)
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c index 033af716610f..5505577d3784 100644 --- a/mm/mem_reliable.c +++ b/mm/mem_reliable.c @@ -9,6 +9,7 @@ #include <linux/seq_file.h> #include <linux/mmzone.h> #include <linux/oom.h> +#include <linux/crash_dump.h>
#define MEM_RELIABLE_RESERVE_MIN (256UL << 20)
@@ -128,6 +129,11 @@ void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn) if (!reliable_enabled) return;
+ if (is_kdump_kernel()) { + pr_err("init failed, the kdump is in progress\n"); + return; + } + if (atomic_long_read(&total_reliable_mem) == 0) { memset(zone_movable_pfn, 0, sizeof(unsigned long) * MAX_NUMNODES);
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
--------------------------------
The mapping between zoneref and zone will be updated if __GFP_THISNODE is defined and memory reliable fallback is enabled. This will put ZONE_MOVABLE in a wrong zonerefs slot and lead to the origin zone unselectable.
With this patch, high_zoneidx is updated via gfp_zone() which ___GFP_RELIABILITY is removed from the origin gfp_mask. Perferred zoneref is recalculated after.
Fixes: 3023a4b35d41 ("mm: Introduce fallback mechanism for memory reliable") Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- mm/page_alloc.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index dab62d1d3a6e..f4d4716b9049 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3672,21 +3672,21 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order, }
#ifdef CONFIG_MEMORY_RELIABLE -static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask, - struct alloc_context *ac) +static inline void reliable_fb_find_zone(gfp_t gfp_mask, + struct alloc_context *ac) { if (!reliable_allow_fb_enabled()) - return NULL; + return;
- /* dst nodemask may don't have zone we want, fallback here */ + /* dst node don't have zone we want, fallback here */ if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) && (gfp_mask & ___GFP_RELIABILITY)) { - struct zoneref *ref = first_zones_zonelist( - ac->zonelist, ZONE_MOVABLE, ac->nodemask); - return ref->zone; + ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY); + ac->preferred_zoneref = first_zones_zonelist( + ac->zonelist, ac->high_zoneidx, ac->nodemask); }
- return NULL; + return; }
static inline struct page * @@ -3712,10 +3712,10 @@ reliable_fb_before_oom(gfp_t gfp_mask, int order, return NULL; } #else -static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask, - struct alloc_context *ac) +static inline void reliable_fb_find_zone(gfp_t gfp_mask, + struct alloc_context *ac) { - return NULL; + return; }
static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order, @@ -4375,8 +4375,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ac->preferred_zoneref = first_zones_zonelist(ac->zonelist, ac->high_zoneidx, ac->nodemask); if (!ac->preferred_zoneref->zone) { - ac->preferred_zoneref->zone = - reliable_fb_find_zone(gfp_mask, ac); + reliable_fb_find_zone(gfp_mask, ac);
if (!ac->preferred_zoneref->zone) goto nopage;
From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S CVE: NA
------------------------------------------
With this patch, reliable memory counter will be updated when NR_SHMEM is updated. Pervious shmem reliable memory counter is not accurate if swap is enabled.
NR_SHMEM update in memcg secenario is ignored because this has nothing to do with the global counter. If shmem pages is migrated or collapsed from one region to another region, reliable memory counter need to be updated because these pages's reliable status may not be the same.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com Reviewed-by: Kefeng Wang wangkefeng.wang@huawei.com Signed-off-by: Laibin Qiu qiulaibin@huawei.com --- mm/filemap.c | 5 ++++- mm/khugepaged.c | 2 ++ mm/migrate.c | 5 +++++ mm/shmem.c | 7 ++----- 4 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/mm/filemap.c b/mm/filemap.c index a89d70097e68..9b6e72e14a04 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -291,6 +291,7 @@ static void unaccount_page_cache_page(struct address_space *mapping, __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); if (PageSwapBacked(page)) { __mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr); + shmem_reliable_page_counter(page, -nr); if (PageTransHuge(page)) __dec_node_page_state(page, NR_SHMEM_THPS); } else { @@ -895,8 +896,10 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask) */ if (!PageHuge(new)) __inc_node_page_state(new, NR_FILE_PAGES); - if (PageSwapBacked(new)) + if (PageSwapBacked(new)) { __inc_node_page_state(new, NR_SHMEM); + shmem_reliable_page_counter(new, 1); + } xa_unlock_irqrestore(&mapping->i_pages, flags); mem_cgroup_migrate(old, new); radix_tree_preload_end(); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 301d6aa079d7..2975fc124cb6 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1559,6 +1559,7 @@ static void collapse_shmem(struct mm_struct *mm, ClearPageActive(page); ClearPageUnevictable(page); unlock_page(page); + shmem_reliable_page_counter(page, -1); put_page(page); index++; } @@ -1573,6 +1574,7 @@ static void collapse_shmem(struct mm_struct *mm, mem_cgroup_commit_charge(new_page, memcg, false, true); count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1); lru_cache_add_anon(new_page); + shmem_reliable_page_counter(new_page, 1 << HPAGE_PMD_ORDER);
/* * Remove pte page tables, so we can re-fault the page as huge. diff --git a/mm/migrate.c b/mm/migrate.c index f7721d0aece5..ecfa8829acfd 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -548,6 +548,11 @@ int migrate_page_move_mapping(struct address_space *mapping, xa_unlock(&mapping->i_pages); /* Leave irq disabled to prevent preemption while updating stats */
+ if (PageSwapBacked(page) && !PageSwapCache(page)) { + shmem_reliable_page_counter(page, -nr); + shmem_reliable_page_counter(newpage, nr); + } + /* * If moved to a different zone then also account * the page for that zone. Other VM counters will be diff --git a/mm/shmem.c b/mm/shmem.c index 4363dbc8d57e..8915a5b9ad0a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -733,6 +733,7 @@ static int shmem_add_to_page_cache(struct page *page, __inc_node_page_state(page, NR_SHMEM_THPS); __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); __mod_node_page_state(page_pgdat(page), NR_SHMEM, nr); + shmem_reliable_page_counter(page, nr); xa_unlock_irq(&mapping->i_pages); } else { page->mapping = NULL; @@ -758,6 +759,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap) mapping->nrpages--; __dec_node_page_state(page, NR_FILE_PAGES); __dec_node_page_state(page, NR_SHMEM); + shmem_reliable_page_counter(page, -1); xa_unlock_irq(&mapping->i_pages); put_page(page); BUG_ON(error); @@ -962,8 +964,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, truncate_inode_page(mapping, page); } } - shmem_reliable_page_counter( - page, -(1 << compound_order(page))); unlock_page(page); } pagevec_remove_exceptionals(&pvec); @@ -1074,8 +1074,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend, break; } } - shmem_reliable_page_counter( - page, -(1 << compound_order(page))); unlock_page(page); } pagevec_remove_exceptionals(&pvec); @@ -1981,7 +1979,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page); shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); - shmem_reliable_page_counter(page, 1 << compound_order(page)); alloced = true;
if (PageTransHuge(page) &&