From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Commit 67f22ba7750f ("mm/memory-failure: disable unpoison once hw error happens") disable unpoison_memory once real memory-failure happens since kpte will be clear in x86 and this will lead to kernel panic after unpoison.
This problem do not exist on arm64, disable this check by introduce soft_online_page. This is only used for bypass this check, real failure pages in x86 should call this to online.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- include/linux/mm.h | 1 + mm/memory-failure.c | 39 +++++++++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ed628b136a05..67a5205b02c67 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3880,6 +3880,7 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); +extern int soft_online_page(unsigned long pfn); extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 367e303ba565e..d67cc40693678 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2496,19 +2496,7 @@ core_initcall(memory_failure_init); pr_info(fmt, pfn); \ })
-/** - * unpoison_memory - Unpoison a previously poisoned page - * @pfn: Page number of the to be unpoisoned page - * - * Software-unpoison a page that has been poisoned by - * memory_failure() earlier. - * - * This is only done on the software-level, so it only works - * for linux injected failures, not real hardware failures - * - * Returns 0 for success, otherwise -errno. - */ -int unpoison_memory(unsigned long pfn) +static int __unpoison_memory(unsigned long pfn, bool hw_mf_check) { struct folio *folio; struct page *p; @@ -2526,7 +2514,7 @@ int unpoison_memory(unsigned long pfn)
mutex_lock(&mf_mutex);
- if (hw_memory_failure) { + if (hw_mf_check && hw_memory_failure) { unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n", pfn, &unpoison_rs); ret = -EOPNOTSUPP; @@ -2609,8 +2597,31 @@ int unpoison_memory(unsigned long pfn) } return ret; } + +/** + * unpoison_memory - Unpoison a previously poisoned page + * @pfn: Page number of the to be unpoisoned page + * + * Software-unpoison a page that has been poisoned by + * memory_failure() earlier. + * + * This is only done on the software-level, so it only works + * for linux injected failures, not real hardware failures + * + * Returns 0 for success, otherwise -errno. + */ +int unpoison_memory(unsigned long pfn) +{ + return __unpoison_memory(pfn, true); +} EXPORT_SYMBOL(unpoison_memory);
+int soft_online_page(unsigned long pfn) +{ + return __unpoison_memory(pfn, false); +} +EXPORT_SYMBOL_GPL(soft_online_page); + static bool isolate_page(struct page *page, struct list_head *pagelist) { bool isolated = false;