From: Ma Wupeng mawupeng1@huawei.com
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I90COS
--------------------------------
Commit 67f22ba7750f ("mm/memory-failure: disable unpoison once hw error happens") disable unpoison_memory once real memory-failure happens since kpte will be clear in x86 and this will lead to kernel panic after unpoison.
This problem do not exist on arm64, disable this check by introduce soft_online_page. This is only used for bypass this check, real failure pages in x86 should not call this to online.
Signed-off-by: Ma Wupeng mawupeng1@huawei.com --- include/linux/mm.h | 1 + mm/memory-failure.c | 54 +++++++++++++++++++++++++++++++++------------ 2 files changed, 41 insertions(+), 14 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0ed628b136a05..55dbab37adeaf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3883,6 +3883,7 @@ extern int unpoison_memory(unsigned long pfn); extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); +extern int soft_online_page(unsigned long pfn); #ifdef CONFIG_MEMORY_FAILURE /* * Sysfs entries for memory failure handling statistics. diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 367e303ba565e..8ab4f0abee6ee 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2496,19 +2496,7 @@ core_initcall(memory_failure_init); pr_info(fmt, pfn); \ })
-/** - * unpoison_memory - Unpoison a previously poisoned page - * @pfn: Page number of the to be unpoisoned page - * - * Software-unpoison a page that has been poisoned by - * memory_failure() earlier. - * - * This is only done on the software-level, so it only works - * for linux injected failures, not real hardware failures - * - * Returns 0 for success, otherwise -errno. - */ -int unpoison_memory(unsigned long pfn) +static int __unpoison_memory(unsigned long pfn, bool hw_mf_check) { struct folio *folio; struct page *p; @@ -2526,7 +2514,7 @@ int unpoison_memory(unsigned long pfn)
mutex_lock(&mf_mutex);
- if (hw_memory_failure) { + if (hw_mf_check && hw_memory_failure) { unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n", pfn, &unpoison_rs); ret = -EOPNOTSUPP; @@ -2609,8 +2597,46 @@ int unpoison_memory(unsigned long pfn) } return ret; } + +/** + * unpoison_memory - Unpoison a previously poisoned page + * @pfn: Page number of the to be unpoisoned page + * + * Software-unpoison a page that has been poisoned by + * memory_failure() earlier. + * + * This is only done on the software-level, so it only works + * for linux injected failures, not real hardware failures + * + * Returns 0 for success, otherwise -errno. + */ +int unpoison_memory(unsigned long pfn) +{ + return __unpoison_memory(pfn, true); +} EXPORT_SYMBOL(unpoison_memory);
+/** + * soft_online_page - Unpoison a previously poisoned page + * @pfn: Page number of the to be unpoisoned page + * + * Software-unpoison a page that has been poisoned by + * memory_failure() earlier. + * + * This is only done on the software-level, so it only works + * for linux injected failures, not real hardware failures + * + * Since KPTE will be clear once hardware memory corrupts + * happens in x86, hw_mf_check can not be ignored for x86. + * + * Returns 0 for success, otherwise -errno. + */ +int soft_online_page(unsigned long pfn) +{ + return __unpoison_memory(pfn, IS_ENABLED(CONFIG_X86)); +} +EXPORT_SYMBOL_GPL(soft_online_page); + static bool isolate_page(struct page *page, struct list_head *pagelist) { bool isolated = false;