
From: Jane Chu <jane.chu@oracle.com> mainline inclusion from mainline-v6.11-rc1 commit b8b9488d50b7150bd4830dfff487e8d4ef6589ba category: feature bugzilla: https://gitee.com/src-openeuler/kernel/issues/ICTDP9 CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Added two explicit MF_MSG messages describing failure in get_hwpoison_page. Attemped to document the definition of various action names, and made a few adjustment to the action_result() calls. Link: https://lkml.kernel.org/r/20240524215306.2705454-4-jane.chu@oracle.com Signed-off-by: Jane Chu <jane.chu@oracle.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Acked-by: Miaohe Lin <linmiaohe@huawei.com> Cc: Naoya Horiguchi <nao.horiguchi@gmail.com> Cc: Oscar Salvador <oalvador@suse.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Conflicts: include/linux/mm.h include/ras/ras_event.h mm/memory-failure.c [context conflict] Signed-off-by: Tong Tiangen <tongtiangen@huawei.com> --- include/linux/mm.h | 2 ++ include/ras/ras_event.h | 2 ++ mm/memory-failure.c | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 36 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 886569d56066..1f36bf9ee02f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4063,6 +4063,7 @@ enum mf_action_page_type { MF_MSG_DIFFERENT_COMPOUND, MF_MSG_HUGE, MF_MSG_FREE_HUGE, + MF_MSG_GET_HWPOISON, MF_MSG_UNMAP_FAILED, MF_MSG_DIRTY_SWAPCACHE, MF_MSG_CLEAN_SWAPCACHE, @@ -4077,6 +4078,7 @@ enum mf_action_page_type { MF_MSG_DAX, MF_MSG_UNSPLIT_THP, MF_MSG_FREE_DPOOL, + MF_MSG_ALREADY_POISONED, MF_MSG_UNKNOWN, }; diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 9dbca0e03b3d..77a7f2a80a97 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -414,6 +414,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_DIFFERENT_COMPOUND, "different compound page after locking" ) \ EM ( MF_MSG_HUGE, "huge page" ) \ EM ( MF_MSG_FREE_HUGE, "free huge page" ) \ + EM ( MF_MSG_GET_HWPOISON, "get hwpoison page" ) \ EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \ EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \ EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \ @@ -428,6 +429,7 @@ TRACE_EVENT(aer_event, EM ( MF_MSG_DAX, "dax page" ) \ EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \ EM ( MF_MSG_FREE_DPOOL, "free dynamic pool page" ) \ + EM ( MF_MSG_ALREADY_POISONED, "already poisoned" ) \ EMe ( MF_MSG_UNKNOWN, "unknown page" ) /* diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b05946e0776e..22556ef8aadd 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -871,6 +871,28 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn, return ret > 0 ? -EHWPOISON : -EFAULT; } +/* + * MF_IGNORED - The m-f() handler marks the page as PG_hwpoisoned'ed. + * But it could not do more to isolate the page from being accessed again, + * nor does it kill the process. This is extremely rare and one of the + * potential causes is that the page state has been changed due to + * underlying race condition. This is the most severe outcomes. + * + * MF_FAILED - The m-f() handler marks the page as PG_hwpoisoned'ed. + * It should have killed the process, but it can't isolate the page, + * due to conditions such as extra pin, unmap failure, etc. Accessing + * the page again may trigger another MCE and the process will be killed + * by the m-f() handler immediately. + * + * MF_DELAYED - The m-f() handler marks the page as PG_hwpoisoned'ed. + * The page is unmapped, and is removed from the LRU or file mapping. + * An attempt to access the page again will trigger page fault and the + * PF handler will kill the process. + * + * MF_RECOVERED - The m-f() handler marks the page as PG_hwpoisoned'ed. + * The page has been completely isolated, that is, unmapped, taken out of + * the buddy system, or hole-punnched out of the file mapping. + */ static const char *action_name[] = { [MF_IGNORED] = "Ignored", [MF_FAILED] = "Failed", @@ -885,6 +907,7 @@ static const char * const action_page_types[] = { [MF_MSG_DIFFERENT_COMPOUND] = "different compound page after locking", [MF_MSG_HUGE] = "huge page", [MF_MSG_FREE_HUGE] = "free huge page", + [MF_MSG_GET_HWPOISON] = "get hwpoison page", [MF_MSG_UNMAP_FAILED] = "unmapping failed page", [MF_MSG_DIRTY_SWAPCACHE] = "dirty swapcache page", [MF_MSG_CLEAN_SWAPCACHE] = "clean swapcache page", @@ -899,6 +922,7 @@ static const char * const action_page_types[] = { [MF_MSG_DAX] = "dax page", [MF_MSG_UNSPLIT_THP] = "unsplit thp", [MF_MSG_FREE_DPOOL] = "free dynamic pool page", + [MF_MSG_ALREADY_POISONED] = "already poisoned", [MF_MSG_UNKNOWN] = "unknown page", }; @@ -1007,12 +1031,13 @@ static int me_kernel(struct page_state *ps, struct page *p) /* * Page in unknown state. Do nothing. + * This is a catch-all in case we fail to make sense of the page state. */ static int me_unknown(struct page_state *ps, struct page *p) { pr_err("%#lx: Unknown page state\n", page_to_pfn(p)); unlock_page(p); - return MF_FAILED; + return MF_IGNORED; } /* @@ -2052,6 +2077,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb if (flags & MF_ACTION_REQUIRED) { folio = page_folio(p); res = kill_accessing_process(current, folio_pfn(folio), flags); + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); } return res; } else if (res == -EBUSY) { @@ -2059,7 +2085,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb flags |= MF_NO_RETRY; goto retry; } - return action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); + return action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED); } folio = page_folio(p); @@ -2096,7 +2122,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb if (!hwpoison_user_mappings(p, pfn, flags, &folio->page)) { folio_unlock(folio); - return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); + return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_FAILED); } return identify_page_state(pfn, p, page_flags); @@ -2233,6 +2259,7 @@ int memory_failure(unsigned long pfn, int flags) res = kill_accessing_process(current, pfn, flags); if (flags & MF_COUNT_INCREASED) put_page(p); + action_result(pfn, MF_MSG_ALREADY_POISONED, MF_FAILED); goto unlock_mutex; } @@ -2273,7 +2300,7 @@ int memory_failure(unsigned long pfn, int flags) } goto unlock_mutex; } else if (res < 0) { - res = action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED); + res = action_result(pfn, MF_MSG_GET_HWPOISON, MF_IGNORED); goto unlock_mutex; } } @@ -2368,7 +2395,7 @@ int memory_failure(unsigned long pfn, int flags) * Abort on fail: __filemap_remove_folio() assumes unmapped page. */ if (!hwpoison_user_mappings(p, pfn, flags, p)) { - res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); + res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_FAILED); goto unlock_page; } -- 2.25.1