hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8787 ------------------------------------------ When a critical RAS error occurs on a remote node, ghes_handle_critical_ras() is called with MF_ACTION_REQUIRED. This function: 1. Identifies the node via page_to_nid() 2. Sets PGDAT_CRITICAL_ERR flag via set_node_critical_err() 3. Sends SIGBUS to the current process The PGDAT_CRITICAL_ERR flag prevents further memory ops on the corrupted node. In folio_migrate_mc_copy(), when migrating pages: if (node_is_critical_err(folio_nid(src))) return -EHWPOISON; This avoids migrating pages from corrupted node, which would trigger additional SEA notifications and cause more hardware errors. Signed-off-by: Wupeng Ma <mawupeng1@huawei.com> --- drivers/acpi/apei/ghes.c | 14 +++++++++----- include/linux/mm.h | 19 +++++++++++++++++++ include/linux/mmzone.h | 4 ++++ mm/migrate.c | 4 ++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 38ff2219faf52..4c3dee0937452 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -478,13 +478,13 @@ static void ghes_clear_estatus(struct ghes *ghes, * terminating the corresponding host process (e.g., the VMM/QEMU task) is the * expected behavior to ensure system stability. */ -static void ghes_handle_critical_ras(unsigned long pfn) +static void ghes_handle_critical_ras(unsigned long pfn, unsigned long flags) { struct mm_struct *mm = current->mm; struct page *p; int nid; - if (!IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL)) + if (!IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) || !(flags & MF_ACTION_REQUIRED)) return; p = pfn_to_online_page(pfn); @@ -495,6 +495,10 @@ static void ghes_handle_critical_ras(unsigned long pfn) if (!numa_is_remote_node(nid)) return; + set_node_critical_err(nid); + if (!mm) + return; + if (test_bit(MMF_CRITICAL_ERR, &mm->flags)) return; @@ -554,10 +558,10 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags, bool critical) return false; } - if (flags == MF_ACTION_REQUIRED && current->mm) { - if (critical) - ghes_handle_critical_ras(pfn); + if (critical) + ghes_handle_critical_ras(pfn, flags); + if (flags == MF_ACTION_REQUIRED && current->mm) { twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); if (!twcb) return false; diff --git a/include/linux/mm.h b/include/linux/mm.h index 92000bf98c037..75d32b512cb41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4461,11 +4461,30 @@ static inline bool mm_is_critical_error(struct mm_struct *mm) { return mm && test_bit(MMF_CRITICAL_ERR, &mm->flags); } + +static inline void set_node_critical_err(int nid) +{ + set_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} + +static inline void clear_node_critical_err(int nid) +{ + clear_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} + +static inline bool node_is_critical_err(int nid) +{ + return test_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} #else static inline bool mm_is_critical_error(struct mm_struct *mm) { return false; } + +static inline void set_node_critical_err(int nid) { return; } +static inline void clear_node_critical_err(int nid) { return; } +static inline bool node_is_critical_err(int nid) { return false; } #endif #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b831463c2e6ec..a6ed47ed840ba 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1053,6 +1053,10 @@ enum pgdat_flags { * many pages under writeback */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ + +#ifdef CONFIG_ACPI_APEI_RAS_CRITICAL + PGDAT_CRITICAL_ERR = 31, /* the whole node is unusable */ +#endif }; enum zone_flags { diff --git a/mm/migrate.c b/mm/migrate.c index 2ca451efaaadd..3d393c3df9840 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -583,6 +583,10 @@ static int folio_migrate_mc_copy(struct folio *dst, struct folio *src, if (mode == MIGRATE_SYNC_NO_COPY) return 0; + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + node_is_critical_err(folio_nid(src))) + return -EHWPOISON; + if (mode == MIGRATE_ASYNC_DMA_OFFLOADING) { if (folio_test_hugetlb(src) || folio_test_pmd_mappable(src)) { -- 2.43.0