[PATCH OLK-6.6 0/2] critical err extend
1. mark node as critical_err on remote RAS error 2. add sysfs interface to set/clear node's critical err status Wupeng Ma (2): mm: mark node as critical_err on remote RAS error numa: add critical_err sysfs interface for remote node recovery drivers/acpi/apei/ghes.c | 14 +++++++---- drivers/base/numa_remote.c | 48 ++++++++++++++++++++++++++++++++++---- include/linux/mm.h | 19 +++++++++++++++ include/linux/mmzone.h | 4 ++++ mm/migrate.c | 4 ++++ 5 files changed, 80 insertions(+), 9 deletions(-) -- 2.43.0
hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8787 ------------------------------------------ When a critical RAS error occurs on a remote node, ghes_handle_critical_ras() is called with MF_ACTION_REQUIRED. This function: 1. Identifies the node via page_to_nid() 2. Sets PGDAT_CRITICAL_ERR flag via set_node_critical_err() 3. Sends SIGBUS to the current process The PGDAT_CRITICAL_ERR flag prevents further memory ops on the corrupted node. In folio_migrate_mc_copy(), when migrating pages: if (node_is_critical_err(folio_nid(src))) return -EHWPOISON; This avoids migrating pages from corrupted node, which would trigger additional SEA notifications and cause more hardware errors. Signed-off-by: Wupeng Ma <mawupeng1@huawei.com> --- drivers/acpi/apei/ghes.c | 14 +++++++++----- include/linux/mm.h | 19 +++++++++++++++++++ include/linux/mmzone.h | 4 ++++ mm/migrate.c | 4 ++++ 4 files changed, 36 insertions(+), 5 deletions(-) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 38ff2219faf52..4c3dee0937452 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -478,13 +478,13 @@ static void ghes_clear_estatus(struct ghes *ghes, * terminating the corresponding host process (e.g., the VMM/QEMU task) is the * expected behavior to ensure system stability. */ -static void ghes_handle_critical_ras(unsigned long pfn) +static void ghes_handle_critical_ras(unsigned long pfn, unsigned long flags) { struct mm_struct *mm = current->mm; struct page *p; int nid; - if (!IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL)) + if (!IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) || !(flags & MF_ACTION_REQUIRED)) return; p = pfn_to_online_page(pfn); @@ -495,6 +495,10 @@ static void ghes_handle_critical_ras(unsigned long pfn) if (!numa_is_remote_node(nid)) return; + set_node_critical_err(nid); + if (!mm) + return; + if (test_bit(MMF_CRITICAL_ERR, &mm->flags)) return; @@ -554,10 +558,10 @@ static bool ghes_do_memory_failure(u64 physical_addr, int flags, bool critical) return false; } - if (flags == MF_ACTION_REQUIRED && current->mm) { - if (critical) - ghes_handle_critical_ras(pfn); + if (critical) + ghes_handle_critical_ras(pfn, flags); + if (flags == MF_ACTION_REQUIRED && current->mm) { twcb = (void *)gen_pool_alloc(ghes_estatus_pool, sizeof(*twcb)); if (!twcb) return false; diff --git a/include/linux/mm.h b/include/linux/mm.h index 92000bf98c037..75d32b512cb41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4461,11 +4461,30 @@ static inline bool mm_is_critical_error(struct mm_struct *mm) { return mm && test_bit(MMF_CRITICAL_ERR, &mm->flags); } + +static inline void set_node_critical_err(int nid) +{ + set_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} + +static inline void clear_node_critical_err(int nid) +{ + clear_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} + +static inline bool node_is_critical_err(int nid) +{ + return test_bit(PGDAT_CRITICAL_ERR, &NODE_DATA(nid)->flags); +} #else static inline bool mm_is_critical_error(struct mm_struct *mm) { return false; } + +static inline void set_node_critical_err(int nid) { return; } +static inline void clear_node_critical_err(int nid) { return; } +static inline bool node_is_critical_err(int nid) { return false; } #endif #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b831463c2e6ec..a6ed47ed840ba 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1053,6 +1053,10 @@ enum pgdat_flags { * many pages under writeback */ PGDAT_RECLAIM_LOCKED, /* prevents concurrent reclaim */ + +#ifdef CONFIG_ACPI_APEI_RAS_CRITICAL + PGDAT_CRITICAL_ERR = 31, /* the whole node is unusable */ +#endif }; enum zone_flags { diff --git a/mm/migrate.c b/mm/migrate.c index 2ca451efaaadd..3d393c3df9840 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -583,6 +583,10 @@ static int folio_migrate_mc_copy(struct folio *dst, struct folio *src, if (mode == MIGRATE_SYNC_NO_COPY) return 0; + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + node_is_critical_err(folio_nid(src))) + return -EHWPOISON; + if (mode == MIGRATE_ASYNC_DMA_OFFLOADING) { if (folio_test_hugetlb(src) || folio_test_pmd_mappable(src)) { -- 2.43.0
hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8787 ------------------------------------------ Add a new sysfs interface /sys/devices/system/node/nodeX/critical_err to query and clear the critical error state of a NUMA node. When a critical RAS error occurs on a remote node, ghes.c marks the node with set_node_critical_err(). This prevents further page migration from the corrupted node (migrate.c returns -EHWPOISON). Once the node is recovered (e.g., after hardware reset or replacement), administrators can use this interface to clear the error state: echo 0 > /sys/devices/system/node/nodeX/critical_err The interface is only available for remote nodes and requires CAP_SYS_ADMIN permission to modify, preventing unauthorized access. The critical_err flag (PGDAT_CRITICAL_ERR) is set in node_data when ACPI APEI reports a fatal error with MF_ACTION_REQUIRED on a remote node. This ensures that memory operations avoid the corrupted node until explicit recovery by system administrator. Signed-off-by: Wupeng Ma <mawupeng1@huawei.com> --- drivers/base/numa_remote.c | 48 ++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/base/numa_remote.c b/drivers/base/numa_remote.c index 74eb87ac8b260..f53d1dcab2ce8 100644 --- a/drivers/base/numa_remote.c +++ b/drivers/base/numa_remote.c @@ -602,16 +602,56 @@ static ssize_t remote_show(struct device *dev, } static DEVICE_ATTR_RO(remote); +static ssize_t critical_err_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + return sprintf(buf, "%d\n", node_is_critical_err(dev->id) ? 1 : 0); +} + +static ssize_t critical_err_store(struct device *dev, + struct device_attribute *dev_attr, + const char *buf, size_t count) +{ + int nid = dev->id; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + if (val == 0) + clear_node_critical_err(nid); + else + set_node_critical_err(nid); + + return count; +} +static DEVICE_ATTR_RW(critical_err); + void numa_remote_register_node(struct node *node) { - if (numa_remote_enabled) - device_create_file(&node->dev, &dev_attr_remote); + if (!numa_remote_enabled) + return; + + device_create_file(&node->dev, &dev_attr_remote); + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + numa_is_remote_node(node->dev.id)) + device_create_file(&node->dev, &dev_attr_critical_err); } void numa_remote_unregister_node(struct node *node) { - if (numa_remote_enabled) - device_remove_file(&node->dev, &dev_attr_remote); + if (!numa_remote_enabled) + return; + + device_remove_file(&node->dev, &dev_attr_remote); + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + numa_is_remote_node(node->dev.id)) + device_remove_file(&node->dev, &dev_attr_critical_err); } void numa_remote_report_meminfo(struct seq_file *m) -- 2.43.0
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://atomgit.com/openeuler/kernel/merge_requests/21991 邮件列表地址:https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/JTA... FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://atomgit.com/openeuler/kernel/merge_requests/21991 Mailing list address: https://mailweb.openeuler.org/archives/list/kernel@openeuler.org/message/JTA...
participants (2)
-
patchwork bot -
Wupeng Ma