hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8787 ------------------------------------------ Add a new sysfs interface /sys/devices/system/node/nodeX/critical_err to query and clear the critical error state of a NUMA node. When a critical RAS error occurs on a remote node, ghes.c marks the node with set_node_critical_err(). This prevents further page migration from the corrupted node (migrate.c returns -EHWPOISON). Once the node is recovered (e.g., after hardware reset or replacement), administrators can use this interface to clear the error state: echo 0 > /sys/devices/system/node/nodeX/critical_err The interface is only available for remote nodes. The critical_err flag (PGDAT_CRITICAL_ERR) is set in node_data when ACPI APEI reports a fatal error with MF_ACTION_REQUIRED on a remote node. This ensures that memory operations avoid the corrupted node until explicit recovery by system administrator. Signed-off-by: Wupeng Ma <mawupeng1@huawei.com> --- drivers/base/numa_remote.c | 48 ++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/base/numa_remote.c b/drivers/base/numa_remote.c index 74eb87ac8b260..f53d1dcab2ce8 100644 --- a/drivers/base/numa_remote.c +++ b/drivers/base/numa_remote.c @@ -602,16 +602,56 @@ static ssize_t remote_show(struct device *dev, } static DEVICE_ATTR_RO(remote); +static ssize_t critical_err_show(struct device *dev, + struct device_attribute *dev_attr, char *buf) +{ + return sprintf(buf, "%d\n", node_is_critical_err(dev->id) ? 1 : 0); +} + +static ssize_t critical_err_store(struct device *dev, + struct device_attribute *dev_attr, + const char *buf, size_t count) +{ + int nid = dev->id; + unsigned long val; + int ret; + + ret = kstrtoul(buf, 10, &val); + if (ret) + return ret; + + if (val > 1) + return -EINVAL; + + if (val == 0) + clear_node_critical_err(nid); + else + set_node_critical_err(nid); + + return count; +} +static DEVICE_ATTR_RW(critical_err); + void numa_remote_register_node(struct node *node) { - if (numa_remote_enabled) - device_create_file(&node->dev, &dev_attr_remote); + if (!numa_remote_enabled) + return; + + device_create_file(&node->dev, &dev_attr_remote); + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + numa_is_remote_node(node->dev.id)) + device_create_file(&node->dev, &dev_attr_critical_err); } void numa_remote_unregister_node(struct node *node) { - if (numa_remote_enabled) - device_remove_file(&node->dev, &dev_attr_remote); + if (!numa_remote_enabled) + return; + + device_remove_file(&node->dev, &dev_attr_remote); + if (IS_ENABLED(CONFIG_ACPI_APEI_RAS_CRITICAL) && + numa_is_remote_node(node->dev.id)) + device_remove_file(&node->dev, &dev_attr_critical_err); } void numa_remote_report_meminfo(struct seq_file *m) -- 2.43.0