hulk inclusion category: feature bugzilla: https://atomgit.com/openeuler/kernel/issues/8918 ------------------------------------------ HiSilicon platforms store SEI (SError Interrupt) error information in vendor-specific RAS registers. Accessing these registers incurs overhead and may have side effects on some platforms. In emergency scenarios where ghes_hisi_handle_sei exhibits defects, a runtime switch is needed to provide an escape mechanism without requiring a system reboot. Add a runtime switch via /proc/sys/kernel/ghes_hisi_sei to allow dynamic enable/disable of vendor SEI handling: Design: ------- 1. Runtime Control Mechanism - Feature is enabled by default on supported HiSilicon platforms - Sysctl interface allows immediate toggle without kernel rebuild - Early exit check in SEI handler: when disabled, returns -ENOENT immediately without accessing vendor RAS registers 2. Emergency Escape Path - When ghes_hisi_handle_sei encounters issues, operators can immediately disable the feature via: echo 0 > /proc/sys/kernel/ghes_hisi_sei - This bypasses all vendor-specific SEI processing, returning control to the standard error handling path - No memory allocation or resource cleanup required on toggle 3. Resource Lifecycle - Memory pool (hisi_sei_pool) allocated at init time - Simple lifecycle: allocated once, never freed during runtime - sysctl toggle only affects the hisi_sei_enabled flag - No race conditions: flag read in NMI context, safe for atomic access Benefits: --------- - Emergency recovery: Quickly disable problematic SEI handling - Performance testing: Measure overhead without reboot - Platform flexibility: Adapt to different firmware behaviors - Zero downtime: Enable/disable without system restart Usage: ------ # Disable vendor SEI handling (emergency escape) echo 0 > /proc/sys/kernel/ghes_hisi_sei # Re-enable vendor SEI handling echo 1 > /proc/sys/kernel/ghes_hisi_sei Signed-off-by: Wupeng Ma <mawupeng1@huawei.com> --- Documentation/admin-guide/sysctl/kernel.rst | 27 ++++++++++++++++++ drivers/acpi/apei/ghes-vendor-info.c | 31 ++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 3b8953c49183..86b2100d46f4 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -555,6 +555,33 @@ synchronous exception from memory copy. safely fail when accessing to hwpoison. = =================================================================== + +ghes_hisi_sei (arm64 only) +========================== + +This parameter controls whether HiSilicon vendor-specific SEI (SError +Interrupt) handling is enabled. On HiSilicon platforms, SEI error information +is stored in vendor-specific RAS registers. Accessing these registers incurs +overhead and may have side effects on some platforms. + +This runtime switch allows dynamic enable/disable of vendor SEI handling +without reboot. + += =============================================================== +0 Disable HiSilicon vendor SEI handling (reduces overhead). +1 Enable HiSilicon vendor SEI handling (default on supported platforms). += =============================================================== + +Usage:: + + # Disable vendor SEI handling + echo 0 > /proc/sys/kernel/ghes_hisi_sei + + # Re-enable vendor SEI handling + echo 1 > /proc/sys/kernel/ghes_hisi_sei + +See Documentation/admin-guide/ras/arm64.rst for more information. + modprobe ======== diff --git a/drivers/acpi/apei/ghes-vendor-info.c b/drivers/acpi/apei/ghes-vendor-info.c index facfb5d8696f..8d09cdef9d9e 100644 --- a/drivers/acpi/apei/ghes-vendor-info.c +++ b/drivers/acpi/apei/ghes-vendor-info.c @@ -11,6 +11,7 @@ #include <linux/signal.h> #include <linux/task_work.h> #include <linux/genalloc.h> +#include <linux/sysctl.h> #include <acpi/ghes.h> #include <acpi/apei.h> @@ -22,6 +23,7 @@ #define HISI_OEM BIT(0) static int vender_oem __ro_after_init; +static int hisi_sei_enabled; #ifdef CONFIG_ARCH_HISI @@ -74,8 +76,22 @@ struct sei_task_work { static struct gen_pool *hisi_sei_pool; +static struct ctl_table hisi_sei_sysctl_table[] = { + { + .procname = "ghes_hisi_sei", + .data = &hisi_sei_enabled, + .maxlen = sizeof(hisi_sei_enabled), + .mode = 0600, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, + {} +}; + static int ghes_hisi_sei_init(void) { + struct ctl_table_header *sysctl_header; unsigned long addr, len = PAGE_SIZE; int rc; @@ -85,9 +101,15 @@ static int ghes_hisi_sei_init(void) if (!arm64_sync_sei_enabled()) return 0; + sysctl_header = register_sysctl("kernel", hisi_sei_sysctl_table); + if (!sysctl_header) { + pr_warn("failed to register sysctl\n"); + return -EINVAL; + } + hisi_sei_pool = gen_pool_create(ilog2(sizeof(struct sei_task_work)), -1); if (!hisi_sei_pool) - return -ENOMEM; + goto err_sysctl; addr = (unsigned long)kzalloc(PAGE_ALIGN(len), GFP_KERNEL); if (!addr) @@ -97,6 +119,7 @@ static int ghes_hisi_sei_init(void) if (rc) goto err_pool_add; + hisi_sei_enabled = 1; return 0; err_pool_add: @@ -106,6 +129,9 @@ static int ghes_hisi_sei_init(void) gen_pool_destroy(hisi_sei_pool); hisi_sei_pool = NULL; +err_sysctl: + unregister_sysctl_table(sysctl_header); + pr_warn("%s init failed\n", __func__); return -ENOMEM; } @@ -194,6 +220,9 @@ static int ghes_hisi_handle_sei(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_ARM64_SYNC_SEI)) return -ENOENT; + if (!hisi_sei_enabled) + return -ENOENT; + if (!current->mm) return -ENOENT; -- 2.43.0