Some use-cases, such as system management, require the ability to generate a non-maskable event to the OS to request the OS kernel to perform a diagnostic dump and reset the system. Arm Generic Diagnostic Dump and Reset device enables a maintainer to request OS to perform a diagnostic dump and reset a system via SDEI event or an interrupt. This patch implements SDEI path and discards the interrupted context before proceeding to the crash kernel.
D Scott Phillips (1): arm64: sdei: abort running SDEI handlers during crash
Ilkka Koskinen (4): ACPI: tables: Add AGDI to the list of known table signatures ACPI: AGDI: Add driver for Arm Generic Diagnostic Dump and Reset device ACPICA: iASL: Add suppport for AGDI table ACPI: AGDI: Fix missing prototype warning for acpi_agdi_init()
Jia He (3): EDAC/ghes: Add a notifier for reporting memory errors EDAC/ghes: Prepare to make ghes_edac a proper module EDAC/ghes: Make ghes_edac a proper module
Li Yang (1): APEI: GHES: correctly return NULL for ghes_get_devices()
Shuai Xue (1): ACPI: APEI: explicit init of HEST and GHES in apci_init()
arch/arm64/include/asm/sdei.h | 6 ++ arch/arm64/kernel/entry.S | 27 +++++++- arch/arm64/kernel/sdei.c | 3 + arch/arm64/kernel/smp.c | 8 +-- drivers/acpi/apei/ghes.c | 85 ++++++++++++++++++++---- drivers/acpi/arm64/Kconfig | 10 +++ drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/agdi.c | 117 ++++++++++++++++++++++++++++++++++ drivers/acpi/bus.c | 4 ++ drivers/acpi/pci_root.c | 3 - drivers/acpi/tables.c | 2 +- drivers/edac/Kconfig | 4 +- drivers/edac/ghes_edac.c | 90 ++++++++++++++++---------- drivers/firmware/Kconfig | 1 + drivers/firmware/arm_sdei.c | 32 ++++++---- include/acpi/actbl2.h | 20 ++++++ include/acpi/apei.h | 4 +- include/acpi/ghes.h | 34 +++------- include/linux/acpi_agdi.h | 13 ++++ include/linux/arm_sdei.h | 4 ++ 20 files changed, 371 insertions(+), 97 deletions(-) create mode 100644 drivers/acpi/arm64/agdi.c create mode 100644 include/linux/acpi_agdi.h
From: Shuai Xue xueshuai@linux.alibaba.com
mainline inclusion from mainline-v5.18-rc1 commit dc4e8c07e9e2f69387579c49caca26ba239f7270 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
From commit e147133a42cb ("ACPI / APEI: Make hest.c manage the estatus memory pool") was merged, ghes_init() relies on acpi_hest_init() to manage the estatus memory pool. On the other hand, ghes_init() relies on sdei_init() to detect the SDEI version and (un)register events. The dependencies are as follows:
ghes_init() => acpi_hest_init() => acpi_bus_init() => acpi_init() ghes_init() => sdei_init()
HEST is not PCI-specific and initcall ordering is implicit and not well-defined within a level.
Based on above, remove acpi_hest_init() from acpi_pci_root_init() and convert ghes_init() and sdei_init() from initcalls to explicit calls in the following order:
acpi_hest_init() ghes_init() sdei_init()
Signed-off-by: Shuai Xue xueshuai@linux.alibaba.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/apei/ghes.c | 19 ++++++++----------- drivers/acpi/bus.c | 2 ++ drivers/acpi/pci_root.c | 3 --- drivers/firmware/Kconfig | 1 + drivers/firmware/arm_sdei.c | 13 ++----------- include/acpi/apei.h | 4 +++- include/linux/arm_sdei.h | 2 ++ 7 files changed, 18 insertions(+), 26 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 650a8feda..6535b444a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1473,33 +1473,35 @@ static struct platform_driver ghes_platform_driver = { .remove = ghes_remove, };
-static int __init ghes_init(void) +void __init ghes_init(void) { int rc;
+ sdei_init(); + if (acpi_disabled) - return -ENODEV; + return;
switch (hest_disable) { case HEST_NOT_FOUND: - return -ENODEV; + return; case HEST_DISABLED: pr_info(GHES_PFX "HEST is not enabled!\n"); - return -EINVAL; + return; default: break; }
if (ghes_disable) { pr_info(GHES_PFX "GHES is not enabled!\n"); - return -EINVAL; + return; }
ghes_nmi_init_cxt();
rc = platform_driver_register(&ghes_platform_driver); if (rc) - goto err; + return;
rc = apei_osc_setup(); if (rc == 0 && osc_sb_apei_support_acked) @@ -1510,9 +1512,4 @@ static int __init ghes_init(void) pr_info(GHES_PFX "APEI firmware first mode is enabled by APEI bit.\n"); else pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); - - return 0; -err: - return rc; } -device_initcall(ghes_init); diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 50a65ff16..d8086d49c 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1286,6 +1286,8 @@ static int __init acpi_init(void)
pci_mmcfg_late_init(); acpi_iort_init(); + acpi_hest_init(); + ghes_init(); acpi_scan_init(); acpi_ec_init(); acpi_debugfs_init(); diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index c12b5fb3e..d972ea057 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -20,8 +20,6 @@ #include <linux/slab.h> #include <linux/dmi.h> #include <linux/platform_data/x86/apple.h> -#include <acpi/apei.h> /* for acpi_hest_init() */ - #include "internal.h"
#define ACPI_PCI_ROOT_CLASS "pci_bridge" @@ -950,7 +948,6 @@ struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root,
void __init acpi_pci_root_init(void) { - acpi_hest_init(); if (acpi_pci_disabled) return;
diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index a34d4bfb2..a3996a6fc 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -72,6 +72,7 @@ config ARM_SCPI_POWER_DOMAIN config ARM_SDE_INTERFACE bool "ARM Software Delegated Exception Interface (SDEI)" depends on ARM64 + depends on ACPI_APEI_GHES help The Software Delegated Exception Interface (SDEI) is an ARM standard for registering callbacks from the platform firmware diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 8e9d565c2..0459e1d44 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1081,14 +1081,14 @@ static bool __init sdei_present_acpi(void) return true; }
-static int __init sdei_init(void) +void __init sdei_init(void) { struct platform_device *pdev; int ret;
ret = platform_driver_register(&sdei_driver); if (ret || !sdei_present_acpi()) - return ret; + return;
pdev = platform_device_register_simple(sdei_driver.driver.name, 0, NULL, 0); @@ -1098,17 +1098,8 @@ static int __init sdei_init(void) pr_info("Failed to register ACPI:SDEI platform device %d\n", ret); } - - return ret; }
-/* - * On an ACPI system SDEI needs to be ready before HEST:GHES tries to register - * its events. ACPI is initialised from a subsys_initcall(), GHES is initialised - * by device_initcall(). We want to be called in the middle. - */ -subsys_initcall_sync(sdei_init); - int sdei_event_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 680f80960..a6ac2e8b7 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -27,14 +27,16 @@ extern int hest_disable; extern int erst_disable; #ifdef CONFIG_ACPI_APEI_GHES extern bool ghes_disable; +void __init ghes_init(void); #else #define ghes_disable 1 +static inline void ghes_init(void) { } #endif
#ifdef CONFIG_ACPI_APEI void __init acpi_hest_init(void); #else -static inline void acpi_hest_init(void) { return; } +static inline void acpi_hest_init(void) { } #endif
typedef int (*apei_hest_func_t)(struct acpi_hest_header *hest_hdr, void *data); diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index f3027342c..b1233196c 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -51,9 +51,11 @@ int sdei_unregister_ghes(struct ghes *ghes); /* For use by arch code when CPU hotplug notifiers are not appropriate. */ int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); +void __init sdei_init(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } +static inline void sdei_init(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */
From: Jia He justin.he@arm.com
mainline inclusion from mainline-v6.2-rc1 commit 8e40612f6146da1333e9bb5cfd9af7511c063d93 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
memory errors
In order to make it a proper module and disentangle it from facilities, add a notifier for reporting memory errors. Use an atomic notifier because calls sites like ghes_proc_in_irq() run in interrupt context.
[ bp: Massage commit message. ]
Suggested-by: Borislav Petkov bp@alien8.de Signed-off-by: Jia He justin.he@arm.com Signed-off-by: Borislav Petkov bp@suse.de Link: https://lore.kernel.org/r/20221010023559.69655-3-justin.he@arm.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/apei/ghes.c | 16 +++++++++++++++- drivers/edac/ghes_edac.c | 19 +++++++++++++++++-- include/acpi/ghes.h | 10 +++------- 3 files changed, 35 insertions(+), 10 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 6535b444a..7d39819d2 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -94,6 +94,8 @@ #define FIX_APEI_GHES_SDEI_CRITICAL __end_of_fixed_addresses #endif
+static ATOMIC_NOTIFIER_HEAD(ghes_report_chain); + static inline bool is_hest_type_generic_v2(struct ghes *ghes) { return ghes->generic->header.type == ACPI_HEST_TYPE_GENERIC_ERROR_V2; @@ -649,7 +651,7 @@ static bool ghes_do_proc(struct ghes *ghes, if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) { struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
- ghes_edac_report_mem_error(sev, mem_err); + atomic_notifier_call_chain(&ghes_report_chain, sev, mem_err);
arch_apei_report_mem_error(sev, mem_err); queued = ghes_handle_memory_failure(gdata, sev); @@ -1513,3 +1515,15 @@ void __init ghes_init(void) else pr_info(GHES_PFX "Failed to enable APEI firmware first mode.\n"); } + +void ghes_register_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_register(&ghes_report_chain, nb); +} +EXPORT_SYMBOL_GPL(ghes_register_report_chain); + +void ghes_unregister_report_chain(struct notifier_block *nb) +{ + atomic_notifier_chain_unregister(&ghes_report_chain, nb); +} +EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index df5897c90..31e1dc470 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -14,6 +14,7 @@ #include <linux/dmi.h> #include "edac_module.h" #include <ras/ras_event.h> +#include <linux/notifier.h>
struct ghes_pvt { struct mem_ctl_info *mci; @@ -240,10 +241,13 @@ static void ghes_scan_system(void) system_scanned = true; }
-void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) +static int ghes_edac_report_mem_error(struct notifier_block *nb, + unsigned long val, void *data) { + struct cper_sec_mem_err *mem_err = (struct cper_sec_mem_err *)data; struct edac_raw_error_desc *e; struct mem_ctl_info *mci; + unsigned long sev = val; struct ghes_pvt *pvt; unsigned long flags; char *p; @@ -254,7 +258,7 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) * know. */ if (WARN_ON_ONCE(in_nmi())) - return; + return NOTIFY_OK;
spin_lock_irqsave(&ghes_lock, flags);
@@ -500,8 +504,15 @@ void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err)
unlock: spin_unlock_irqrestore(&ghes_lock, flags); + + return NOTIFY_OK; }
+static struct notifier_block ghes_edac_mem_err_nb = { + .notifier_call = ghes_edac_report_mem_error, + .priority = 0, +}; + /* * Known systems that are safe to enable this module. */ @@ -629,6 +640,8 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) ghes_pvt = pvt; spin_unlock_irqrestore(&ghes_lock, flags);
+ ghes_register_report_chain(&ghes_edac_mem_err_nb); + /* only set on success */ refcount_set(&ghes_refcount, 1);
@@ -674,6 +687,8 @@ void ghes_edac_unregister(struct ghes *ghes) if (mci) edac_mc_free(mci);
+ ghes_unregister_report_chain(&ghes_edac_mem_err_nb); + unlock: mutex_unlock(&ghes_reg_mutex); } diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 544d92789..f96efb31a 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -76,18 +76,11 @@ int ghes_estatus_pool_init(unsigned int num_ghes); /* From drivers/edac/ghes_edac.c */
#ifdef CONFIG_EDAC_GHES -void ghes_edac_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); - int ghes_edac_register(struct ghes *ghes, struct device *dev);
void ghes_edac_unregister(struct ghes *ghes);
#else -static inline void ghes_edac_report_mem_error(int sev, - struct cper_sec_mem_err *mem_err) -{ -} - static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) { return -ENODEV; @@ -149,4 +142,7 @@ static inline int ghes_notify_sea(void) { return -ENOENT; } extern struct blocking_notifier_head ghes_ts_err_chain; #endif
+struct notifier_block; +extern void ghes_register_report_chain(struct notifier_block *nb); +extern void ghes_unregister_report_chain(struct notifier_block *nb); #endif /* GHES_H */
From: Jia He justin.he@arm.com
mainline inclusion from mainline-v5.18-rc1 commit 9057a3f7ac360e068ceb261938e9ae2b1a7e654c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
To make ghes_edac a proper module, prepare to decouple its dependencies from GHES.
Move the ghes_edac.force_load parameter to ghes.c in order to properly control whether ghes_edac should be force-loaded: In ghes_edac_register() it is too late to set the module flag.
Introduce a helper ghes_get_devices(), which returns the list of GHES devices which got probed when the platform-check passes on the system.
The previous force_load check is not needed in ghes_edac_unregister() since it will be checked in the module's init function of ghes_edac later.
[ bp: Massage. ]
Suggested-by: Toshi Kani toshi.kani@hpe.com Suggested-by: Borislav Petkov bp@alien8.de Signed-off-by: Jia He justin.he@arm.com Signed-off-by: Borislav Petkov bp@suse.de Link: https://lore.kernel.org/r/20221010023559.69655-4-justin.he@arm.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/apei/ghes.c | 50 ++++++++++++++++++++++++++++++++++++++++ drivers/edac/ghes_edac.c | 35 ++-------------------------- include/acpi/ghes.h | 5 ++++ 3 files changed, 57 insertions(+), 33 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7d39819d2..3202ff932 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -109,6 +109,13 @@ static inline bool is_hest_type_generic_v2(struct ghes *ghes) bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0);
+/* + * "ghes.edac_force_enable" forcibly enables ghes_edac and skips the platform + * check. + */ +static bool ghes_edac_force_enable; +module_param_named(edac_force_enable, ghes_edac_force_enable, bool, 0); + /* * All error sources notified with HED (Hardware Error Device) share a * single notifier callback, so they need to be linked and checked one @@ -120,6 +127,13 @@ module_param_named(disable, ghes_disable, bool, 0); static LIST_HEAD(ghes_hed); static DEFINE_MUTEX(ghes_list_mutex);
+/* + * A list of GHES devices which are given to the corresponding EDAC driver + * ghes_edac for further use. + */ +static LIST_HEAD(ghes_devs); +static DEFINE_MUTEX(ghes_devs_mutex); + #ifdef CONFIG_ACPI_APEI_GHES_TS_CORE BLOCKING_NOTIFIER_HEAD(ghes_ts_err_chain); EXPORT_SYMBOL(ghes_ts_err_chain); @@ -1396,6 +1410,12 @@ static int ghes_probe(struct platform_device *ghes_dev)
ghes_edac_register(ghes, &ghes_dev->dev);
+ ghes->dev = &ghes_dev->dev; + + mutex_lock(&ghes_devs_mutex); + list_add_tail(&ghes->elist, &ghes_devs); + mutex_unlock(&ghes_devs_mutex); + /* Handle any pending errors right away */ spin_lock_irqsave(&ghes_notify_lock_irq, flags); ghes_proc(ghes); @@ -1460,6 +1480,10 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_edac_unregister(ghes);
+ mutex_lock(&ghes_devs_mutex); + list_del(&ghes->elist); + mutex_unlock(&ghes_devs_mutex); + kfree(ghes);
platform_set_drvdata(ghes_dev, NULL); @@ -1527,3 +1551,29 @@ void ghes_unregister_report_chain(struct notifier_block *nb) atomic_notifier_chain_unregister(&ghes_report_chain, nb); } EXPORT_SYMBOL_GPL(ghes_unregister_report_chain); + +/* + * Known x86 systems that prefer GHES error reporting: + */ +static struct acpi_platform_list plat_list[] = { + {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, + { } /* End */ +}; + +struct list_head *ghes_get_devices(void) +{ + int idx = -1; + + if (IS_ENABLED(CONFIG_X86)) { + idx = acpi_match_platform_list(plat_list); + if (idx < 0) { + if (!ghes_edac_force_enable) + return NULL; + + pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); + } + } + + return &ghes_devs; +} +EXPORT_SYMBOL_GPL(ghes_get_devices); diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index 31e1dc470..e292f6415 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -52,10 +52,6 @@ static DEFINE_MUTEX(ghes_reg_mutex); */ static DEFINE_SPINLOCK(ghes_lock);
-/* "ghes_edac.force_load=1" skips the platform check */ -static bool __read_mostly force_load; -module_param(force_load, bool, 0); - static bool system_scanned;
/* Memory Device - Type 17 of SMBIOS spec */ @@ -513,14 +509,6 @@ static struct notifier_block ghes_edac_mem_err_nb = { .priority = 0, };
-/* - * Known systems that are safe to enable this module. - */ -static struct acpi_platform_list plat_list[] = { - {"HPE ", "Server ", 0, ACPI_SIG_FADT, all_versions}, - { } /* End */ -}; - int ghes_edac_register(struct ghes *ghes, struct device *dev) { bool fake = false; @@ -528,19 +516,8 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) struct ghes_pvt *pvt; struct edac_mc_layer layers[1]; unsigned long flags; - int idx = -1; int rc = 0;
- if (IS_ENABLED(CONFIG_X86)) { - /* Check if safe to enable on this system */ - idx = acpi_match_platform_list(plat_list); - if (!force_load && idx < 0) - return -ENODEV; - } else { - force_load = true; - idx = 0; - } - /* finish another registration/unregistration instance first */ mutex_lock(&ghes_reg_mutex);
@@ -584,15 +561,10 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) pr_info("This system has a very crappy BIOS: It doesn't even list the DIMMS.\n"); pr_info("Its SMBIOS info is wrong. It is doubtful that the error report would\n"); pr_info("work on such system. Use this driver with caution\n"); - } else if (idx < 0) { - pr_info("This EDAC driver relies on BIOS to enumerate memory and get error reports.\n"); - pr_info("Unfortunately, not all BIOSes reflect the memory layout correctly.\n"); - pr_info("So, the end result of using this driver varies from vendor to vendor.\n"); - pr_info("If you find incorrect reports, please contact your hardware vendor\n"); - pr_info("to correct its BIOS.\n"); - pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms); }
+ pr_info("This system has %d DIMM sockets.\n", ghes_hw.num_dimms); + if (!fake) { struct dimm_info *src, *dst; int i = 0; @@ -661,9 +633,6 @@ void ghes_edac_unregister(struct ghes *ghes) struct mem_ctl_info *mci; unsigned long flags;
- if (!force_load) - return; - mutex_lock(&ghes_reg_mutex);
system_scanned = false; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index f96efb31a..41e9afeab 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -27,6 +27,8 @@ struct ghes { struct timer_list timer; unsigned int irq; }; + struct device *dev; + struct list_head elist; };
struct ghes_estatus_node { @@ -80,6 +82,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev);
void ghes_edac_unregister(struct ghes *ghes);
+struct list_head *ghes_get_devices(void); #else static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) { @@ -89,6 +92,8 @@ static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) static inline void ghes_edac_unregister(struct ghes *ghes) { } + +static inline struct list_head *ghes_get_devices(void) { return NULL; } #endif
static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata)
From: Jia He justin.he@arm.com
mainline inclusion from mainline-v6.2-rc1 commit 802e7f1dfed7cc7fb309995e0c4138f08977fdfc category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Commit
dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()")
introduced a bug leading to ghes_edac_register() to be invoked before edac_init(). Because at that time the bus "edac" hadn't been even registered, this created sysfs nodes as /devices/mc0 instead of /sys/devices/system/edac/mc/mc0 on an Ampere eMag server.
Fix this by turning ghes_edac into a proper module.
The list of GHES devices returned is not protected from being modified concurrently but it is pretty static as it gets created only during GHES init and latter is not a module so...
[ bp: Massage. ]
Fixes: dc4e8c07e9e2 ("ACPI: APEI: explicit init of HEST and GHES in apci_init()") Co-developed-by: Borislav Petkov bp@alien8.de Signed-off-by: Borislav Petkov bp@alien8.de Signed-off-by: Jia He justin.he@arm.com Signed-off-by: Borislav Petkov bp@suse.de Link: https://lore.kernel.org/r/20221010023559.69655-5-justin.he@arm.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/apei/ghes.c | 4 ---- drivers/edac/Kconfig | 4 ++-- drivers/edac/ghes_edac.c | 40 ++++++++++++++++++++++++++++++++++++++-- include/acpi/ghes.h | 21 ++------------------- 4 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 3202ff932..415c3ca59 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1408,8 +1408,6 @@ static int ghes_probe(struct platform_device *ghes_dev)
platform_set_drvdata(ghes_dev, ghes);
- ghes_edac_register(ghes, &ghes_dev->dev); - ghes->dev = &ghes_dev->dev;
mutex_lock(&ghes_devs_mutex); @@ -1478,8 +1476,6 @@ static int ghes_remove(struct platform_device *ghes_dev)
ghes_fini(ghes);
- ghes_edac_unregister(ghes); - mutex_lock(&ghes_devs_mutex); list_del(&ghes->elist); mutex_unlock(&ghes_devs_mutex); diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 7a47680d6..7366ea1d2 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -53,8 +53,8 @@ config EDAC_DECODE_MCE has been initialized.
config EDAC_GHES - bool "Output ACPI APEI/GHES BIOS detected errors via EDAC" - depends on ACPI_APEI_GHES && (EDAC=y) + tristate "Output ACPI APEI/GHES BIOS detected errors via EDAC" + depends on ACPI_APEI_GHES help Not all machines support hardware-driven error report. Some of those provide a BIOS-driven error report mechanism via ACPI, using the diff --git a/drivers/edac/ghes_edac.c b/drivers/edac/ghes_edac.c index e292f6415..47c1eea10 100644 --- a/drivers/edac/ghes_edac.c +++ b/drivers/edac/ghes_edac.c @@ -54,6 +54,8 @@ static DEFINE_SPINLOCK(ghes_lock);
static bool system_scanned;
+static struct list_head *ghes_devs; + /* Memory Device - Type 17 of SMBIOS spec */ struct memdev_dmi_entry { u8 type; @@ -509,7 +511,7 @@ static struct notifier_block ghes_edac_mem_err_nb = { .priority = 0, };
-int ghes_edac_register(struct ghes *ghes, struct device *dev) +static int ghes_edac_register(struct device *dev) { bool fake = false; struct mem_ctl_info *mci; @@ -628,7 +630,7 @@ int ghes_edac_register(struct ghes *ghes, struct device *dev) return rc; }
-void ghes_edac_unregister(struct ghes *ghes) +static void ghes_edac_unregister(struct ghes *ghes) { struct mem_ctl_info *mci; unsigned long flags; @@ -661,3 +663,37 @@ void ghes_edac_unregister(struct ghes *ghes) unlock: mutex_unlock(&ghes_reg_mutex); } + +static int __init ghes_edac_init(void) +{ + struct ghes *g, *g_tmp; + + ghes_devs = ghes_get_devices(); + if (!ghes_devs) + return -ENODEV; + + if (list_empty(ghes_devs)) { + pr_info("GHES probing device list is empty"); + return -ENODEV; + } + + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { + ghes_edac_register(g->dev); + } + + return 0; +} +module_init(ghes_edac_init); + +static void __exit ghes_edac_exit(void) +{ + struct ghes *g, *g_tmp; + + list_for_each_entry_safe(g, g_tmp, ghes_devs, elist) { + ghes_edac_unregister(g); + } +} +module_exit(ghes_edac_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Output ACPI APEI/GHES BIOS detected errors via EDAC"); diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 41e9afeab..c2ebc230e 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -71,31 +71,14 @@ int ghes_register_vendor_record_notifier(struct notifier_block *nb); * @nb: pointer to the notifier_block structure of the vendor record handler. */ void ghes_unregister_vendor_record_notifier(struct notifier_block *nb); -#endif - -int ghes_estatus_pool_init(unsigned int num_ghes); - -/* From drivers/edac/ghes_edac.c */ - -#ifdef CONFIG_EDAC_GHES -int ghes_edac_register(struct ghes *ghes, struct device *dev); - -void ghes_edac_unregister(struct ghes *ghes);
struct list_head *ghes_get_devices(void); #else -static inline int ghes_edac_register(struct ghes *ghes, struct device *dev) -{ - return -ENODEV; -} - -static inline void ghes_edac_unregister(struct ghes *ghes) -{ -} - static inline struct list_head *ghes_get_devices(void) { return NULL; } #endif
+int ghes_estatus_pool_init(unsigned int num_ghes); + static inline int acpi_hest_get_version(struct acpi_hest_generic_data *gdata) { return gdata->revision >> 8;
From: Li Yang leoyang.li@nxp.com
mainline inclusion from mainline-v6.5-rc1 commit 9368aa1882ac7178adcd936cee5f0899dbf76dc4 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Since 315bada690e0 ("EDAC: Check for GHES preference in the chipset-specific EDAC drivers"), vendor specific EDAC driver will not probe correctly when CONFIG_ACPI_APEI_GHES is enabled but no GHES device is present. Make ghes_get_devices() return NULL when the GHES device list is empty to fix the problem.
Fixes: 9057a3f7ac36 ("EDAC/ghes: Prepare to make ghes_edac a proper module") Signed-off-by: Li Yang leoyang.li@nxp.com Reviewed-by: Tony Luck tony.luck@intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/apei/ghes.c | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 415c3ca59..f4ae71b8f 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -1568,6 +1568,8 @@ struct list_head *ghes_get_devices(void)
pr_warn_once("Force-loading ghes_edac on an unsupported platform. You're on your own!\n"); } + } else if (list_empty(&ghes_devs)) { + return NULL; }
return &ghes_devs;
From: Ilkka Koskinen ilkka@os.amperecomputing.com
mainline inclusion from mainline-v5.18-rc1 commit e86801b0ff1c5c6d1f78232f7e3b52c0b0631560 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Add AGDI to the list of known ACPI table signatures to allow the kernel to recognize it when upgrading tables via initrd.
Signed-off-by: Ilkka Koskinen ilkka@os.amperecomputing.com Reviewed-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 5943ae4f7..3635c7be6 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -509,7 +509,7 @@ static const char table_sigs[][ACPI_NAMESEG_SIZE] __initconst = { ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, - ACPI_SIG_NHLT }; + ACPI_SIG_NHLT, ACPI_SIG_AGDI };
#define ACPI_HEADER_SIZE sizeof(struct acpi_table_header)
From: Ilkka Koskinen ilkka@os.amperecomputing.com
mainline inclusion from mainline-v5.18-rc1 commit a2a591fb76e6f5461dfd04715b69c317e50c43a5 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
ACPI for Arm Components 1.1 Platform Design Document v1.1 [0] specifices Arm Generic Diagnostic Device Interface (AGDI). It allows an admin to issue diagnostic dump and reset via an SDEI event or an interrupt. This patch implements SDEI path.
[0] https://developer.arm.com/documentation/den0093/latest/
Signed-off-by: Ilkka Koskinen ilkka@os.amperecomputing.com Reviewed-by: Russell King (Oracle) rmk+kernel@armlinux.org.uk Acked-by: Lorenzo Pieralisi lorenzo.pieralisi@arm.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/arm64/Kconfig | 10 ++++ drivers/acpi/arm64/Makefile | 1 + drivers/acpi/arm64/agdi.c | 116 ++++++++++++++++++++++++++++++++++++ drivers/acpi/bus.c | 2 + include/linux/acpi_agdi.h | 13 ++++ 5 files changed, 142 insertions(+) create mode 100644 drivers/acpi/arm64/agdi.c create mode 100644 include/linux/acpi_agdi.h
diff --git a/drivers/acpi/arm64/Kconfig b/drivers/acpi/arm64/Kconfig index 664d2ca05..d6f98f89c 100644 --- a/drivers/acpi/arm64/Kconfig +++ b/drivers/acpi/arm64/Kconfig @@ -11,3 +11,13 @@ config ACPI_GTDT
config ACPI_MPAM bool + +config ACPI_AGDI + bool "Arm Generic Diagnostic Dump and Reset Device Interface" + depends on ARM_SDE_INTERFACE + help + Arm Generic Diagnostic Dump and Reset Device Interface (AGDI) is + a standard that enables issuing a non-maskable diagnostic dump and + reset command. + + If set, the kernel parses AGDI table and listens for the command. diff --git a/drivers/acpi/arm64/Makefile b/drivers/acpi/arm64/Makefile index 2bae08207..7d3c52952 100644 --- a/drivers/acpi/arm64/Makefile +++ b/drivers/acpi/arm64/Makefile @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_ACPI_AGDI) += agdi.o obj-$(CONFIG_ACPI_IORT) += iort.o obj-$(CONFIG_ACPI_GTDT) += gtdt.o obj-$(CONFIG_ACPI_MPAM) += mpam.o mpam_v2.o diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c new file mode 100644 index 000000000..4df337d54 --- /dev/null +++ b/drivers/acpi/arm64/agdi.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file implements handling of + * Arm Generic Diagnostic Dump and Reset Interface table (AGDI) + * + * Copyright (c) 2022, Ampere Computing LLC + */ + +#define pr_fmt(fmt) "ACPI: AGDI: " fmt + +#include <linux/acpi.h> +#include <linux/arm_sdei.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/platform_device.h> + +struct agdi_data { + int sdei_event; +}; + +static int agdi_sdei_handler(u32 sdei_event, struct pt_regs *regs, void *arg) +{ + nmi_panic(regs, "Arm Generic Diagnostic Dump and Reset SDEI event issued"); + return 0; +} + +static int agdi_sdei_probe(struct platform_device *pdev, + struct agdi_data *adata) +{ + int err; + + err = sdei_event_register(adata->sdei_event, agdi_sdei_handler, pdev); + if (err) { + dev_err(&pdev->dev, "Failed to register for SDEI event %d", + adata->sdei_event); + return err; + } + + err = sdei_event_enable(adata->sdei_event); + if (err) { + sdei_event_unregister(adata->sdei_event); + dev_err(&pdev->dev, "Failed to enable event %d\n", + adata->sdei_event); + return err; + } + + return 0; +} + +static int agdi_probe(struct platform_device *pdev) +{ + struct agdi_data *adata = dev_get_platdata(&pdev->dev); + + if (!adata) + return -EINVAL; + + return agdi_sdei_probe(pdev, adata); +} + +static int agdi_remove(struct platform_device *pdev) +{ + struct agdi_data *adata = dev_get_platdata(&pdev->dev); + int err, i; + + err = sdei_event_disable(adata->sdei_event); + if (err) + return err; + + for (i = 0; i < 3; i++) { + err = sdei_event_unregister(adata->sdei_event); + if (err != -EINPROGRESS) + break; + + schedule(); + } + + return err; +} + +static struct platform_driver agdi_driver = { + .driver = { + .name = "agdi", + }, + .probe = agdi_probe, + .remove = agdi_remove, +}; + +void __init acpi_agdi_init(void) +{ + struct acpi_table_agdi *agdi_table; + struct agdi_data pdata; + struct platform_device *pdev; + acpi_status status; + + status = acpi_get_table(ACPI_SIG_AGDI, 0, + (struct acpi_table_header **) &agdi_table); + if (ACPI_FAILURE(status)) + return; + + if (agdi_table->flags & ACPI_AGDI_SIGNALING_MODE) { + pr_warn("Interrupt signaling is not supported"); + goto err_put_table; + } + + pdata.sdei_event = agdi_table->sdei_event; + + pdev = platform_device_register_data(NULL, "agdi", 0, &pdata, sizeof(pdata)); + if (IS_ERR(pdev)) + goto err_put_table; + + if (platform_driver_register(&agdi_driver)) + platform_device_unregister(pdev); + +err_put_table: + acpi_put_table((struct acpi_table_header *)agdi_table); +} diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index d8086d49c..bb213ade9 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -24,6 +24,7 @@ #include <asm/mpspec.h> #include <linux/dmi.h> #endif +#include <linux/acpi_agdi.h> #include <linux/acpi_iort.h> #include <linux/pci.h> #include <acpi/apei.h> @@ -1295,6 +1296,7 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); + acpi_agdi_init(); return 0; }
diff --git a/include/linux/acpi_agdi.h b/include/linux/acpi_agdi.h new file mode 100644 index 000000000..f477f0b45 --- /dev/null +++ b/include/linux/acpi_agdi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#ifndef __ACPI_AGDI_H__ +#define __ACPI_AGDI_H__ + +#include <linux/acpi.h> + +#ifdef CONFIG_ACPI_AGDI +void __init acpi_agdi_init(void); +#else +static inline void acpi_agdi_init(void) {} +#endif +#endif /* __ACPI_AGDI_H__ */
From: Ilkka Koskinen ilkka@os.amperecomputing.com
mainline inclusion from mainline-v5.17-rc1 commit 5579649e7eb756a4e3d5784b6958374e5bfc41de category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
ACPICA commit cf36a6d658ca5aa8c329c2edfc3322c095ffd844
Add support for Arm Generic Diagnostic Dump and Reset Interface, which is described by "ACPI for Arm Components 1.1 Platform Design Document" ARM DEN0093.
Add the necessary types in the ACPICA header files and support for compiling and decompiling the table.
Link: https://github.com/acpica/acpica/commit/cf36a6d6 Signed-off-by: Ilkka Koskinen ilkka@os.amperecomputing.com Signed-off-by: Bob Moore robert.moore@intel.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- include/acpi/actbl2.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 2fe351437..aa4568c4f 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -24,6 +24,7 @@ * file. Useful because they make it more difficult to inadvertently type in * the wrong signature. */ +#define ACPI_SIG_AGDI "AGDI" /* Arm Generic Diagnostic Dump and Reset Device Interface */ #define ACPI_SIG_IORT "IORT" /* IO Remapping Table */ #define ACPI_SIG_IVRS "IVRS" /* I/O Virtualization Reporting Structure */ #define ACPI_SIG_LPIT "LPIT" /* Low Power Idle Table */ @@ -65,6 +66,25 @@ * See http://stackoverflow.com/a/1053662/41661 */
+/******************************************************************************* + * AGDI - Arm Generic Diagnostic Dump and Reset Device Interface + * + * Conforms to "ACPI for Arm Components 1.1, Platform Design Document" + * ARM DEN0093 v1.1 + * + ******************************************************************************/ +struct acpi_table_agdi { + struct acpi_table_header header; /* Common ACPI table header */ + u8 flags; + u8 reserved[3]; + u32 sdei_event; + u32 gsiv; +}; + +/* Mask for Flags field above */ + +#define ACPI_AGDI_SIGNALING_MODE (1) + /******************************************************************************* * * IORT - IO Remapping Table
From: Ilkka Koskinen ilkka@os.amperecomputing.com
mainline inclusion from mainline-v5.19-rc1 commit 988d7a14408db4183202f16bb02b8149b9da3727 category: bugfix bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
When building with W=1, we get the following warning:
drivers/acpi/arm64/agdi.c:88:13: warning: no previous prototype for ‘acpi_agdi_init’ [-Wmissing-prototypes] void __init acpi_agdi_init(void)
Include AGDI driver's header file to pull in the prototype definition for acpi_agdi_init() to get rid of the compiler warning
Fixes: a2a591fb76e6 ("ACPI: AGDI: Add driver for Arm Generic Diagnostic Dump and Reset device") Reported-by: kernel test robot lkp@intel.com Signed-off-by: Ilkka Koskinen ilkka@os.amperecomputing.com Signed-off-by: Rafael J. Wysocki rafael.j.wysocki@intel.com Signed-off-by: Qian Zou zouqian4@huawei.com --- drivers/acpi/arm64/agdi.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/acpi/arm64/agdi.c b/drivers/acpi/arm64/agdi.c index 4df337d54..cf31abd0e 100644 --- a/drivers/acpi/arm64/agdi.c +++ b/drivers/acpi/arm64/agdi.c @@ -9,6 +9,7 @@ #define pr_fmt(fmt) "ACPI: AGDI: " fmt
#include <linux/acpi.h> +#include <linux/acpi_agdi.h> #include <linux/arm_sdei.h> #include <linux/io.h> #include <linux/kernel.h>
From: D Scott Phillips scott@os.amperecomputing.com
mainline inclusion from mainline-v6.6-rc1 commit 5cd474e57368f0957c343bb21e309cf82826b1ef category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I8DQUX CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Interrupts are blocked in SDEI context, per the SDEI spec: "The client interrupts cannot preempt the event handler." If we crashed in the SDEI handler-running context (as with ACPI's AGDI) then we need to clean up the SDEI state before proceeding to the crash kernel so that the crash kernel can have working interrupts.
Track the active SDEI handler per-cpu so that we can COMPLETE_AND_RESUME the handler, discarding the interrupted context.
Fixes: f5df26961853 ("arm64: kernel: Add arch-specific SDEI entry code and CPU masking") Signed-off-by: D Scott Phillips scott@os.amperecomputing.com Cc: stable@vger.kernel.org Reviewed-by: James Morse james.morse@arm.com Tested-by: Mihai Carabas mihai.carabas@oracle.com Link: https://lore.kernel.org/r/20230627002939.2758-1-scott@os.amperecomputing.com Signed-off-by: Will Deacon will@kernel.org Signed-off-by: Qian Zou zouqian4@huawei.com --- arch/arm64/include/asm/sdei.h | 6 ++++++ arch/arm64/kernel/entry.S | 27 +++++++++++++++++++++++++-- arch/arm64/kernel/sdei.c | 3 +++ arch/arm64/kernel/smp.c | 8 ++++---- drivers/firmware/arm_sdei.c | 19 +++++++++++++++++++ include/linux/arm_sdei.h | 2 ++ 6 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 63e0b92a5..5882c0e29 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -17,6 +17,9 @@
#include <asm/virt.h>
+DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + extern unsigned long sdei_exit_mode;
/* Software Delegated Exception entry point from firmware*/ @@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, unsigned long pc, unsigned long pstate);
+/* Abort a running handler. Context is discarded. */ +void __sdei_handler_abort(void); + /* * The above entry point does the minimum to call C code. This function does * anything else, before calling the driver. diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 64145bfab..34c58ad95 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -1134,9 +1134,13 @@ SYM_CODE_START(__sdei_asm_handler)
mov x19, x1
-#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) + /* Store the registered-event for crash_smp_send_stop() */ ldrb w4, [x19, #SDEI_EVENT_PRIORITY] -#endif + cbnz w4, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str x19, [x5]
#ifdef CONFIG_VMAP_STACK /* @@ -1201,6 +1205,14 @@ SYM_CODE_START(__sdei_asm_handler)
ldr_l x2, sdei_exit_mode
+ /* Clear the registered-event seen by crash_smp_send_stop() */ + ldrb w3, [x4, #SDEI_EVENT_PRIORITY] + cbnz w3, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str xzr, [x5] + alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 sdei_handler_exit exit_mode=x2 alternative_else_nop_endif @@ -1211,4 +1223,15 @@ alternative_else_nop_endif #endif SYM_CODE_END(__sdei_asm_handler) NOKPROBE(__sdei_asm_handler) + +SYM_CODE_START(__sdei_handler_abort) + mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME + adr x1, 1f + ldr_l x2, sdei_exit_mode + sdei_handler_exit exit_mode=x2 + // exit the handler and jump to the next instruction. + // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx. +1: ret +SYM_CODE_END(__sdei_handler_abort) +NOKPROBE(__sdei_handler_abort) #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 2132bd953..6d51d873b 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -38,6 +38,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_stack_critical_ptr); #endif
+DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) { unsigned long *p; diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 08328ec02..23707812f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1138,10 +1138,8 @@ void crash_smp_send_stop(void) * If this cpu is the only one alive at this point in time, online or * not, there are no stop messages to be sent around, so just back out. */ - if (num_other_online_cpus() == 0) { - sdei_mask_local_cpu(); - return; - } + if (num_other_online_cpus() == 0) + goto skip_ipi;
cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); @@ -1160,7 +1158,9 @@ void crash_smp_send_stop(void) pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(&mask));
+skip_ipi: sdei_mask_local_cpu(); + sdei_handler_abort(); }
bool smp_crash_stop_failed(void) diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 0459e1d44..0f0629a94 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -1114,3 +1114,22 @@ int sdei_event_handler(struct pt_regs *regs, return err; } NOKPROBE_SYMBOL(sdei_event_handler); + +void sdei_handler_abort(void) +{ + /* + * If the crash happened in an SDEI event handler then we need to + * finish the handler with the firmware so that we can have working + * interrupts in the crash kernel. + */ + if (__this_cpu_read(sdei_active_critical_event)) { + pr_warn("still in SDEI critical event context, attempting to finish handler.\n"); + __sdei_handler_abort(); + __this_cpu_write(sdei_active_critical_event, NULL); + } + if (__this_cpu_read(sdei_active_normal_event)) { + pr_warn("still in SDEI normal event context, attempting to finish handler.\n"); + __sdei_handler_abort(); + __this_cpu_write(sdei_active_normal_event, NULL); + } +} diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index b1233196c..28e247dd5 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -52,10 +52,12 @@ int sdei_unregister_ghes(struct ghes *ghes); int sdei_mask_local_cpu(void); int sdei_unmask_local_cpu(void); void __init sdei_init(void); +void sdei_handler_abort(void); #else static inline int sdei_mask_local_cpu(void) { return 0; } static inline int sdei_unmask_local_cpu(void) { return 0; } static inline void sdei_init(void) { } +static inline void sdei_handler_abort(void) { } #endif /* CONFIG_ARM_SDE_INTERFACE */
反馈: 您发送到kernel@openeuler.org的补丁/补丁集,已成功转换为PR! PR链接地址: https://gitee.com/openeuler/kernel/pulls/2795 邮件列表地址:https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...
FeedBack: The patch(es) which you have sent to kernel@openeuler.org mailing list has been converted to a pull request successfully! Pull request link: https://gitee.com/openeuler/kernel/pulls/2795 Mailing list address: https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/M...