From: Shenwei Luo luoshengwei@huawei.com
kunpeng inclusion category: feature bugzilla: https://bugzilla.openeuler.org/show_bug.cgi?id=45 CVE: NA
The ARM processor error section includes several ARM processor error information, several ARM processor context information and several vendor specific error information structures. Report all of these information to userspace via perf i/f.
Shengwei Luo: backport for openEuler 20.xx kernel.
V2: report severity info to userspace V1: fix the error in the original patch. Ensure all info to be parsed correctly.
Original-Author: Jason Tian jason@os.amperecomputing.com Signed-off-by: Gong Chen chengong15@huawei.com Signed-off-by: Shengwei Luo luoshengwei@huawei.com Cc: Chen Wei chenwei68@huawei.com Reviewed-by: Xiongfeng Wang wangxiongfeng2@huawei.com Signed-off-by: Yang Yingliang yangyingliang@huawei.com --- arch/arm64/kernel/ras.c | 5 +++-- drivers/acpi/apei/ghes.c | 6 +++--- drivers/ras/ras.c | 40 ++++++++++++++++++++++++++++++++++-- include/acpi/ghes.h | 2 +- include/linux/ras.h | 4 ++-- include/ras/ras_event.h | 44 +++++++++++++++++++++++++++++++++++----- 6 files changed, 86 insertions(+), 15 deletions(-)
diff --git a/arch/arm64/kernel/ras.c b/arch/arm64/kernel/ras.c index 0414289b0707c..181e609e1cd4c 100644 --- a/arch/arm64/kernel/ras.c +++ b/arch/arm64/kernel/ras.c @@ -118,13 +118,14 @@ void sea_notify_process(void) } }
-void ghes_arm_process_error(struct ghes *ghes, struct cper_sec_proc_arm *err) +void ghes_arm_process_error(struct ghes *ghes, + struct cper_sec_proc_arm *err, int sec_sev) { int i; bool info_saved = false; struct cper_arm_err_info *err_info;
- log_arm_hw_error(err); + log_arm_hw_error(err, sec_sev);
if ((ghes->generic->notify.type != ACPI_HEST_NOTIFY_SEA) || (ghes->estatus->error_severity != CPER_SEV_RECOVERABLE)) diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e2704619e7015..e807e8f74d1e0 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -465,9 +465,9 @@ static void ghes_handle_aer(struct acpi_hest_generic_data *gdata) }
void __weak ghes_arm_process_error(struct ghes *ghes, - struct cper_sec_proc_arm *err) + struct cper_sec_proc_arm *err, int sec_sev) { - log_arm_hw_error(err); + log_arm_hw_error(err, sec_sev); }
static void ghes_do_proc(struct ghes *ghes, @@ -510,7 +510,7 @@ static void ghes_do_proc(struct ghes *ghes, else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { struct cper_sec_proc_arm *err = acpi_hest_get_payload(gdata);
- ghes_arm_process_error(ghes, err); + ghes_arm_process_error(ghes, err, sec_sev); } else if (guid_equal(sec_type, &CPER_SEC_TS_CORE)) { blocking_notifier_call_chain(&ghes_ts_err_chain, 0, acpi_hest_get_payload(gdata)); diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 3f38907320dcc..9302ed7f42588 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -21,9 +21,45 @@ void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, trace_non_standard_event(sec_type, fru_id, fru_text, sev, err, len); }
-void log_arm_hw_error(struct cper_sec_proc_arm *err) +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { - trace_arm_event(err); + u32 pei_len; + u32 ctx_len = 0; + u32 vsei_len; + u8 *pei_err; + u8 *ctx_err; + u8 *ven_err_data; + struct cper_arm_err_info *err_info; + struct cper_arm_ctx_info *ctx_info; + int n, sz; + + pei_len = sizeof(struct cper_arm_err_info) * err->err_info_num; + pei_err = (u8 *)err + sizeof(struct cper_sec_proc_arm); + + err_info = (struct cper_arm_err_info *)(err + 1); + ctx_info = (struct cper_arm_ctx_info *)(err_info + err->err_info_num); + ctx_err = (u8 *)ctx_info; + for (n = 0; n < err->context_info_num; n++) { + sz = sizeof(struct cper_arm_ctx_info) + ctx_info->size; + ctx_info = (struct cper_arm_ctx_info *)((long)ctx_info + sz); + ctx_len += sz; + } + + vsei_len = err->section_length - (sizeof(struct cper_sec_proc_arm) + + pei_len + ctx_len); + if (vsei_len < 0) { + printk(FW_BUG + "section length: %d\n", err->section_length); + printk(FW_BUG + "section length is too small\n"); + pr_warn(FW_BUG + "firmware-generated error record is incorrect\n"); + vsei_len = 0; + } + ven_err_data = (u8 *)ctx_info; + + trace_arm_event(err, pei_err, pei_len, ctx_err, ctx_len, + ven_err_data, vsei_len, sev); }
static int __init ras_init(void) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 4c536a2638df7..9aaeaaa3d1a7f 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -124,7 +124,7 @@ int ghes_notify_sea(void); static inline int ghes_notify_sea(void) { return -ENOENT; } #endif extern void ghes_arm_process_error(struct ghes *ghes, - struct cper_sec_proc_arm *err); + struct cper_sec_proc_arm *err, int sec_sev);
struct ghes_mem_err { int notify_type; diff --git a/include/linux/ras.h b/include/linux/ras.h index 7c3debb47c87a..3431b4a5fa42d 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -29,7 +29,7 @@ static inline int cec_add_elem(u64 pfn) { return -ENODEV; } void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id, const char *fru_text, const u8 sev, const u8 *err, const u32 len); -void log_arm_hw_error(struct cper_sec_proc_arm *err); +void log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev); #else static inline void log_non_standard_event(const guid_t *sec_type, @@ -37,7 +37,7 @@ log_non_standard_event(const guid_t *sec_type, const u8 sev, const u8 *err, const u32 len) { return; } static inline void -log_arm_hw_error(struct cper_sec_proc_arm *err) { return; } +log_arm_hw_error(struct cper_sec_proc_arm *err, const u8 sev) { return; } #endif
#endif /* __RAS_H__ */ diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index a0794632fd01a..7c8cb123ba32d 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -168,11 +168,23 @@ TRACE_EVENT(mc_event, * This event is generated when hardware detects an ARM processor error * has occurred. UEFI 2.6 spec section N.2.4.4. */ +#define APEIL "ARM Processor Err Info data len" +#define APEID "ARM Processor Err Info raw data" +#define APECIL "ARM Processor Err Context Info data len" +#define APECID "ARM Processor Err Context Info raw data" +#define VSEIL "Vendor Specific Err Info data len" +#define VSEID "Vendor Specific Err Info raw data" TRACE_EVENT(arm_event,
- TP_PROTO(const struct cper_sec_proc_arm *proc), + TP_PROTO(const struct cper_sec_proc_arm *proc, const u8 *pei_err, + const u32 pei_len, + const u8 *ctx_err, + const u32 ctx_len, + const u8 *oem, + const u32 oem_len, + u8 sev),
- TP_ARGS(proc), + TP_ARGS(proc, pei_err, pei_len, ctx_err, ctx_len, oem, oem_len, sev),
TP_STRUCT__entry( __field(u64, mpidr) @@ -180,6 +192,13 @@ TRACE_EVENT(arm_event, __field(u32, running_state) __field(u32, psci_state) __field(u8, affinity) + __field(u32, pei_len) + __dynamic_array(u8, buf, pei_len) + __field(u32, ctx_len) + __dynamic_array(u8, buf1, ctx_len) + __field(u32, oem_len) + __dynamic_array(u8, buf2, oem_len) + __field(u8, sev) ),
TP_fast_assign( @@ -199,12 +218,27 @@ TRACE_EVENT(arm_event, __entry->running_state = ~0; __entry->psci_state = ~0; } + __entry->pei_len = pei_len; + memcpy(__get_dynamic_array(buf), pei_err, pei_len); + __entry->ctx_len = ctx_len; + memcpy(__get_dynamic_array(buf1), ctx_err, ctx_len); + __entry->oem_len = oem_len; + memcpy(__get_dynamic_array(buf2), oem, oem_len); + __entry->sev = sev; ),
- TP_printk("affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " - "running state: %d; PSCI state: %d", + TP_printk("error: %d; affinity level: %d; MPIDR: %016llx; MIDR: %016llx; " + "running state: %d; PSCI state: %d; " + "%s: %d; %s: %s; %s: %d; %s: %s; %s: %d; %s: %s", + __entry->sev, __entry->affinity, __entry->mpidr, __entry->midr, - __entry->running_state, __entry->psci_state) + __entry->running_state, __entry->psci_state, + APEIL, __entry->pei_len, APEID, + __print_hex(__get_dynamic_array(buf), __entry->pei_len), + APECIL, __entry->ctx_len, APECID, + __print_hex(__get_dynamic_array(buf1), __entry->ctx_len), + VSEIL, __entry->oem_len, VSEID, + __print_hex(__get_dynamic_array(buf2), __entry->oem_len)) );
/*