From 38356c1fbd8cd0f44a32ede2c97f0eb639d06613 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 22 May 2014 16:40:54 +0200 Subject: x86, MCE: Kill CPU_POST_DEAD In conjunction with cleaning up CPU hotplug, we want to get rid of CPU_POST_DEAD. Kill this instance here and rediscover CMCI banks at the end of CPU_DEAD. Link: http://lkml.kernel.org/r/http://lkml.kernel.org/r/1400750624-19238-1-git-send-email-bp@alien8.de Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index bb92f38..8fecdd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2385,6 +2385,10 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) threshold_cpu_callback(action, cpu); mce_device_remove(cpu); mce_intel_hcpu_update(cpu); + + /* intentionally ignoring frozen here */ + if (!(action & CPU_TASKS_FROZEN)) + cmci_rediscover(); break; case CPU_DOWN_PREPARE: smp_call_function_single(cpu, mce_disable_cpu, &action, 1); @@ -2396,11 +2400,6 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) break; } - if (action == CPU_POST_DEAD) { - /* intentionally ignoring frozen here */ - cmci_rediscover(); - } - return NOTIFY_OK; } -- cgit v0.10.2 From 76ac8275f296b49c58f684825543bf4eb85d43d0 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:54:04 -0700 Subject: trace, RAS: Add basic RAS trace event To avoid confuision and conflict of usage for RAS related trace event, add an unified RAS trace event stub. Start a RAS subsystem menu which will be fleshed out in time, when more features get added to it. Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1402475691-30045-2-git-send-email-gong.chen@linux.intel.com Signed-off-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/Kconfig b/drivers/Kconfig index 0e87a34..4e6e66c 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -176,4 +176,6 @@ source "drivers/powercap/Kconfig" source "drivers/mcb/Kconfig" +source "drivers/ras/Kconfig" + endmenu diff --git a/drivers/Makefile b/drivers/Makefile index f98b50d..65c32b1 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -158,3 +158,4 @@ obj-$(CONFIG_NTB) += ntb/ obj-$(CONFIG_FMC) += fmc/ obj-$(CONFIG_POWERCAP) += powercap/ obj-$(CONFIG_MCB) += mcb/ +obj-$(CONFIG_RAS) += ras/ diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 878f090..d3c0465 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -72,6 +72,7 @@ config EDAC_MCE_INJ config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" + select RAS help Some systems are able to detect and correct errors in main memory. EDAC can report statistics on memory error diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 2c694b5..9f134823 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -33,9 +33,6 @@ #include #include "edac_core.h" #include "edac_module.h" - -#define CREATE_TRACE_POINTS -#define TRACE_INCLUDE_PATH ../../include/ras #include /* lock to memory controller's control array */ diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig new file mode 100644 index 0000000..f9da613 --- /dev/null +++ b/drivers/ras/Kconfig @@ -0,0 +1,2 @@ +config RAS + bool diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile new file mode 100644 index 0000000..223e806 --- /dev/null +++ b/drivers/ras/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_RAS) += ras.o diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c new file mode 100644 index 0000000..b0c6ed1 --- /dev/null +++ b/drivers/ras/ras.c @@ -0,0 +1,12 @@ +/* + * Copyright (C) 2014 Intel Corporation + * + * Authors: + * Chen, Gong + */ + +#define CREATE_TRACE_POINTS +#define TRACE_INCLUDE_PATH ../../include/ras +#include + +EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); -- cgit v0.10.2 From 0a2409aad38e97b1db55e6515b990be7b17060f6 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:57:27 -0700 Subject: trace, AER: Move trace into unified interface AER uses a separate trace interface by now. To make it consistent, move it into unified RAS trace interface. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/pci/pcie/aer/Kconfig b/drivers/pci/pcie/aer/Kconfig index 50e94e0..3894402 100644 --- a/drivers/pci/pcie/aer/Kconfig +++ b/drivers/pci/pcie/aer/Kconfig @@ -5,6 +5,7 @@ config PCIEAER boolean "Root Port Advanced Error Reporting support" depends on PCIEPORTBUS + select RAS default y help This enables PCI Express Root Port Advanced Error Reporting diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 36ed31b5..35d06e1 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -22,9 +22,7 @@ #include #include "aerdrv.h" - -#define CREATE_TRACE_POINTS -#include +#include #define AER_AGENT_RECEIVER 0 #define AER_AGENT_REQUESTER 1 diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index 21cdb0b..acbcbb8 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -8,6 +8,7 @@ #include #include #include +#include /* * Hardware Events Report @@ -94,6 +95,69 @@ TRACE_EVENT(mc_event, __get_str(driver_detail)) ); +/* + * PCIe AER Trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event on a PCIe device. The event report has + * the following structure: + * + * char * dev_name - The name of the slot where the device resides + * ([domain:]bus:device.function). + * u32 status - Either the correctable or uncorrectable register + * indicating what error or errors have been seen + * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED + */ + +#define aer_correctable_errors \ + {BIT(0), "Receiver Error"}, \ + {BIT(6), "Bad TLP"}, \ + {BIT(7), "Bad DLLP"}, \ + {BIT(8), "RELAY_NUM Rollover"}, \ + {BIT(12), "Replay Timer Timeout"}, \ + {BIT(13), "Advisory Non-Fatal"} + +#define aer_uncorrectable_errors \ + {BIT(4), "Data Link Protocol"}, \ + {BIT(12), "Poisoned TLP"}, \ + {BIT(13), "Flow Control Protocol"}, \ + {BIT(14), "Completion Timeout"}, \ + {BIT(15), "Completer Abort"}, \ + {BIT(16), "Unexpected Completion"}, \ + {BIT(17), "Receiver Overflow"}, \ + {BIT(18), "Malformed TLP"}, \ + {BIT(19), "ECRC"}, \ + {BIT(20), "Unsupported Request"} + +TRACE_EVENT(aer_event, + TP_PROTO(const char *dev_name, + const u32 status, + const u8 severity), + + TP_ARGS(dev_name, status, severity), + + TP_STRUCT__entry( + __string( dev_name, dev_name ) + __field( u32, status ) + __field( u8, severity ) + ), + + TP_fast_assign( + __assign_str(dev_name, dev_name); + __entry->status = status; + __entry->severity = severity; + ), + + TP_printk("%s PCIe Bus Error: severity=%s, %s\n", + __get_str(dev_name), + __entry->severity == AER_CORRECTABLE ? "Corrected" : + __entry->severity == AER_FATAL ? + "Fatal" : "Uncorrected, non-fatal", + __entry->severity == AER_CORRECTABLE ? + __print_flags(__entry->status, "|", aer_correctable_errors) : + __print_flags(__entry->status, "|", aer_uncorrectable_errors)) +); + #endif /* _TRACE_HW_EVENT_MC_H */ /* This part must be outside protection */ diff --git a/include/trace/events/ras.h b/include/trace/events/ras.h deleted file mode 100644 index 1c875ad..0000000 --- a/include/trace/events/ras.h +++ /dev/null @@ -1,77 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM ras - -#if !defined(_TRACE_AER_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_AER_H - -#include -#include - - -/* - * PCIe AER Trace event - * - * These events are generated when hardware detects a corrected or - * uncorrected event on a PCIe device. The event report has - * the following structure: - * - * char * dev_name - The name of the slot where the device resides - * ([domain:]bus:device.function). - * u32 status - Either the correctable or uncorrectable register - * indicating what error or errors have been seen - * u8 severity - error severity 0:NONFATAL 1:FATAL 2:CORRECTED - */ - -#define aer_correctable_errors \ - {BIT(0), "Receiver Error"}, \ - {BIT(6), "Bad TLP"}, \ - {BIT(7), "Bad DLLP"}, \ - {BIT(8), "RELAY_NUM Rollover"}, \ - {BIT(12), "Replay Timer Timeout"}, \ - {BIT(13), "Advisory Non-Fatal"} - -#define aer_uncorrectable_errors \ - {BIT(4), "Data Link Protocol"}, \ - {BIT(12), "Poisoned TLP"}, \ - {BIT(13), "Flow Control Protocol"}, \ - {BIT(14), "Completion Timeout"}, \ - {BIT(15), "Completer Abort"}, \ - {BIT(16), "Unexpected Completion"}, \ - {BIT(17), "Receiver Overflow"}, \ - {BIT(18), "Malformed TLP"}, \ - {BIT(19), "ECRC"}, \ - {BIT(20), "Unsupported Request"} - -TRACE_EVENT(aer_event, - TP_PROTO(const char *dev_name, - const u32 status, - const u8 severity), - - TP_ARGS(dev_name, status, severity), - - TP_STRUCT__entry( - __string( dev_name, dev_name ) - __field( u32, status ) - __field( u8, severity ) - ), - - TP_fast_assign( - __assign_str(dev_name, dev_name); - __entry->status = status; - __entry->severity = severity; - ), - - TP_printk("%s PCIe Bus Error: severity=%s, %s\n", - __get_str(dev_name), - __entry->severity == AER_CORRECTABLE ? "Corrected" : - __entry->severity == AER_FATAL ? - "Fatal" : "Uncorrected, non-fatal", - __entry->severity == AER_CORRECTABLE ? - __print_flags(__entry->status, "|", aer_correctable_errors) : - __print_flags(__entry->status, "|", aer_uncorrectable_errors)) -); - -#endif /* _TRACE_AER_H */ - -/* This part must be outside protection */ -#include -- cgit v0.10.2 From 27c934158c5be0bebfb2970da521b9d9efc0058b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jun 2014 23:16:45 +0200 Subject: x86, MCE: Robustify mcheck_init_device BorisO reports that misc_register() fails often on xen. The current code unregisters the CPU hotplug notifier in that case. If then a CPU is offlined and onlined back again, we end up with a second timer running on that CPU, leading to soft lockups and system hangs. So let's leave the hotcpu notifier always registered - even if mce_device_create failed for some cores and never unreg it so that we can deal with the timer handling accordingly. Reported-and-Tested-by: Boris Ostrovsky Link: http://lkml.kernel.org/r/1403274493-1371-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 8fecdd3..4fc5797 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -2450,6 +2450,12 @@ static __init int mcheck_init_device(void) for_each_online_cpu(i) { err = mce_device_create(i); if (err) { + /* + * Register notifier anyway (and do not unreg it) so + * that we don't leave undeleted timers, see notifier + * callback above. + */ + __register_hotcpu_notifier(&mce_cpu_notifier); cpu_notifier_register_done(); goto err_device_create; } @@ -2470,10 +2476,6 @@ static __init int mcheck_init_device(void) err_register: unregister_syscore_ops(&mce_syscore_ops); - cpu_notifier_register_begin(); - __unregister_hotcpu_notifier(&mce_cpu_notifier); - cpu_notifier_register_done(); - err_device_create: /* * We didn't keep track of which devices were created above, but -- cgit v0.10.2 From 3760cd20402d4c131e1994c968ecb055fa0f74bc Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 13:59:45 -0700 Subject: CPER: Adjust code flow of some functions Some codes can be reorganzied as a common function for other usages. Signed-off-by: Chen, Gong Signed-off-by: Tony Luck diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index 1491dd4..ac33a9f 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -34,6 +34,9 @@ #include #define INDENT_SP " " + +static char rcd_decode_str[CPER_REC_LEN]; + /* * CPER record ID need to be unique even after reboot, because record * ID is used as index for ERST storage, while CPER records from @@ -50,18 +53,19 @@ u64 cper_next_record_id(void) } EXPORT_SYMBOL_GPL(cper_next_record_id); -static const char *cper_severity_strs[] = { +static const char * const severity_strs[] = { "recoverable", "fatal", "corrected", "info", }; -static const char *cper_severity_str(unsigned int severity) +const char *cper_severity_str(unsigned int severity) { - return severity < ARRAY_SIZE(cper_severity_strs) ? - cper_severity_strs[severity] : "unknown"; + return severity < ARRAY_SIZE(severity_strs) ? + severity_strs[severity] : "unknown"; } +EXPORT_SYMBOL_GPL(cper_severity_str); /* * cper_print_bits - print strings for set bits @@ -100,32 +104,32 @@ void cper_print_bits(const char *pfx, unsigned int bits, printk("%s\n", buf); } -static const char * const cper_proc_type_strs[] = { +static const char * const proc_type_strs[] = { "IA32/X64", "IA64", }; -static const char * const cper_proc_isa_strs[] = { +static const char * const proc_isa_strs[] = { "IA32", "IA64", "X64", }; -static const char * const cper_proc_error_type_strs[] = { +static const char * const proc_error_type_strs[] = { "cache error", "TLB error", "bus error", "micro-architectural error", }; -static const char * const cper_proc_op_strs[] = { +static const char * const proc_op_strs[] = { "unknown or generic", "data read", "data write", "instruction execution", }; -static const char * const cper_proc_flag_strs[] = { +static const char * const proc_flag_strs[] = { "restartable", "precise IP", "overflow", @@ -137,26 +141,26 @@ static void cper_print_proc_generic(const char *pfx, { if (proc->validation_bits & CPER_PROC_VALID_TYPE) printk("%s""processor_type: %d, %s\n", pfx, proc->proc_type, - proc->proc_type < ARRAY_SIZE(cper_proc_type_strs) ? - cper_proc_type_strs[proc->proc_type] : "unknown"); + proc->proc_type < ARRAY_SIZE(proc_type_strs) ? + proc_type_strs[proc->proc_type] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ISA) printk("%s""processor_isa: %d, %s\n", pfx, proc->proc_isa, - proc->proc_isa < ARRAY_SIZE(cper_proc_isa_strs) ? - cper_proc_isa_strs[proc->proc_isa] : "unknown"); + proc->proc_isa < ARRAY_SIZE(proc_isa_strs) ? + proc_isa_strs[proc->proc_isa] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_ERROR_TYPE) { printk("%s""error_type: 0x%02x\n", pfx, proc->proc_error_type); cper_print_bits(pfx, proc->proc_error_type, - cper_proc_error_type_strs, - ARRAY_SIZE(cper_proc_error_type_strs)); + proc_error_type_strs, + ARRAY_SIZE(proc_error_type_strs)); } if (proc->validation_bits & CPER_PROC_VALID_OPERATION) printk("%s""operation: %d, %s\n", pfx, proc->operation, - proc->operation < ARRAY_SIZE(cper_proc_op_strs) ? - cper_proc_op_strs[proc->operation] : "unknown"); + proc->operation < ARRAY_SIZE(proc_op_strs) ? + proc_op_strs[proc->operation] : "unknown"); if (proc->validation_bits & CPER_PROC_VALID_FLAGS) { printk("%s""flags: 0x%02x\n", pfx, proc->flags); - cper_print_bits(pfx, proc->flags, cper_proc_flag_strs, - ARRAY_SIZE(cper_proc_flag_strs)); + cper_print_bits(pfx, proc->flags, proc_flag_strs, + ARRAY_SIZE(proc_flag_strs)); } if (proc->validation_bits & CPER_PROC_VALID_LEVEL) printk("%s""level: %d\n", pfx, proc->level); @@ -177,7 +181,7 @@ static void cper_print_proc_generic(const char *pfx, printk("%s""IP: 0x%016llx\n", pfx, proc->ip); } -static const char *cper_mem_err_type_strs[] = { +static const char * const mem_err_type_strs[] = { "unknown", "no error", "single-bit ECC", @@ -196,58 +200,99 @@ static const char *cper_mem_err_type_strs[] = { "physical memory map-out event", }; -static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +const char *cper_mem_err_type_str(unsigned int etype) { - if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) - printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); - if (mem->validation_bits & CPER_MEM_VALID_PA) - printk("%s""physical_address: 0x%016llx\n", - pfx, mem->physical_addr); - if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) - printk("%s""physical_address_mask: 0x%016llx\n", - pfx, mem->physical_addr_mask); + return etype < ARRAY_SIZE(mem_err_type_strs) ? + mem_err_type_strs[etype] : "unknown"; +} +EXPORT_SYMBOL_GPL(cper_mem_err_type_str); + +static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) +{ + u32 len, n; + + if (!msg) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; if (mem->validation_bits & CPER_MEM_VALID_NODE) - pr_debug("node: %d\n", mem->node); + n += scnprintf(msg + n, len - n, "node: %d ", mem->node); if (mem->validation_bits & CPER_MEM_VALID_CARD) - pr_debug("card: %d\n", mem->card); + n += scnprintf(msg + n, len - n, "card: %d ", mem->card); if (mem->validation_bits & CPER_MEM_VALID_MODULE) - pr_debug("module: %d\n", mem->module); + n += scnprintf(msg + n, len - n, "module: %d ", mem->module); if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER) - pr_debug("rank: %d\n", mem->rank); + n += scnprintf(msg + n, len - n, "rank: %d ", mem->rank); if (mem->validation_bits & CPER_MEM_VALID_BANK) - pr_debug("bank: %d\n", mem->bank); + n += scnprintf(msg + n, len - n, "bank: %d ", mem->bank); if (mem->validation_bits & CPER_MEM_VALID_DEVICE) - pr_debug("device: %d\n", mem->device); + n += scnprintf(msg + n, len - n, "device: %d ", mem->device); if (mem->validation_bits & CPER_MEM_VALID_ROW) - pr_debug("row: %d\n", mem->row); + n += scnprintf(msg + n, len - n, "row: %d ", mem->row); if (mem->validation_bits & CPER_MEM_VALID_COLUMN) - pr_debug("column: %d\n", mem->column); + n += scnprintf(msg + n, len - n, "column: %d ", mem->column); if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION) - pr_debug("bit_position: %d\n", mem->bit_pos); + n += scnprintf(msg + n, len - n, "bit_position: %d ", + mem->bit_pos); if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID) - pr_debug("requestor_id: 0x%016llx\n", mem->requestor_id); + n += scnprintf(msg + n, len - n, "requestor_id: 0x%016llx ", + mem->requestor_id); if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID) - pr_debug("responder_id: 0x%016llx\n", mem->responder_id); + n += scnprintf(msg + n, len - n, "responder_id: 0x%016llx ", + mem->responder_id); if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID) - pr_debug("target_id: 0x%016llx\n", mem->target_id); + scnprintf(msg + n, len - n, "target_id: 0x%016llx ", + mem->target_id); + + msg[n] = '\0'; + return n; +} + +static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) +{ + u32 len, n; + const char *bank = NULL, *device = NULL; + + if (!msg || !(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE)) + return 0; + + n = 0; + len = CPER_REC_LEN - 1; + dmi_memdev_name(mem->mem_dev_handle, &bank, &device); + if (bank && device) + n = snprintf(msg, len, "DIMM location: %s %s ", bank, device); + else + n = snprintf(msg, len, + "DIMM location: not present. DMI handle: 0x%.4x ", + mem->mem_dev_handle); + + msg[n] = '\0'; + return n; +} + +static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) +{ + if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) + printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); + if (mem->validation_bits & CPER_MEM_VALID_PA) + printk("%s""physical_address: 0x%016llx\n", + pfx, mem->physical_addr); + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + printk("%s""physical_address_mask: 0x%016llx\n", + pfx, mem->physical_addr_mask); + if (cper_mem_err_location(mem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, - etype < ARRAY_SIZE(cper_mem_err_type_strs) ? - cper_mem_err_type_strs[etype] : "unknown"); - } - if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) { - const char *bank = NULL, *device = NULL; - dmi_memdev_name(mem->mem_dev_handle, &bank, &device); - if (bank != NULL && device != NULL) - printk("%s""DIMM location: %s %s", pfx, bank, device); - else - printk("%s""DIMM DMI handle: 0x%.4x", - pfx, mem->mem_dev_handle); + cper_mem_err_type_str(etype)); } + if (cper_dimm_err_location(mem, rcd_decode_str)) + printk("%s%s\n", pfx, rcd_decode_str); } -static const char *cper_pcie_port_type_strs[] = { +static const char * const pcie_port_type_strs[] = { "PCIe end point", "legacy PCI end point", "unknown", @@ -266,8 +311,8 @@ static void cper_print_pcie(const char *pfx, const struct cper_sec_pcie *pcie, { if (pcie->validation_bits & CPER_PCIE_VALID_PORT_TYPE) printk("%s""port_type: %d, %s\n", pfx, pcie->port_type, - pcie->port_type < ARRAY_SIZE(cper_pcie_port_type_strs) ? - cper_pcie_port_type_strs[pcie->port_type] : "unknown"); + pcie->port_type < ARRAY_SIZE(pcie_port_type_strs) ? + pcie_port_type_strs[pcie->port_type] : "unknown"); if (pcie->validation_bits & CPER_PCIE_VALID_VERSION) printk("%s""version: %d.%d\n", pfx, pcie->version.major, pcie->version.minor); diff --git a/include/linux/cper.h b/include/linux/cper.h index 2fc0ec3..ed088b9 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -36,6 +36,13 @@ #define CPER_RECORD_REV 0x0100 /* + * CPER record length contains the CPER fields which are relevant for further + * handling of a memory error in userspace (we don't carry all the fields + * defined in the UEFI spec because some of them don't make any sense.) + * Currently, a length of 256 should be more than enough. + */ +#define CPER_REC_LEN 256 +/* * Severity difinition for error_severity in struct cper_record_header * and section_severity in struct cper_section_descriptor */ @@ -395,6 +402,8 @@ struct cper_sec_pcie { #pragma pack() u64 cper_next_record_id(void); +const char *cper_severity_str(unsigned int); +const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); -- cgit v0.10.2 From d963cd95bea93b7db9390a71d1e2cabbb3b2c3ea Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 14:02:20 -0700 Subject: RAS, debugfs: Add debugfs interface for RAS subsystem Implement a new debugfs interface for RAS susbsystem. A file named daemon_active is added there accordingly. This file is used to track if user space daemon accesses perf/trace interface or not. One can track which daemon opens it via "lsof /path/to/debugfs/ras/daemon_active". Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1402475691-30045-5-git-send-email-gong.chen@linux.intel.com Signed-off-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/ras/Makefile b/drivers/ras/Makefile index 223e806..d7f7334 100644 --- a/drivers/ras/Makefile +++ b/drivers/ras/Makefile @@ -1 +1 @@ -obj-$(CONFIG_RAS) += ras.o +obj-$(CONFIG_RAS) += ras.o debugfs.o diff --git a/drivers/ras/debugfs.c b/drivers/ras/debugfs.c new file mode 100644 index 0000000..0322acf --- /dev/null +++ b/drivers/ras/debugfs.c @@ -0,0 +1,56 @@ +#include + +static struct dentry *ras_debugfs_dir; + +static atomic_t trace_count = ATOMIC_INIT(0); + +int ras_userspace_consumers(void) +{ + return atomic_read(&trace_count); +} +EXPORT_SYMBOL_GPL(ras_userspace_consumers); + +static int trace_show(struct seq_file *m, void *v) +{ + return atomic_read(&trace_count); +} + +static int trace_open(struct inode *inode, struct file *file) +{ + atomic_inc(&trace_count); + return single_open(file, trace_show, NULL); +} + +static int trace_release(struct inode *inode, struct file *file) +{ + atomic_dec(&trace_count); + return single_release(inode, file); +} + +static const struct file_operations trace_fops = { + .open = trace_open, + .read = seq_read, + .llseek = seq_lseek, + .release = trace_release, +}; + +int __init ras_add_daemon_trace(void) +{ + struct dentry *fentry; + + if (!ras_debugfs_dir) + return -ENOENT; + + fentry = debugfs_create_file("daemon_active", S_IRUSR, ras_debugfs_dir, + NULL, &trace_fops); + if (!fentry) + return -ENODEV; + + return 0; + +} + +void __init ras_debugfs_init(void) +{ + ras_debugfs_dir = debugfs_create_dir("ras", NULL); +} diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index b0c6ed1..4cac43a 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -5,8 +5,22 @@ * Chen, Gong */ +#include +#include + #define CREATE_TRACE_POINTS #define TRACE_INCLUDE_PATH ../../include/ras #include +static int __init ras_init(void) +{ + int rc = 0; + + ras_debugfs_init(); + rc = ras_add_daemon_trace(); + + return rc; +} +subsys_initcall(ras_init); + EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); diff --git a/include/linux/ras.h b/include/linux/ras.h new file mode 100644 index 0000000..2aceeaf --- /dev/null +++ b/include/linux/ras.h @@ -0,0 +1,14 @@ +#ifndef __RAS_H__ +#define __RAS_H__ + +#ifdef CONFIG_DEBUG_FS +int ras_userspace_consumers(void); +void ras_debugfs_init(void); +int ras_add_daemon_trace(void); +#else +static inline int ras_userspace_consumers(void) { return 0; } +static inline void ras_debugfs_init(void) { return; } +static inline int ras_add_daemon_trace(void) { return 0; } +#endif + +#endif -- cgit v0.10.2 From 2dfb7d51a61d7ca91b131c8db612f27d9390f2d5 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Tue, 17 Jun 2014 22:33:07 -0400 Subject: trace, RAS: Add eMCA trace event interface Add trace interface to elaborate all H/W error related information. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index a34a228..206942b 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -370,6 +370,7 @@ config ACPI_EXTLOG tristate "Extended Error Log support" depends on X86_MCE && X86_LOCAL_APIC select UEFI_CPER + select RAS default n help Certain usages such as Predictive Failure Analysis (PFA) require @@ -384,6 +385,7 @@ config ACPI_EXTLOG Enhanced MCA Logging allows firmware to provide additional error information to system software, synchronous with MCE or CMCI. This - driver adds support for that functionality. + driver adds support for that functionality with corresponding + tracepoint which carries that information to userspace. endif # ACPI diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 1853341..e61da95 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -16,6 +16,7 @@ #include #include "apei/apei-internal.h" +#include #define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */ @@ -137,8 +138,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, struct mce *mce = (struct mce *)data; int bank = mce->bank; int cpu = mce->extcpu; - struct acpi_generic_status *estatus; - int rc; + struct acpi_generic_status *estatus, *tmp; + struct acpi_generic_data *gdata; + const uuid_le *fru_id = &NULL_UUID_LE; + char *fru_text = ""; + uuid_le *sec_type; + static u32 err_seq; estatus = extlog_elog_entry_check(cpu, bank); if (estatus == NULL) @@ -148,7 +153,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, /* clear record status to enable BIOS to update it again */ estatus->block_status = 0; - rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu); + tmp = (struct acpi_generic_status *)elog_buf; + print_extlog_rcd(NULL, tmp, cpu); + + /* log event via trace */ + err_seq++; + gdata = (struct acpi_generic_data *)(tmp + 1); + if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID) + fru_id = (uuid_le *)gdata->fru_id; + if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT) + fru_text = gdata->fru_text; + sec_type = (uuid_le *)gdata->section_type; + if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) { + struct cper_sec_mem_err *mem = (void *)(gdata + 1); + if (gdata->error_data_length >= sizeof(*mem)) + trace_extlog_mem_event(mem, err_seq, fru_id, fru_text, + (u8)gdata->error_severity); + } return NOTIFY_STOP; } diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c index ac33a9f..437e6fd 100644 --- a/drivers/firmware/efi/cper.c +++ b/drivers/firmware/efi/cper.c @@ -207,7 +207,7 @@ const char *cper_mem_err_type_str(unsigned int etype) } EXPORT_SYMBOL_GPL(cper_mem_err_type_str); -static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) +static int cper_mem_err_location(struct cper_mem_err_compact *mem, char *msg) { u32 len, n; @@ -249,7 +249,7 @@ static int cper_mem_err_location(const struct cper_sec_mem_err *mem, char *msg) return n; } -static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) +static int cper_dimm_err_location(struct cper_mem_err_compact *mem, char *msg) { u32 len, n; const char *bank = NULL, *device = NULL; @@ -271,8 +271,44 @@ static int cper_dimm_err_location(const struct cper_sec_mem_err *mem, char *msg) return n; } +void cper_mem_err_pack(const struct cper_sec_mem_err *mem, + struct cper_mem_err_compact *cmem) +{ + cmem->validation_bits = mem->validation_bits; + cmem->node = mem->node; + cmem->card = mem->card; + cmem->module = mem->module; + cmem->bank = mem->bank; + cmem->device = mem->device; + cmem->row = mem->row; + cmem->column = mem->column; + cmem->bit_pos = mem->bit_pos; + cmem->requestor_id = mem->requestor_id; + cmem->responder_id = mem->responder_id; + cmem->target_id = mem->target_id; + cmem->rank = mem->rank; + cmem->mem_array_handle = mem->mem_array_handle; + cmem->mem_dev_handle = mem->mem_dev_handle; +} + +const char *cper_mem_err_unpack(struct trace_seq *p, + struct cper_mem_err_compact *cmem) +{ + const char *ret = p->buffer + p->len; + + if (cper_mem_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + if (cper_dimm_err_location(cmem, rcd_decode_str)) + trace_seq_printf(p, "%s", rcd_decode_str); + trace_seq_putc(p, '\0'); + + return ret; +} + static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) { + struct cper_mem_err_compact cmem; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS) printk("%s""error_status: 0x%016llx\n", pfx, mem->error_status); if (mem->validation_bits & CPER_MEM_VALID_PA) @@ -281,14 +317,15 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem) if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) printk("%s""physical_address_mask: 0x%016llx\n", pfx, mem->physical_addr_mask); - if (cper_mem_err_location(mem, rcd_decode_str)) + cper_mem_err_pack(mem, &cmem); + if (cper_mem_err_location(&cmem, rcd_decode_str)) printk("%s%s\n", pfx, rcd_decode_str); if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) { u8 etype = mem->error_type; printk("%s""error_type: %d, %s\n", pfx, etype, cper_mem_err_type_str(etype)); } - if (cper_dimm_err_location(mem, rcd_decode_str)) + if (cper_dimm_err_location(&cmem, rcd_decode_str)) printk("%s%s\n", pfx, rcd_decode_str); } diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c index 4cac43a..b67dd36 100644 --- a/drivers/ras/ras.c +++ b/drivers/ras/ras.c @@ -23,4 +23,7 @@ static int __init ras_init(void) } subsys_initcall(ras_init); +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event); diff --git a/include/linux/cper.h b/include/linux/cper.h index ed088b9..76abba4 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -22,6 +22,7 @@ #define LINUX_CPER_H #include +#include /* CPER record signature and the size */ #define CPER_SIG_RECORD "CPER" @@ -363,6 +364,24 @@ struct cper_sec_mem_err { __u16 mem_dev_handle; /* module handle in UEFI 2.4 */ }; +struct cper_mem_err_compact { + __u64 validation_bits; + __u16 node; + __u16 card; + __u16 module; + __u16 bank; + __u16 device; + __u16 row; + __u16 column; + __u16 bit_pos; + __u64 requestor_id; + __u64 responder_id; + __u64 target_id; + __u16 rank; + __u16 mem_array_handle; + __u16 mem_dev_handle; +}; + struct cper_sec_pcie { __u64 validation_bits; __u32 port_type; @@ -406,5 +425,9 @@ const char *cper_severity_str(unsigned int); const char *cper_mem_err_type_str(unsigned int); void cper_print_bits(const char *prefix, unsigned int bits, const char * const strs[], unsigned int strs_size); +void cper_mem_err_pack(const struct cper_sec_mem_err *, + struct cper_mem_err_compact *); +const char *cper_mem_err_unpack(struct trace_seq *, + struct cper_mem_err_compact *); #endif diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index acbcbb8..47da53c 100644 --- a/include/ras/ras_event.h +++ b/include/ras/ras_event.h @@ -9,6 +9,70 @@ #include #include #include +#include + +/* + * MCE Extended Error Log trace event + * + * These events are generated when hardware detects a corrected or + * uncorrected event. + */ + +/* memory trace event */ + +#if defined(CONFIG_ACPI_EXTLOG) || defined(CONFIG_ACPI_EXTLOG_MODULE) +TRACE_EVENT(extlog_mem_event, + TP_PROTO(struct cper_sec_mem_err *mem, + u32 err_seq, + const uuid_le *fru_id, + const char *fru_text, + u8 sev), + + TP_ARGS(mem, err_seq, fru_id, fru_text, sev), + + TP_STRUCT__entry( + __field(u32, err_seq) + __field(u8, etype) + __field(u8, sev) + __field(u64, pa) + __field(u8, pa_mask_lsb) + __field_struct(uuid_le, fru_id) + __string(fru_text, fru_text) + __field_struct(struct cper_mem_err_compact, data) + ), + + TP_fast_assign( + __entry->err_seq = err_seq; + if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) + __entry->etype = mem->error_type; + else + __entry->etype = ~0; + __entry->sev = sev; + if (mem->validation_bits & CPER_MEM_VALID_PA) + __entry->pa = mem->physical_addr; + else + __entry->pa = ~0ull; + + if (mem->validation_bits & CPER_MEM_VALID_PA_MASK) + __entry->pa_mask_lsb = (u8)__ffs64(mem->physical_addr_mask); + else + __entry->pa_mask_lsb = ~0; + __entry->fru_id = *fru_id; + __assign_str(fru_text, fru_text); + cper_mem_err_pack(mem, &__entry->data); + ), + + TP_printk("{%d} %s error: %s physical addr: %016llx (mask lsb: %x) %sFRU: %pUl %.20s", + __entry->err_seq, + cper_severity_str(__entry->sev), + cper_mem_err_type_str(__entry->etype), + __entry->pa, + __entry->pa_mask_lsb, + cper_mem_err_unpack(p, &__entry->data), + &__entry->fru_id, + __get_str(fru_text)) +); +#endif /* * Hardware Events Report -- cgit v0.10.2 From d6cae935ec5b7873a8ccd8f0331bef2df729e86a Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 04:34:50 -0400 Subject: trace, eMCA: Add a knob to adjust where to save event log To avoid saving two copies for one H/W event, add a new file under debugfs to control how to save event log. Once this file is opened, the perf/trace will be used, in the meanwhile, kernel will stop printing event log to the console. On the other hand, if this file is closed, kernel will print event log to the console again. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index e61da95..a99d4a6 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -154,7 +155,11 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, estatus->block_status = 0; tmp = (struct acpi_generic_status *)elog_buf; - print_extlog_rcd(NULL, tmp, cpu); + + if (!ras_userspace_consumers()) { + print_extlog_rcd(NULL, tmp, cpu); + goto out; + } /* log event via trace */ err_seq++; @@ -171,6 +176,7 @@ static int extlog_print(struct notifier_block *nb, unsigned long val, (u8)gdata->error_severity); } +out: return NOTIFY_STOP; } -- cgit v0.10.2 From 7c76bb5f7a3d052339b873374333dd0dcc35ce28 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Wed, 11 Jun 2014 04:34:51 -0400 Subject: RAS, extlog: Adjust init flow Unless the platform has eMCA related capability, don't need to check if there is conflict with EDAC driver. Signed-off-by: Chen, Gong Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index a99d4a6..0ad6f38 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -223,19 +223,16 @@ static int __init extlog_init(void) u64 cap; int rc; + rdmsrl(MSR_IA32_MCG_CAP, cap); + + if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) + return -ENODEV; + if (get_edac_report_status() == EDAC_REPORTING_FORCE) { pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); return -EPERM; } - rc = -ENODEV; - rdmsrl(MSR_IA32_MCG_CAP, cap); - if (!(cap & MCG_ELOG_P)) - return rc; - - if (!extlog_get_l1addr()) - return rc; - rc = -EINVAL; /* get L1 header to fetch necessary information */ l1_hdr_size = sizeof(struct extlog_l1_head); -- cgit v0.10.2 From 9dae3d0d9e64c3cb8bb172f041d4e66d4b92088a Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Tue, 22 Jul 2014 11:20:11 +0200 Subject: apei, mce: Factor out APEI architecture specific MCE calls. This commit abstracts MCE calls and provides weak corresponding default implementation for those architectures which do not need arch specific actions. Each platform willing to do additional architectural actions should provides desired function definition. It allows us to avoid wrap code into #ifdef in generic code and prevent new platform from introducing dummy stub function too. Initially, there are two APEI arch-specific calls: - arch_apei_enable_cmcff() - arch_apei_report_mem_error() Both interact with MCE driver for X86 architecture. Signed-off-by: Tomasz Nowicki Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index 163b225..3242e59 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o +obj-$(CONFIG_ACPI_APEI) += apei.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c new file mode 100644 index 0000000..12b13de --- /dev/null +++ b/arch/x86/kernel/acpi/apei.c @@ -0,0 +1,56 @@ +/* + * Arch-specific APEI-related functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include + +#include + +int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) +{ +#ifdef CONFIG_X86_MCE + int i; + struct acpi_hest_ia_corrected *cmc; + struct acpi_hest_ia_error_bank *mc_bank; + + if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) + return 0; + + cmc = (struct acpi_hest_ia_corrected *)hest_hdr; + if (!cmc->enabled) + return 0; + + /* + * We expect HEST to provide a list of MC banks that report errors + * in firmware first mode. Otherwise, return non-zero value to + * indicate that we are done parsing HEST. + */ + if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || + !cmc->num_hardware_banks) + return 1; + + pr_info("HEST: Enabling Firmware First mode for corrected errors.\n"); + + mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1); + for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) + mce_disable_bank(mc_bank->bank_number); +#endif + return 1; +} + +void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) +{ +#ifdef CONFIG_X86_MCE + apei_mce_report_mem_error(sev, mem_err); +#endif +} diff --git a/drivers/acpi/apei/apei-base.c b/drivers/acpi/apei/apei-base.c index 8678dfe..2cd7bdd 100644 --- a/drivers/acpi/apei/apei-base.c +++ b/drivers/acpi/apei/apei-base.c @@ -745,6 +745,19 @@ struct dentry *apei_get_debugfs_dir(void) } EXPORT_SYMBOL_GPL(apei_get_debugfs_dir); +int __weak arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, + void *data) +{ + return 1; +} +EXPORT_SYMBOL_GPL(arch_apei_enable_cmcff); + +void __weak arch_apei_report_mem_error(int sev, + struct cper_sec_mem_err *mem_err) +{ +} +EXPORT_SYMBOL_GPL(arch_apei_report_mem_error); + int apei_osc_setup(void) { static u8 whea_uuid_str[] = "ed855e0c-6c90-47bf-a62a-26de0fc5ad5c"; diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index dab7cb7..352170a 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include #include @@ -455,9 +455,7 @@ static void ghes_do_proc(struct ghes *ghes, mem_err = (struct cper_sec_mem_err *)(gdata+1); ghes_edac_report_mem_error(ghes, sev, mem_err); -#ifdef CONFIG_X86_MCE - apei_mce_report_mem_error(sev, mem_err); -#endif + arch_apei_report_mem_error(sev, mem_err); ghes_handle_memory_failure(gdata, sev); } #ifdef CONFIG_ACPI_APEI_PCIEAER diff --git a/drivers/acpi/apei/hest.c b/drivers/acpi/apei/hest.c index f5e37f3..06e9b41 100644 --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -36,7 +36,6 @@ #include #include #include -#include #include "apei-internal.h" @@ -128,33 +127,7 @@ EXPORT_SYMBOL_GPL(apei_hest_parse); */ static int __init hest_parse_cmc(struct acpi_hest_header *hest_hdr, void *data) { -#ifdef CONFIG_X86_MCE - int i; - struct acpi_hest_ia_corrected *cmc; - struct acpi_hest_ia_error_bank *mc_bank; - - if (hest_hdr->type != ACPI_HEST_TYPE_IA32_CORRECTED_CHECK) - return 0; - - cmc = (struct acpi_hest_ia_corrected *)hest_hdr; - if (!cmc->enabled) - return 0; - - /* - * We expect HEST to provide a list of MC banks that report errors - * in firmware first mode. Otherwise, return non-zero value to - * indicate that we are done parsing HEST. - */ - if (!(cmc->flags & ACPI_HEST_FIRMWARE_FIRST) || !cmc->num_hardware_banks) - return 1; - - pr_info(HEST_PFX "Enabling Firmware First mode for corrected errors.\n"); - - mc_bank = (struct acpi_hest_ia_error_bank *)(cmc + 1); - for (i = 0; i < cmc->num_hardware_banks; i++, mc_bank++) - mce_disable_bank(mc_bank->bank_number); -#endif - return 1; + return arch_apei_enable_cmcff(hest_hdr, data); } struct ghes_arr { diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 04f349d..8a23c95 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -42,5 +42,8 @@ ssize_t erst_read(u64 record_id, struct cper_record_header *record, size_t buflen); int erst_clear(u64 record_id); +int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data); +void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); + #endif #endif -- cgit v0.10.2 From 44a69f6195628f6f940566d133a72987559e102d Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Tue, 22 Jul 2014 11:20:12 +0200 Subject: acpi, apei, ghes: Make NMI error notification to be GHES architecture extension. Currently APEI depends on x86 architecture. It is because of NMI hardware error notification of GHES which is currently supported by x86 only. However, many other APEI features can be still used perfectly by other architectures. This commit adds two symbols: 1. HAVE_ACPI_APEI for those archs which support APEI. 2. HAVE_ACPI_APEI_NMI which is used for NMI code isolation in ghes.c file. NMI related data and functions are grouped so they can be wrapped inside one #ifdef section. Appropriate function stubs are provided for !NMI case. Note there is no functional changes for x86 due to hard selected HAVE_ACPI_APEI and HAVE_ACPI_APEI_NMI symbols. Signed-off-by: Tomasz Nowicki Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d24887b..4387344 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -132,6 +132,8 @@ config X86 select GENERIC_CPU_AUTOPROBE select HAVE_ARCH_AUDITSYSCALL select ARCH_SUPPORTS_ATOMIC_RMW + select HAVE_ACPI_APEI if ACPI + select HAVE_ACPI_APEI_NMI if ACPI config INSTRUCTION_DECODER def_bool y diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index c4dac71..b0140c8 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -1,9 +1,15 @@ +config HAVE_ACPI_APEI + bool + +config HAVE_ACPI_APEI_NMI + bool + config ACPI_APEI bool "ACPI Platform Error Interface (APEI)" select MISC_FILESYSTEMS select PSTORE select UEFI_CPER - depends on X86 + depends on HAVE_ACPI_APEI help APEI allows to report errors (for example from the chipset) to the operating system. This improves NMI handling diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 352170a..7fcf4d7 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -47,11 +47,11 @@ #include #include #include +#include #include #include #include -#include #include "apei-internal.h" @@ -86,8 +86,6 @@ bool ghes_disable; module_param_named(disable, ghes_disable, bool, 0); -static int ghes_panic_timeout __read_mostly = 30; - /* * All error sources notified with SCI shares one notifier function, * so they need to be linked and checked one by one. This is applied @@ -97,16 +95,9 @@ static int ghes_panic_timeout __read_mostly = 30; * list changing, not for traversing. */ static LIST_HEAD(ghes_sci); -static LIST_HEAD(ghes_nmi); static DEFINE_MUTEX(ghes_list_mutex); /* - * NMI may be triggered on any CPU, so ghes_nmi_lock is used for - * mutual exclusion. - */ -static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); - -/* * Because the memory area used to transfer hardware error information * from BIOS to Linux can be determined only in NMI, IRQ or timer * handler, but general ioremap can not be used in atomic context, so @@ -130,18 +121,8 @@ static struct vm_struct *ghes_ioremap_area; static DEFINE_RAW_SPINLOCK(ghes_ioremap_lock_nmi); static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); -/* - * printk is not safe in NMI context. So in NMI handler, we allocate - * required memory from lock-less memory allocator - * (ghes_estatus_pool), save estatus into it, put them into lock-less - * list (ghes_estatus_llist), then delay printk into IRQ context via - * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record - * required pool size by all NMI error source. - */ static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; -static struct llist_head ghes_estatus_llist; -static struct irq_work ghes_proc_irq_work; struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; @@ -249,11 +230,6 @@ static int ghes_estatus_pool_expand(unsigned long len) return 0; } -static void ghes_estatus_pool_shrink(unsigned long len) -{ - ghes_estatus_pool_size_request -= PAGE_ALIGN(len); -} - static struct ghes *ghes_new(struct acpi_hest_generic *generic) { struct ghes *ghes; @@ -732,6 +708,32 @@ static int ghes_notify_sci(struct notifier_block *this, return ret; } +static struct notifier_block ghes_notifier_sci = { + .notifier_call = ghes_notify_sci, +}; + +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +/* + * printk is not safe in NMI context. So in NMI handler, we allocate + * required memory from lock-less memory allocator + * (ghes_estatus_pool), save estatus into it, put them into lock-less + * list (ghes_estatus_llist), then delay printk into IRQ context via + * irq_work (ghes_proc_irq_work). ghes_estatus_size_request record + * required pool size by all NMI error source. + */ +static struct llist_head ghes_estatus_llist; +static struct irq_work ghes_proc_irq_work; + +/* + * NMI may be triggered on any CPU, so ghes_nmi_lock is used for + * mutual exclusion. + */ +static DEFINE_RAW_SPINLOCK(ghes_nmi_lock); + +static LIST_HEAD(ghes_nmi); + +static int ghes_panic_timeout __read_mostly = 30; + static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) { struct llist_node *next, *tail = NULL; @@ -875,10 +877,6 @@ out: return ret; } -static struct notifier_block ghes_notifier_sci = { - .notifier_call = ghes_notify_sci, -}; - static unsigned long ghes_esource_prealloc_size( const struct acpi_hest_generic *generic) { @@ -894,11 +892,71 @@ static unsigned long ghes_esource_prealloc_size( return prealloc_size; } +static void ghes_estatus_pool_shrink(unsigned long len) +{ + ghes_estatus_pool_size_request -= PAGE_ALIGN(len); +} + +static void ghes_nmi_add(struct ghes *ghes) +{ + unsigned long len; + + len = ghes_esource_prealloc_size(ghes->generic); + ghes_estatus_pool_expand(len); + mutex_lock(&ghes_list_mutex); + if (list_empty(&ghes_nmi)) + register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, "ghes"); + list_add_rcu(&ghes->list, &ghes_nmi); + mutex_unlock(&ghes_list_mutex); +} + +static void ghes_nmi_remove(struct ghes *ghes) +{ + unsigned long len; + + mutex_lock(&ghes_list_mutex); + list_del_rcu(&ghes->list); + if (list_empty(&ghes_nmi)) + unregister_nmi_handler(NMI_LOCAL, "ghes"); + mutex_unlock(&ghes_list_mutex); + /* + * To synchronize with NMI handler, ghes can only be + * freed after NMI handler finishes. + */ + synchronize_rcu(); + len = ghes_esource_prealloc_size(ghes->generic); + ghes_estatus_pool_shrink(len); +} + +static void ghes_nmi_init_cxt(void) +{ + init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); +} +#else /* CONFIG_HAVE_ACPI_APEI_NMI */ +static inline void ghes_nmi_add(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to add NMI notification which is not supported!\n", + ghes->generic->header.source_id); + BUG(); +} + +static inline void ghes_nmi_remove(struct ghes *ghes) +{ + pr_err(GHES_PFX "ID: %d, trying to remove NMI notification which is not supported!\n", + ghes->generic->header.source_id); + BUG(); +} + +static inline void ghes_nmi_init_cxt(void) +{ +} +#endif /* CONFIG_HAVE_ACPI_APEI_NMI */ + static int ghes_probe(struct platform_device *ghes_dev) { struct acpi_hest_generic *generic; struct ghes *ghes = NULL; - unsigned long len; + int rc = -EINVAL; generic = *(struct acpi_hest_generic **)ghes_dev->dev.platform_data; @@ -909,7 +967,13 @@ static int ghes_probe(struct platform_device *ghes_dev) case ACPI_HEST_NOTIFY_POLLED: case ACPI_HEST_NOTIFY_EXTERNAL: case ACPI_HEST_NOTIFY_SCI: + break; case ACPI_HEST_NOTIFY_NMI: + if (!IS_ENABLED(CONFIG_HAVE_ACPI_APEI_NMI)) { + pr_warn(GHES_PFX "Generic hardware error source: %d notified via NMI interrupt is not supported!\n", + generic->header.source_id); + goto err; + } break; case ACPI_HEST_NOTIFY_LOCAL: pr_warning(GHES_PFX "Generic hardware error source: %d notified via local interrupt is not supported!\n", @@ -970,14 +1034,7 @@ static int ghes_probe(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: - len = ghes_esource_prealloc_size(generic); - ghes_estatus_pool_expand(len); - mutex_lock(&ghes_list_mutex); - if (list_empty(&ghes_nmi)) - register_nmi_handler(NMI_LOCAL, ghes_notify_nmi, 0, - "ghes"); - list_add_rcu(&ghes->list, &ghes_nmi); - mutex_unlock(&ghes_list_mutex); + ghes_nmi_add(ghes); break; default: BUG(); @@ -999,7 +1056,6 @@ static int ghes_remove(struct platform_device *ghes_dev) { struct ghes *ghes; struct acpi_hest_generic *generic; - unsigned long len; ghes = platform_get_drvdata(ghes_dev); generic = ghes->generic; @@ -1020,18 +1076,7 @@ static int ghes_remove(struct platform_device *ghes_dev) mutex_unlock(&ghes_list_mutex); break; case ACPI_HEST_NOTIFY_NMI: - mutex_lock(&ghes_list_mutex); - list_del_rcu(&ghes->list); - if (list_empty(&ghes_nmi)) - unregister_nmi_handler(NMI_LOCAL, "ghes"); - mutex_unlock(&ghes_list_mutex); - /* - * To synchronize with NMI handler, ghes can only be - * freed after NMI handler finishes. - */ - synchronize_rcu(); - len = ghes_esource_prealloc_size(generic); - ghes_estatus_pool_shrink(len); + ghes_nmi_remove(ghes); break; default: BUG(); @@ -1075,7 +1120,7 @@ static int __init ghes_init(void) return -EINVAL; } - init_irq_work(&ghes_proc_irq_work, ghes_proc_in_irq); + ghes_nmi_init_cxt(); rc = ghes_ioremap_init(); if (rc) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 447775e..1d2a6ab 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -63,4 +63,8 @@ extern int proc_dowatchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); #endif +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +#include +#endif + #endif -- cgit v0.10.2 From 594c7255dce7a13cac50cf2470cc56e2c3b0494e Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Tue, 22 Jul 2014 11:20:13 +0200 Subject: acpi, apei, ghes: Factor out ioremap virtual memory for IRQ and NMI context. GHES currently maps two pages with atomic_ioremap. From now on, NMI is architectural depended so there is no need to allocate an NMI page for platforms without NMI support. To make it possible to not use a second page, swap the existing page order so that the IRQ context page is first, and the optional NMI context page is second. Then, use HAVE_ACPI_APEI_NMI to decide how many pages are to be allocated. Signed-off-by: Tomasz Nowicki Acked-by: Borislav Petkov Signed-off-by: Tony Luck diff --git a/arch/x86/kernel/acpi/apei.c b/arch/x86/kernel/acpi/apei.c index 12b13de..c280df6 100644 --- a/arch/x86/kernel/acpi/apei.c +++ b/arch/x86/kernel/acpi/apei.c @@ -15,6 +15,7 @@ #include #include +#include int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data) { @@ -54,3 +55,8 @@ void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err) apei_mce_report_mem_error(sev, mem_err); #endif } + +void arch_apei_flush_tlb_one(unsigned long addr) +{ + __flush_tlb_one(addr); +} diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 7fcf4d7..e05d84e7 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -105,12 +105,16 @@ static DEFINE_MUTEX(ghes_list_mutex); */ /* - * Two virtual pages are used, one for NMI context, the other for - * IRQ/PROCESS context + * Two virtual pages are used, one for IRQ/PROCESS context, the other for + * NMI context (optionally). */ -#define GHES_IOREMAP_PAGES 2 -#define GHES_IOREMAP_NMI_PAGE(base) (base) -#define GHES_IOREMAP_IRQ_PAGE(base) ((base) + PAGE_SIZE) +#ifdef CONFIG_HAVE_ACPI_APEI_NMI +#define GHES_IOREMAP_PAGES 2 +#else +#define GHES_IOREMAP_PAGES 1 +#endif +#define GHES_IOREMAP_IRQ_PAGE(base) (base) +#define GHES_IOREMAP_NMI_PAGE(base) ((base) + PAGE_SIZE) /* virtual memory area for atomic ioremap */ static struct vm_struct *ghes_ioremap_area; @@ -173,7 +177,7 @@ static void ghes_iounmap_nmi(void __iomem *vaddr_ptr) BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_NMI_PAGE(base)); unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - __flush_tlb_one(vaddr); + arch_apei_flush_tlb_one(vaddr); } static void ghes_iounmap_irq(void __iomem *vaddr_ptr) @@ -183,7 +187,7 @@ static void ghes_iounmap_irq(void __iomem *vaddr_ptr) BUG_ON(vaddr != (unsigned long)GHES_IOREMAP_IRQ_PAGE(base)); unmap_kernel_range_noflush(vaddr, PAGE_SIZE); - __flush_tlb_one(vaddr); + arch_apei_flush_tlb_one(vaddr); } static int ghes_estatus_pool_init(void) diff --git a/include/acpi/apei.h b/include/acpi/apei.h index 8a23c95..76284bb 100644 --- a/include/acpi/apei.h +++ b/include/acpi/apei.h @@ -44,6 +44,7 @@ int erst_clear(u64 record_id); int arch_apei_enable_cmcff(struct acpi_hest_header *hest_hdr, void *data); void arch_apei_report_mem_error(int sev, struct cper_sec_mem_err *mem_err); +void arch_apei_flush_tlb_one(unsigned long addr); #endif #endif -- cgit v0.10.2 From 5ccb8225abf2ac51cd023a99f28366ac9823bd0d Mon Sep 17 00:00:00 2001 From: Mike Qiu Date: Tue, 29 Jul 2014 10:49:25 -0700 Subject: x86/ras: Fix build warnings in MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build warning due to a missing forward declaration in . We need struct pci_dev to be forward declared so we can define pointers to it, but we don't need to pull in the whole definition. build log: In file included from include/ras/ras_event.h:11:0, from drivers/ras/ras.c:13: include/linux/aer.h:42:129: warning: ‘struct pci_dev’ declared inside parameter list [enabled by default] include/linux/aer.h:42:129: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default] include/linux/aer.h:46:130: warning: ‘struct pci_dev’ declared inside parameter list [enabled by default] include/linux/aer.h:50:136: warning: ‘struct pci_dev’ declared inside parameter list [enabled by default] include/linux/aer.h:57:14: warning: ‘struct pci_dev’ declared inside parameter list [enabled by default] Signed-off-by: Mike Qiu Link: http://lkml.kernel.org/r/53d7dea511471321bb@agluck-desk.sc.intel.com Acked-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Bjorn Helgaas Signed-off-by: Tony Luck Signed-off-by: H. Peter Anvin diff --git a/include/linux/aer.h b/include/linux/aer.h index 4dbaa70..c826d1c 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -11,6 +11,8 @@ #define AER_FATAL 1 #define AER_CORRECTABLE 2 +struct pci_dev; + struct aer_header_log_regs { unsigned int dw0; unsigned int dw1; -- cgit v0.10.2