From 8d21d4c91efaed3eb60cc64d43889665a4bd7a36 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 28 Jul 2014 02:50:59 -0400 Subject: APEI, GHES: Cleanup unnecessary function for lockless list We have a generic function to reverse a lockless list, kill homegrown copy. Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1406530260-26078-2-git-send-email-gong.chen@linux.intel.com Acked-by: Tony Luck Acked-by: Naoya Horiguchi [ Boris: correct commit msg ] Signed-off-by: Borislav Petkov diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index fc5f780..9dcc915 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -738,20 +738,6 @@ static LIST_HEAD(ghes_nmi); static int ghes_panic_timeout __read_mostly = 30; -static struct llist_node *llist_nodes_reverse(struct llist_node *llnode) -{ - struct llist_node *next, *tail = NULL; - - while (llnode) { - next = llnode->next; - llnode->next = tail; - tail = llnode; - llnode = next; - } - - return tail; -} - static void ghes_proc_in_irq(struct irq_work *irq_work) { struct llist_node *llnode, *next; @@ -765,7 +751,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work) * Because the time order of estatus in list is reversed, * revert it back to proper order. */ - llnode = llist_nodes_reverse(llnode); + llnode = llist_reverse_order(llnode); while (llnode) { next = llnode->next; estatus_node = llist_entry(llnode, struct ghes_estatus_node, @@ -798,7 +784,7 @@ static void ghes_print_queued_estatus(void) * Because the time order of estatus in list is reversed, * revert it back to proper order. */ - llnode = llist_nodes_reverse(llnode); + llnode = llist_reverse_order(llnode); while (llnode) { estatus_node = llist_entry(llnode, struct ghes_estatus_node, llnode); -- cgit v0.10.2 From 8f7c31f6cd499877aa6b96decd31b406a6cd4ddf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 29 Sep 2014 13:33:17 +0200 Subject: GHES: Make ghes_estatus_caches static It is used only in ghes.c. Signed-off-by: Borislav Petkov diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9dcc915..1b6aa51 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -128,7 +128,7 @@ static DEFINE_SPINLOCK(ghes_ioremap_lock_irq); static struct gen_pool *ghes_estatus_pool; static unsigned long ghes_estatus_pool_size_request; -struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; +static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE]; static atomic_t ghes_estatus_cache_alloced; static int ghes_ioremap_init(void) -- cgit v0.10.2 From 6dc52cbe0b181818450fc1de4c0c850226ce0e68 Mon Sep 17 00:00:00 2001 From: "Chen, Gong" Date: Mon, 28 Jul 2014 02:51:00 -0400 Subject: RAS, HWPOISON: Fix wrong error recovery status When Uncorrected error happens, if the poisoned page is referenced by more than one user after error recovery, the recovery is not successful. But currently the display result is wrong. Before this patch: MCE 0x44e336: dirty mlocked LRU page recovery: Recovered MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users mce: Memory error not recovered After this patch: MCE 0x44e336: dirty mlocked LRU page recovery: Failed MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users mce: Memory error not recovered Signed-off-by: Chen, Gong Link: http://lkml.kernel.org/r/1406530260-26078-3-git-send-email-gong.chen@linux.intel.com Acked-by: Naoya Horiguchi Acked-by: Tony Luck Signed-off-by: Borislav Petkov diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8639f6b..b852b10 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -860,7 +860,6 @@ static int page_action(struct page_state *ps, struct page *p, int count; result = ps->action(p, pfn); - action_result(pfn, ps->msg, result); count = page_count(p) - 1; if (ps->action == me_swapcache_dirty && result == DELAYED) @@ -871,6 +870,7 @@ static int page_action(struct page_state *ps, struct page *p, pfn, ps->msg, count); result = FAILED; } + action_result(pfn, ps->msg, result); /* Could do more checks here if page looks ok */ /* -- cgit v0.10.2 From 4b737d78a8081cb2def7ced262a212c31363539a Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 23 Sep 2014 10:16:01 +0800 Subject: x86, MCE, AMD: Use macros to compute bank MSRs Avoid open coded calculations for bank MSRs by hiding the index of higher bank MSRs in well-defined macros. No semantic changes. Signed-off-by: Chen Yucong Link: http://lkml.kernel.org/r/1411438561-24319-1-git-send-email-slaoub@gmail.com Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 5d4999f..f8c56bd 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -217,7 +217,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) - address = MSR_IA32_MC0_MISC + bank * 4; + address = MSR_IA32_MCx_MISC(bank); else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) @@ -281,7 +281,7 @@ static void amd_threshold_interrupt(void) continue; for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) { - address = MSR_IA32_MC0_MISC + bank * 4; + address = MSR_IA32_MCx_MISC(bank); } else if (block == 1) { address = (low & MASK_BLKPTR_LO) >> 21; if (!address) @@ -314,8 +314,7 @@ static void amd_threshold_interrupt(void) if (high & MASK_OVERFLOW_HI) { rdmsrl(address, m.misc); - rdmsrl(MSR_IA32_MC0_STATUS + bank * 4, - m.status); + rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; @@ -617,8 +616,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank) } } - err = allocate_threshold_blocks(cpu, bank, 0, - MSR_IA32_MC0_MISC + bank * 4); + err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank)); if (!err) goto out; -- cgit v0.10.2 From 44612a3ac671d7b9a9b987ab73dcc776204ac4d5 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Thu, 2 Oct 2014 14:48:19 +0200 Subject: x86, MCE, AMD: Correct thresholding error logging mce_setup() does not gather the content of IA32_MCG_STATUS, so it should be read explicitly. Moreover, we need to clear IA32_MCx_STATUS to avoid that mce_log() logs the processed threshold event again at next time. But we do the logging ourselves and machine_check_poll() is completely useless there. So kill it. Signed-off-by: Chen Yucong Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f8c56bd..9ce64955 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -270,14 +270,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) static void amd_threshold_interrupt(void) { u32 low = 0, high = 0, address = 0; + int cpu = smp_processor_id(); unsigned int bank, block; struct mce m; - mce_setup(&m); - /* assume first bank caused it */ for (bank = 0; bank < mca_cfg.banks; ++bank) { - if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) + if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; for (block = 0; block < NR_BLOCKS; ++block) { if (block == 0) { @@ -309,20 +308,21 @@ static void amd_threshold_interrupt(void) * Log the machine check that caused the threshold * event. */ - machine_check_poll(MCP_TIMESTAMP, - this_cpu_ptr(&mce_poll_banks)); - - if (high & MASK_OVERFLOW_HI) { - rdmsrl(address, m.misc); - rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); - m.bank = K8_MCE_THRESHOLD_BASE - + bank * NR_BLOCKS - + block; - mce_log(&m); - return; - } + if (high & MASK_OVERFLOW_HI) + goto log; } } + return; + +log: + mce_setup(&m); + rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); + rdmsrl(address, m.misc); + rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); + m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; + mce_log(&m); + + wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); } /* -- cgit v0.10.2 From 69b957583580bf40624553c64d802fefb54199cb Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Thu, 2 Oct 2014 23:20:12 +0800 Subject: x86, MCE, AMD: Move invariant code out from loop body Assigning to mce_threshold_vector is loop-invariant code in mce_amd_feature_init(). So do it only once, out of loop body. Signed-off-by: Chen Yucong Link: http://lkml.kernel.org/r/1412263212.8085.6.camel@debian [ Boris: commit message corrections. ] Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9ce64955..9af7bd7 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -253,7 +253,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) } mce_threshold_block_init(&b, offset); - mce_threshold_vector = amd_threshold_interrupt; + + if (mce_threshold_vector != amd_threshold_interrupt) + mce_threshold_vector = amd_threshold_interrupt; } } } -- cgit v0.10.2 From a3a529d104ec5149fb9a667dce988635941be1ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Oct 2014 22:19:59 +0200 Subject: x86, MCE, AMD: Drop software-defined bank in error thresholding Aravind had the good question about why we're assigning a software-defined bank when reporting error thresholding errors instead of simply using the bank which reports the last error causing the overflow. Digging through git history, it pointed to 95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors") which added that functionality. The problem with this, however, is that tools don't know about software-defined banks and get puzzled. So drop that K8_MCE_THRESHOLD_BASE and simply use the hw bank reporting the thresholding interrupt. Save us a couple of MSR reads while at it. Reported-by: Aravind Gopalakrishnan Link: https://lkml.kernel.org/r/5435B206.60402@amd.com Signed-off-by: Borislav Petkov diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 958b90f..276392f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -78,7 +78,6 @@ /* Software defined banks */ #define MCE_EXTENDED_BANK 128 #define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) -#define K8_MCE_THRESHOLD_BASE (MCE_EXTENDED_BANK + 1) #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 9af7bd7..6606523 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -318,10 +318,9 @@ static void amd_threshold_interrupt(void) log: mce_setup(&m); - rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); - rdmsrl(address, m.misc); rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status); - m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block; + m.misc = ((u64)high << 32) | low; + m.bank = bank; mce_log(&m); wrmsrl(MSR_IA32_MCx_STATUS(bank), 0); -- cgit v0.10.2 From 8dcf32ea220d87ca517e164de85d336480c9d172 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Sat, 1 Nov 2014 11:23:32 +0100 Subject: x86, MCE, AMD: Assign interrupt handler only when bank supports it There are some AMD CPU models which have thresholding banks but which cannot generate a thresholding interrupt. This is denoted by the bit MCi_MISC[IntP]. Make sure to check that bit before assigning the thresholding interrupt handler. Signed-off-by: Chen Yucong [ Boris: save an indentation level and rewrite commit message. ] Link: http://lkml.kernel.org/r/1412662128.28440.18.camel@debian Signed-off-by: Borislav Petkov diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 6606523..f1c3769 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -212,7 +212,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) unsigned int cpu = smp_processor_id(); u32 low = 0, high = 0, address = 0; unsigned int bank, block; - int offset = -1; + int offset = -1, new; for (bank = 0; bank < mca_cfg.banks; ++bank) { for (block = 0; block < NR_BLOCKS; ++block) { @@ -247,15 +247,18 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) b.address = address; b.interrupt_capable = lvt_interrupt_supported(bank, high); - if (b.interrupt_capable) { - int new = (high & MASK_LVTOFF_HI) >> 20; - offset = setup_APIC_mce(offset, new); - } + if (!b.interrupt_capable) + goto init; - mce_threshold_block_init(&b, offset); + new = (high & MASK_LVTOFF_HI) >> 20; + offset = setup_APIC_mce(offset, new); - if (mce_threshold_vector != amd_threshold_interrupt) + if ((offset == new) && + (mce_threshold_vector != amd_threshold_interrupt)) mce_threshold_vector = amd_threshold_interrupt; + +init: + mce_threshold_block_init(&b, offset); } } } -- cgit v0.10.2 From e3480271f59253cb60d030aa5e615bf00b731fea Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 18 Nov 2014 10:09:19 +0800 Subject: x86, mce, severity: Extend the the mce_severity mechanism to handle UCNA/DEFERRED error Until now, the mce_severity mechanism can only identify the severity of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter out DEFERRED error for AMD platform. This patch extends the mce_severity mechanism for handling UCNA/DEFERRED error. In order to do this, the patch introduces a new severity level - MCE_UCNA/DEFERRED_SEVERITY. In addition, mce_severity is specific to machine check exception, and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity mechanism in non-exception context, the patch also introduces a new argument (is_excp) for mce_severity. `is_excp' is used to explicitly specify the calling context of mce_severity. Reviewed-by: Aravind Gopalakrishnan Signed-off-by: Chen Yucong Signed-off-by: Tony Luck diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 276392f..51b26e89 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -34,6 +34,10 @@ #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ #define MCI_STATUS_AR (1ULL<<55) /* Action required */ +/* AMD-specific bits */ +#define MCI_STATUS_DEFERRED (1ULL<<44) /* declare an uncorrected error */ +#define MCI_STATUS_POISON (1ULL<<43) /* access poisonous data */ + /* * Note that the full MCACOD field of IA32_MCi_STATUS MSR is * bits 15:0. But bit 12 is the 'F' bit, defined for corrected diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 09edd0b..10b4690 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -3,6 +3,8 @@ enum severity_level { MCE_NO_SEVERITY, + MCE_DEFERRED_SEVERITY, + MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY, MCE_KEEP_SEVERITY, MCE_SOME_SEVERITY, MCE_AO_SEVERITY, @@ -21,7 +23,7 @@ struct mce_bank { char attrname[ATTR_LEN]; /* attribute name */ }; -int mce_severity(struct mce *a, int tolerant, char **msg); +int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp); struct dentry *mce_get_debugfs_dir(void); extern struct mce_bank *mce_banks; diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index c370e1c..8bb4330 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c @@ -31,6 +31,7 @@ enum context { IN_KERNEL = 1, IN_USER = 2 }; enum ser { SER_REQUIRED = 1, NO_SER = 2 }; +enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 }; static struct severity { u64 mask; @@ -40,6 +41,7 @@ static struct severity { unsigned char mcgres; unsigned char ser; unsigned char context; + unsigned char excp; unsigned char covered; char *msg; } severities[] = { @@ -48,6 +50,8 @@ static struct severity { #define USER .context = IN_USER #define SER .ser = SER_REQUIRED #define NOSER .ser = NO_SER +#define EXCP .excp = EXCP_CONTEXT +#define NOEXCP .excp = NO_EXCP #define BITCLR(x) .mask = x, .result = 0 #define BITSET(x) .mask = x, .result = x #define MCGMASK(x, y) .mcgmask = x, .mcgres = y @@ -62,7 +66,7 @@ static struct severity { ), MCESEV( NO, "Not enabled", - BITCLR(MCI_STATUS_EN) + EXCP, BITCLR(MCI_STATUS_EN) ), MCESEV( PANIC, "Processor context corrupt", @@ -71,16 +75,20 @@ static struct severity { /* When MCIP is not set something is very confused */ MCESEV( PANIC, "MCIP not set in MCA handler", - MCGMASK(MCG_STATUS_MCIP, 0) + EXCP, MCGMASK(MCG_STATUS_MCIP, 0) ), /* Neither return not error IP -- no chance to recover -> PANIC */ MCESEV( PANIC, "Neither restart nor error IP", - MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) + EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0) ), MCESEV( PANIC, "In kernel and no restart IP", - KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) + EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0) + ), + MCESEV( + DEFERRED, "Deferred error", + NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED) ), MCESEV( KEEP, "Corrected error", @@ -89,7 +97,7 @@ static struct severity { /* ignore OVER for UCNA */ MCESEV( - KEEP, "Uncorrected no action required", + UCNA, "Uncorrected no action required", SER, MASK(MCI_UC_SAR, MCI_STATUS_UC) ), MCESEV( @@ -178,8 +186,9 @@ static int error_context(struct mce *m) return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL; } -int mce_severity(struct mce *m, int tolerant, char **msg) +int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp) { + enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP); enum context ctx = error_context(m); struct severity *s; @@ -194,6 +203,8 @@ int mce_severity(struct mce *m, int tolerant, char **msg) continue; if (s->context && ctx != s->context) continue; + if (s->excp && excp != s->excp) + continue; if (msg) *msg = s->msg; s->covered = 1; diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 61a9668ce..453e9bf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -668,7 +668,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp, if (quirk_no_way_out) quirk_no_way_out(i, m, regs); } - if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY) + if (mce_severity(m, mca_cfg.tolerant, msg, true) >= + MCE_PANIC_SEVERITY) ret = 1; } return ret; @@ -754,7 +755,7 @@ static void mce_reign(void) for_each_possible_cpu(cpu) { int severity = mce_severity(&per_cpu(mces_seen, cpu), mca_cfg.tolerant, - &nmsg); + &nmsg, true); if (severity > global_worst) { msg = nmsg; global_worst = severity; @@ -1095,13 +1096,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) */ add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - severity = mce_severity(&m, cfg->tolerant, NULL); + severity = mce_severity(&m, cfg->tolerant, NULL, true); /* - * When machine check was for corrected handler don't touch, - * unless we're panicing. + * When machine check was for corrected/deferred handler don't + * touch, unless we're panicing. */ - if (severity == MCE_KEEP_SEVERITY && !no_way_out) + if ((severity == MCE_KEEP_SEVERITY || + severity == MCE_UCNA_SEVERITY) && !no_way_out) continue; __set_bit(i, toclear); if (severity == MCE_NO_SEVERITY) { diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h index 51b7e3a..c2359a1 100644 --- a/drivers/edac/mce_amd.h +++ b/drivers/edac/mce_amd.h @@ -32,9 +32,6 @@ #define R4(x) (((x) >> 4) & 0xf) #define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!") -#define MCI_STATUS_DEFERRED BIT_64(44) -#define MCI_STATUS_POISON BIT_64(43) - extern const char * const pp_msgs[]; enum tt_ids { -- cgit v0.10.2 From fa92c58694268a7e9f7fa2c6881c1482221c2788 Mon Sep 17 00:00:00 2001 From: Chen Yucong Date: Tue, 18 Nov 2014 10:09:20 +0800 Subject: x86, mce: Support memory error recovery for both UCNA and Deferred error in machine_check_poll Uncorrected no action required (UCNA) - is a uncorrected recoverable machine check error that is not signaled via a machine check exception and, instead, is reported to system software as a corrected machine check error. UCNA errors indicate that some data in the system is corrupted, but the data has not been consumed and the processor state is valid and you may continue execution on this processor. UCNA errors require no action from system software to continue execution. Note that UCNA errors are supported by the processor only when IA32_MCG_CAP[24] (MCG_SER_P) is set. -- Intel SDM Volume 3B Deferred errors are errors that cannot be corrected by hardware, but do not cause an immediate interruption in program flow, loss of data integrity, or corruption of processor state. These errors indicate that data has been corrupted but not consumed. Hardware writes information to the status and address registers in the corresponding bank that identifies the source of the error if deferred errors are enabled for logging. Deferred errors are not reported via machine check exceptions; they can be seen by polling the MCi_STATUS registers. -- AMD64 APM Volume 2 Above two items, both UCNA and Deferred errors belong to detected errors, but they can't be corrected by hardware, and this is very similar to Software Recoverable Action Optional (SRAO) errors. Therefore, we can take some actions that have been used for handling SRAO errors to handle UCNA and Deferred errors. Acked-by: Borislav Petkov Signed-off-by: Chen Yucong Signed-off-by: Tony Luck diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 453e9bf..cfb16f6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -575,6 +575,37 @@ static void mce_read_aux(struct mce *m, int i) } } +static bool memory_error(struct mce *m) +{ + struct cpuinfo_x86 *c = &boot_cpu_data; + + if (c->x86_vendor == X86_VENDOR_AMD) { + /* + * coming soon + */ + return false; + } else if (c->x86_vendor == X86_VENDOR_INTEL) { + /* + * Intel SDM Volume 3B - 15.9.2 Compound Error Codes + * + * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for + * indicating a memory error. Bit 8 is used for indicating a + * cache hierarchy error. The combination of bit 2 and bit 3 + * is used for indicating a `generic' cache hierarchy error + * But we can't just blindly check the above bits, because if + * bit 11 is set, then it is a bus/interconnect error - and + * either way the above bits just gives more detail on what + * bus/interconnect error happened. Note that bit 12 can be + * ignored, as it's the "filter" bit. + */ + return (m->status & 0xef80) == BIT(7) || + (m->status & 0xef00) == BIT(8) || + (m->status & 0xeffc) == 0xc; + } + + return false; +} + DEFINE_PER_CPU(unsigned, mce_poll_count); /* @@ -595,6 +626,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count); void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) { struct mce m; + int severity; int i; this_cpu_inc(mce_poll_count); @@ -630,6 +662,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) if (!(flags & MCP_TIMESTAMP)) m.tsc = 0; + + severity = mce_severity(&m, mca_cfg.tolerant, NULL, false); + + /* + * In the cases where we don't have a valid address after all, + * do not add it into the ring buffer. + */ + if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) { + if (m.status & MCI_STATUS_ADDRV) { + mce_ring_add(m.addr >> PAGE_SHIFT); + mce_schedule_work(); + } + } + /* * Don't get the IP here because it's unlikely to * have anything to do with the actual error location. -- cgit v0.10.2 From c7c9b3929b6a57ad47ab4021c77e46f7ff21c007 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 3 Dec 2014 22:36:45 +0100 Subject: x86/mce: Spell "panicked" correctly We need the additional "k" to make it a hard-c: https://en.wiktionary.org/wiki/panicked Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1417642605-15730-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index cfb16f6..d2c6116 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -292,10 +292,10 @@ static void print_mce(struct mce *m) #define PANIC_TIMEOUT 5 /* 5 seconds */ -static atomic_t mce_paniced; +static atomic_t mce_panicked; static int fake_panic; -static atomic_t mce_fake_paniced; +static atomic_t mce_fake_panicked; /* Panic in progress. Enable interrupts and wait for final IPI */ static void wait_for_panic(void) @@ -319,7 +319,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) /* * Make sure only one CPU runs in machine check panic */ - if (atomic_inc_return(&mce_paniced) > 1) + if (atomic_inc_return(&mce_panicked) > 1) wait_for_panic(); barrier(); @@ -327,7 +327,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) console_verbose(); } else { /* Don't log too much for fake panic */ - if (atomic_inc_return(&mce_fake_paniced) > 1) + if (atomic_inc_return(&mce_fake_panicked) > 1) return; } /* First print corrected ones that are still unlogged */ @@ -744,7 +744,7 @@ static int mce_timed_out(u64 *t) * might have been modified by someone else. */ rmb(); - if (atomic_read(&mce_paniced)) + if (atomic_read(&mce_panicked)) wait_for_panic(); if (!mca_cfg.monarch_timeout) goto out; @@ -2568,7 +2568,7 @@ struct dentry *mce_get_debugfs_dir(void) static void mce_reset(void) { cpu_missing = 0; - atomic_set(&mce_fake_paniced, 0); + atomic_set(&mce_fake_panicked, 0); atomic_set(&mce_executing, 0); atomic_set(&mce_callin, 0); atomic_set(&global_nwo, 0); -- cgit v0.10.2