From 8d21d4c91efaed3eb60cc64d43889665a4bd7a36 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 28 Jul 2014 02:50:59 -0400
Subject: APEI, GHES: Cleanup unnecessary function for lockless list

We have a generic function to reverse a lockless list, kill homegrown
copy.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1406530260-26078-2-git-send-email-gong.chen@linux.intel.com
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
[ Boris: correct commit msg ]
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index fc5f780..9dcc915 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -738,20 +738,6 @@ static LIST_HEAD(ghes_nmi);
 
 static int ghes_panic_timeout	__read_mostly = 30;
 
-static struct llist_node *llist_nodes_reverse(struct llist_node *llnode)
-{
-	struct llist_node *next, *tail = NULL;
-
-	while (llnode) {
-		next = llnode->next;
-		llnode->next = tail;
-		tail = llnode;
-		llnode = next;
-	}
-
-	return tail;
-}
-
 static void ghes_proc_in_irq(struct irq_work *irq_work)
 {
 	struct llist_node *llnode, *next;
@@ -765,7 +751,7 @@ static void ghes_proc_in_irq(struct irq_work *irq_work)
 	 * Because the time order of estatus in list is reversed,
 	 * revert it back to proper order.
 	 */
-	llnode = llist_nodes_reverse(llnode);
+	llnode = llist_reverse_order(llnode);
 	while (llnode) {
 		next = llnode->next;
 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
@@ -798,7 +784,7 @@ static void ghes_print_queued_estatus(void)
 	 * Because the time order of estatus in list is reversed,
 	 * revert it back to proper order.
 	 */
-	llnode = llist_nodes_reverse(llnode);
+	llnode = llist_reverse_order(llnode);
 	while (llnode) {
 		estatus_node = llist_entry(llnode, struct ghes_estatus_node,
 					   llnode);
-- 
cgit v0.10.2


From 8f7c31f6cd499877aa6b96decd31b406a6cd4ddf Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Mon, 29 Sep 2014 13:33:17 +0200
Subject: GHES: Make ghes_estatus_caches static

It is used only in ghes.c.

Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
index 9dcc915..1b6aa51 100644
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -128,7 +128,7 @@ static DEFINE_SPINLOCK(ghes_ioremap_lock_irq);
 static struct gen_pool *ghes_estatus_pool;
 static unsigned long ghes_estatus_pool_size_request;
 
-struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
+static struct ghes_estatus_cache *ghes_estatus_caches[GHES_ESTATUS_CACHES_SIZE];
 static atomic_t ghes_estatus_cache_alloced;
 
 static int ghes_ioremap_init(void)
-- 
cgit v0.10.2


From 6dc52cbe0b181818450fc1de4c0c850226ce0e68 Mon Sep 17 00:00:00 2001
From: "Chen, Gong" <gong.chen@linux.intel.com>
Date: Mon, 28 Jul 2014 02:51:00 -0400
Subject: RAS, HWPOISON: Fix wrong error recovery status

When Uncorrected error happens, if the poisoned page is referenced
by more than one user after error recovery, the recovery is not
successful. But currently the display result is wrong.
Before this patch:

MCE 0x44e336: dirty mlocked LRU page recovery: Recovered
MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users
mce: Memory error not recovered

After this patch:

MCE 0x44e336: dirty mlocked LRU page recovery: Failed
MCE 0x44e336: dirty mlocked LRU page still referenced by 1 users
mce: Memory error not recovered

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1406530260-26078-3-git-send-email-gong.chen@linux.intel.com
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8639f6b..b852b10 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -860,7 +860,6 @@ static int page_action(struct page_state *ps, struct page *p,
 	int count;
 
 	result = ps->action(p, pfn);
-	action_result(pfn, ps->msg, result);
 
 	count = page_count(p) - 1;
 	if (ps->action == me_swapcache_dirty && result == DELAYED)
@@ -871,6 +870,7 @@ static int page_action(struct page_state *ps, struct page *p,
 		       pfn, ps->msg, count);
 		result = FAILED;
 	}
+	action_result(pfn, ps->msg, result);
 
 	/* Could do more checks here if page looks ok */
 	/*
-- 
cgit v0.10.2


From 4b737d78a8081cb2def7ced262a212c31363539a Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Tue, 23 Sep 2014 10:16:01 +0800
Subject: x86, MCE, AMD: Use macros to compute bank MSRs

Avoid open coded calculations for bank MSRs by hiding the index
of higher bank MSRs in well-defined macros.

No semantic changes.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
Link: http://lkml.kernel.org/r/1411438561-24319-1-git-send-email-slaoub@gmail.com
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 5d4999f..f8c56bd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -217,7 +217,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
 			if (block == 0)
-				address = MSR_IA32_MC0_MISC + bank * 4;
+				address = MSR_IA32_MCx_MISC(bank);
 			else if (block == 1) {
 				address = (low & MASK_BLKPTR_LO) >> 21;
 				if (!address)
@@ -281,7 +281,7 @@ static void amd_threshold_interrupt(void)
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
 			if (block == 0) {
-				address = MSR_IA32_MC0_MISC + bank * 4;
+				address = MSR_IA32_MCx_MISC(bank);
 			} else if (block == 1) {
 				address = (low & MASK_BLKPTR_LO) >> 21;
 				if (!address)
@@ -314,8 +314,7 @@ static void amd_threshold_interrupt(void)
 
 			if (high & MASK_OVERFLOW_HI) {
 				rdmsrl(address, m.misc);
-				rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
-				       m.status);
+				rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
 				m.bank = K8_MCE_THRESHOLD_BASE
 				       + bank * NR_BLOCKS
 				       + block;
@@ -617,8 +616,7 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
 		}
 	}
 
-	err = allocate_threshold_blocks(cpu, bank, 0,
-					MSR_IA32_MC0_MISC + bank * 4);
+	err = allocate_threshold_blocks(cpu, bank, 0, MSR_IA32_MCx_MISC(bank));
 	if (!err)
 		goto out;
 
-- 
cgit v0.10.2


From 44612a3ac671d7b9a9b987ab73dcc776204ac4d5 Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Thu, 2 Oct 2014 14:48:19 +0200
Subject: x86, MCE, AMD: Correct thresholding error logging

mce_setup() does not gather the content of IA32_MCG_STATUS, so it
should be read explicitly. Moreover, we need to clear IA32_MCx_STATUS
to avoid that mce_log() logs the processed threshold event again
at next time.

But we do the logging ourselves and machine_check_poll() is completely
useless there. So kill it.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index f8c56bd..9ce64955 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -270,14 +270,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 static void amd_threshold_interrupt(void)
 {
 	u32 low = 0, high = 0, address = 0;
+	int cpu = smp_processor_id();
 	unsigned int bank, block;
 	struct mce m;
 
-	mce_setup(&m);
-
 	/* assume first bank caused it */
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
-		if (!(per_cpu(bank_map, m.cpu) & (1 << bank)))
+		if (!(per_cpu(bank_map, cpu) & (1 << bank)))
 			continue;
 		for (block = 0; block < NR_BLOCKS; ++block) {
 			if (block == 0) {
@@ -309,20 +308,21 @@ static void amd_threshold_interrupt(void)
 			 * Log the machine check that caused the threshold
 			 * event.
 			 */
-			machine_check_poll(MCP_TIMESTAMP,
-					this_cpu_ptr(&mce_poll_banks));
-
-			if (high & MASK_OVERFLOW_HI) {
-				rdmsrl(address, m.misc);
-				rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
-				m.bank = K8_MCE_THRESHOLD_BASE
-				       + bank * NR_BLOCKS
-				       + block;
-				mce_log(&m);
-				return;
-			}
+			if (high & MASK_OVERFLOW_HI)
+				goto log;
 		}
 	}
+	return;
+
+log:
+	mce_setup(&m);
+	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+	rdmsrl(address, m.misc);
+	rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
+	m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block;
+	mce_log(&m);
+
+	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
 }
 
 /*
-- 
cgit v0.10.2


From 69b957583580bf40624553c64d802fefb54199cb Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Thu, 2 Oct 2014 23:20:12 +0800
Subject: x86, MCE, AMD: Move invariant code out from loop body

Assigning to mce_threshold_vector is loop-invariant code in
mce_amd_feature_init(). So do it only once, out of loop body.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
Link: http://lkml.kernel.org/r/1412263212.8085.6.camel@debian
[ Boris: commit message corrections. ]
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9ce64955..9af7bd7 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -253,7 +253,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			}
 
 			mce_threshold_block_init(&b, offset);
-			mce_threshold_vector = amd_threshold_interrupt;
+
+			if (mce_threshold_vector != amd_threshold_interrupt)
+				mce_threshold_vector = amd_threshold_interrupt;
 		}
 	}
 }
-- 
cgit v0.10.2


From a3a529d104ec5149fb9a667dce988635941be1ed Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Tue, 21 Oct 2014 22:19:59 +0200
Subject: x86, MCE, AMD: Drop software-defined bank in error thresholding

Aravind had the good question about why we're assigning a
software-defined bank when reporting error thresholding errors instead
of simply using the bank which reports the last error causing the
overflow.

Digging through git history, it pointed to

95268664390b ("[PATCH] x86_64: mce_amd support for family 0x10 processors")

which added that functionality. The problem with this, however, is that
tools don't know about software-defined banks and get puzzled. So drop
that K8_MCE_THRESHOLD_BASE and simply use the hw bank reporting the
thresholding interrupt.

Save us a couple of MSR reads while at it.

Reported-by: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
Link: https://lkml.kernel.org/r/5435B206.60402@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 958b90f..276392f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -78,7 +78,6 @@
 /* Software defined banks */
 #define MCE_EXTENDED_BANK	128
 #define MCE_THERMAL_BANK	(MCE_EXTENDED_BANK + 0)
-#define K8_MCE_THRESHOLD_BASE   (MCE_EXTENDED_BANK + 1)
 
 #define MCE_LOG_LEN 32
 #define MCE_LOG_SIGNATURE	"MACHINECHECK"
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 9af7bd7..6606523 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -318,10 +318,9 @@ static void amd_threshold_interrupt(void)
 
 log:
 	mce_setup(&m);
-	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
-	rdmsrl(address, m.misc);
 	rdmsrl(MSR_IA32_MCx_STATUS(bank), m.status);
-	m.bank = K8_MCE_THRESHOLD_BASE + bank * NR_BLOCKS + block;
+	m.misc = ((u64)high << 32) | low;
+	m.bank = bank;
 	mce_log(&m);
 
 	wrmsrl(MSR_IA32_MCx_STATUS(bank), 0);
-- 
cgit v0.10.2


From 8dcf32ea220d87ca517e164de85d336480c9d172 Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Sat, 1 Nov 2014 11:23:32 +0100
Subject: x86, MCE, AMD: Assign interrupt handler only when bank supports it

There are some AMD CPU models which have thresholding banks but which
cannot generate a thresholding interrupt. This is denoted by the bit
MCi_MISC[IntP]. Make sure to check that bit before assigning the
thresholding interrupt handler.

Signed-off-by: Chen Yucong <slaoub@gmail.com>
[ Boris: save an indentation level and rewrite commit message. ]
Link: http://lkml.kernel.org/r/1412662128.28440.18.camel@debian
Signed-off-by: Borislav Petkov <bp@suse.de>

diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 6606523..f1c3769 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -212,7 +212,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	unsigned int cpu = smp_processor_id();
 	u32 low = 0, high = 0, address = 0;
 	unsigned int bank, block;
-	int offset = -1;
+	int offset = -1, new;
 
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
 		for (block = 0; block < NR_BLOCKS; ++block) {
@@ -247,15 +247,18 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 			b.address		= address;
 			b.interrupt_capable	= lvt_interrupt_supported(bank, high);
 
-			if (b.interrupt_capable) {
-				int new = (high & MASK_LVTOFF_HI) >> 20;
-				offset  = setup_APIC_mce(offset, new);
-			}
+			if (!b.interrupt_capable)
+				goto init;
 
-			mce_threshold_block_init(&b, offset);
+			new	= (high & MASK_LVTOFF_HI) >> 20;
+			offset  = setup_APIC_mce(offset, new);
 
-			if (mce_threshold_vector != amd_threshold_interrupt)
+			if ((offset == new) &&
+			    (mce_threshold_vector != amd_threshold_interrupt))
 				mce_threshold_vector = amd_threshold_interrupt;
+
+init:
+			mce_threshold_block_init(&b, offset);
 		}
 	}
 }
-- 
cgit v0.10.2


From e3480271f59253cb60d030aa5e615bf00b731fea Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Tue, 18 Nov 2014 10:09:19 +0800
Subject: x86, mce, severity: Extend the the mce_severity mechanism to handle
 UCNA/DEFERRED error

Until now, the mce_severity mechanism can only identify the severity
of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
out DEFERRED error for AMD platform.

This patch extends the mce_severity mechanism for handling
UCNA/DEFERRED error. In order to do this, the patch introduces a new
severity level - MCE_UCNA/DEFERRED_SEVERITY.

In addition, mce_severity is specific to machine check exception,
and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
mechanism in non-exception context, the patch also introduces a new
argument (is_excp) for mce_severity. `is_excp' is used to explicitly
specify the calling context of mce_severity.

Reviewed-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Signed-off-by: Chen Yucong <slaoub@gmail.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 276392f..51b26e89 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -34,6 +34,10 @@
 #define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
 #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
 
+/* AMD-specific bits */
+#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
+#define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
+
 /*
  * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
  * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 09edd0b..10b4690 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -3,6 +3,8 @@
 
 enum severity_level {
 	MCE_NO_SEVERITY,
+	MCE_DEFERRED_SEVERITY,
+	MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
 	MCE_KEEP_SEVERITY,
 	MCE_SOME_SEVERITY,
 	MCE_AO_SEVERITY,
@@ -21,7 +23,7 @@ struct mce_bank {
 	char			attrname[ATTR_LEN];	/* attribute name */
 };
 
-int mce_severity(struct mce *a, int tolerant, char **msg);
+int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
 struct dentry *mce_get_debugfs_dir(void);
 
 extern struct mce_bank *mce_banks;
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index c370e1c..8bb4330 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -31,6 +31,7 @@
 
 enum context { IN_KERNEL = 1, IN_USER = 2 };
 enum ser { SER_REQUIRED = 1, NO_SER = 2 };
+enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
 
 static struct severity {
 	u64 mask;
@@ -40,6 +41,7 @@ static struct severity {
 	unsigned char mcgres;
 	unsigned char ser;
 	unsigned char context;
+	unsigned char excp;
 	unsigned char covered;
 	char *msg;
 } severities[] = {
@@ -48,6 +50,8 @@ static struct severity {
 #define  USER		.context = IN_USER
 #define  SER		.ser = SER_REQUIRED
 #define  NOSER		.ser = NO_SER
+#define  EXCP		.excp = EXCP_CONTEXT
+#define  NOEXCP		.excp = NO_EXCP
 #define  BITCLR(x)	.mask = x, .result = 0
 #define  BITSET(x)	.mask = x, .result = x
 #define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
@@ -62,7 +66,7 @@ static struct severity {
 		),
 	MCESEV(
 		NO, "Not enabled",
-		BITCLR(MCI_STATUS_EN)
+		EXCP, BITCLR(MCI_STATUS_EN)
 		),
 	MCESEV(
 		PANIC, "Processor context corrupt",
@@ -71,16 +75,20 @@ static struct severity {
 	/* When MCIP is not set something is very confused */
 	MCESEV(
 		PANIC, "MCIP not set in MCA handler",
-		MCGMASK(MCG_STATUS_MCIP, 0)
+		EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
 		),
 	/* Neither return not error IP -- no chance to recover -> PANIC */
 	MCESEV(
 		PANIC, "Neither restart nor error IP",
-		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
+		EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
 		),
 	MCESEV(
 		PANIC, "In kernel and no restart IP",
-		KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
+		),
+	MCESEV(
+		DEFERRED, "Deferred error",
+		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
 		),
 	MCESEV(
 		KEEP, "Corrected error",
@@ -89,7 +97,7 @@ static struct severity {
 
 	/* ignore OVER for UCNA */
 	MCESEV(
-		KEEP, "Uncorrected no action required",
+		UCNA, "Uncorrected no action required",
 		SER, MASK(MCI_UC_SAR, MCI_STATUS_UC)
 		),
 	MCESEV(
@@ -178,8 +186,9 @@ static int error_context(struct mce *m)
 	return ((m->cs & 3) == 3) ? IN_USER : IN_KERNEL;
 }
 
-int mce_severity(struct mce *m, int tolerant, char **msg)
+int mce_severity(struct mce *m, int tolerant, char **msg, bool is_excp)
 {
+	enum exception excp = (is_excp ? EXCP_CONTEXT : NO_EXCP);
 	enum context ctx = error_context(m);
 	struct severity *s;
 
@@ -194,6 +203,8 @@ int mce_severity(struct mce *m, int tolerant, char **msg)
 			continue;
 		if (s->context && ctx != s->context)
 			continue;
+		if (s->excp && excp != s->excp)
+			continue;
 		if (msg)
 			*msg = s->msg;
 		s->covered = 1;
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 61a9668ce..453e9bf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -668,7 +668,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
 			if (quirk_no_way_out)
 				quirk_no_way_out(i, m, regs);
 		}
-		if (mce_severity(m, mca_cfg.tolerant, msg) >= MCE_PANIC_SEVERITY)
+		if (mce_severity(m, mca_cfg.tolerant, msg, true) >=
+		    MCE_PANIC_SEVERITY)
 			ret = 1;
 	}
 	return ret;
@@ -754,7 +755,7 @@ static void mce_reign(void)
 	for_each_possible_cpu(cpu) {
 		int severity = mce_severity(&per_cpu(mces_seen, cpu),
 					    mca_cfg.tolerant,
-					    &nmsg);
+					    &nmsg, true);
 		if (severity > global_worst) {
 			msg = nmsg;
 			global_worst = severity;
@@ -1095,13 +1096,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
 		 */
 		add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
 
-		severity = mce_severity(&m, cfg->tolerant, NULL);
+		severity = mce_severity(&m, cfg->tolerant, NULL, true);
 
 		/*
-		 * When machine check was for corrected handler don't touch,
-		 * unless we're panicing.
+		 * When machine check was for corrected/deferred handler don't
+		 * touch, unless we're panicing.
 		 */
-		if (severity == MCE_KEEP_SEVERITY && !no_way_out)
+		if ((severity == MCE_KEEP_SEVERITY ||
+		     severity == MCE_UCNA_SEVERITY) && !no_way_out)
 			continue;
 		__set_bit(i, toclear);
 		if (severity == MCE_NO_SEVERITY) {
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 51b7e3a..c2359a1 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -32,9 +32,6 @@
 #define R4(x)				(((x) >> 4) & 0xf)
 #define R4_MSG(x)			((R4(x) < 9) ?  rrrr_msgs[R4(x)] : "Wrong R4!")
 
-#define MCI_STATUS_DEFERRED		BIT_64(44)
-#define MCI_STATUS_POISON		BIT_64(43)
-
 extern const char * const pp_msgs[];
 
 enum tt_ids {
-- 
cgit v0.10.2


From fa92c58694268a7e9f7fa2c6881c1482221c2788 Mon Sep 17 00:00:00 2001
From: Chen Yucong <slaoub@gmail.com>
Date: Tue, 18 Nov 2014 10:09:20 +0800
Subject: x86, mce: Support memory error recovery for both UCNA and Deferred
 error in machine_check_poll

Uncorrected no action required (UCNA) - is a uncorrected recoverable
machine check error that is not signaled via a machine check exception
and, instead, is reported to system software as a corrected machine
check error. UCNA errors indicate that some data in the system is
corrupted, but the data has not been consumed and the processor state
is valid and you may continue execution on this processor. UCNA errors
require no action from system software to continue execution. Note that
UCNA errors are supported by the processor only when IA32_MCG_CAP[24]
(MCG_SER_P) is set.
                                               -- Intel SDM Volume 3B

Deferred errors are errors that cannot be corrected by hardware, but
do not cause an immediate interruption in program flow, loss of data
integrity, or corruption of processor state. These errors indicate
that data has been corrupted but not consumed. Hardware writes information
to the status and address registers in the corresponding bank that
identifies the source of the error if deferred errors are enabled for
logging. Deferred errors are not reported via machine check exceptions;
they can be seen by polling the MCi_STATUS registers.
                                                -- AMD64 APM Volume 2

Above two items, both UCNA and Deferred errors belong to detected
errors, but they can't be corrected by hardware, and this is very
similar to Software Recoverable Action Optional (SRAO) errors.
Therefore, we can take some actions that have been used for handling
SRAO errors to handle UCNA and Deferred errors.

Acked-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Chen Yucong <slaoub@gmail.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 453e9bf..cfb16f6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -575,6 +575,37 @@ static void mce_read_aux(struct mce *m, int i)
 	}
 }
 
+static bool memory_error(struct mce *m)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+
+	if (c->x86_vendor == X86_VENDOR_AMD) {
+		/*
+		 * coming soon
+		 */
+		return false;
+	} else if (c->x86_vendor == X86_VENDOR_INTEL) {
+		/*
+		 * Intel SDM Volume 3B - 15.9.2 Compound Error Codes
+		 *
+		 * Bit 7 of the MCACOD field of IA32_MCi_STATUS is used for
+		 * indicating a memory error. Bit 8 is used for indicating a
+		 * cache hierarchy error. The combination of bit 2 and bit 3
+		 * is used for indicating a `generic' cache hierarchy error
+		 * But we can't just blindly check the above bits, because if
+		 * bit 11 is set, then it is a bus/interconnect error - and
+		 * either way the above bits just gives more detail on what
+		 * bus/interconnect error happened. Note that bit 12 can be
+		 * ignored, as it's the "filter" bit.
+		 */
+		return (m->status & 0xef80) == BIT(7) ||
+		       (m->status & 0xef00) == BIT(8) ||
+		       (m->status & 0xeffc) == 0xc;
+	}
+
+	return false;
+}
+
 DEFINE_PER_CPU(unsigned, mce_poll_count);
 
 /*
@@ -595,6 +626,7 @@ DEFINE_PER_CPU(unsigned, mce_poll_count);
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
 	struct mce m;
+	int severity;
 	int i;
 
 	this_cpu_inc(mce_poll_count);
@@ -630,6 +662,20 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 
 		if (!(flags & MCP_TIMESTAMP))
 			m.tsc = 0;
+
+		severity = mce_severity(&m, mca_cfg.tolerant, NULL, false);
+
+		/*
+		 * In the cases where we don't have a valid address after all,
+		 * do not add it into the ring buffer.
+		 */
+		if (severity == MCE_DEFERRED_SEVERITY && memory_error(&m)) {
+			if (m.status & MCI_STATUS_ADDRV) {
+				mce_ring_add(m.addr >> PAGE_SHIFT);
+				mce_schedule_work();
+			}
+		}
+
 		/*
 		 * Don't get the IP here because it's unlikely to
 		 * have anything to do with the actual error location.
-- 
cgit v0.10.2


From c7c9b3929b6a57ad47ab4021c77e46f7ff21c007 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 3 Dec 2014 22:36:45 +0100
Subject: x86/mce: Spell "panicked" correctly

We need the additional "k" to make it a hard-c:

  https://en.wiktionary.org/wiki/panicked

Signed-off-by: Borislav Petkov <bp@suse.de>
Link: http://lkml.kernel.org/r/1417642605-15730-1-git-send-email-bp@alien8.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index cfb16f6..d2c6116 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -292,10 +292,10 @@ static void print_mce(struct mce *m)
 
 #define PANIC_TIMEOUT 5 /* 5 seconds */
 
-static atomic_t mce_paniced;
+static atomic_t mce_panicked;
 
 static int fake_panic;
-static atomic_t mce_fake_paniced;
+static atomic_t mce_fake_panicked;
 
 /* Panic in progress. Enable interrupts and wait for final IPI */
 static void wait_for_panic(void)
@@ -319,7 +319,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		/*
 		 * Make sure only one CPU runs in machine check panic
 		 */
-		if (atomic_inc_return(&mce_paniced) > 1)
+		if (atomic_inc_return(&mce_panicked) > 1)
 			wait_for_panic();
 		barrier();
 
@@ -327,7 +327,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
 		console_verbose();
 	} else {
 		/* Don't log too much for fake panic */
-		if (atomic_inc_return(&mce_fake_paniced) > 1)
+		if (atomic_inc_return(&mce_fake_panicked) > 1)
 			return;
 	}
 	/* First print corrected ones that are still unlogged */
@@ -744,7 +744,7 @@ static int mce_timed_out(u64 *t)
 	 * might have been modified by someone else.
 	 */
 	rmb();
-	if (atomic_read(&mce_paniced))
+	if (atomic_read(&mce_panicked))
 		wait_for_panic();
 	if (!mca_cfg.monarch_timeout)
 		goto out;
@@ -2568,7 +2568,7 @@ struct dentry *mce_get_debugfs_dir(void)
 static void mce_reset(void)
 {
 	cpu_missing = 0;
-	atomic_set(&mce_fake_paniced, 0);
+	atomic_set(&mce_fake_panicked, 0);
 	atomic_set(&mce_executing, 0);
 	atomic_set(&mce_callin, 0);
 	atomic_set(&global_nwo, 0);
-- 
cgit v0.10.2