summaryrefslogtreecommitdiff
path: root/drivers/edac
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/Kconfig14
-rw-r--r--drivers/edac/Makefile5
-rw-r--r--drivers/edac/amd64_edac.c104
-rw-r--r--drivers/edac/amd64_edac.h23
-rw-r--r--drivers/edac/amd64_edac_inj.c49
-rw-r--r--drivers/edac/edac_mce_amd.c32
6 files changed, 146 insertions, 81 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 02127e5..55c9c59 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -47,6 +47,18 @@ config EDAC_DEBUG_VERBOSE
Source file name and line number where debugging message
printed will be added to debugging message.
+ config EDAC_DECODE_MCE
+ tristate "Decode MCEs in human-readable form (only on AMD for now)"
+ depends on CPU_SUP_AMD && X86_MCE
+ default y
+ ---help---
+ Enable this option if you want to decode Machine Check Exceptions
+ occuring on your machine in human-readable form.
+
+ You should definitely say Y here in case you want to decode MCEs
+ which occur really early upon boot, before the module infrastructure
+ has been initialized.
+
config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting"
help
@@ -59,7 +71,7 @@ config EDAC_MM_EDAC
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
- depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD
+ depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE
help
Support for error detection and correction on the AMD 64
Families of Memory Controllers (K8, F10h and F11h)
diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile
index 7a473bb..bc5dc23 100644
--- a/drivers/edac/Makefile
+++ b/drivers/edac/Makefile
@@ -6,7 +6,6 @@
# GNU General Public License.
#
-
obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
@@ -17,9 +16,7 @@ ifdef CONFIG_PCI
edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif
-ifdef CONFIG_CPU_SUP_AMD
-edac_core-objs += edac_mce_amd.o
-endif
+obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 4e551e6..4f4ac82 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,8 +15,8 @@ module_param(ecc_enable_override, int, 0644);
/* Lookup table for all possible MC control instances */
struct amd64_pvt;
-static struct mem_ctl_info *mci_lookup[MAX_NUMNODES];
-static struct amd64_pvt *pvt_lookup[MAX_NUMNODES];
+static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
+static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES];
/*
* See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only
@@ -189,7 +189,10 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
/* Map from a CSROW entry to the mask entry that operates on it */
static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow)
{
- return csrow >> (pvt->num_dcsm >> 3);
+ if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F)
+ return csrow;
+ else
+ return csrow >> 1;
}
/* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */
@@ -279,29 +282,26 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
intlv_en = pvt->dram_IntlvEn[0];
if (intlv_en == 0) {
- for (node_id = 0; ; ) {
+ for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) {
if (amd64_base_limit_match(pvt, sys_addr, node_id))
- break;
-
- if (++node_id >= DRAM_REG_COUNT)
- goto err_no_match;
+ goto found;
}
- goto found;
+ goto err_no_match;
}
- if (unlikely((intlv_en != (0x01 << 8)) &&
- (intlv_en != (0x03 << 8)) &&
- (intlv_en != (0x07 << 8)))) {
+ if (unlikely((intlv_en != 0x01) &&
+ (intlv_en != 0x03) &&
+ (intlv_en != 0x07))) {
amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
"IntlvEn field of DRAM Base Register for node 0: "
- "This probably indicates a BIOS bug.\n", intlv_en);
+ "this probably indicates a BIOS bug.\n", intlv_en);
return NULL;
}
bits = (((u32) sys_addr) >> 12) & intlv_en;
for (node_id = 0; ; ) {
- if ((pvt->dram_limit[node_id] & intlv_en) == bits)
+ if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits)
break; /* intlv_sel field matches */
if (++node_id >= DRAM_REG_COUNT)
@@ -311,10 +311,10 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
/* sanity test for sys_addr */
if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
amd64_printk(KERN_WARNING,
- "%s(): sys_addr 0x%lx falls outside base/limit "
- "address range for node %d with node interleaving "
- "enabled.\n", __func__, (unsigned long)sys_addr,
- node_id);
+ "%s(): sys_addr 0x%llx falls outside base/limit "
+ "address range for node %d with node interleaving "
+ "enabled.\n",
+ __func__, sys_addr, node_id);
return NULL;
}
@@ -377,7 +377,7 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr)
* base/mask register pair, test the condition shown near the start of
* section 3.5.4 (p. 84, BKDG #26094, K8, revA-E).
*/
- for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+ for (csrow = 0; csrow < pvt->cs_count; csrow++) {
/* This DRAM chip select is disabled on this node */
if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0)
@@ -734,7 +734,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow,
u64 base, mask;
pvt = mci->pvt_info;
- BUG_ON((csrow < 0) || (csrow >= CHIPSELECT_COUNT));
+ BUG_ON((csrow < 0) || (csrow >= pvt->cs_count));
base = base_from_dct_base(pvt, csrow);
mask = mask_from_dct_mask(pvt, csrow);
@@ -962,35 +962,27 @@ err_reg:
*/
static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
{
- if (pvt->ext_model >= OPTERON_CPU_REV_F) {
+
+ if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) {
+ pvt->dcsb_base = REV_E_DCSB_BASE_BITS;
+ pvt->dcsm_mask = REV_E_DCSM_MASK_BITS;
+ pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS;
+ pvt->dcs_shift = REV_E_DCS_SHIFT;
+ pvt->cs_count = 8;
+ pvt->num_dcsm = 8;
+ } else {
pvt->dcsb_base = REV_F_F1Xh_DCSB_BASE_BITS;
pvt->dcsm_mask = REV_F_F1Xh_DCSM_MASK_BITS;
pvt->dcs_mask_notused = REV_F_F1Xh_DCS_NOTUSED_BITS;
pvt->dcs_shift = REV_F_F1Xh_DCS_SHIFT;
- switch (boot_cpu_data.x86) {
- case 0xf:
- pvt->num_dcsm = REV_F_DCSM_COUNT;
- break;
-
- case 0x10:
- pvt->num_dcsm = F10_DCSM_COUNT;
- break;
-
- case 0x11:
- pvt->num_dcsm = F11_DCSM_COUNT;
- break;
-
- default:
- amd64_printk(KERN_ERR, "Unsupported family!\n");
- break;
+ if (boot_cpu_data.x86 == 0x11) {
+ pvt->cs_count = 4;
+ pvt->num_dcsm = 2;
+ } else {
+ pvt->cs_count = 8;
+ pvt->num_dcsm = 4;
}
- } else {
- pvt->dcsb_base = REV_E_DCSB_BASE_BITS;
- pvt->dcsm_mask = REV_E_DCSM_MASK_BITS;
- pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS;
- pvt->dcs_shift = REV_E_DCS_SHIFT;
- pvt->num_dcsm = REV_E_DCSM_COUNT;
}
}
@@ -1003,7 +995,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
amd64_set_dct_base_and_mask(pvt);
- for (cs = 0; cs < CHIPSELECT_COUNT; cs++) {
+ for (cs = 0; cs < pvt->cs_count; cs++) {
reg = K8_DCSB0 + (cs * 4);
err = pci_read_config_dword(pvt->dram_f2_ctl, reg,
&pvt->dcsb0[cs]);
@@ -1130,7 +1122,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
debugf0("Reading K8_DRAM_BASE_LOW failed\n");
/* Extract parts into separate data entries */
- pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
+ pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 24;
pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
pvt->dram_rw_en[dram] = (low & 0x3);
@@ -1143,7 +1135,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
* Extract parts into separate data entries. Limit is the HIGHEST memory
* location of the region, so lower 24 bits need to be all ones
*/
- pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF;
+ pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 24) | 0x00FFFFFF;
pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7;
pvt->dram_DstNode[dram] = (low & 0x7);
}
@@ -1193,7 +1185,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
* different from the node that detected the error.
*/
src_mci = find_mc_by_sys_addr(mci, SystemAddress);
- if (src_mci) {
+ if (!src_mci) {
amd64_mc_printk(mci, KERN_ERR,
"failed to map error address 0x%lx to a node\n",
(unsigned long)SystemAddress);
@@ -1376,8 +1368,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7;
- pvt->dram_base[dram] = (((((u64) high_base & 0x000000FF) << 32) |
- ((u64) low_base & 0xFFFF0000))) << 8;
+ pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) |
+ (((u64)low_base & 0xFFFF0000) << 24);
low_offset = K8_DRAM_LIMIT_LOW + (dram << 3);
high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
@@ -1398,9 +1390,9 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
* Extract address values and form a LIMIT address. Limit is the HIGHEST
* memory location of the region, so low 24 bits need to be all ones.
*/
- low_limit |= 0x0000FFFF;
- pvt->dram_limit[dram] =
- ((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF);
+ pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) |
+ (((u64) low_limit & 0xFFFF0000) << 24) |
+ 0x00FFFFFF;
}
static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
@@ -1566,7 +1558,7 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
debugf1("InputAddr=0x%x channelselect=%d\n", in_addr, cs);
- for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) {
+ for (csrow = 0; csrow < pvt->cs_count; csrow++) {
cs_base = amd64_get_dct_base(pvt, cs, csrow);
if (!(cs_base & K8_DCSB_CS_ENABLE))
@@ -2497,7 +2489,7 @@ err_reg:
* NOTE: CPU Revision Dependent code
*
* Input:
- * @csrow_nr ChipSelect Row Number (0..CHIPSELECT_COUNT-1)
+ * @csrow_nr ChipSelect Row Number (0..pvt->cs_count-1)
* k8 private pointer to -->
* DRAM Bank Address mapping register
* node_id
@@ -2577,7 +2569,7 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
(pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
);
- for (i = 0; i < CHIPSELECT_COUNT; i++) {
+ for (i = 0; i < pvt->cs_count; i++) {
csrow = &mci->csrows[i];
if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) {
@@ -2988,7 +2980,7 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
goto err_exit;
ret = -ENOMEM;
- mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id);
+ mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id);
if (!mci)
goto err_exit;
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 8ea07e2..c6f359a 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -132,6 +132,8 @@
#define EDAC_AMD64_VERSION " Ver: 3.2.0 " __DATE__
#define EDAC_MOD_STR "amd64_edac"
+#define EDAC_MAX_NUMNODES 8
+
/* Extended Model from CPUID, for CPU Revision numbers */
#define OPTERON_CPU_LE_REV_C 0
#define OPTERON_CPU_REV_D 1
@@ -142,7 +144,7 @@
#define OPTERON_CPU_REV_FA 5
/* Hardware limit on ChipSelect rows per MC and processors per system */
-#define CHIPSELECT_COUNT 8
+#define MAX_CS_COUNT 8
#define DRAM_REG_COUNT 8
@@ -193,7 +195,6 @@
*/
#define REV_E_DCSB_BASE_BITS (0xFFE0FE00ULL)
#define REV_E_DCS_SHIFT 4
-#define REV_E_DCSM_COUNT 8
#define REV_F_F1Xh_DCSB_BASE_BITS (0x1FF83FE0ULL)
#define REV_F_F1Xh_DCS_SHIFT 8
@@ -204,9 +205,6 @@
*/
#define REV_F_DCSB_BASE_BITS (0x1FF83FE0ULL)
#define REV_F_DCS_SHIFT 8
-#define REV_F_DCSM_COUNT 4
-#define F10_DCSM_COUNT 4
-#define F11_DCSM_COUNT 2
/* DRAM CS Mask Registers */
#define K8_DCSM0 0x60
@@ -374,13 +372,11 @@ enum {
#define SET_NB_DRAM_INJECTION_WRITE(word, bits) \
(BIT(((word) & 0xF) + 20) | \
- BIT(17) | \
- ((bits) & 0xF))
+ BIT(17) | bits)
#define SET_NB_DRAM_INJECTION_READ(word, bits) \
(BIT(((word) & 0xF) + 20) | \
- BIT(16) | \
- ((bits) & 0xF))
+ BIT(16) | bits)
#define K8_NBCAP 0xE8
#define K8_NBCAP_CORES (BIT(12)|BIT(13))
@@ -445,12 +441,12 @@ struct amd64_pvt {
u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */
/* DRAM CS Base Address Registers F2x[1,0][5C:40] */
- u32 dcsb0[CHIPSELECT_COUNT];
- u32 dcsb1[CHIPSELECT_COUNT];
+ u32 dcsb0[MAX_CS_COUNT];
+ u32 dcsb1[MAX_CS_COUNT];
/* DRAM CS Mask Registers F2x[1,0][6C:60] */
- u32 dcsm0[CHIPSELECT_COUNT];
- u32 dcsm1[CHIPSELECT_COUNT];
+ u32 dcsm0[MAX_CS_COUNT];
+ u32 dcsm1[MAX_CS_COUNT];
/*
* Decoded parts of DRAM BASE and LIMIT Registers
@@ -470,6 +466,7 @@ struct amd64_pvt {
*/
u32 dcsb_base; /* DCSB base bits */
u32 dcsm_mask; /* DCSM mask bits */
+ u32 cs_count; /* num chip selects (== num DCSB registers) */
u32 num_dcsm; /* Number of DCSM registers */
u32 dcs_mask_notused; /* DCSM notused mask bits */
u32 dcs_shift; /* DCSB and DCSM shift value */
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index d3675b7..29f1f7a 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -1,5 +1,11 @@
#include "amd64_edac.h"
+static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.section);
+}
+
/*
* store error injection section value which refers to one of 4 16-byte sections
* within a 64-byte cacheline
@@ -15,12 +21,26 @@ static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci,
ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) {
+
+ if (value > 3) {
+ amd64_printk(KERN_WARNING,
+ "%s: invalid section 0x%lx\n",
+ __func__, value);
+ return -EINVAL;
+ }
+
pvt->injection.section = (u32) value;
return count;
}
return ret;
}
+static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.word);
+}
+
/*
* store error injection word value which refers to one of 9 16-bit word of the
* 16-byte (128-bit + ECC bits) section
@@ -37,14 +57,25 @@ static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci,
ret = strict_strtoul(data, 10, &value);
if (ret != -EINVAL) {
- value = (value <= 8) ? value : 0;
- pvt->injection.word = (u32) value;
+ if (value > 8) {
+ amd64_printk(KERN_WARNING,
+ "%s: invalid word 0x%lx\n",
+ __func__, value);
+ return -EINVAL;
+ }
+ pvt->injection.word = (u32) value;
return count;
}
return ret;
}
+static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf)
+{
+ struct amd64_pvt *pvt = mci->pvt_info;
+ return sprintf(buf, "0x%x\n", pvt->injection.bit_map);
+}
+
/*
* store 16 bit error injection vector which enables injecting errors to the
* corresponding bit within the error injection word above. When used during a
@@ -60,8 +91,14 @@ static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci,
ret = strict_strtoul(data, 16, &value);
if (ret != -EINVAL) {
- pvt->injection.bit_map = (u32) value & 0xFFFF;
+ if (value & 0xFFFF0000) {
+ amd64_printk(KERN_WARNING,
+ "%s: invalid EccVector: 0x%lx\n",
+ __func__, value);
+ return -EINVAL;
+ }
+ pvt->injection.bit_map = (u32) value;
return count;
}
return ret;
@@ -147,7 +184,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
.name = "inject_section",
.mode = (S_IRUGO | S_IWUSR)
},
- .show = NULL,
+ .show = amd64_inject_section_show,
.store = amd64_inject_section_store,
},
{
@@ -155,7 +192,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
.name = "inject_word",
.mode = (S_IRUGO | S_IWUSR)
},
- .show = NULL,
+ .show = amd64_inject_word_show,
.store = amd64_inject_word_store,
},
{
@@ -163,7 +200,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = {
.name = "inject_ecc_vector",
.mode = (S_IRUGO | S_IWUSR)
},
- .show = NULL,
+ .show = amd64_inject_ecc_vector_show,
.store = amd64_inject_ecc_vector_store,
},
{
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 0c21c37..713ed7d 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,6 +3,7 @@
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
+static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v)
{
@@ -362,7 +363,7 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-void decode_mce(struct mce *m)
+static void amd_decode_mce(struct mce *m)
{
struct err_regs regs;
int node, ecc;
@@ -420,3 +421,32 @@ void decode_mce(struct mce *m)
amd_decode_err_code(m->status & 0xffff);
}
+
+static int __init mce_amd_init(void)
+{
+ /*
+ * We can decode MCEs for Opteron and later CPUs:
+ */
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
+ (boot_cpu_data.x86 >= 0xf)) {
+ /* safe the default decode mce callback */
+ orig_mce_callback = x86_mce_decode_callback;
+
+ x86_mce_decode_callback = amd_decode_mce;
+ }
+
+ return 0;
+}
+early_initcall(mce_amd_init);
+
+#ifdef MODULE
+static void __exit mce_amd_exit(void)
+{
+ x86_mce_decode_callback = orig_mce_callback;
+}
+
+MODULE_DESCRIPTION("AMD MCE decoder");
+MODULE_ALIAS("edac-mce-amd");
+MODULE_LICENSE("GPL");
+module_exit(mce_amd_exit);
+#endif