diff options
Diffstat (limited to 'drivers/edac')
-rw-r--r-- | drivers/edac/Kconfig | 14 | ||||
-rw-r--r-- | drivers/edac/Makefile | 5 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.c | 104 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 23 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_inj.c | 49 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 32 |
6 files changed, 146 insertions, 81 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 02127e5..55c9c59 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -47,6 +47,18 @@ config EDAC_DEBUG_VERBOSE Source file name and line number where debugging message printed will be added to debugging message. + config EDAC_DECODE_MCE + tristate "Decode MCEs in human-readable form (only on AMD for now)" + depends on CPU_SUP_AMD && X86_MCE + default y + ---help--- + Enable this option if you want to decode Machine Check Exceptions + occuring on your machine in human-readable form. + + You should definitely say Y here in case you want to decode MCEs + which occur really early upon boot, before the module infrastructure + has been initialized. + config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" help @@ -59,7 +71,7 @@ config EDAC_MM_EDAC config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" - depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD + depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE help Support for error detection and correction on the AMD 64 Families of Memory Controllers (K8, F10h and F11h) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 7a473bb..bc5dc23 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,7 +6,6 @@ # GNU General Public License. # - obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o @@ -17,9 +16,7 @@ ifdef CONFIG_PCI edac_core-objs += edac_pci.o edac_pci_sysfs.o endif -ifdef CONFIG_CPU_SUP_AMD -edac_core-objs += edac_mce_amd.o -endif +obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 4e551e6..4f4ac82 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -15,8 +15,8 @@ module_param(ecc_enable_override, int, 0644); /* Lookup table for all possible MC control instances */ struct amd64_pvt; -static struct mem_ctl_info *mci_lookup[MAX_NUMNODES]; -static struct amd64_pvt *pvt_lookup[MAX_NUMNODES]; +static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES]; +static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES]; /* * See F2x80 for K8 and F2x[1,0]80 for Fam10 and later. The table below is only @@ -189,7 +189,10 @@ static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw) /* Map from a CSROW entry to the mask entry that operates on it */ static inline u32 amd64_map_to_dcs_mask(struct amd64_pvt *pvt, int csrow) { - return csrow >> (pvt->num_dcsm >> 3); + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) + return csrow; + else + return csrow >> 1; } /* return the 'base' address the i'th CS entry of the 'dct' DRAM controller */ @@ -279,29 +282,26 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, intlv_en = pvt->dram_IntlvEn[0]; if (intlv_en == 0) { - for (node_id = 0; ; ) { + for (node_id = 0; node_id < DRAM_REG_COUNT; node_id++) { if (amd64_base_limit_match(pvt, sys_addr, node_id)) - break; - - if (++node_id >= DRAM_REG_COUNT) - goto err_no_match; + goto found; } - goto found; + goto err_no_match; } - if (unlikely((intlv_en != (0x01 << 8)) && - (intlv_en != (0x03 << 8)) && - (intlv_en != (0x07 << 8)))) { + if (unlikely((intlv_en != 0x01) && + (intlv_en != 0x03) && + (intlv_en != 0x07))) { amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from " "IntlvEn field of DRAM Base Register for node 0: " - "This probably indicates a BIOS bug.\n", intlv_en); + "this probably indicates a BIOS bug.\n", intlv_en); return NULL; } bits = (((u32) sys_addr) >> 12) & intlv_en; for (node_id = 0; ; ) { - if ((pvt->dram_limit[node_id] & intlv_en) == bits) + if ((pvt->dram_IntlvSel[node_id] & intlv_en) == bits) break; /* intlv_sel field matches */ if (++node_id >= DRAM_REG_COUNT) @@ -311,10 +311,10 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci, /* sanity test for sys_addr */ if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) { amd64_printk(KERN_WARNING, - "%s(): sys_addr 0x%lx falls outside base/limit " - "address range for node %d with node interleaving " - "enabled.\n", __func__, (unsigned long)sys_addr, - node_id); + "%s(): sys_addr 0x%llx falls outside base/limit " + "address range for node %d with node interleaving " + "enabled.\n", + __func__, sys_addr, node_id); return NULL; } @@ -377,7 +377,7 @@ static int input_addr_to_csrow(struct mem_ctl_info *mci, u64 input_addr) * base/mask register pair, test the condition shown near the start of * section 3.5.4 (p. 84, BKDG #26094, K8, revA-E). */ - for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) { + for (csrow = 0; csrow < pvt->cs_count; csrow++) { /* This DRAM chip select is disabled on this node */ if ((pvt->dcsb0[csrow] & K8_DCSB_CS_ENABLE) == 0) @@ -734,7 +734,7 @@ static void find_csrow_limits(struct mem_ctl_info *mci, int csrow, u64 base, mask; pvt = mci->pvt_info; - BUG_ON((csrow < 0) || (csrow >= CHIPSELECT_COUNT)); + BUG_ON((csrow < 0) || (csrow >= pvt->cs_count)); base = base_from_dct_base(pvt, csrow); mask = mask_from_dct_mask(pvt, csrow); @@ -962,35 +962,27 @@ err_reg: */ static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt) { - if (pvt->ext_model >= OPTERON_CPU_REV_F) { + + if (boot_cpu_data.x86 == 0xf && pvt->ext_model < OPTERON_CPU_REV_F) { + pvt->dcsb_base = REV_E_DCSB_BASE_BITS; + pvt->dcsm_mask = REV_E_DCSM_MASK_BITS; + pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS; + pvt->dcs_shift = REV_E_DCS_SHIFT; + pvt->cs_count = 8; + pvt->num_dcsm = 8; + } else { pvt->dcsb_base = REV_F_F1Xh_DCSB_BASE_BITS; pvt->dcsm_mask = REV_F_F1Xh_DCSM_MASK_BITS; pvt->dcs_mask_notused = REV_F_F1Xh_DCS_NOTUSED_BITS; pvt->dcs_shift = REV_F_F1Xh_DCS_SHIFT; - switch (boot_cpu_data.x86) { - case 0xf: - pvt->num_dcsm = REV_F_DCSM_COUNT; - break; - - case 0x10: - pvt->num_dcsm = F10_DCSM_COUNT; - break; - - case 0x11: - pvt->num_dcsm = F11_DCSM_COUNT; - break; - - default: - amd64_printk(KERN_ERR, "Unsupported family!\n"); - break; + if (boot_cpu_data.x86 == 0x11) { + pvt->cs_count = 4; + pvt->num_dcsm = 2; + } else { + pvt->cs_count = 8; + pvt->num_dcsm = 4; } - } else { - pvt->dcsb_base = REV_E_DCSB_BASE_BITS; - pvt->dcsm_mask = REV_E_DCSM_MASK_BITS; - pvt->dcs_mask_notused = REV_E_DCS_NOTUSED_BITS; - pvt->dcs_shift = REV_E_DCS_SHIFT; - pvt->num_dcsm = REV_E_DCSM_COUNT; } } @@ -1003,7 +995,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt) amd64_set_dct_base_and_mask(pvt); - for (cs = 0; cs < CHIPSELECT_COUNT; cs++) { + for (cs = 0; cs < pvt->cs_count; cs++) { reg = K8_DCSB0 + (cs * 4); err = pci_read_config_dword(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs]); @@ -1130,7 +1122,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) debugf0("Reading K8_DRAM_BASE_LOW failed\n"); /* Extract parts into separate data entries */ - pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8; + pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 24; pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7; pvt->dram_rw_en[dram] = (low & 0x3); @@ -1143,7 +1135,7 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram) * Extract parts into separate data entries. Limit is the HIGHEST memory * location of the region, so lower 24 bits need to be all ones */ - pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 8) | 0x00FFFFFF; + pvt->dram_limit[dram] = (((u64) low & 0xFFFF0000) << 24) | 0x00FFFFFF; pvt->dram_IntlvSel[dram] = (low >> 8) & 0x7; pvt->dram_DstNode[dram] = (low & 0x7); } @@ -1193,7 +1185,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci, * different from the node that detected the error. */ src_mci = find_mc_by_sys_addr(mci, SystemAddress); - if (src_mci) { + if (!src_mci) { amd64_mc_printk(mci, KERN_ERR, "failed to map error address 0x%lx to a node\n", (unsigned long)SystemAddress); @@ -1376,8 +1368,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) pvt->dram_IntlvEn[dram] = (low_base >> 8) & 0x7; - pvt->dram_base[dram] = (((((u64) high_base & 0x000000FF) << 32) | - ((u64) low_base & 0xFFFF0000))) << 8; + pvt->dram_base[dram] = (((u64)high_base & 0x000000FF) << 40) | + (((u64)low_base & 0xFFFF0000) << 24); low_offset = K8_DRAM_LIMIT_LOW + (dram << 3); high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3); @@ -1398,9 +1390,9 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram) * Extract address values and form a LIMIT address. Limit is the HIGHEST * memory location of the region, so low 24 bits need to be all ones. */ - low_limit |= 0x0000FFFF; - pvt->dram_limit[dram] = - ((((u64) high_limit << 32) + (u64) low_limit) << 8) | (0xFF); + pvt->dram_limit[dram] = (((u64)high_limit & 0x000000FF) << 40) | + (((u64) low_limit & 0xFFFF0000) << 24) | + 0x00FFFFFF; } static void f10_read_dram_ctl_register(struct amd64_pvt *pvt) @@ -1566,7 +1558,7 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs) debugf1("InputAddr=0x%x channelselect=%d\n", in_addr, cs); - for (csrow = 0; csrow < CHIPSELECT_COUNT; csrow++) { + for (csrow = 0; csrow < pvt->cs_count; csrow++) { cs_base = amd64_get_dct_base(pvt, cs, csrow); if (!(cs_base & K8_DCSB_CS_ENABLE)) @@ -2497,7 +2489,7 @@ err_reg: * NOTE: CPU Revision Dependent code * * Input: - * @csrow_nr ChipSelect Row Number (0..CHIPSELECT_COUNT-1) + * @csrow_nr ChipSelect Row Number (0..pvt->cs_count-1) * k8 private pointer to --> * DRAM Bank Address mapping register * node_id @@ -2577,7 +2569,7 @@ static int amd64_init_csrows(struct mem_ctl_info *mci) (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled" ); - for (i = 0; i < CHIPSELECT_COUNT; i++) { + for (i = 0; i < pvt->cs_count; i++) { csrow = &mci->csrows[i]; if ((pvt->dcsb0[i] & K8_DCSB_CS_ENABLE) == 0) { @@ -2988,7 +2980,7 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt) goto err_exit; ret = -ENOMEM; - mci = edac_mc_alloc(0, CHIPSELECT_COUNT, pvt->channel_count, node_id); + mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id); if (!mci) goto err_exit; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 8ea07e2..c6f359a 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -132,6 +132,8 @@ #define EDAC_AMD64_VERSION " Ver: 3.2.0 " __DATE__ #define EDAC_MOD_STR "amd64_edac" +#define EDAC_MAX_NUMNODES 8 + /* Extended Model from CPUID, for CPU Revision numbers */ #define OPTERON_CPU_LE_REV_C 0 #define OPTERON_CPU_REV_D 1 @@ -142,7 +144,7 @@ #define OPTERON_CPU_REV_FA 5 /* Hardware limit on ChipSelect rows per MC and processors per system */ -#define CHIPSELECT_COUNT 8 +#define MAX_CS_COUNT 8 #define DRAM_REG_COUNT 8 @@ -193,7 +195,6 @@ */ #define REV_E_DCSB_BASE_BITS (0xFFE0FE00ULL) #define REV_E_DCS_SHIFT 4 -#define REV_E_DCSM_COUNT 8 #define REV_F_F1Xh_DCSB_BASE_BITS (0x1FF83FE0ULL) #define REV_F_F1Xh_DCS_SHIFT 8 @@ -204,9 +205,6 @@ */ #define REV_F_DCSB_BASE_BITS (0x1FF83FE0ULL) #define REV_F_DCS_SHIFT 8 -#define REV_F_DCSM_COUNT 4 -#define F10_DCSM_COUNT 4 -#define F11_DCSM_COUNT 2 /* DRAM CS Mask Registers */ #define K8_DCSM0 0x60 @@ -374,13 +372,11 @@ enum { #define SET_NB_DRAM_INJECTION_WRITE(word, bits) \ (BIT(((word) & 0xF) + 20) | \ - BIT(17) | \ - ((bits) & 0xF)) + BIT(17) | bits) #define SET_NB_DRAM_INJECTION_READ(word, bits) \ (BIT(((word) & 0xF) + 20) | \ - BIT(16) | \ - ((bits) & 0xF)) + BIT(16) | bits) #define K8_NBCAP 0xE8 #define K8_NBCAP_CORES (BIT(12)|BIT(13)) @@ -445,12 +441,12 @@ struct amd64_pvt { u32 dbam1; /* DRAM Base Address Mapping reg for DCT1 */ /* DRAM CS Base Address Registers F2x[1,0][5C:40] */ - u32 dcsb0[CHIPSELECT_COUNT]; - u32 dcsb1[CHIPSELECT_COUNT]; + u32 dcsb0[MAX_CS_COUNT]; + u32 dcsb1[MAX_CS_COUNT]; /* DRAM CS Mask Registers F2x[1,0][6C:60] */ - u32 dcsm0[CHIPSELECT_COUNT]; - u32 dcsm1[CHIPSELECT_COUNT]; + u32 dcsm0[MAX_CS_COUNT]; + u32 dcsm1[MAX_CS_COUNT]; /* * Decoded parts of DRAM BASE and LIMIT Registers @@ -470,6 +466,7 @@ struct amd64_pvt { */ u32 dcsb_base; /* DCSB base bits */ u32 dcsm_mask; /* DCSM mask bits */ + u32 cs_count; /* num chip selects (== num DCSB registers) */ u32 num_dcsm; /* Number of DCSM registers */ u32 dcs_mask_notused; /* DCSM notused mask bits */ u32 dcs_shift; /* DCSB and DCSM shift value */ diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index d3675b7..29f1f7a 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -1,5 +1,11 @@ #include "amd64_edac.h" +static ssize_t amd64_inject_section_show(struct mem_ctl_info *mci, char *buf) +{ + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.section); +} + /* * store error injection section value which refers to one of 4 16-byte sections * within a 64-byte cacheline @@ -15,12 +21,26 @@ static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci, ret = strict_strtoul(data, 10, &value); if (ret != -EINVAL) { + + if (value > 3) { + amd64_printk(KERN_WARNING, + "%s: invalid section 0x%lx\n", + __func__, value); + return -EINVAL; + } + pvt->injection.section = (u32) value; return count; } return ret; } +static ssize_t amd64_inject_word_show(struct mem_ctl_info *mci, char *buf) +{ + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.word); +} + /* * store error injection word value which refers to one of 9 16-bit word of the * 16-byte (128-bit + ECC bits) section @@ -37,14 +57,25 @@ static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci, ret = strict_strtoul(data, 10, &value); if (ret != -EINVAL) { - value = (value <= 8) ? value : 0; - pvt->injection.word = (u32) value; + if (value > 8) { + amd64_printk(KERN_WARNING, + "%s: invalid word 0x%lx\n", + __func__, value); + return -EINVAL; + } + pvt->injection.word = (u32) value; return count; } return ret; } +static ssize_t amd64_inject_ecc_vector_show(struct mem_ctl_info *mci, char *buf) +{ + struct amd64_pvt *pvt = mci->pvt_info; + return sprintf(buf, "0x%x\n", pvt->injection.bit_map); +} + /* * store 16 bit error injection vector which enables injecting errors to the * corresponding bit within the error injection word above. When used during a @@ -60,8 +91,14 @@ static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci, ret = strict_strtoul(data, 16, &value); if (ret != -EINVAL) { - pvt->injection.bit_map = (u32) value & 0xFFFF; + if (value & 0xFFFF0000) { + amd64_printk(KERN_WARNING, + "%s: invalid EccVector: 0x%lx\n", + __func__, value); + return -EINVAL; + } + pvt->injection.bit_map = (u32) value; return count; } return ret; @@ -147,7 +184,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = { .name = "inject_section", .mode = (S_IRUGO | S_IWUSR) }, - .show = NULL, + .show = amd64_inject_section_show, .store = amd64_inject_section_store, }, { @@ -155,7 +192,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = { .name = "inject_word", .mode = (S_IRUGO | S_IWUSR) }, - .show = NULL, + .show = amd64_inject_word_show, .store = amd64_inject_word_store, }, { @@ -163,7 +200,7 @@ struct mcidev_sysfs_attribute amd64_inj_attrs[] = { .name = "inject_ecc_vector", .mode = (S_IRUGO | S_IWUSR) }, - .show = NULL, + .show = amd64_inject_ecc_vector_show, .store = amd64_inject_ecc_vector_store, }, { diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 0c21c37..713ed7d 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -3,6 +3,7 @@ static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); +static void (*orig_mce_callback)(struct mce *m); void amd_report_gart_errors(bool v) { @@ -362,7 +363,7 @@ static inline void amd_decode_err_code(unsigned int ec) pr_warning("Huh? Unknown MCE error 0x%x\n", ec); } -void decode_mce(struct mce *m) +static void amd_decode_mce(struct mce *m) { struct err_regs regs; int node, ecc; @@ -420,3 +421,32 @@ void decode_mce(struct mce *m) amd_decode_err_code(m->status & 0xffff); } + +static int __init mce_amd_init(void) +{ + /* + * We can decode MCEs for Opteron and later CPUs: + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 >= 0xf)) { + /* safe the default decode mce callback */ + orig_mce_callback = x86_mce_decode_callback; + + x86_mce_decode_callback = amd_decode_mce; + } + + return 0; +} +early_initcall(mce_amd_init); + +#ifdef MODULE +static void __exit mce_amd_exit(void) +{ + x86_mce_decode_callback = orig_mce_callback; +} + +MODULE_DESCRIPTION("AMD MCE decoder"); +MODULE_ALIAS("edac-mce-amd"); +MODULE_LICENSE("GPL"); +module_exit(mce_amd_exit); +#endif |