summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2012-04-18 22:19:40 (GMT)
committerTony Luck <tony.luck@intel.com>2012-04-19 16:12:43 (GMT)
commit95022b8cf6ed7f3292b60c8e85fe59a12bfb1c9e (patch)
treed0af3bb590eadc51cf0ad8f08a4b91fad2cd3433 /arch/x86/kernel/cpu
parent0034102808e0dbbf3a2394b82b1bb40b5778de9e (diff)
downloadlinux-fsl-qoriq-95022b8cf6ed7f3292b60c8e85fe59a12bfb1c9e.tar.xz
x86/mce: Avoid reading every machine check bank register twice.
Reading machine check bank registers is slow. There is a trend of increasing the number of banks, and the number of cores. The main section of do_machine_check() is a serialized section where each cpu in turn checks every bank. Even on a little two socket SandyBridge-EP system that multiplies out as: 2 sockets * 8 cores * 2 hyperthreads * 20 banks = 640 MSRs We already scan the banks in parallel in mce_no_way_out() to see if there is a fatal error anywhere in the system. If we build a cache of VALID bits during this scan, we can avoid uselessly re-reading banks that have no data. Note that this cache is only a hint. If the valid bit is set in a shared bank, all cpus that share that bank will see it during the parallel scan, but the first to find it in the sequential scan will (usually) clear the bank. Acked-by: Borislav Petkov <borislav.petkov@amd.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c16
1 files changed, 11 insertions, 5 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index d086a09..66e1c51 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -641,16 +641,18 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
* Do a quick check if any of the events requires a panic.
* This decides if we keep the events around or clear them.
*/
-static int mce_no_way_out(struct mce *m, char **msg)
+static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp)
{
- int i;
+ int i, ret = 0;
for (i = 0; i < banks; i++) {
m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
+ if (m->status & MCI_STATUS_VAL)
+ __set_bit(i, validp);
if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
- return 1;
+ ret = 1;
}
- return 0;
+ return ret;
}
/*
@@ -1011,6 +1013,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
*/
int kill_it = 0;
DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+ DECLARE_BITMAP(valid_banks, MAX_NR_BANKS);
char *msg = "Unknown";
atomic_inc(&mce_entry);
@@ -1025,7 +1028,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
final = &__get_cpu_var(mces_seen);
*final = m;
- no_way_out = mce_no_way_out(&m, &msg);
+ memset(valid_banks, 0, sizeof(valid_banks));
+ no_way_out = mce_no_way_out(&m, &msg, valid_banks);
barrier();
@@ -1045,6 +1049,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
order = mce_start(&no_way_out);
for (i = 0; i < banks; i++) {
__clear_bit(i, toclear);
+ if (!test_bit(i, valid_banks))
+ continue;
if (!mce_banks[i].ctl)
continue;