summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/smpboot.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/smpboot.c')
-rw-r--r--arch/x86/kernel/smpboot.c134
1 files changed, 107 insertions, 27 deletions
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 8b3bfc4..6af1185 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,7 +62,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mtrr.h>
-#include <asm/vmi.h>
+#include <asm/mwait.h>
#include <asm/apic.h>
#include <asm/setup.h>
#include <asm/uv/uv.h>
@@ -299,23 +299,16 @@ notrace static void __cpuinit start_secondary(void *unused)
* fragile that we want to limit the things done here to the
* most necessary things.
*/
+ cpu_init();
+ preempt_disable();
+ smp_callin();
#ifdef CONFIG_X86_32
- /*
- * Switch away from the trampoline page-table
- *
- * Do this before cpu_init() because it needs to access per-cpu
- * data which may not be mapped in the trampoline page-table.
- */
+ /* switch away from the initial page table */
load_cr3(swapper_pg_dir);
__flush_tlb_all();
#endif
- vmi_bringup();
- cpu_init();
- preempt_disable();
- smp_callin();
-
/* otherwise gcc will move up smp_processor_id before the cpu_init */
barrier();
/*
@@ -324,9 +317,9 @@ notrace static void __cpuinit start_secondary(void *unused)
check_tsc_sync_target();
if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
enable_NMI_through_LVT0();
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
}
/* This must be done before setting cpu_online_mask */
@@ -397,6 +390,19 @@ void __cpuinit smp_store_cpu_info(int id)
identify_secondary_cpu(c);
}
+static void __cpuinit link_thread_siblings(int cpu1, int cpu2)
+{
+ struct cpuinfo_x86 *c1 = &cpu_data(cpu1);
+ struct cpuinfo_x86 *c2 = &cpu_data(cpu2);
+
+ cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1));
+ cpumask_set_cpu(cpu1, cpu_core_mask(cpu2));
+ cpumask_set_cpu(cpu2, cpu_core_mask(cpu1));
+ cpumask_set_cpu(cpu1, c2->llc_shared_map);
+ cpumask_set_cpu(cpu2, c1->llc_shared_map);
+}
+
void __cpuinit set_cpu_sibling_map(int cpu)
{
@@ -409,14 +415,13 @@ void __cpuinit set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
struct cpuinfo_x86 *o = &cpu_data(i);
- if (c->phys_proc_id == o->phys_proc_id &&
- c->cpu_core_id == o->cpu_core_id) {
- cpumask_set_cpu(i, cpu_sibling_mask(cpu));
- cpumask_set_cpu(cpu, cpu_sibling_mask(i));
- cpumask_set_cpu(i, cpu_core_mask(cpu));
- cpumask_set_cpu(cpu, cpu_core_mask(i));
- cpumask_set_cpu(i, c->llc_shared_map);
- cpumask_set_cpu(cpu, o->llc_shared_map);
+ if (cpu_has(c, X86_FEATURE_TOPOEXT)) {
+ if (c->phys_proc_id == o->phys_proc_id &&
+ c->compute_unit_id == o->compute_unit_id)
+ link_thread_siblings(cpu, i);
+ } else if (c->phys_proc_id == o->phys_proc_id &&
+ c->cpu_core_id == o->cpu_core_id) {
+ link_thread_siblings(cpu, i);
}
}
} else {
@@ -774,7 +779,6 @@ do_rest:
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
irq_ctx_init(cpu);
- initial_page_table = __pa(&trampoline_pg_dir);
#else
clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
initial_gs = per_cpu_offset(cpu);
@@ -923,7 +927,6 @@ int __cpuinit native_cpu_up(unsigned int cpu)
per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
err = do_boot_cpu(apicid, cpu);
-
if (err) {
pr_debug("do_boot_cpu failed %d\n", err);
return -EIO;
@@ -1109,8 +1112,6 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
}
set_cpu_sibling_map(0);
- enable_IR_x2apic();
- default_setup_apic_routing();
if (smp_sanity_check(max_cpus) < 0) {
printk(KERN_INFO "SMP disabled\n");
@@ -1118,6 +1119,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
goto out;
}
+ default_setup_apic_routing();
+
preempt_disable();
if (read_apic_id() != boot_cpu_physical_apicid) {
panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
@@ -1383,11 +1386,88 @@ void play_dead_common(void)
local_irq_disable();
}
+/*
+ * We need to flush the caches before going to sleep, lest we have
+ * dirty data in our caches when we come back up.
+ */
+static inline void mwait_play_dead(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int highest_cstate = 0;
+ unsigned int highest_subcstate = 0;
+ int i;
+ void *mwait_ptr;
+
+ if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+ return;
+ if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+ return;
+ if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return;
+
+ eax = CPUID_MWAIT_LEAF;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ /*
+ * eax will be 0 if EDX enumeration is not valid.
+ * Initialized below to cstate, sub_cstate value when EDX is valid.
+ */
+ if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED)) {
+ eax = 0;
+ } else {
+ edx >>= MWAIT_SUBSTATE_SIZE;
+ for (i = 0; i < 7 && edx; i++, edx >>= MWAIT_SUBSTATE_SIZE) {
+ if (edx & MWAIT_SUBSTATE_MASK) {
+ highest_cstate = i;
+ highest_subcstate = edx & MWAIT_SUBSTATE_MASK;
+ }
+ }
+ eax = (highest_cstate << MWAIT_SUBSTATE_SIZE) |
+ (highest_subcstate - 1);
+ }
+
+ /*
+ * This should be a memory location in a cache line which is
+ * unlikely to be touched by other processors. The actual
+ * content is immaterial as it is not actually modified in any way.
+ */
+ mwait_ptr = &current_thread_info()->flags;
+
+ wbinvd();
+
+ while (1) {
+ /*
+ * The CLFLUSH is a workaround for erratum AAI65 for
+ * the Xeon 7400 series. It's not clear it is actually
+ * needed, but it should be harmless in either case.
+ * The WBINVD is insufficient due to the spurious-wakeup
+ * case where we return around the loop.
+ */
+ clflush(mwait_ptr);
+ __monitor(mwait_ptr, 0, 0);
+ mb();
+ __mwait(eax, 0);
+ }
+}
+
+static inline void hlt_play_dead(void)
+{
+ if (current_cpu_data.x86 >= 4)
+ wbinvd();
+
+ while (1) {
+ native_halt();
+ }
+}
+
void native_play_dead(void)
{
play_dead_common();
tboot_shutdown(TB_SHUTDOWN_WFS);
- wbinvd_halt();
+
+ mwait_play_dead(); /* Only returns on failure */
+ hlt_play_dead();
}
#else /* ... !CONFIG_HOTPLUG_CPU */