summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c59
-rw-r--r--arch/x86/kernel/alternative.c52
-rw-r--r--arch/x86/kernel/amd_nb.c142
-rw-r--r--arch/x86/kernel/apb_timer.c1
-rw-r--r--arch/x86/kernel/aperture_64.c54
-rw-r--r--arch/x86/kernel/apic/Makefile5
-rw-r--r--arch/x86/kernel/apic/apic.c192
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c39
-rw-r--r--arch/x86/kernel/apic/io_apic.c115
-rw-r--r--arch/x86/kernel/apic/nmi.c567
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c69
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c1
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c4
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c151
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c5
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c135
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c40
-rw-r--r--arch/x86/kernel/cpu/perf_event.c104
-rw-r--r--arch/x86/kernel/cpu/perf_event_amd.c16
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c30
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c28
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c644
-rw-r--r--arch/x86/kernel/dumpstack.c18
-rw-r--r--arch/x86/kernel/dumpstack_32.c25
-rw-r--r--arch/x86/kernel/dumpstack_64.c24
-rw-r--r--arch/x86/kernel/early_printk.c3
-rw-r--r--arch/x86/kernel/early_printk_mrst.c319
-rw-r--r--arch/x86/kernel/entry_64.S36
-rw-r--r--arch/x86/kernel/ftrace.c9
-rw-r--r--arch/x86/kernel/head32.c3
-rw-r--r--arch/x86/kernel/head_32.S83
-rw-r--r--arch/x86/kernel/hw_breakpoint.c12
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c4
-rw-r--r--arch/x86/kernel/kgdb.c7
-rw-r--r--arch/x86/kernel/kprobes.c127
-rw-r--r--arch/x86/kernel/microcode_amd.c34
-rw-r--r--arch/x86/kernel/mpparse.c114
-rw-r--r--arch/x86/kernel/pci-gart_64.c34
-rw-r--r--arch/x86/kernel/process.c14
-rw-r--r--arch/x86/kernel/process_32.c2
-rw-r--r--arch/x86/kernel/process_64.c2
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/reboot_fixups_32.c16
-rw-r--r--arch/x86/kernel/setup.c13
-rw-r--r--arch/x86/kernel/smpboot.c54
-rw-r--r--arch/x86/kernel/stacktrace.c8
-rw-r--r--arch/x86/kernel/time.c18
-rw-r--r--arch/x86/kernel/trampoline_64.S2
-rw-r--r--arch/x86/kernel/traps.c131
-rw-r--r--arch/x86/kernel/tsc.c98
-rw-r--r--arch/x86/kernel/verify_cpu.S (renamed from arch/x86/kernel/verify_cpu_64.S)49
-rw-r--r--arch/x86/kernel/vmlinux.lds.S8
57 files changed, 1213 insertions, 2543 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 1e99475..34244b2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_VM86) += vm86_32.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
-obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_APB_TIMER) += apb_timer.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 71232b9..ec881c6 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -198,6 +198,11 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
{
unsigned int ver = 0;
+ if (id >= (MAX_LOCAL_APIC-1)) {
+ printk(KERN_INFO PREFIX "skipped apicid that is too big\n");
+ return;
+ }
+
if (!enabled) {
++disabled_cpus;
return;
@@ -847,18 +852,6 @@ static int __init acpi_parse_fadt(struct acpi_table_header *table)
* returns 0 on success, < 0 on error
*/
-static void __init acpi_register_lapic_address(unsigned long address)
-{
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, address);
- if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
-}
-
static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
{
int count;
@@ -880,7 +873,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
return count;
}
- acpi_register_lapic_address(acpi_lapic_addr);
+ register_lapic_address(acpi_lapic_addr);
return count;
}
@@ -907,16 +900,16 @@ static int __init acpi_parse_madt_lapic_entries(void)
return count;
}
- acpi_register_lapic_address(acpi_lapic_addr);
+ register_lapic_address(acpi_lapic_addr);
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC,
- acpi_parse_sapic, MAX_APICS);
+ acpi_parse_sapic, MAX_LOCAL_APIC);
if (!count) {
x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC,
- acpi_parse_x2apic, MAX_APICS);
+ acpi_parse_x2apic, MAX_LOCAL_APIC);
count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC,
- acpi_parse_lapic, MAX_APICS);
+ acpi_parse_lapic, MAX_LOCAL_APIC);
}
if (!count && !x2count) {
printk(KERN_ERR PREFIX "No LAPIC entries present\n");
@@ -949,32 +942,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
extern int es7000_plat;
#endif
-static void assign_to_mp_irq(struct mpc_intsrc *m,
- struct mpc_intsrc *mp_irq)
-{
- memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
-}
-
-static void save_mp_irq(struct mpc_intsrc *m)
-{
- int i;
-
- for (i = 0; i < mp_irq_entries; i++) {
- if (!mp_irq_cmp(&mp_irqs[i], m))
- return;
- }
-
- assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
-
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
@@ -1005,7 +972,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
mp_irq.dstirq = pin; /* INTIN# */
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
isa_irq_to_gsi[bus_irq] = gsi;
}
@@ -1080,7 +1047,7 @@ void __init mp_config_acpi_legacy_irqs(void)
mp_irq.srcbusirq = i; /* Identity mapped */
mp_irq.dstirq = pin;
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
}
}
@@ -1117,7 +1084,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger,
mp_irq.dstapic = mp_ioapics[ioapic].apicid;
mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
- save_mp_irq(&mp_irq);
+ mp_save_irq(&mp_irq);
#endif
return 0;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5079f24..1236085 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -353,6 +353,7 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
mutex_unlock(&smp_alt);
}
+bool skip_smp_alternatives;
void alternatives_smp_switch(int smp)
{
struct smp_alt_module *mod;
@@ -368,7 +369,7 @@ void alternatives_smp_switch(int smp)
printk("lockdep: fixing up alternatives.\n");
#endif
- if (noreplace_smp || smp_alt_once)
+ if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
return;
BUG_ON(!smp && (num_online_cpus() > 1));
@@ -591,17 +592,21 @@ static atomic_t stop_machine_first;
static int wrote_text;
struct text_poke_params {
- void *addr;
- const void *opcode;
- size_t len;
+ struct text_poke_param *params;
+ int nparams;
};
static int __kprobes stop_machine_text_poke(void *data)
{
struct text_poke_params *tpp = data;
+ struct text_poke_param *p;
+ int i;
if (atomic_dec_and_test(&stop_machine_first)) {
- text_poke(tpp->addr, tpp->opcode, tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ text_poke(p->addr, p->opcode, p->len);
+ }
smp_wmb(); /* Make sure other cpus see that this has run */
wrote_text = 1;
} else {
@@ -610,8 +615,12 @@ static int __kprobes stop_machine_text_poke(void *data)
smp_mb(); /* Load wrote_text before following execution */
}
- flush_icache_range((unsigned long)tpp->addr,
- (unsigned long)tpp->addr + tpp->len);
+ for (i = 0; i < tpp->nparams; i++) {
+ p = &tpp->params[i];
+ flush_icache_range((unsigned long)p->addr,
+ (unsigned long)p->addr + p->len);
+ }
+
return 0;
}
@@ -631,10 +640,13 @@ static int __kprobes stop_machine_text_poke(void *data)
void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
{
struct text_poke_params tpp;
+ struct text_poke_param p;
- tpp.addr = addr;
- tpp.opcode = opcode;
- tpp.len = len;
+ p.addr = addr;
+ p.opcode = opcode;
+ p.len = len;
+ tpp.params = &p;
+ tpp.nparams = 1;
atomic_set(&stop_machine_first, 1);
wrote_text = 0;
/* Use __stop_machine() because the caller already got online_cpus. */
@@ -642,6 +654,26 @@ void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
return addr;
}
+/**
+ * text_poke_smp_batch - Update instructions on a live kernel on SMP
+ * @params: an array of text_poke parameters
+ * @n: the number of elements in params.
+ *
+ * Modify multi-byte instruction by using stop_machine() on SMP. Since the
+ * stop_machine() is heavy task, it is better to aggregate text_poke requests
+ * and do it once if possible.
+ *
+ * Note: Must be called under get_online_cpus() and text_mutex.
+ */
+void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
+{
+ struct text_poke_params tpp = {.params = params, .nparams = n};
+
+ atomic_set(&stop_machine_first, 1);
+ wrote_text = 0;
+ stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
+}
+
#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 8f6463d..0a99f71 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -12,95 +12,123 @@
static u32 *flush_words;
-struct pci_device_id k8_nb_ids[] = {
+struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
{}
};
-EXPORT_SYMBOL(k8_nb_ids);
+EXPORT_SYMBOL(amd_nb_misc_ids);
-struct k8_northbridge_info k8_northbridges;
-EXPORT_SYMBOL(k8_northbridges);
+const struct amd_nb_bus_dev_range amd_nb_bus_dev_ranges[] __initconst = {
+ { 0x00, 0x18, 0x20 },
+ { 0xff, 0x00, 0x20 },
+ { 0xfe, 0x00, 0x20 },
+ { }
+};
+
+struct amd_northbridge_info amd_northbridges;
+EXPORT_SYMBOL(amd_northbridges);
-static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
+static struct pci_dev *next_northbridge(struct pci_dev *dev,
+ struct pci_device_id *ids)
{
do {
dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
if (!dev)
break;
- } while (!pci_match_id(&k8_nb_ids[0], dev));
+ } while (!pci_match_id(ids, dev));
return dev;
}
-int cache_k8_northbridges(void)
+int amd_cache_northbridges(void)
{
- int i;
- struct pci_dev *dev;
+ int i = 0;
+ struct amd_northbridge *nb;
+ struct pci_dev *misc;
- if (k8_northbridges.num)
+ if (amd_nb_num())
return 0;
- dev = NULL;
- while ((dev = next_k8_northbridge(dev)) != NULL)
- k8_northbridges.num++;
+ misc = NULL;
+ while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
+ i++;
- /* some CPU families (e.g. family 0x11) do not support GART */
- if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
- boot_cpu_data.x86 == 0x15)
- k8_northbridges.gart_supported = 1;
+ if (i == 0)
+ return 0;
- k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
- sizeof(void *), GFP_KERNEL);
- if (!k8_northbridges.nb_misc)
+ nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
+ if (!nb)
return -ENOMEM;
- if (!k8_northbridges.num) {
- k8_northbridges.nb_misc[0] = NULL;
- return 0;
- }
+ amd_northbridges.nb = nb;
+ amd_northbridges.num = i;
- if (k8_northbridges.gart_supported) {
- flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
- GFP_KERNEL);
- if (!flush_words) {
- kfree(k8_northbridges.nb_misc);
- return -ENOMEM;
- }
- }
+ misc = NULL;
+ for (i = 0; i != amd_nb_num(); i++) {
+ node_to_amd_nb(i)->misc = misc =
+ next_northbridge(misc, amd_nb_misc_ids);
+ }
+
+ /* some CPU families (e.g. family 0x11) do not support GART */
+ if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
+ boot_cpu_data.x86 == 0x15)
+ amd_northbridges.flags |= AMD_NB_GART;
+
+ /*
+ * Some CPU families support L3 Cache Index Disable. There are some
+ * limitations because of E382 and E388 on family 0x10.
+ */
+ if (boot_cpu_data.x86 == 0x10 &&
+ boot_cpu_data.x86_model >= 0x8 &&
+ (boot_cpu_data.x86_model > 0x9 ||
+ boot_cpu_data.x86_mask >= 0x1))
+ amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;
- dev = NULL;
- i = 0;
- while ((dev = next_k8_northbridge(dev)) != NULL) {
- k8_northbridges.nb_misc[i] = dev;
- if (k8_northbridges.gart_supported)
- pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
- }
- k8_northbridges.nb_misc[i] = NULL;
return 0;
}
-EXPORT_SYMBOL_GPL(cache_k8_northbridges);
+EXPORT_SYMBOL_GPL(amd_cache_northbridges);
/* Ignores subdevice/subvendor but as far as I can figure out
they're useless anyways */
-int __init early_is_k8_nb(u32 device)
+int __init early_is_amd_nb(u32 device)
{
struct pci_device_id *id;
u32 vendor = device & 0xffff;
device >>= 16;
- for (id = k8_nb_ids; id->vendor; id++)
+ for (id = amd_nb_misc_ids; id->vendor; id++)
if (vendor == id->vendor && device == id->device)
return 1;
return 0;
}
-void k8_flush_garts(void)
+int amd_cache_gart(void)
+{
+ int i;
+
+ if (!amd_nb_has_feature(AMD_NB_GART))
+ return 0;
+
+ flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
+ if (!flush_words) {
+ amd_northbridges.flags &= ~AMD_NB_GART;
+ return -ENOMEM;
+ }
+
+ for (i = 0; i != amd_nb_num(); i++)
+ pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+ &flush_words[i]);
+
+ return 0;
+}
+
+void amd_flush_garts(void)
{
int flushed, i;
unsigned long flags;
static DEFINE_SPINLOCK(gart_lock);
- if (!k8_northbridges.gart_supported)
+ if (!amd_nb_has_feature(AMD_NB_GART))
return;
/* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +137,16 @@ void k8_flush_garts(void)
that it doesn't matter to serialize more. -AK */
spin_lock_irqsave(&gart_lock, flags);
flushed = 0;
- for (i = 0; i < k8_northbridges.num; i++) {
- pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
- flush_words[i]|1);
+ for (i = 0; i < amd_nb_num(); i++) {
+ pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
+ flush_words[i] | 1);
flushed++;
}
- for (i = 0; i < k8_northbridges.num; i++) {
+ for (i = 0; i < amd_nb_num(); i++) {
u32 w;
/* Make sure the hardware actually executed the flush*/
for (;;) {
- pci_read_config_dword(k8_northbridges.nb_misc[i],
+ pci_read_config_dword(node_to_amd_nb(i)->misc,
0x9c, &w);
if (!(w & 1))
break;
@@ -129,19 +157,23 @@ void k8_flush_garts(void)
if (!flushed)
printk("nothing to flush?\n");
}
-EXPORT_SYMBOL_GPL(k8_flush_garts);
+EXPORT_SYMBOL_GPL(amd_flush_garts);
-static __init int init_k8_nbs(void)
+static __init int init_amd_nbs(void)
{
int err = 0;
- err = cache_k8_northbridges();
+ err = amd_cache_northbridges();
if (err < 0)
- printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
+ printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");
+
+ if (amd_cache_gart() < 0)
+ printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
+ "GART support disabled.\n");
return err;
}
/* This has to go after the PCI subsystem */
-fs_initcall(init_k8_nbs);
+fs_initcall(init_amd_nbs);
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
index 92543c7..7c9ab59 100644
--- a/arch/x86/kernel/apb_timer.c
+++ b/arch/x86/kernel/apb_timer.c
@@ -315,6 +315,7 @@ static void apbt_setup_irq(struct apbt_dev *adev)
if (system_state == SYSTEM_BOOTING) {
irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
+ irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
/* APB timer irqs are set up as mp_irqs, timer is edge type */
__set_irq_handler(adev->irq, handle_edge_irq, 0, "edge");
if (request_irq(adev->irq, apbt_interrupt_handler,
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c
index b3a16e8..5955a78 100644
--- a/arch/x86/kernel/aperture_64.c
+++ b/arch/x86/kernel/aperture_64.c
@@ -39,18 +39,6 @@ int fallback_aper_force __initdata;
int fix_aperture __initdata = 1;
-struct bus_dev_range {
- int bus;
- int dev_base;
- int dev_limit;
-};
-
-static struct bus_dev_range bus_dev_ranges[] __initdata = {
- { 0x00, 0x18, 0x20},
- { 0xff, 0x00, 0x20},
- { 0xfe, 0x00, 0x20}
-};
-
static struct resource gart_resource = {
.name = "GART",
.flags = IORESOURCE_MEM,
@@ -206,7 +194,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
* Do an PCI bus scan by hand because we're running before the PCI
* subsystem.
*
- * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
+ * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
* generically. It's probably overkill to always scan all slots because
* the AGP bridges should be always an own bus on the HT hierarchy,
* but do it here for future safety.
@@ -294,16 +282,16 @@ void __init early_gart_iommu_check(void)
search_agp_bridge(&agp_aper_order, &valid_agp);
fix = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
- if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+ if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -349,16 +337,16 @@ void __init early_gart_iommu_check(void)
return;
/* disable them all at first */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
- if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+ if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -390,17 +378,17 @@ int __init gart_iommu_hole_init(void)
fix = 0;
node = 0;
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus;
int dev_base, dev_limit;
u32 ctl;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
- if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+ if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
iommu_detected = 1;
@@ -505,7 +493,7 @@ out:
}
/* Fix up the north bridges */
- for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
+ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) {
int bus, dev_base, dev_limit;
/*
@@ -514,11 +502,11 @@ out:
*/
u32 ctl = DISTLBWALKPRB | aper_order << 1;
- bus = bus_dev_ranges[i].bus;
- dev_base = bus_dev_ranges[i].dev_base;
- dev_limit = bus_dev_ranges[i].dev_limit;
+ bus = amd_nb_bus_dev_ranges[i].bus;
+ dev_base = amd_nb_bus_dev_ranges[i].dev_base;
+ dev_limit = amd_nb_bus_dev_ranges[i].dev_limit;
for (slot = dev_base; slot < dev_limit; slot++) {
- if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
+ if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
continue;
write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 910f20b4..3966b56 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -3,10 +3,7 @@
#
obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
-ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
-obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
-endif
-obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
+obj-y += hw_nmi.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 7821813..06c196d 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -31,7 +31,6 @@
#include <linux/init.h>
#include <linux/cpu.h>
#include <linux/dmi.h>
-#include <linux/nmi.h>
#include <linux/smp.h>
#include <linux/mm.h>
@@ -50,8 +49,8 @@
#include <asm/mtrr.h>
#include <asm/smp.h>
#include <asm/mce.h>
-#include <asm/kvm_para.h>
#include <asm/tsc.h>
+#include <asm/hypervisor.h>
unsigned int num_processors;
@@ -432,17 +431,18 @@ int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
reserved = reserve_eilvt_offset(offset, new);
if (reserved != new) {
- pr_err(FW_BUG "cpu %d, try to setup vector 0x%x, but "
- "vector 0x%x was already reserved by another core, "
- "APIC%lX=0x%x\n",
- smp_processor_id(), new, reserved, reg, old);
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on another cpu\n",
+ smp_processor_id(), reg, offset, new, reserved);
return -EINVAL;
}
if (!eilvt_entry_is_changeable(old, new)) {
- pr_err(FW_BUG "cpu %d, try to setup vector 0x%x but "
- "register already in use, APIC%lX=0x%x\n",
- smp_processor_id(), new, reg, old);
+ pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+ "vector 0x%x, but the register is already in use for "
+ "vector 0x%x on this cpu\n",
+ smp_processor_id(), reg, offset, new, old);
return -EBUSY;
}
@@ -516,7 +516,7 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
- if (cpu_has(&current_cpu_data, X86_FEATURE_ARAT)) {
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;
@@ -684,7 +684,7 @@ static int __init calibrate_APIC_clock(void)
lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
lapic_clockevent.shift);
lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
lapic_clockevent.min_delta_ns =
clockevent_delta2ns(0xF, &lapic_clockevent);
@@ -799,11 +799,7 @@ void __init setup_boot_APIC_clock(void)
* PIT/HPET going. Otherwise register lapic as a dummy
* device.
*/
- if (nmi_watchdog != NMI_IO_APIC)
- lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
- else
- pr_warning("APIC timer registered as dummy,"
- " due to nmi_watchdog=%d!\n", nmi_watchdog);
+ lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
/* Setup the lapic or request the broadcast */
setup_APIC_timer();
@@ -1195,12 +1191,15 @@ static void __cpuinit lapic_setup_esr(void)
oldvalue, value);
}
-
/**
* setup_local_APIC - setup the local APIC
+ *
+ * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Always called with preemption disabled.
*/
void __cpuinit setup_local_APIC(void)
{
+ int cpu = smp_processor_id();
unsigned int value, queued;
int i, j, acked = 0;
unsigned long long tsc = 0, ntsc;
@@ -1225,8 +1224,6 @@ void __cpuinit setup_local_APIC(void)
#endif
perf_events_lapic_init();
- preempt_disable();
-
/*
* Double-check whether this APIC is really registered.
* This is meaningless in clustered apic mode, so we skip it.
@@ -1342,21 +1339,19 @@ void __cpuinit setup_local_APIC(void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!smp_processor_id() && (pic_mode || !value)) {
+ if (!cpu && (pic_mode || !value)) {
value = APIC_DM_EXTINT;
- apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
- smp_processor_id());
+ apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
} else {
value = APIC_DM_EXTINT | APIC_LVT_MASKED;
- apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
- smp_processor_id());
+ apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
}
apic_write(APIC_LVT0, value);
/*
* only the BP should see the LINT1 NMI signal, obviously.
*/
- if (!smp_processor_id())
+ if (!cpu)
value = APIC_DM_NMI;
else
value = APIC_DM_NMI | APIC_LVT_MASKED;
@@ -1364,11 +1359,9 @@ void __cpuinit setup_local_APIC(void)
value |= APIC_LVT_LEVEL_TRIGGER;
apic_write(APIC_LVT1, value);
- preempt_enable();
-
#ifdef CONFIG_X86_MCE_INTEL
/* Recheck CMCI information after local APIC is up on CPU #0 */
- if (smp_processor_id() == 0)
+ if (!cpu)
cmci_recheck();
#endif
}
@@ -1387,7 +1380,6 @@ void __cpuinit end_local_APIC_setup(void)
}
#endif
- setup_apic_nmi_watchdog(NULL);
apic_pm_activate();
/*
@@ -1484,7 +1476,8 @@ void __init enable_IR_x2apic(void)
/* IR is required if there is APIC ID > 255 even when running
* under KVM
*/
- if (max_physical_apicid > 255 || !kvm_para_available())
+ if (max_physical_apicid > 255 ||
+ !hypervisor_x2apic_available())
goto nox2apic;
/*
* without IR all CPUs can be addressed by IOAPIC/MSI
@@ -1538,13 +1531,60 @@ static int __init detect_init_APIC(void)
return 0;
}
#else
+
+static int apic_verify(void)
+{
+ u32 features, h, l;
+
+ /*
+ * The APIC feature bit should now be enabled
+ * in `cpuid'
+ */
+ features = cpuid_edx(1);
+ if (!(features & (1 << X86_FEATURE_APIC))) {
+ pr_warning("Could not enable APIC!\n");
+ return -1;
+ }
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+ /* The BIOS may have set up the APIC at some other address */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (l & MSR_IA32_APICBASE_ENABLE)
+ mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+
+ pr_info("Found and enabled local APIC!\n");
+ return 0;
+}
+
+int apic_force_enable(void)
+{
+ u32 h, l;
+
+ if (disable_apic)
+ return -1;
+
+ /*
+ * Some BIOSes disable the local APIC in the APIC_BASE
+ * MSR. This can only be done in software for Intel P6 or later
+ * and AMD K7 (Model > 1) or later.
+ */
+ rdmsr(MSR_IA32_APICBASE, l, h);
+ if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+ pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+ l &= ~MSR_IA32_APICBASE_BASE;
+ l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+ wrmsr(MSR_IA32_APICBASE, l, h);
+ enabled_via_apicbase = 1;
+ }
+ return apic_verify();
+}
+
/*
* Detect and initialize APIC
*/
static int __init detect_init_APIC(void)
{
- u32 h, l, features;
-
/* Disabled by kernel option? */
if (disable_apic)
return -1;
@@ -1574,38 +1614,12 @@ static int __init detect_init_APIC(void)
"you can enable it with \"lapic\"\n");
return -1;
}
- /*
- * Some BIOSes disable the local APIC in the APIC_BASE
- * MSR. This can only be done in software for Intel P6 or later
- * and AMD K7 (Model > 1) or later.
- */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (!(l & MSR_IA32_APICBASE_ENABLE)) {
- pr_info("Local APIC disabled by BIOS -- reenabling.\n");
- l &= ~MSR_IA32_APICBASE_BASE;
- l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
- wrmsr(MSR_IA32_APICBASE, l, h);
- enabled_via_apicbase = 1;
- }
- }
- /*
- * The APIC feature bit should now be enabled
- * in `cpuid'
- */
- features = cpuid_edx(1);
- if (!(features & (1 << X86_FEATURE_APIC))) {
- pr_warning("Could not enable APIC!\n");
- return -1;
+ if (apic_force_enable())
+ return -1;
+ } else {
+ if (apic_verify())
+ return -1;
}
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
-
- /* The BIOS may have set up the APIC at some other address */
- rdmsr(MSR_IA32_APICBASE, l, h);
- if (l & MSR_IA32_APICBASE_ENABLE)
- mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
-
- pr_info("Found and enabled local APIC!\n");
apic_pm_activate();
@@ -1617,28 +1631,6 @@ no_apic:
}
#endif
-#ifdef CONFIG_X86_64
-void __init early_init_lapic_mapping(void)
-{
- /*
- * If no local APIC can be found then go out
- * : it means there is no mpatable and MADT
- */
- if (!smp_found_config)
- return;
-
- set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
- apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
- APIC_BASE, mp_lapic_addr);
-
- /*
- * Fetch the APIC ID of the BSP in case we have a
- * default configuration (or the MP table is broken).
- */
- boot_cpu_physical_apicid = read_apic_id();
-}
-#endif
-
/**
* init_apic_mappings - initialize APIC mappings
*/
@@ -1664,10 +1656,7 @@ void __init init_apic_mappings(void)
* acpi_register_lapic_address()
*/
if (!acpi_lapic && !smp_found_config)
- set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
-
- apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
- APIC_BASE, apic_phys);
+ register_lapic_address(apic_phys);
}
/*
@@ -1689,11 +1678,27 @@ void __init init_apic_mappings(void)
}
}
+void __init register_lapic_address(unsigned long address)
+{
+ mp_lapic_addr = address;
+
+ if (!x2apic_mode) {
+ set_fixmap_nocache(FIX_APIC_BASE, address);
+ apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+ APIC_BASE, mp_lapic_addr);
+ }
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+}
+
/*
* This initializes the IO-APIC and APIC hardware if this is
* a UP kernel.
*/
-int apic_version[MAX_APICS];
+int apic_version[MAX_LOCAL_APIC];
int __init APIC_init_uniprocessor(void)
{
@@ -1758,17 +1763,10 @@ int __init APIC_init_uniprocessor(void)
setup_IO_APIC();
else {
nr_ioapics = 0;
- localise_nmi_watchdog();
}
-#else
- localise_nmi_watchdog();
#endif
x86_init.timers.setup_percpu_clockev();
-#ifdef CONFIG_X86_64
- check_nmi_watchdog();
-#endif
-
return 0;
}
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 62f6e1e..79fd43c 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -17,20 +17,31 @@
#include <linux/nmi.h>
#include <linux/module.h>
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(void)
{
return (u64)(cpu_khz) * 1000 * 60;
}
+#endif
-#ifdef ARCH_HAS_NMI_WATCHDOG
-
+#ifdef arch_trigger_all_cpu_backtrace
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
void arch_trigger_all_cpu_backtrace(void)
{
int i;
+ if (test_and_set_bit(0, &backtrace_flag))
+ /*
+ * If there is already a trigger_all_cpu_backtrace() in progress
+ * (backtrace_flag == 1), don't output double cpu dump infos.
+ */
+ return;
+
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
printk(KERN_INFO "sending NMI to all CPUs:\n");
@@ -42,6 +53,9 @@ void arch_trigger_all_cpu_backtrace(void)
break;
mdelay(1);
}
+
+ clear_bit(0, &backtrace_flag);
+ smp_mb__after_clear_bit();
}
static int __kprobes
@@ -50,11 +64,10 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
{
struct die_args *args = __args;
struct pt_regs *regs;
- int cpu = smp_processor_id();
+ int cpu;
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
default:
@@ -62,6 +75,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
}
regs = args->regs;
+ cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
@@ -81,7 +95,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
static __read_mostly struct notifier_block backtrace_notifier = {
.notifier_call = arch_trigger_all_cpu_backtrace_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static int __init register_trigger_all_cpu_backtrace(void)
@@ -91,18 +105,3 @@ static int __init register_trigger_all_cpu_backtrace(void)
}
early_initcall(register_trigger_all_cpu_backtrace);
#endif
-
-/* STUB calls to mimic old nmi_watchdog behaviour */
-#if defined(CONFIG_X86_LOCAL_APIC)
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-void acpi_nmi_enable(void) { return; }
-void acpi_nmi_disable(void) { return; }
-#endif
-atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
-void cpu_nmi_set_wd_enabled(void) { return; }
-void stop_apic_nmi_watchdog(void *unused) { return; }
-void setup_apic_nmi_watchdog(void *unused) { return; }
-int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fadcd74..697dc34 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -54,7 +54,6 @@
#include <asm/dma.h>
#include <asm/timer.h>
#include <asm/i8259.h>
-#include <asm/nmi.h>
#include <asm/msidef.h>
#include <asm/hypertransport.h>
#include <asm/setup.h>
@@ -126,6 +125,26 @@ static int __init parse_noapic(char *str)
}
early_param("noapic", parse_noapic);
+/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+void mp_save_irq(struct mpc_intsrc *m)
+{
+ int i;
+
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+ m->srcbusirq, m->dstapic, m->dstirq);
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
+ return;
+ }
+
+ memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
struct irq_pin_list {
int apic, pin;
struct irq_pin_list *next;
@@ -136,6 +155,7 @@ static struct irq_pin_list *alloc_irq_pin_list(int node)
return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
+
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
@@ -1934,8 +1954,7 @@ void disable_IO_APIC(void)
*
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
-
-void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
@@ -1944,15 +1963,6 @@ void __init setup_ioapic_ids_from_mpc(void)
unsigned char old_id;
unsigned long flags;
- if (acpi_ioapic)
- return;
- /*
- * Don't check I/O APIC IDs for xAPIC systems. They have
- * no meaning without the serial APIC bus.
- */
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
- return;
/*
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
@@ -2006,7 +2016,6 @@ void __init setup_ioapic_ids_from_mpc(void)
physids_or(phys_id_present_map, phys_id_present_map, tmp);
}
-
/*
* We need to adjust the IRQ routing table
* if the ID changed.
@@ -2018,9 +2027,12 @@ void __init setup_ioapic_ids_from_mpc(void)
= mp_ioapics[apic_id].apicid;
/*
- * Read the right value from the MPC table and
- * write it into the ID register.
+ * Update the ID register according to the right value
+ * from the MPC table if they are different.
*/
+ if (mp_ioapics[apic_id].apicid == reg_00.bits.ID)
+ continue;
+
apic_printk(APIC_VERBOSE, KERN_INFO
"...changing IO-APIC physical APIC ID to %d ...",
mp_ioapics[apic_id].apicid);
@@ -2042,6 +2054,21 @@ void __init setup_ioapic_ids_from_mpc(void)
apic_printk(APIC_VERBOSE, " ok.\n");
}
}
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+ if (acpi_ioapic)
+ return;
+ /*
+ * Don't check I/O APIC IDs for xAPIC systems. They have
+ * no meaning without the serial APIC bus.
+ */
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return;
+ setup_ioapic_ids_from_mpc_nocheck();
+}
#endif
int no_timer_check __initdata;
@@ -2302,7 +2329,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
unsigned int irr;
struct irq_desc *desc;
struct irq_cfg *cfg;
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
if (irq == -1)
continue;
@@ -2336,7 +2363,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void)
apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
goto unlock;
}
- __get_cpu_var(vector_irq)[vector] = -1;
+ __this_cpu_write(vector_irq[vector], -1);
unlock:
raw_spin_unlock(&desc->lock);
}
@@ -2642,24 +2669,6 @@ static void lapic_register_intr(int irq)
"edge");
}
-static void __init setup_nmi(void)
-{
- /*
- * Dirty trick to enable the NMI watchdog ...
- * We put the 8259A master into AEOI mode and
- * unmask on all local APICs LVT0 as NMI.
- *
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
- * is from Maciej W. Rozycki - so we do not have to EOI from
- * the NMI handler or the timer interrupt.
- */
- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
-
- enable_NMI_through_LVT0();
-
- apic_printk(APIC_VERBOSE, " done.\n");
-}
-
/*
* This looks a bit hackish but it's about the only one way of sending
* a few INTA cycles to 8259As and any associated glue logic. ICR does
@@ -2765,15 +2774,6 @@ static inline void __init check_timer(void)
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
legacy_pic->init(1);
-#ifdef CONFIG_X86_32
- {
- unsigned int ver;
-
- ver = apic_read(APIC_LVR);
- ver = GET_APIC_VERSION(ver);
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
- }
-#endif
pin1 = find_isa_irq_pin(0, mp_INT);
apic1 = find_isa_irq_apic(0, mp_INT);
@@ -2821,10 +2821,6 @@ static inline void __init check_timer(void)
unmask_ioapic(cfg);
}
if (timer_irq_works()) {
- if (nmi_watchdog == NMI_IO_APIC) {
- setup_nmi();
- legacy_pic->unmask(0);
- }
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
goto out;
@@ -2850,11 +2846,6 @@ static inline void __init check_timer(void)
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
- if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->mask(0);
- setup_nmi();
- legacy_pic->unmask(0);
- }
goto out;
}
/*
@@ -2866,15 +2857,6 @@ static inline void __init check_timer(void)
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
- if (nmi_watchdog == NMI_IO_APIC) {
- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
- "through the IO-APIC - disabling NMI Watchdog!\n");
- nmi_watchdog = NMI_NONE;
- }
-#ifdef CONFIG_X86_32
- timer_ack = 0;
-#endif
-
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
@@ -3639,7 +3621,7 @@ int __init io_apic_get_redir_entries (int ioapic)
return reg_01.bits.entries + 1;
}
-void __init probe_nr_irqs_gsi(void)
+static void __init probe_nr_irqs_gsi(void)
{
int nr;
@@ -3956,7 +3938,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics)
return res;
}
-void __init ioapic_init_mappings(void)
+void __init ioapic_and_gsi_init(void)
{
unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
struct resource *ioapic_res;
@@ -3994,6 +3976,8 @@ fake_ioapic_page:
ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
+
+ probe_nr_irqs_gsi();
}
void __init ioapic_insert_resources(void)
@@ -4103,7 +4087,8 @@ void __init pre_init_apic_IRQ0(void)
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
- phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+ physid_set_mask_of_physid(boot_cpu_physical_apicid,
+ &phys_cpu_present_map);
#endif
/* Make sure the irq descriptor is set up */
cfg = alloc_irq_and_cfg_at(0, 0);
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
deleted file mode 100644
index c90041c..0000000
--- a/arch/x86/kernel/apic/nmi.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- * NMI watchdog support on APIC systems
- *
- * Started by Ingo Molnar <mingo@redhat.com>
- *
- * Fixes:
- * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
- * Mikael Pettersson : Power Management for local APIC NMI watchdog.
- * Mikael Pettersson : Pentium 4 support for local APIC NMI watchdog.
- * Pavel Machek and
- * Mikael Pettersson : PM converted to driver model. Disable/enable API.
- */
-
-#include <asm/apic.h>
-
-#include <linux/nmi.h>
-#include <linux/mm.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/sysdev.h>
-#include <linux/sysctl.h>
-#include <linux/percpu.h>
-#include <linux/kprobes.h>
-#include <linux/cpumask.h>
-#include <linux/kernel_stat.h>
-#include <linux/kdebug.h>
-#include <linux/smp.h>
-
-#include <asm/i8259.h>
-#include <asm/io_apic.h>
-#include <asm/proto.h>
-#include <asm/timer.h>
-
-#include <asm/mce.h>
-
-#include <asm/mach_traps.h>
-
-int unknown_nmi_panic;
-int nmi_watchdog_enabled;
-
-/* For reliability, we're prepared to waste bits here. */
-static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
-
-/* nmi_active:
- * >0: the lapic NMI watchdog is active, but can be disabled
- * <0: the lapic NMI watchdog has not been set up, and cannot
- * be enabled
- * 0: the lapic NMI watchdog is disabled, but can be enabled
- */
-atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
-EXPORT_SYMBOL(nmi_active);
-
-unsigned int nmi_watchdog = NMI_NONE;
-EXPORT_SYMBOL(nmi_watchdog);
-
-static int panic_on_timeout;
-
-static unsigned int nmi_hz = HZ;
-static DEFINE_PER_CPU(short, wd_enabled);
-static int endflag __initdata;
-
-static inline unsigned int get_nmi_count(int cpu)
-{
- return per_cpu(irq_stat, cpu).__nmi_count;
-}
-
-static inline int mce_in_progress(void)
-{
-#if defined(CONFIG_X86_MCE)
- return atomic_read(&mce_entry) > 0;
-#endif
- return 0;
-}
-
-/*
- * Take the local apic timer and PIT/HPET into account. We don't
- * know which one is active, when we have highres/dyntick on
- */
-static inline unsigned int get_timer_irqs(int cpu)
-{
- return per_cpu(irq_stat, cpu).apic_timer_irqs +
- per_cpu(irq_stat, cpu).irq0_irqs;
-}
-
-#ifdef CONFIG_SMP
-/*
- * The performance counters used by NMI_LOCAL_APIC don't trigger when
- * the CPU is idle. To make sure the NMI watchdog really ticks on all
- * CPUs during the test make them busy.
- */
-static __init void nmi_cpu_busy(void *data)
-{
- local_irq_enable_in_hardirq();
- /*
- * Intentionally don't use cpu_relax here. This is
- * to make sure that the performance counter really ticks,
- * even if there is a simulator or similar that catches the
- * pause instruction. On a real HT machine this is fine because
- * all other CPUs are busy with "useless" delay loops and don't
- * care if they get somewhat less cycles.
- */
- while (endflag == 0)
- mb();
-}
-#endif
-
-static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count)
-{
- printk(KERN_CONT "\n");
-
- printk(KERN_WARNING
- "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
- cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
-
- printk(KERN_WARNING
- "Please report this to bugzilla.kernel.org,\n");
- printk(KERN_WARNING
- "and attach the output of the 'dmesg' command.\n");
-
- per_cpu(wd_enabled, cpu) = 0;
- atomic_dec(&nmi_active);
-}
-
-static void __acpi_nmi_disable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
-}
-
-int __init check_nmi_watchdog(void)
-{
- unsigned int *prev_nmi_count;
- int cpu;
-
- if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
- return 0;
-
- prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
- if (!prev_nmi_count)
- goto error;
-
- printk(KERN_INFO "Testing NMI watchdog ... ");
-
-#ifdef CONFIG_SMP
- if (nmi_watchdog == NMI_LOCAL_APIC)
- smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
-#endif
-
- for_each_possible_cpu(cpu)
- prev_nmi_count[cpu] = get_nmi_count(cpu);
- local_irq_enable();
- mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
-
- for_each_online_cpu(cpu) {
- if (!per_cpu(wd_enabled, cpu))
- continue;
- if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
- report_broken_nmi(cpu, prev_nmi_count);
- }
- endflag = 1;
- if (!atomic_read(&nmi_active)) {
- kfree(prev_nmi_count);
- atomic_set(&nmi_active, -1);
- goto error;
- }
- printk("OK.\n");
-
- /*
- * now that we know it works we can reduce NMI frequency to
- * something more reasonable; makes a difference in some configs
- */
- if (nmi_watchdog == NMI_LOCAL_APIC)
- nmi_hz = lapic_adjust_nmi_hz(1);
-
- kfree(prev_nmi_count);
- return 0;
-error:
- if (nmi_watchdog == NMI_IO_APIC) {
- if (!timer_through_8259)
- legacy_pic->mask(0);
- on_each_cpu(__acpi_nmi_disable, NULL, 1);
- }
-
-#ifdef CONFIG_X86_32
- timer_ack = 0;
-#endif
- return -1;
-}
-
-static int __init setup_nmi_watchdog(char *str)
-{
- unsigned int nmi;
-
- if (!strncmp(str, "panic", 5)) {
- panic_on_timeout = 1;
- str = strchr(str, ',');
- if (!str)
- return 1;
- ++str;
- }
-
- if (!strncmp(str, "lapic", 5))
- nmi_watchdog = NMI_LOCAL_APIC;
- else if (!strncmp(str, "ioapic", 6))
- nmi_watchdog = NMI_IO_APIC;
- else {
- get_option(&str, &nmi);
- if (nmi >= NMI_INVALID)
- return 0;
- nmi_watchdog = nmi;
- }
-
- return 1;
-}
-__setup("nmi_watchdog=", setup_nmi_watchdog);
-
-/*
- * Suspend/resume support
- */
-#ifdef CONFIG_PM
-
-static int nmi_pm_active; /* nmi_active before suspend */
-
-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- nmi_pm_active = atomic_read(&nmi_active);
- stop_apic_nmi_watchdog(NULL);
- BUG_ON(atomic_read(&nmi_active) != 0);
- return 0;
-}
-
-static int lapic_nmi_resume(struct sys_device *dev)
-{
- /* only CPU0 goes here, other CPUs should be offline */
- if (nmi_pm_active > 0) {
- setup_apic_nmi_watchdog(NULL);
- touch_nmi_watchdog();
- }
- return 0;
-}
-
-static struct sysdev_class nmi_sysclass = {
- .name = "lapic_nmi",
- .resume = lapic_nmi_resume,
- .suspend = lapic_nmi_suspend,
-};
-
-static struct sys_device device_lapic_nmi = {
- .id = 0,
- .cls = &nmi_sysclass,
-};
-
-static int __init init_lapic_nmi_sysfs(void)
-{
- int error;
-
- /*
- * should really be a BUG_ON but b/c this is an
- * init call, it just doesn't work. -dcz
- */
- if (nmi_watchdog != NMI_LOCAL_APIC)
- return 0;
-
- if (atomic_read(&nmi_active) < 0)
- return 0;
-
- error = sysdev_class_register(&nmi_sysclass);
- if (!error)
- error = sysdev_register(&device_lapic_nmi);
- return error;
-}
-
-/* must come after the local APIC's device_initcall() */
-late_initcall(init_lapic_nmi_sysfs);
-
-#endif /* CONFIG_PM */
-
-static void __acpi_nmi_enable(void *__unused)
-{
- apic_write(APIC_LVT0, APIC_DM_NMI);
-}
-
-/*
- * Enable timer based NMIs on all CPUs:
- */
-void acpi_nmi_enable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_enable, NULL, 1);
-}
-
-/*
- * Disable timer based NMIs on all CPUs:
- */
-void acpi_nmi_disable(void)
-{
- if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
- on_each_cpu(__acpi_nmi_disable, NULL, 1);
-}
-
-/*
- * This function is called as soon the LAPIC NMI watchdog driver has everything
- * in place and it's ready to check if the NMIs belong to the NMI watchdog
- */
-void cpu_nmi_set_wd_enabled(void)
-{
- __get_cpu_var(wd_enabled) = 1;
-}
-
-void setup_apic_nmi_watchdog(void *unused)
-{
- if (__get_cpu_var(wd_enabled))
- return;
-
- /* cheap hack to support suspend/resume */
- /* if cpu0 is not active neither should the other cpus */
- if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
- return;
-
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- if (lapic_watchdog_init(nmi_hz) < 0) {
- __get_cpu_var(wd_enabled) = 0;
- return;
- }
- /* FALL THROUGH */
- case NMI_IO_APIC:
- __get_cpu_var(wd_enabled) = 1;
- atomic_inc(&nmi_active);
- }
-}
-
-void stop_apic_nmi_watchdog(void *unused)
-{
- /* only support LOCAL and IO APICs for now */
- if (!nmi_watchdog_active())
- return;
- if (__get_cpu_var(wd_enabled) == 0)
- return;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- lapic_watchdog_stop();
- else
- __acpi_nmi_disable(NULL);
- __get_cpu_var(wd_enabled) = 0;
- atomic_dec(&nmi_active);
-}
-
-/*
- * the best way to detect whether a CPU has a 'hard lockup' problem
- * is to check it's local APIC timer IRQ counts. If they are not
- * changing then that CPU has some problem.
- *
- * as these watchdog NMI IRQs are generated on every CPU, we only
- * have to check the current processor.
- *
- * since NMIs don't listen to _any_ locks, we have to be extremely
- * careful not to rely on unsafe variables. The printk might lock
- * up though, so we have to break up any console locks first ...
- * [when there will be more tty-related locks, break them up here too!]
- */
-
-static DEFINE_PER_CPU(unsigned, last_irq_sum);
-static DEFINE_PER_CPU(long, alert_counter);
-static DEFINE_PER_CPU(int, nmi_touch);
-
-void touch_nmi_watchdog(void)
-{
- if (nmi_watchdog_active()) {
- unsigned cpu;
-
- /*
- * Tell other CPUs to reset their alert counters. We cannot
- * do it ourselves because the alert count increase is not
- * atomic.
- */
- for_each_present_cpu(cpu) {
- if (per_cpu(nmi_touch, cpu) != 1)
- per_cpu(nmi_touch, cpu) = 1;
- }
- }
-
- /*
- * Tickle the softlockup detector too:
- */
- touch_softlockup_watchdog();
-}
-EXPORT_SYMBOL(touch_nmi_watchdog);
-
-notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
-{
- /*
- * Since current_thread_info()-> is always on the stack, and we
- * always switch the stack NMI-atomically, it's safe to use
- * smp_processor_id().
- */
- unsigned int sum;
- int touched = 0;
- int cpu = smp_processor_id();
- int rc = 0;
-
- sum = get_timer_irqs(cpu);
-
- if (__get_cpu_var(nmi_touch)) {
- __get_cpu_var(nmi_touch) = 0;
- touched = 1;
- }
-
- /* We can be called before check_nmi_watchdog, hence NULL check. */
- if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */
-
- raw_spin_lock(&lock);
- printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
- show_regs(regs);
- dump_stack();
- raw_spin_unlock(&lock);
- cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
-
- rc = 1;
- }
-
- /* Could check oops_in_progress here too, but it's safer not to */
- if (mce_in_progress())
- touched = 1;
-
- /* if the none of the timers isn't firing, this cpu isn't doing much */
- if (!touched && __get_cpu_var(last_irq_sum) == sum) {
- /*
- * Ayiee, looks like this CPU is stuck ...
- * wait a few IRQs (5 seconds) before doing the oops ...
- */
- __this_cpu_inc(alert_counter);
- if (__this_cpu_read(alert_counter) == 5 * nmi_hz)
- /*
- * die_nmi will return ONLY if NOTIFY_STOP happens..
- */
- die_nmi("BUG: NMI Watchdog detected LOCKUP",
- regs, panic_on_timeout);
- } else {
- __get_cpu_var(last_irq_sum) = sum;
- __this_cpu_write(alert_counter, 0);
- }
-
- /* see if the nmi watchdog went off */
- if (!__get_cpu_var(wd_enabled))
- return rc;
- switch (nmi_watchdog) {
- case NMI_LOCAL_APIC:
- rc |= lapic_wd_event(nmi_hz);
- break;
- case NMI_IO_APIC:
- /*
- * don't know how to accurately check for this.
- * just assume it was a watchdog timer interrupt
- * This matches the old behaviour.
- */
- rc = 1;
- break;
- }
- return rc;
-}
-
-#ifdef CONFIG_SYSCTL
-
-static void enable_ioapic_nmi_watchdog_single(void *unused)
-{
- __get_cpu_var(wd_enabled) = 1;
- atomic_inc(&nmi_active);
- __acpi_nmi_enable(NULL);
-}
-
-static void enable_ioapic_nmi_watchdog(void)
-{
- on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
- touch_nmi_watchdog();
-}
-
-static void disable_ioapic_nmi_watchdog(void)
-{
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-}
-
-static int __init setup_unknown_nmi_panic(char *str)
-{
- unknown_nmi_panic = 1;
- return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
- unsigned char reason = get_nmi_reason();
- char buf[64];
-
- sprintf(buf, "NMI received for unknown reason %02x\n", reason);
- die_nmi(buf, regs, 1); /* Always panic here */
- return 0;
-}
-
-/*
- * proc handler for /proc/sys/kernel/nmi
- */
-int proc_nmi_enabled(struct ctl_table *table, int write,
- void __user *buffer, size_t *length, loff_t *ppos)
-{
- int old_state;
-
- nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
- old_state = nmi_watchdog_enabled;
- proc_dointvec(table, write, buffer, length, ppos);
- if (!!old_state == !!nmi_watchdog_enabled)
- return 0;
-
- if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
- printk(KERN_WARNING
- "NMI watchdog is permanently disabled\n");
- return -EIO;
- }
-
- if (nmi_watchdog == NMI_LOCAL_APIC) {
- if (nmi_watchdog_enabled)
- enable_lapic_nmi_watchdog();
- else
- disable_lapic_nmi_watchdog();
- } else if (nmi_watchdog == NMI_IO_APIC) {
- if (nmi_watchdog_enabled)
- enable_ioapic_nmi_watchdog();
- else
- disable_ioapic_nmi_watchdog();
- } else {
- printk(KERN_WARNING
- "NMI watchdog doesn't know what hardware to touch\n");
- return -EIO;
- }
- return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
- if (unknown_nmi_panic)
- return unknown_nmi_panic_callback(regs, cpu);
-#endif
- return 0;
-}
-
-void arch_trigger_all_cpu_backtrace(void)
-{
- int i;
-
- cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
-
- printk(KERN_INFO "sending NMI to all CPUs:\n");
- apic->send_IPI_all(NMI_VECTOR);
-
- /* Wait for up to 10 seconds for all CPUs to do the backtrace */
- for (i = 0; i < 10 * 1000; i++) {
- if (cpumask_empty(to_cpumask(backtrace_mask)))
- break;
- mdelay(1);
- }
-}
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index c1c52c3..bd16b58 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -48,6 +48,16 @@ unsigned int uv_apicid_hibits;
EXPORT_SYMBOL_GPL(uv_apicid_hibits);
static DEFINE_SPINLOCK(uv_nmi_lock);
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+ unsigned long val, *mmr;
+
+ mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+ val = *mmr;
+ early_iounmap(mmr, sizeof(*mmr));
+ return val;
+}
+
static inline bool is_GRU_range(u64 start, u64 end)
{
return start >= gru_start_paddr && end <= gru_end_paddr;
@@ -58,28 +68,24 @@ static bool uv_is_untracked_pat_range(u64 start, u64 end)
return is_ISA_range(start, end) || is_GRU_range(start, end);
}
-static int early_get_nodeid(void)
+static int __init early_get_pnodeid(void)
{
union uvh_node_id_u node_id;
- unsigned long *mmr;
-
- mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_NODE_ID, sizeof(*mmr));
- node_id.v = *mmr;
- early_iounmap(mmr, sizeof(*mmr));
+ union uvh_rh_gam_config_mmr_u m_n_config;
+ int pnode;
/* Currently, all blades have same revision number */
+ node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+ m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
uv_min_hub_revision_id = node_id.s.revision;
- return node_id.s.node_id;
+ pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+ return pnode;
}
static void __init early_get_apic_pnode_shift(void)
{
- unsigned long *mmr;
-
- mmr = early_ioremap(UV_LOCAL_MMR_BASE | UVH_APICID, sizeof(*mmr));
- uvh_apicid.v = *mmr;
- early_iounmap(mmr, sizeof(*mmr));
+ uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
if (!uvh_apicid.v)
/*
* Old bios, use default value
@@ -95,21 +101,17 @@ static void __init early_get_apic_pnode_shift(void)
static void __init uv_set_apicid_hibit(void)
{
union uvh_lb_target_physical_apic_id_mask_u apicid_mask;
- unsigned long *mmr;
- mmr = early_ioremap(UV_LOCAL_MMR_BASE |
- UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK, sizeof(*mmr));
- apicid_mask.v = *mmr;
- early_iounmap(mmr, sizeof(*mmr));
+ apicid_mask.v = uv_early_read_mmr(UVH_LB_TARGET_PHYSICAL_APIC_ID_MASK);
uv_apicid_hibits = apicid_mask.s.bit_enables & UV_APICID_HIBIT_MASK;
}
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- int nodeid;
+ int pnodeid;
if (!strcmp(oem_id, "SGI")) {
- nodeid = early_get_nodeid();
+ pnodeid = early_get_pnodeid();
early_get_apic_pnode_shift();
x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
x86_platform.nmi_init = uv_nmi_init;
@@ -118,8 +120,8 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
else if (!strcmp(oem_table_id, "UVX"))
uv_system_type = UV_X2APIC;
else if (!strcmp(oem_table_id, "UVH")) {
- __get_cpu_var(x2apic_extra_bits) =
- nodeid << (uvh_apicid.s.pnode_shift - 1);
+ __this_cpu_write(x2apic_extra_bits,
+ pnodeid << uvh_apicid.s.pnode_shift);
uv_system_type = UV_NON_UNIQUE_APIC;
uv_set_apicid_hibit();
return 1;
@@ -284,7 +286,7 @@ static unsigned int x2apic_get_apic_id(unsigned long x)
unsigned int id;
WARN_ON(preemptible() && num_online_cpus() > 1);
- id = x | __get_cpu_var(x2apic_extra_bits);
+ id = x | __this_cpu_read(x2apic_extra_bits);
return id;
}
@@ -376,7 +378,7 @@ struct apic __refdata apic_x2apic_uv_x = {
static __cpuinit void set_x2apic_extra_bits(int pnode)
{
- __get_cpu_var(x2apic_extra_bits) = (pnode << 6);
+ __this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
}
/*
@@ -639,7 +641,7 @@ void __cpuinit uv_cpu_init(void)
*/
int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
{
- if (reason != DIE_NMI_IPI)
+ if (reason != DIE_NMIUNKNOWN)
return NOTIFY_OK;
if (in_crash_kexec)
@@ -682,27 +684,32 @@ void uv_nmi_init(void)
void __init uv_system_init(void)
{
union uvh_rh_gam_config_mmr_u m_n_config;
+ union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
union uvh_node_id_u node_id;
unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
- int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+ int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val, n_io;
int gnode_extra, max_pnode = 0;
unsigned long mmr_base, present, paddr;
- unsigned short pnode_mask;
+ unsigned short pnode_mask, pnode_io_mask;
map_low_mmrs();
m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
m_val = m_n_config.s.m_skt;
n_val = m_n_config.s.n_skt;
+ mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+ n_io = mmioh.s.n_io;
mmr_base =
uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
~UV_MMR_ENABLE;
pnode_mask = (1 << n_val) - 1;
+ pnode_io_mask = (1 << n_io) - 1;
+
node_id.v = uv_read_local_mmr(UVH_NODE_ID);
gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
gnode_upper = ((unsigned long)gnode_extra << m_val);
- printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n",
- n_val, m_val, gnode_upper, gnode_extra);
+ printk(KERN_INFO "UV: N %d, M %d, N_IO: %d, gnode_upper 0x%lx, gnode_extra 0x%x, pnode_mask 0x%x, pnode_io_mask 0x%x\n",
+ n_val, m_val, n_io, gnode_upper, gnode_extra, pnode_mask, pnode_io_mask);
printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
@@ -735,7 +742,7 @@ void __init uv_system_init(void)
for (j = 0; j < 64; j++) {
if (!test_bit(j, &present))
continue;
- pnode = (i * 64 + j);
+ pnode = (i * 64 + j) & pnode_mask;
uv_blade_info[blade].pnode = pnode;
uv_blade_info[blade].nr_possible_cpus = 0;
uv_blade_info[blade].nr_online_cpus = 0;
@@ -756,6 +763,7 @@ void __init uv_system_init(void)
/*
* apic_pnode_shift must be set before calling uv_apicid_to_pnode();
*/
+ uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
pnode = uv_apicid_to_pnode(apicid);
blade = boot_pnode_to_blade(pnode);
@@ -772,7 +780,6 @@ void __init uv_system_init(void)
uv_cpu_hub_info(cpu)->numa_blade_id = blade;
uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
uv_cpu_hub_info(cpu)->pnode = pnode;
- uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
@@ -796,7 +803,7 @@ void __init uv_system_init(void)
map_gru_high(max_pnode);
map_mmr_high(max_pnode);
- map_mmioh_high(max_pnode);
+ map_mmioh_high(max_pnode & pnode_io_mask);
uv_cpu_init();
uv_scir_register_cpu_notifier();
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9e093f8..7c7bedb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL_GPL(amd_erratum_383);
bool cpu_has_amd_erratum(const int *erratum)
{
- struct cpuinfo_x86 *cpu = &current_cpu_data;
+ struct cpuinfo_x86 *cpu = __this_cpu_ptr(&cpu_info);
int osvw_id = *erratum++;
u32 range;
u32 ms;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4b68bda..1d59834 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -894,7 +894,6 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
- init_hw_perf_events();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 491977b..35c7e65 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -521,7 +521,7 @@ static void check_supported_cpu(void *_rc)
*rc = -ENODEV;
- if (current_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ if (__this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_AMD)
return;
eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
@@ -1377,7 +1377,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol)
static void query_values_on_cpu(void *_err)
{
int *err = _err;
- struct powernow_k8_data *data = __get_cpu_var(powernow_data);
+ struct powernow_k8_data *data = __this_cpu_read(powernow_data);
*err = query_current_values_with_pending_wait(data);
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 17ad033..7283e98 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
};
struct amd_l3_cache {
- struct pci_dev *dev;
- bool can_disable;
+ struct amd_northbridge *nb;
unsigned indices;
u8 subcaches[4];
};
@@ -266,7 +265,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
line_size = l2.line_size;
lines_per_tag = l2.lines_per_tag;
/* cpu_data has errata corrections for K7 applied */
- size_in_kb = current_cpu_data.x86_cache_size;
+ size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
break;
case 3:
if (!l3.val)
@@ -288,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
eax->split.type = types[leaf];
eax->split.level = levels[leaf];
eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+ eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
if (assoc == 0xffff)
@@ -311,14 +310,12 @@ struct _cache_attr {
/*
* L3 cache descriptors
*/
-static struct amd_l3_cache **__cpuinitdata l3_caches;
-
static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
{
unsigned int sc0, sc1, sc2, sc3;
u32 val = 0;
- pci_read_config_dword(l3->dev, 0x1C4, &val);
+ pci_read_config_dword(l3->nb->misc, 0x1C4, &val);
/* calculate subcache sizes */
l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}
-static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
-{
- struct amd_l3_cache *l3;
- struct pci_dev *dev = node_to_k8_nb_misc(node);
-
- l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
- if (!l3) {
- printk(KERN_WARNING "Error allocating L3 struct\n");
- return NULL;
- }
-
- l3->dev = dev;
-
- amd_calc_l3_indices(l3);
-
- return l3;
-}
-
-static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
- int index)
+static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
+ int index)
{
+ static struct amd_l3_cache *__cpuinitdata l3_caches;
int node;
- if (boot_cpu_data.x86 != 0x10)
- return;
-
- if (index < 3)
- return;
-
- /* see errata #382 and #388 */
- if (boot_cpu_data.x86_model < 0x8)
- return;
-
- if ((boot_cpu_data.x86_model == 0x8 ||
- boot_cpu_data.x86_model == 0x9)
- &&
- boot_cpu_data.x86_mask < 0x1)
- return;
-
- /* not in virtualized environments */
- if (k8_northbridges.num == 0)
+ /* only for L3, and not in virtualized environments */
+ if (index < 3 || amd_nb_num() == 0)
return;
/*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
* never freed but this is done only on shutdown so it doesn't matter.
*/
if (!l3_caches) {
- int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
+ int size = amd_nb_num() * sizeof(struct amd_l3_cache);
l3_caches = kzalloc(size, GFP_ATOMIC);
if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
node = amd_get_nb_id(smp_processor_id());
- if (!l3_caches[node]) {
- l3_caches[node] = amd_init_l3_cache(node);
- l3_caches[node]->can_disable = true;
+ if (!l3_caches[node].nb) {
+ l3_caches[node].nb = node_to_amd_nb(node);
+ amd_calc_l3_indices(&l3_caches[node]);
}
- WARN_ON(!l3_caches[node]);
-
- this_leaf->l3 = l3_caches[node];
+ this_leaf->l3 = &l3_caches[node];
}
/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
{
unsigned int reg = 0;
- pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
+ pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);
/* check whether this slot is activated already */
if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
{
int index;
- if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+ if (!this_leaf->l3 ||
+ !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
return -EINVAL;
index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
if (!l3->subcaches[i])
continue;
- pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+ pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
/*
* We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
wbinvd_on_cpu(cpu);
reg |= BIT(31);
- pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
+ pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
}
}
@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (!this_leaf->l3 || !this_leaf->l3->can_disable)
+ if (!this_leaf->l3 ||
+ !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
return -EINVAL;
cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -545,7 +509,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
#define STORE_CACHE_DISABLE(slot) \
static ssize_t \
store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
- const char *buf, size_t count) \
+ const char *buf, size_t count) \
{ \
return store_cache_disable(this_leaf, buf, count, slot); \
}
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
show_cache_disable_1, store_cache_disable_1);
#else /* CONFIG_AMD_NB */
-static void __cpuinit
-amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
-{
-};
+#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */
static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,
if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
amd_cpuid4(index, &eax, &ebx, &ecx);
- amd_check_l3_disable(this_leaf, index);
+ amd_init_l3_cache(this_leaf, index);
} else {
cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
}
@@ -983,30 +944,48 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);
-#define DEFAULT_SYSFS_CACHE_ATTRS \
- &type.attr, \
- &level.attr, \
- &coherency_line_size.attr, \
- &physical_line_partition.attr, \
- &ways_of_associativity.attr, \
- &number_of_sets.attr, \
- &size.attr, \
- &shared_cpu_map.attr, \
- &shared_cpu_list.attr
-
static struct attribute *default_attrs[] = {
- DEFAULT_SYSFS_CACHE_ATTRS,
+ &type.attr,
+ &level.attr,
+ &coherency_line_size.attr,
+ &physical_line_partition.attr,
+ &ways_of_associativity.attr,
+ &number_of_sets.attr,
+ &size.attr,
+ &shared_cpu_map.attr,
+ &shared_cpu_list.attr,
NULL
};
-static struct attribute *default_l3_attrs[] = {
- DEFAULT_SYSFS_CACHE_ATTRS,
#ifdef CONFIG_AMD_NB
- &cache_disable_0.attr,
- &cache_disable_1.attr,
+static struct attribute ** __cpuinit amd_l3_attrs(void)
+{
+ static struct attribute **attrs;
+ int n;
+
+ if (attrs)
+ return attrs;
+
+ n = sizeof (default_attrs) / sizeof (struct attribute *);
+
+ if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
+ n += 2;
+
+ attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
+ if (attrs == NULL)
+ return attrs = default_attrs;
+
+ for (n = 0; default_attrs[n]; n++)
+ attrs[n] = default_attrs[n];
+
+ if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
+ attrs[n++] = &cache_disable_0.attr;
+ attrs[n++] = &cache_disable_1.attr;
+ }
+
+ return attrs;
+}
#endif
- NULL
-};
static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
this_leaf = CPUID4_INFO_IDX(cpu, i);
- if (this_leaf->l3 && this_leaf->l3->can_disable)
- ktype_cache.default_attrs = default_l3_attrs;
- else
- ktype_cache.default_attrs = default_attrs;
-
+ ktype_cache.default_attrs = default_attrs;
+#ifdef CONFIG_AMD_NB
+ if (this_leaf->l3)
+ ktype_cache.default_attrs = amd_l3_attrs();
+#endif
retval = kobject_init_and_add(&(this_object->kobj),
&ktype_cache,
per_cpu(ici_cache_kobject, cpu),
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7..a779719 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
#include <linux/gfp.h>
#include <asm/mce.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
struct die_args *args = (struct die_args *)data;
int cpu = smp_processor_id();
struct mce *m = &__get_cpu_var(injectm);
- if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+ if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
return NOTIFY_DONE;
cpumask_clear_cpu(cpu, mce_inject_cpumask);
if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
static struct notifier_block mce_raise_nb = {
.notifier_call = mce_raise_notify,
- .priority = 1000,
+ .priority = NMI_LOCAL_NORMAL_PRIOR,
};
/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 7a35b72..d916183 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -326,7 +326,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr)
{
- unsigned bank = __get_cpu_var(injectm.bank);
+ unsigned bank = __this_cpu_read(injectm.bank);
if (msr == rip_msr)
return offsetof(struct mce, ip);
@@ -346,7 +346,7 @@ static u64 mce_rdmsrl(u32 msr)
{
u64 v;
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset < 0)
@@ -369,7 +369,7 @@ static u64 mce_rdmsrl(u32 msr)
static void mce_wrmsrl(u32 msr, u64 v)
{
- if (__get_cpu_var(injectm).finished) {
+ if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
if (offset >= 0)
@@ -1159,7 +1159,7 @@ static void mce_start_timer(unsigned long data)
WARN_ON(smp_processor_id() != data);
- if (mce_available(&current_cpu_data)) {
+ if (mce_available(__this_cpu_ptr(&cpu_info))) {
machine_check_poll(MCP_TIMESTAMP,
&__get_cpu_var(mce_poll_banks));
}
@@ -1767,7 +1767,7 @@ static int mce_shutdown(struct sys_device *dev)
static int mce_resume(struct sys_device *dev)
{
__mcheck_cpu_init_generic();
- __mcheck_cpu_init_vendor(&current_cpu_data);
+ __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
return 0;
}
@@ -1775,7 +1775,7 @@ static int mce_resume(struct sys_device *dev)
static void mce_cpu_restart(void *data)
{
del_timer_sync(&__get_cpu_var(mce_timer));
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
@@ -1790,7 +1790,7 @@ static void mce_restart(void)
/* Toggle features for corrected errors */
static void mce_disable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (all)
del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1799,7 +1799,7 @@ static void mce_disable_ce(void *all)
static void mce_enable_ce(void *all)
{
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
cmci_reenable();
cmci_recheck();
@@ -2022,7 +2022,7 @@ static void __cpuinit mce_disable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
@@ -2040,7 +2040,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
unsigned long action = *(unsigned long *)h;
int i;
- if (!mce_available(&current_cpu_data))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)))
return;
if (!(action & CPU_TASKS_FROZEN))
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 80c4823..5bf2fac 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
#include <asm/mce.h>
#include <asm/msr.h>
-#define PFX "mce_threshold: "
-#define VERSION "version 1.1.1"
#define NR_BANKS 6
#define NR_BLOCKS 9
#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
struct list_head miscj;
};
-/* defaults used early on boot */
-static struct threshold_block threshold_defaults = {
- .interrupt_enable = 0,
- .threshold_limit = THRESHOLD_MAX,
-};
-
struct threshold_bank {
struct kobject *kobj;
struct threshold_block *blocks;
@@ -89,50 +81,101 @@ static void amd_threshold_interrupt(void);
struct thresh_restart {
struct threshold_block *b;
int reset;
+ int set_lvt_off;
+ int lvt_off;
u16 old_limit;
};
+static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
+{
+ int msr = (hi & MASK_LVTOFF_HI) >> 20;
+
+ if (apic < 0) {
+ pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
+ "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
+ b->bank, b->block, b->address, hi, lo);
+ return 0;
+ }
+
+ if (apic != msr) {
+ pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
+ "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
+ b->cpu, apic, b->bank, b->block, b->address, hi, lo);
+ return 0;
+ }
+
+ return 1;
+};
+
/* must be called with correct cpu affinity */
/* Called via smp_call_function_single() */
static void threshold_restart_bank(void *_tr)
{
struct thresh_restart *tr = _tr;
- u32 mci_misc_hi, mci_misc_lo;
+ u32 hi, lo;
- rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ rdmsr(tr->b->address, lo, hi);
- if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX))
+ if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
tr->reset = 1; /* limit cannot be lower than err count */
if (tr->reset) { /* reset err count and overflow bit */
- mci_misc_hi =
- (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
+ hi =
+ (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
(THRESHOLD_MAX - tr->b->threshold_limit);
} else if (tr->old_limit) { /* change limit w/o reset */
- int new_count = (mci_misc_hi & THRESHOLD_MAX) +
+ int new_count = (hi & THRESHOLD_MAX) +
(tr->old_limit - tr->b->threshold_limit);
- mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) |
+ hi = (hi & ~MASK_ERR_COUNT_HI) |
(new_count & THRESHOLD_MAX);
}
+ if (tr->set_lvt_off) {
+ if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
+ /* set new lvt offset */
+ hi &= ~MASK_LVTOFF_HI;
+ hi |= tr->lvt_off << 20;
+ }
+ }
+
tr->b->interrupt_enable ?
- (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
- (mci_misc_hi &= ~MASK_INT_TYPE_HI);
+ (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
+ (hi &= ~MASK_INT_TYPE_HI);
- mci_misc_hi |= MASK_COUNT_EN_HI;
- wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+ hi |= MASK_COUNT_EN_HI;
+ wrmsr(tr->b->address, lo, hi);
+}
+
+static void mce_threshold_block_init(struct threshold_block *b, int offset)
+{
+ struct thresh_restart tr = {
+ .b = b,
+ .set_lvt_off = 1,
+ .lvt_off = offset,
+ };
+
+ b->threshold_limit = THRESHOLD_MAX;
+ threshold_restart_bank(&tr);
+};
+
+static int setup_APIC_mce(int reserved, int new)
+{
+ if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
+ APIC_EILVT_MSG_FIX, 0))
+ return new;
+
+ return reserved;
}
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
+ struct threshold_block b;
unsigned int cpu = smp_processor_id();
u32 low = 0, high = 0, address = 0;
unsigned int bank, block;
- struct thresh_restart tr;
- int lvt_off = -1;
- u8 offset;
+ int offset = -1;
for (bank = 0; bank < NR_BANKS; ++bank) {
for (block = 0; block < NR_BLOCKS; ++block) {
@@ -163,39 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
if (shared_bank[bank] && c->cpu_core_id)
break;
#endif
- offset = (high & MASK_LVTOFF_HI) >> 20;
- if (lvt_off < 0) {
- if (setup_APIC_eilvt(offset,
- THRESHOLD_APIC_VECTOR,
- APIC_EILVT_MSG_FIX, 0)) {
- pr_err(FW_BUG "cpu %d, failed to "
- "setup threshold interrupt "
- "for bank %d, block %d "
- "(MSR%08X=0x%x%08x)",
- smp_processor_id(), bank, block,
- address, high, low);
- continue;
- }
- lvt_off = offset;
- } else if (lvt_off != offset) {
- pr_err(FW_BUG "cpu %d, invalid threshold "
- "interrupt offset %d for bank %d,"
- "block %d (MSR%08X=0x%x%08x)",
- smp_processor_id(), lvt_off, bank,
- block, address, high, low);
- continue;
- }
-
- high &= ~MASK_LVTOFF_HI;
- high |= lvt_off << 20;
- wrmsr(address, low, high);
+ offset = setup_APIC_mce(offset,
+ (high & MASK_LVTOFF_HI) >> 20);
- threshold_defaults.address = address;
- tr.b = &threshold_defaults;
- tr.reset = 0;
- tr.old_limit = 0;
- threshold_restart_bank(&tr);
+ memset(&b, 0, sizeof(b));
+ b.cpu = cpu;
+ b.bank = bank;
+ b.block = block;
+ b.address = address;
+ mce_threshold_block_init(&b, offset);
mce_threshold_vector = amd_threshold_interrupt;
}
}
@@ -298,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
b->interrupt_enable = !!new;
+ memset(&tr, 0, sizeof(tr));
tr.b = b;
- tr.reset = 0;
- tr.old_limit = 0;
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
@@ -321,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
if (new < 1)
new = 1;
+ memset(&tr, 0, sizeof(tr));
tr.old_limit = b->threshold_limit;
b->threshold_limit = new;
tr.b = b;
- tr.reset = 0;
smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
@@ -603,9 +622,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
continue;
err = threshold_create_bank(cpu, bank);
if (err)
- goto out;
+ return err;
}
-out:
+
return err;
}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd093..8694ef56 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
unsigned long flags;
int banks;
- if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
+ if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
return;
local_irq_save(flags);
machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 4b68326..e12246f 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,13 @@ struct thermal_state {
struct _thermal_state core_power_limit;
struct _thermal_state package_throttle;
struct _thermal_state package_power_limit;
+ struct _thermal_state core_thresh0;
+ struct _thermal_state core_thresh1;
};
+/* Callback to handle core threshold interrupts */
+int (*platform_thermal_notify)(__u64 msr_val);
+
static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -200,6 +205,22 @@ static int therm_throt_process(bool new_event, int event, int level)
return 0;
}
+static int thresh_event_valid(int event)
+{
+ struct _thermal_state *state;
+ unsigned int this_cpu = smp_processor_id();
+ struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
+ u64 now = get_jiffies_64();
+
+ state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
+
+ if (time_before64(now, state->next_check))
+ return 0;
+
+ state->next_check = now + CHECK_INTERVAL;
+ return 1;
+}
+
#ifdef CONFIG_SYSFS
/* Add/Remove thermal_throttle interface for CPU device: */
static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,6 +334,22 @@ device_initcall(thermal_throttle_init_device);
#define PACKAGE_THROTTLED ((__u64)2 << 62)
#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
+static void notify_thresholds(__u64 msr_val)
+{
+ /* check whether the interrupt handler is defined;
+ * otherwise simply return
+ */
+ if (!platform_thermal_notify)
+ return;
+
+ /* lower threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
+ platform_thermal_notify(msr_val);
+ /* higher threshold reached */
+ if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
+ platform_thermal_notify(msr_val);
+}
+
/* Thermal transition interrupt handler */
static void intel_thermal_interrupt(void)
{
@@ -321,6 +358,9 @@ static void intel_thermal_interrupt(void)
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+ /* Check for violation of core thermal thresholds*/
+ notify_thresholds(msr_val);
+
if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
CORE_LEVEL) != 0)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 6d75b91..9d977a2 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -330,9 +330,6 @@ static bool reserve_pmc_hardware(void)
{
int i;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- disable_lapic_nmi_watchdog();
-
for (i = 0; i < x86_pmu.num_counters; i++) {
if (!reserve_perfctr_nmi(x86_pmu.perfctr + i))
goto perfctr_fail;
@@ -355,9 +352,6 @@ perfctr_fail:
for (i--; i >= 0; i--)
release_perfctr_nmi(x86_pmu.perfctr + i);
- if (nmi_watchdog == NMI_LOCAL_APIC)
- enable_lapic_nmi_watchdog();
-
return false;
}
@@ -369,9 +363,6 @@ static void release_pmc_hardware(void)
release_perfctr_nmi(x86_pmu.perfctr + i);
release_evntsel_nmi(x86_pmu.eventsel + i);
}
-
- if (nmi_watchdog == NMI_LOCAL_APIC)
- enable_lapic_nmi_watchdog();
}
#else
@@ -384,15 +375,53 @@ static void release_pmc_hardware(void) {}
static bool check_hw_exists(void)
{
u64 val, val_new = 0;
- int ret = 0;
+ int i, reg, ret = 0;
+
+ /*
+ * Check to see if the BIOS enabled any of the counters, if so
+ * complain and bail.
+ */
+ for (i = 0; i < x86_pmu.num_counters; i++) {
+ reg = x86_pmu.eventsel + i;
+ ret = rdmsrl_safe(reg, &val);
+ if (ret)
+ goto msr_fail;
+ if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
+ goto bios_fail;
+ }
+ if (x86_pmu.num_counters_fixed) {
+ reg = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
+ ret = rdmsrl_safe(reg, &val);
+ if (ret)
+ goto msr_fail;
+ for (i = 0; i < x86_pmu.num_counters_fixed; i++) {
+ if (val & (0x03 << i*4))
+ goto bios_fail;
+ }
+ }
+
+ /*
+ * Now write a value and read it back to see if it matches,
+ * this is needed to detect certain hardware emulators (qemu/kvm)
+ * that don't trap on the MSR access and always return 0s.
+ */
val = 0xabcdUL;
- ret |= checking_wrmsrl(x86_pmu.perfctr, val);
+ ret = checking_wrmsrl(x86_pmu.perfctr, val);
ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
if (ret || val != val_new)
- return false;
+ goto msr_fail;
return true;
+
+bios_fail:
+ printk(KERN_CONT "Broken BIOS detected, using software events only.\n");
+ printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val);
+ return false;
+
+msr_fail:
+ printk(KERN_CONT "Broken PMU hardware detected, using software events only.\n");
+ return false;
}
static void reserve_ds_buffers(void);
@@ -451,7 +480,7 @@ static int x86_setup_perfctr(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 config;
- if (!hwc->sample_period) {
+ if (!is_sampling_event(event)) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
local64_set(&hwc->period_left, hwc->sample_period);
@@ -968,8 +997,7 @@ x86_perf_event_set_period(struct perf_event *event)
static void x86_pmu_enable_event(struct perf_event *event)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- if (cpuc->enabled)
+ if (__this_cpu_read(cpu_hw_events.enabled))
__x86_pmu_enable_event(&event->hw,
ARCH_PERFMON_EVENTSEL_ENABLE);
}
@@ -1239,11 +1267,10 @@ perf_event_nmi_handler(struct notifier_block *self,
switch (cmd) {
case DIE_NMI:
- case DIE_NMI_IPI:
break;
case DIE_NMIUNKNOWN:
this_nmi = percpu_read(irq_stat.__nmi_count);
- if (this_nmi != __get_cpu_var(pmu_nmi).marked)
+ if (this_nmi != __this_cpu_read(pmu_nmi.marked))
/* let the kernel handle the unknown nmi */
return NOTIFY_DONE;
/*
@@ -1267,8 +1294,8 @@ perf_event_nmi_handler(struct notifier_block *self,
this_nmi = percpu_read(irq_stat.__nmi_count);
if ((handled > 1) ||
/* the next nmi could be a back-to-back nmi */
- ((__get_cpu_var(pmu_nmi).marked == this_nmi) &&
- (__get_cpu_var(pmu_nmi).handled > 1))) {
+ ((__this_cpu_read(pmu_nmi.marked) == this_nmi) &&
+ (__this_cpu_read(pmu_nmi.handled) > 1))) {
/*
* We could have two subsequent back-to-back nmis: The
* first handles more than one counter, the 2nd
@@ -1279,8 +1306,8 @@ perf_event_nmi_handler(struct notifier_block *self,
* handling more than one counter. We will mark the
* next (3rd) and then drop it if unhandled.
*/
- __get_cpu_var(pmu_nmi).marked = this_nmi + 1;
- __get_cpu_var(pmu_nmi).handled = handled;
+ __this_cpu_write(pmu_nmi.marked, this_nmi + 1);
+ __this_cpu_write(pmu_nmi.handled, handled);
}
return NOTIFY_STOP;
@@ -1289,7 +1316,7 @@ perf_event_nmi_handler(struct notifier_block *self,
static __read_mostly struct notifier_block perf_event_nmi_notifier = {
.notifier_call = perf_event_nmi_handler,
.next = NULL,
- .priority = 1
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
static struct event_constraint unconstrained;
@@ -1362,7 +1389,7 @@ static void __init pmu_check_apic(void)
pr_info("no hardware sampling interrupt available.\n");
}
-void __init init_hw_perf_events(void)
+int __init init_hw_perf_events(void)
{
struct event_constraint *c;
int err;
@@ -1377,20 +1404,18 @@ void __init init_hw_perf_events(void)
err = amd_pmu_init();
break;
default:
- return;
+ return 0;
}
if (err != 0) {
pr_cont("no PMU driver, software events only.\n");
- return;
+ return 0;
}
pmu_check_apic();
/* sanity check that the hardware exists or is emulated */
- if (!check_hw_exists()) {
- pr_cont("Broken PMU hardware detected, software events only.\n");
- return;
- }
+ if (!check_hw_exists())
+ return 0;
pr_cont("%s PMU driver.\n", x86_pmu.name);
@@ -1438,9 +1463,12 @@ void __init init_hw_perf_events(void)
pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed);
pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl);
- perf_pmu_register(&pmu);
+ perf_pmu_register(&pmu, "cpu", PERF_TYPE_RAW);
perf_cpu_notifier(x86_pmu_notifier);
+
+ return 0;
}
+early_initcall(init_hw_perf_events);
static inline void x86_pmu_read(struct perf_event *event)
{
@@ -1454,11 +1482,9 @@ static inline void x86_pmu_read(struct perf_event *event)
*/
static void x86_pmu_start_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
perf_pmu_disable(pmu);
- cpuc->group_flag |= PERF_EVENT_TXN;
- cpuc->n_txn = 0;
+ __this_cpu_or(cpu_hw_events.group_flag, PERF_EVENT_TXN);
+ __this_cpu_write(cpu_hw_events.n_txn, 0);
}
/*
@@ -1468,14 +1494,12 @@ static void x86_pmu_start_txn(struct pmu *pmu)
*/
static void x86_pmu_cancel_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-
- cpuc->group_flag &= ~PERF_EVENT_TXN;
+ __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN);
/*
* Truncate the collected events.
*/
- cpuc->n_added -= cpuc->n_txn;
- cpuc->n_events -= cpuc->n_txn;
+ __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn));
+ __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn));
perf_pmu_enable(pmu);
}
@@ -1686,7 +1710,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
perf_callchain_store(entry, regs->ip);
- dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
+ dump_trace(NULL, regs, NULL, &backtrace_ops, entry);
}
#ifdef CONFIG_COMPAT
diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c
index e421b8c..67e2202 100644
--- a/arch/x86/kernel/cpu/perf_event_amd.c
+++ b/arch/x86/kernel/cpu/perf_event_amd.c
@@ -1,7 +1,5 @@
#ifdef CONFIG_CPU_SUP_AMD
-static DEFINE_RAW_SPINLOCK(amd_nb_lock);
-
static __initconst const u64 amd_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -275,7 +273,7 @@ done:
return &emptyconstraint;
}
-static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
+static struct amd_nb *amd_alloc_nb(int cpu)
{
struct amd_nb *nb;
int i;
@@ -285,7 +283,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
if (!nb)
return NULL;
- nb->nb_id = nb_id;
+ nb->nb_id = -1;
/*
* initialize all possible NB constraints
@@ -306,7 +304,7 @@ static int amd_pmu_cpu_prepare(int cpu)
if (boot_cpu_data.x86_max_cores < 2)
return NOTIFY_OK;
- cpuc->amd_nb = amd_alloc_nb(cpu, -1);
+ cpuc->amd_nb = amd_alloc_nb(cpu);
if (!cpuc->amd_nb)
return NOTIFY_BAD;
@@ -325,8 +323,6 @@ static void amd_pmu_cpu_starting(int cpu)
nb_id = amd_get_nb_id(cpu);
WARN_ON_ONCE(nb_id == BAD_APICID);
- raw_spin_lock(&amd_nb_lock);
-
for_each_online_cpu(i) {
nb = per_cpu(cpu_hw_events, i).amd_nb;
if (WARN_ON_ONCE(!nb))
@@ -341,8 +337,6 @@ static void amd_pmu_cpu_starting(int cpu)
cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;
-
- raw_spin_unlock(&amd_nb_lock);
}
static void amd_pmu_cpu_dead(int cpu)
@@ -354,8 +348,6 @@ static void amd_pmu_cpu_dead(int cpu)
cpuhw = &per_cpu(cpu_hw_events, cpu);
- raw_spin_lock(&amd_nb_lock);
-
if (cpuhw->amd_nb) {
struct amd_nb *nb = cpuhw->amd_nb;
@@ -364,8 +356,6 @@ static void amd_pmu_cpu_dead(int cpu)
cpuhw->amd_nb = NULL;
}
-
- raw_spin_unlock(&amd_nb_lock);
}
static __initconst const struct x86_pmu amd_pmu = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c08..008835c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -649,7 +649,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
if (unlikely(hwc->idx == X86_PMC_IDX_FIXED_BTS)) {
- if (!__get_cpu_var(cpu_hw_events).enabled)
+ if (!__this_cpu_read(cpu_hw_events.enabled))
return;
intel_pmu_enable_bts(hwc->config);
@@ -679,7 +679,7 @@ static int intel_pmu_save_and_restart(struct perf_event *event)
static void intel_pmu_reset(void)
{
- struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds;
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
unsigned long flags;
int idx;
@@ -816,6 +816,32 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;
+ if (event->attr.precise_ip &&
+ (event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
+ /*
+ * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
+ * (0x003c) so that we can use it with PEBS.
+ *
+ * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
+ * PEBS capable. However we can use INST_RETIRED.ANY_P
+ * (0x00c0), which is a PEBS capable event, to get the same
+ * count.
+ *
+ * INST_RETIRED.ANY_P counts the number of cycles that retires
+ * CNTMASK instructions. By setting CNTMASK to a value (16)
+ * larger than the maximum number of instructions that can be
+ * retired per cycle (4) and then inverting the condition, we
+ * count all cycles that retire 16 or less instructions, which
+ * is every cycle.
+ *
+ * Thereby we gain a PEBS capable cycle counter.
+ */
+ u64 alt_config = 0x108000c0; /* INST_RETIRED.TOTAL_CYCLES */
+
+ alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
+ event->hw.config = alt_config;
+ }
+
if (event->attr.type != PERF_TYPE_RAW)
return 0;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index 81400b9..e56b9bf 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -753,19 +753,21 @@ out:
static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
{
- int overflow = 0;
- u32 low, high;
+ u64 v;
- rdmsr(hwc->config_base + hwc->idx, low, high);
-
- /* we need to check high bit for unflagged overflows */
- if ((low & P4_CCCR_OVF) || !(high & (1 << 31))) {
- overflow = 1;
- (void)checking_wrmsrl(hwc->config_base + hwc->idx,
- ((u64)low) & ~P4_CCCR_OVF);
+ /* an official way for overflow indication */
+ rdmsrl(hwc->config_base + hwc->idx, v);
+ if (v & P4_CCCR_OVF) {
+ wrmsrl(hwc->config_base + hwc->idx, v & ~P4_CCCR_OVF);
+ return 1;
}
- return overflow;
+ /* it might be unflagged overflow */
+ rdmsrl(hwc->event_base + hwc->idx, v);
+ if (!(v & ARCH_P4_CNTRVAL_MASK))
+ return 1;
+
+ return 0;
}
static void p4_pmu_disable_pebs(void)
@@ -1152,9 +1154,9 @@ static __initconst const struct x86_pmu p4_pmu = {
*/
.num_counters = ARCH_P4_MAX_CCCR,
.apic = 1,
- .cntval_bits = 40,
- .cntval_mask = (1ULL << 40) - 1,
- .max_period = (1ULL << 39) - 1,
+ .cntval_bits = ARCH_P4_CNTRVAL_BITS,
+ .cntval_mask = ARCH_P4_CNTRVAL_MASK,
+ .max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
/*
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index d9f4ff8..d5a2366 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -16,32 +16,12 @@
#include <linux/kernel.h>
#include <linux/bitops.h>
#include <linux/smp.h>
-#include <linux/nmi.h>
+#include <asm/nmi.h>
#include <linux/kprobes.h>
#include <asm/apic.h>
#include <asm/perf_event.h>
-struct nmi_watchdog_ctlblk {
- unsigned int cccr_msr;
- unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
- unsigned int evntsel_msr; /* the MSR to select the events to handle */
-};
-
-/* Interface defining a CPU specific perfctr watchdog */
-struct wd_ops {
- int (*reserve)(void);
- void (*unreserve)(void);
- int (*setup)(unsigned nmi_hz);
- void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz);
- void (*stop)(void);
- unsigned perfctr;
- unsigned evntsel;
- u64 checkbit;
-};
-
-static const struct wd_ops *wd_ops;
-
/*
* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
* offset from MSR_P4_BSU_ESCR0.
@@ -60,8 +40,6 @@ static const struct wd_ops *wd_ops;
static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS);
static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS);
-static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
-
/* converts an msr to an appropriate reservation bit */
static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
{
@@ -172,623 +150,3 @@ void release_evntsel_nmi(unsigned int msr)
clear_bit(counter, evntsel_nmi_owner);
}
EXPORT_SYMBOL(release_evntsel_nmi);
-
-void disable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- if (atomic_read(&nmi_active) <= 0)
- return;
-
- on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
-
- if (wd_ops)
- wd_ops->unreserve();
-
- BUG_ON(atomic_read(&nmi_active) != 0);
-}
-
-void enable_lapic_nmi_watchdog(void)
-{
- BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
-
- /* are we already enabled */
- if (atomic_read(&nmi_active) != 0)
- return;
-
- /* are we lapic aware */
- if (!wd_ops)
- return;
- if (!wd_ops->reserve()) {
- printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n");
- return;
- }
-
- on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
- touch_nmi_watchdog();
-}
-
-/*
- * Activate the NMI watchdog via the local APIC.
- */
-
-static unsigned int adjust_for_32bit_ctr(unsigned int hz)
-{
- u64 counter_val;
- unsigned int retval = hz;
-
- /*
- * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter
- * are writable, with higher bits sign extending from bit 31.
- * So, we can only program the counter with 31 bit values and
- * 32nd bit should be 1, for 33.. to be 1.
- * Find the appropriate nmi_hz
- */
- counter_val = (u64)cpu_khz * 1000;
- do_div(counter_val, retval);
- if (counter_val > 0x7fffffffULL) {
- u64 count = (u64)cpu_khz * 1000;
- do_div(count, 0x7fffffffUL);
- retval = count + 1;
- }
- return retval;
-}
-
-static void write_watchdog_counter(unsigned int perfctr_msr,
- const char *descr, unsigned nmi_hz)
-{
- u64 count = (u64)cpu_khz * 1000;
-
- do_div(count, nmi_hz);
- if (descr)
- pr_debug("setting %s to -0x%08Lx\n", descr, count);
- wrmsrl(perfctr_msr, 0 - count);
-}
-
-static void write_watchdog_counter32(unsigned int perfctr_msr,
- const char *descr, unsigned nmi_hz)
-{
- u64 count = (u64)cpu_khz * 1000;
-
- do_div(count, nmi_hz);
- if (descr)
- pr_debug("setting %s to -0x%08Lx\n", descr, count);
- wrmsr(perfctr_msr, (u32)(-count), 0);
-}
-
-/*
- * AMD K7/K8/Family10h/Family11h support.
- * AMD keeps this interface nicely stable so there is not much variety
- */
-#define K7_EVNTSEL_ENABLE (1 << 22)
-#define K7_EVNTSEL_INT (1 << 20)
-#define K7_EVNTSEL_OS (1 << 17)
-#define K7_EVNTSEL_USR (1 << 16)
-#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
-#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
-
-static int setup_k7_watchdog(unsigned nmi_hz)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = wd_ops->perfctr;
- evntsel_msr = wd_ops->evntsel;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = K7_EVNTSEL_INT
- | K7_EVNTSEL_OS
- | K7_EVNTSEL_USR
- | K7_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
-
- /* initialize the wd struct before enabling */
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= K7_EVNTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- return 1;
-}
-
-static void single_msr_stop_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int single_msr_reserve(void)
-{
- if (!reserve_perfctr_nmi(wd_ops->perfctr))
- return 0;
-
- if (!reserve_evntsel_nmi(wd_ops->evntsel)) {
- release_perfctr_nmi(wd_ops->perfctr);
- return 0;
- }
- return 1;
-}
-
-static void single_msr_unreserve(void)
-{
- release_evntsel_nmi(wd_ops->evntsel);
- release_perfctr_nmi(wd_ops->perfctr);
-}
-
-static void __kprobes
-single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
- /* start the cycle over again */
- write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops k7_wd_ops = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_k7_watchdog,
- .rearm = single_msr_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_K7_PERFCTR0,
- .evntsel = MSR_K7_EVNTSEL0,
- .checkbit = 1ULL << 47,
-};
-
-/*
- * Intel Model 6 (PPro+,P2,P3,P-M,Core1)
- */
-#define P6_EVNTSEL0_ENABLE (1 << 22)
-#define P6_EVNTSEL_INT (1 << 20)
-#define P6_EVNTSEL_OS (1 << 17)
-#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
-
-static int setup_p6_watchdog(unsigned nmi_hz)
-{
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- perfctr_msr = wd_ops->perfctr;
- evntsel_msr = wd_ops->evntsel;
-
- /* KVM doesn't implement this MSR */
- if (wrmsr_safe(perfctr_msr, 0, 0) < 0)
- return 0;
-
- evntsel = P6_EVNTSEL_INT
- | P6_EVNTSEL_OS
- | P6_EVNTSEL_USR
- | P6_NMI_EVENT;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
-
- /* initialize the wd struct before enabling */
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= P6_EVNTSEL0_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
-
- return 1;
-}
-
-static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
- /*
- * P6 based Pentium M need to re-unmask
- * the apic vector but it doesn't hurt
- * other P6 variant.
- * ArchPerfom/Core Duo also needs this
- */
- apic_write(APIC_LVTPC, APIC_DM_NMI);
-
- /* P6/ARCH_PERFMON has 32 bit counter write */
- write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p6_wd_ops = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_p6_watchdog,
- .rearm = p6_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_P6_PERFCTR0,
- .evntsel = MSR_P6_EVNTSEL0,
- .checkbit = 1ULL << 39,
-};
-
-/*
- * Intel P4 performance counters.
- * By far the most complicated of all.
- */
-#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1 << 7)
-#define P4_ESCR_EVENT_SELECT(N) ((N) << 25)
-#define P4_ESCR_OS (1 << 3)
-#define P4_ESCR_USR (1 << 2)
-#define P4_CCCR_OVF_PMI0 (1 << 26)
-#define P4_CCCR_OVF_PMI1 (1 << 27)
-#define P4_CCCR_THRESHOLD(N) ((N) << 20)
-#define P4_CCCR_COMPLEMENT (1 << 19)
-#define P4_CCCR_COMPARE (1 << 18)
-#define P4_CCCR_REQUIRED (3 << 16)
-#define P4_CCCR_ESCR_SELECT(N) ((N) << 13)
-#define P4_CCCR_ENABLE (1 << 12)
-#define P4_CCCR_OVF (1 << 31)
-
-#define P4_CONTROLS 18
-static unsigned int p4_controls[18] = {
- MSR_P4_BPU_CCCR0,
- MSR_P4_BPU_CCCR1,
- MSR_P4_BPU_CCCR2,
- MSR_P4_BPU_CCCR3,
- MSR_P4_MS_CCCR0,
- MSR_P4_MS_CCCR1,
- MSR_P4_MS_CCCR2,
- MSR_P4_MS_CCCR3,
- MSR_P4_FLAME_CCCR0,
- MSR_P4_FLAME_CCCR1,
- MSR_P4_FLAME_CCCR2,
- MSR_P4_FLAME_CCCR3,
- MSR_P4_IQ_CCCR0,
- MSR_P4_IQ_CCCR1,
- MSR_P4_IQ_CCCR2,
- MSR_P4_IQ_CCCR3,
- MSR_P4_IQ_CCCR4,
- MSR_P4_IQ_CCCR5,
-};
-/*
- * Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
- * CRU_ESCR0 (with any non-null event selector) through a complemented
- * max threshold. [IA32-Vol3, Section 14.9.9]
- */
-static int setup_p4_watchdog(unsigned nmi_hz)
-{
- unsigned int perfctr_msr, evntsel_msr, cccr_msr;
- unsigned int evntsel, cccr_val;
- unsigned int misc_enable, dummy;
- unsigned int ht_num;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
- if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
- return 0;
-
-#ifdef CONFIG_SMP
- /* detect which hyperthread we are on */
- if (smp_num_siblings == 2) {
- unsigned int ebx, apicid;
-
- ebx = cpuid_ebx(1);
- apicid = (ebx >> 24) & 0xff;
- ht_num = apicid & 1;
- } else
-#endif
- ht_num = 0;
-
- /*
- * performance counters are shared resources
- * assign each hyperthread its own set
- * (re-use the ESCR0 register, seems safe
- * and keeps the cccr_val the same)
- */
- if (!ht_num) {
- /* logical cpu 0 */
- perfctr_msr = MSR_P4_IQ_PERFCTR0;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR0;
- cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
-
- /*
- * If we're on the kdump kernel or other situation, we may
- * still have other performance counter registers set to
- * interrupt and they'll keep interrupting forever because
- * of the P4_CCCR_OVF quirk. So we need to ACK all the
- * pending interrupts and disable all the registers here,
- * before reenabling the NMI delivery. Refer to p4_rearm()
- * about the P4_CCCR_OVF quirk.
- */
- if (reset_devices) {
- unsigned int low, high;
- int i;
-
- for (i = 0; i < P4_CONTROLS; i++) {
- rdmsr(p4_controls[i], low, high);
- low &= ~(P4_CCCR_ENABLE | P4_CCCR_OVF);
- wrmsr(p4_controls[i], low, high);
- }
- }
- } else {
- /* logical cpu 1 */
- perfctr_msr = MSR_P4_IQ_PERFCTR1;
- evntsel_msr = MSR_P4_CRU_ESCR0;
- cccr_msr = MSR_P4_IQ_CCCR1;
-
- /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */
- if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4)
- cccr_val = P4_CCCR_OVF_PMI0;
- else
- cccr_val = P4_CCCR_OVF_PMI1;
- cccr_val |= P4_CCCR_ESCR_SELECT(4);
- }
-
- evntsel = P4_ESCR_EVENT_SELECT(0x3F)
- | P4_ESCR_OS
- | P4_ESCR_USR;
-
- cccr_val |= P4_CCCR_THRESHOLD(15)
- | P4_CCCR_COMPLEMENT
- | P4_CCCR_COMPARE
- | P4_CCCR_REQUIRED;
-
- wrmsr(evntsel_msr, evntsel, 0);
- wrmsr(cccr_msr, cccr_val, 0);
- write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = cccr_msr;
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- cccr_val |= P4_CCCR_ENABLE;
- wrmsr(cccr_msr, cccr_val, 0);
- return 1;
-}
-
-static void stop_p4_watchdog(void)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- wrmsr(wd->cccr_msr, 0, 0);
- wrmsr(wd->evntsel_msr, 0, 0);
-}
-
-static int p4_reserve(void)
-{
- if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0))
- return 0;
-#ifdef CONFIG_SMP
- if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1))
- goto fail1;
-#endif
- if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0))
- goto fail2;
- /* RED-PEN why is ESCR1 not reserved here? */
- return 1;
- fail2:
-#ifdef CONFIG_SMP
- if (smp_num_siblings > 1)
- release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
- fail1:
-#endif
- release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
- return 0;
-}
-
-static void p4_unreserve(void)
-{
-#ifdef CONFIG_SMP
- if (smp_num_siblings > 1)
- release_perfctr_nmi(MSR_P4_IQ_PERFCTR1);
-#endif
- release_evntsel_nmi(MSR_P4_CRU_ESCR0);
- release_perfctr_nmi(MSR_P4_IQ_PERFCTR0);
-}
-
-static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
-{
- unsigned dummy;
- /*
- * P4 quirks:
- * - An overflown perfctr will assert its interrupt
- * until the OVF flag in its CCCR is cleared.
- * - LVTPC is masked on interrupt and must be
- * unmasked by the LVTPC handler.
- */
- rdmsrl(wd->cccr_msr, dummy);
- dummy &= ~P4_CCCR_OVF;
- wrmsrl(wd->cccr_msr, dummy);
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- /* start the cycle over again */
- write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz);
-}
-
-static const struct wd_ops p4_wd_ops = {
- .reserve = p4_reserve,
- .unreserve = p4_unreserve,
- .setup = setup_p4_watchdog,
- .rearm = p4_rearm,
- .stop = stop_p4_watchdog,
- /* RED-PEN this is wrong for the other sibling */
- .perfctr = MSR_P4_BPU_PERFCTR0,
- .evntsel = MSR_P4_BSU_ESCR0,
- .checkbit = 1ULL << 39,
-};
-
-/*
- * Watchdog using the Intel architected PerfMon.
- * Used for Core2 and hopefully all future Intel CPUs.
- */
-#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
-#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
-
-static struct wd_ops intel_arch_wd_ops;
-
-static int setup_intel_arch_watchdog(unsigned nmi_hz)
-{
- unsigned int ebx;
- union cpuid10_eax eax;
- unsigned int unused;
- unsigned int perfctr_msr, evntsel_msr;
- unsigned int evntsel;
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
-
- /*
- * Check whether the Architectural PerfMon supports
- * Unhalted Core Cycles Event or not.
- * NOTE: Corresponding bit = 0 in ebx indicates event present.
- */
- cpuid(10, &(eax.full), &ebx, &unused, &unused);
- if ((eax.split.mask_length <
- (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
- (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
- return 0;
-
- perfctr_msr = wd_ops->perfctr;
- evntsel_msr = wd_ops->evntsel;
-
- wrmsrl(perfctr_msr, 0UL);
-
- evntsel = ARCH_PERFMON_EVENTSEL_INT
- | ARCH_PERFMON_EVENTSEL_OS
- | ARCH_PERFMON_EVENTSEL_USR
- | ARCH_PERFMON_NMI_EVENT_SEL
- | ARCH_PERFMON_NMI_EVENT_UMASK;
-
- /* setup the timer */
- wrmsr(evntsel_msr, evntsel, 0);
- nmi_hz = adjust_for_32bit_ctr(nmi_hz);
- write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz);
-
- wd->perfctr_msr = perfctr_msr;
- wd->evntsel_msr = evntsel_msr;
- wd->cccr_msr = 0; /* unused */
-
- /* ok, everything is initialized, announce that we're set */
- cpu_nmi_set_wd_enabled();
-
- apic_write(APIC_LVTPC, APIC_DM_NMI);
- evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE;
- wrmsr(evntsel_msr, evntsel, 0);
- intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1);
- return 1;
-}
-
-static struct wd_ops intel_arch_wd_ops __read_mostly = {
- .reserve = single_msr_reserve,
- .unreserve = single_msr_unreserve,
- .setup = setup_intel_arch_watchdog,
- .rearm = p6_rearm,
- .stop = single_msr_stop_watchdog,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
- .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
-};
-
-static void probe_nmi_watchdog(void)
-{
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_AMD:
- if (boot_cpu_data.x86 == 6 ||
- (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15))
- wd_ops = &k7_wd_ops;
- return;
- case X86_VENDOR_INTEL:
- /* Work around where perfctr1 doesn't have a working enable
- * bit as described in the following errata:
- * AE49 Core Duo and Intel Core Solo 65 nm
- * AN49 Intel Pentium Dual-Core
- * AF49 Dual-Core Intel Xeon Processor LV
- */
- if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) ||
- ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 &&
- boot_cpu_data.x86_mask == 4))) {
- intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0;
- intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0;
- }
- if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
- wd_ops = &intel_arch_wd_ops;
- break;
- }
- switch (boot_cpu_data.x86) {
- case 6:
- if (boot_cpu_data.x86_model > 13)
- return;
-
- wd_ops = &p6_wd_ops;
- break;
- case 15:
- wd_ops = &p4_wd_ops;
- break;
- default:
- return;
- }
- break;
- }
-}
-
-/* Interface to nmi.c */
-
-int lapic_watchdog_init(unsigned nmi_hz)
-{
- if (!wd_ops) {
- probe_nmi_watchdog();
- if (!wd_ops) {
- printk(KERN_INFO "NMI watchdog: CPU not supported\n");
- return -1;
- }
-
- if (!wd_ops->reserve()) {
- printk(KERN_ERR
- "NMI watchdog: cannot reserve perfctrs\n");
- return -1;
- }
- }
-
- if (!(wd_ops->setup(nmi_hz))) {
- printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n",
- raw_smp_processor_id());
- return -1;
- }
-
- return 0;
-}
-
-void lapic_watchdog_stop(void)
-{
- if (wd_ops)
- wd_ops->stop();
-}
-
-unsigned lapic_adjust_nmi_hz(unsigned hz)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
- wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1)
- hz = adjust_for_32bit_ctr(hz);
- return hz;
-}
-
-int __kprobes lapic_wd_event(unsigned nmi_hz)
-{
- struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
- u64 ctr;
-
- rdmsrl(wd->perfctr_msr, ctr);
- if (ctr & wd_ops->checkbit) /* perfctr still running? */
- return 0;
-
- wd_ops->rearm(wd, nmi_hz);
- return 1;
-}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6e8752c..d6fb146 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -175,21 +175,21 @@ static const struct stacktrace_ops print_trace_ops = {
void
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl)
+ unsigned long *stack, char *log_lvl)
{
printk("%sCall Trace:\n", log_lvl);
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
+ dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
}
void show_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp)
+ unsigned long *stack)
{
- show_trace_log_lvl(task, regs, stack, bp, "");
+ show_trace_log_lvl(task, regs, stack, "");
}
void show_stack(struct task_struct *task, unsigned long *sp)
{
- show_stack_log_lvl(task, NULL, sp, 0, "");
+ show_stack_log_lvl(task, NULL, sp, "");
}
/*
@@ -197,20 +197,14 @@ void show_stack(struct task_struct *task, unsigned long *sp)
*/
void dump_stack(void)
{
- unsigned long bp = 0;
unsigned long stack;
-#ifdef CONFIG_FRAME_POINTER
- if (!bp)
- get_bp(bp);
-#endif
-
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
current->pid, current->comm, print_tainted(),
init_utsname()->release,
(int)strcspn(init_utsname()->version, " "),
init_utsname()->version);
- show_trace(NULL, NULL, &stack, bp);
+ show_trace(NULL, NULL, &stack);
}
EXPORT_SYMBOL(dump_stack);
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 1bc7f75..74cc1ed 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -17,11 +17,12 @@
#include <asm/stacktrace.h>
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+ struct pt_regs *regs, unsigned long *stack,
const struct stacktrace_ops *ops, void *data)
{
int graph = 0;
+ unsigned long bp;
if (!task)
task = current;
@@ -34,18 +35,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
stack = (unsigned long *)task->thread.sp;
}
-#ifdef CONFIG_FRAME_POINTER
- if (!bp) {
- if (task == current) {
- /* Grab bp right from our regs */
- get_bp(bp);
- } else {
- /* bp is the last reg pushed by switch_to */
- bp = *(unsigned long *) task->thread.sp;
- }
- }
-#endif
-
+ bp = stack_frame(task, regs);
for (;;) {
struct thread_info *context;
@@ -65,7 +55,7 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, char *log_lvl)
{
unsigned long *stack;
int i;
@@ -87,7 +77,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
touch_nmi_watchdog();
}
printk(KERN_CONT "\n");
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ show_trace_log_lvl(task, regs, sp, log_lvl);
}
@@ -112,8 +102,7 @@ void show_registers(struct pt_regs *regs)
u8 *ip;
printk(KERN_EMERG "Stack:\n");
- show_stack_log_lvl(NULL, regs, &regs->sp,
- 0, KERN_EMERG);
+ show_stack_log_lvl(NULL, regs, &regs->sp, KERN_EMERG);
printk(KERN_EMERG "Code: ");
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6a34048..6410133 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -139,8 +139,8 @@ fixup_bp_irq_link(unsigned long bp, unsigned long *stack,
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
*/
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp,
+void dump_trace(struct task_struct *task,
+ struct pt_regs *regs, unsigned long *stack,
const struct stacktrace_ops *ops, void *data)
{
const unsigned cpu = get_cpu();
@@ -149,6 +149,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned used = 0;
struct thread_info *tinfo;
int graph = 0;
+ unsigned long bp;
if (!task)
task = current;
@@ -160,18 +161,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
stack = (unsigned long *)task->thread.sp;
}
-#ifdef CONFIG_FRAME_POINTER
- if (!bp) {
- if (task == current) {
- /* Grab bp right from our regs */
- get_bp(bp);
- } else {
- /* bp is the last reg pushed by switch_to */
- bp = *(unsigned long *) task->thread.sp;
- }
- }
-#endif
-
+ bp = stack_frame(task, regs);
/*
* Print function call entries in all stacks, starting at the
* current stack address. If the stacks consist of nested
@@ -235,7 +225,7 @@ EXPORT_SYMBOL(dump_trace);
void
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl)
+ unsigned long *sp, char *log_lvl)
{
unsigned long *irq_stack_end;
unsigned long *irq_stack;
@@ -279,7 +269,7 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
preempt_enable();
printk(KERN_CONT "\n");
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
+ show_trace_log_lvl(task, regs, sp, log_lvl);
}
void show_registers(struct pt_regs *regs)
@@ -308,7 +298,7 @@ void show_registers(struct pt_regs *regs)
printk(KERN_EMERG "Stack:\n");
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
- regs->bp, KERN_EMERG);
+ KERN_EMERG);
printk(KERN_EMERG "Code: ");
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 4572f25..cd28a35 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -240,7 +240,7 @@ static int __init setup_early_printk(char *buf)
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
-#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+#ifdef CONFIG_EARLY_PRINTK_MRST
if (!strncmp(buf, "mrst", 4)) {
mrst_early_console_init();
early_console_register(&early_mrst_console, keep);
@@ -250,7 +250,6 @@ static int __init setup_early_printk(char *buf)
hsu_early_console_init();
early_console_register(&early_hsu_console, keep);
}
-
#endif
buf++;
}
diff --git a/arch/x86/kernel/early_printk_mrst.c b/arch/x86/kernel/early_printk_mrst.c
deleted file mode 100644
index 65df603..0000000
--- a/arch/x86/kernel/early_printk_mrst.c
+++ /dev/null
@@ -1,319 +0,0 @@
-/*
- * early_printk_mrst.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements two early consoles named mrst and hsu.
- * mrst is based on Maxim3110 spi-uart device, it exists in both
- * Moorestown and Medfield platforms, while hsu is based on a High
- * Speed UART device which only exists in the Medfield platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/kmsg_dump.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/init.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/mrst.h>
-
-#define MRST_SPI_TIMEOUT 0x200000
-#define MRST_REGBASE_SPI0 0xff128000
-#define MRST_REGBASE_SPI1 0xff128400
-#define MRST_CLK_SPI0_REG 0xff11d86c
-
-/* Bit fields in CTRLR0 */
-#define SPI_DFS_OFFSET 0
-
-#define SPI_FRF_OFFSET 4
-#define SPI_FRF_SPI 0x0
-#define SPI_FRF_SSP 0x1
-#define SPI_FRF_MICROWIRE 0x2
-#define SPI_FRF_RESV 0x3
-
-#define SPI_MODE_OFFSET 6
-#define SPI_SCPH_OFFSET 6
-#define SPI_SCOL_OFFSET 7
-#define SPI_TMOD_OFFSET 8
-#define SPI_TMOD_TR 0x0 /* xmit & recv */
-#define SPI_TMOD_TO 0x1 /* xmit only */
-#define SPI_TMOD_RO 0x2 /* recv only */
-#define SPI_TMOD_EPROMREAD 0x3 /* eeprom read mode */
-
-#define SPI_SLVOE_OFFSET 10
-#define SPI_SRL_OFFSET 11
-#define SPI_CFS_OFFSET 12
-
-/* Bit fields in SR, 7 bits */
-#define SR_MASK 0x7f /* cover 7 bits */
-#define SR_BUSY (1 << 0)
-#define SR_TF_NOT_FULL (1 << 1)
-#define SR_TF_EMPT (1 << 2)
-#define SR_RF_NOT_EMPT (1 << 3)
-#define SR_RF_FULL (1 << 4)
-#define SR_TX_ERR (1 << 5)
-#define SR_DCOL (1 << 6)
-
-struct dw_spi_reg {
- u32 ctrl0;
- u32 ctrl1;
- u32 ssienr;
- u32 mwcr;
- u32 ser;
- u32 baudr;
- u32 txfltr;
- u32 rxfltr;
- u32 txflr;
- u32 rxflr;
- u32 sr;
- u32 imr;
- u32 isr;
- u32 risr;
- u32 txoicr;
- u32 rxoicr;
- u32 rxuicr;
- u32 msticr;
- u32 icr;
- u32 dmacr;
- u32 dmatdlr;
- u32 dmardlr;
- u32 idr;
- u32 version;
-
- /* Currently operates as 32 bits, though only the low 16 bits matter */
- u32 dr;
-} __packed;
-
-#define dw_readl(dw, name) __raw_readl(&(dw)->name)
-#define dw_writel(dw, name, val) __raw_writel((val), &(dw)->name)
-
-/* Default use SPI0 register for mrst, we will detect Penwell and use SPI1 */
-static unsigned long mrst_spi_paddr = MRST_REGBASE_SPI0;
-
-static u32 *pclk_spi0;
-/* Always contains an accessable address, start with 0 */
-static struct dw_spi_reg *pspi;
-
-static struct kmsg_dumper dw_dumper;
-static int dumper_registered;
-
-static void dw_kmsg_dump(struct kmsg_dumper *dumper,
- enum kmsg_dump_reason reason,
- const char *s1, unsigned long l1,
- const char *s2, unsigned long l2)
-{
- int i;
-
- /* When run to this, we'd better re-init the HW */
- mrst_early_console_init();
-
- for (i = 0; i < l1; i++)
- early_mrst_console.write(&early_mrst_console, s1 + i, 1);
- for (i = 0; i < l2; i++)
- early_mrst_console.write(&early_mrst_console, s2 + i, 1);
-}
-
-/* Set the ratio rate to 115200, 8n1, IRQ disabled */
-static void max3110_write_config(void)
-{
- u16 config;
-
- config = 0xc001;
- dw_writel(pspi, dr, config);
-}
-
-/* Translate char to a eligible word and send to max3110 */
-static void max3110_write_data(char c)
-{
- u16 data;
-
- data = 0x8000 | c;
- dw_writel(pspi, dr, data);
-}
-
-void mrst_early_console_init(void)
-{
- u32 ctrlr0 = 0;
- u32 spi0_cdiv;
- u32 freq; /* Freqency info only need be searched once */
-
- /* Base clk is 100 MHz, the actual clk = 100M / (clk_divider + 1) */
- pclk_spi0 = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
- MRST_CLK_SPI0_REG);
- spi0_cdiv = ((*pclk_spi0) & 0xe00) >> 9;
- freq = 100000000 / (spi0_cdiv + 1);
-
- if (mrst_identify_cpu() == MRST_CPU_CHIP_PENWELL)
- mrst_spi_paddr = MRST_REGBASE_SPI1;
-
- pspi = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
- mrst_spi_paddr);
-
- /* Disable SPI controller */
- dw_writel(pspi, ssienr, 0);
-
- /* Set control param, 8 bits, transmit only mode */
- ctrlr0 = dw_readl(pspi, ctrl0);
-
- ctrlr0 &= 0xfcc0;
- ctrlr0 |= 0xf | (SPI_FRF_SPI << SPI_FRF_OFFSET)
- | (SPI_TMOD_TO << SPI_TMOD_OFFSET);
- dw_writel(pspi, ctrl0, ctrlr0);
-
- /*
- * Change the spi0 clk to comply with 115200 bps, use 100000 to
- * calculate the clk dividor to make the clock a little slower
- * than real baud rate.
- */
- dw_writel(pspi, baudr, freq/100000);
-
- /* Disable all INT for early phase */
- dw_writel(pspi, imr, 0x0);
-
- /* Set the cs to spi-uart */
- dw_writel(pspi, ser, 0x2);
-
- /* Enable the HW, the last step for HW init */
- dw_writel(pspi, ssienr, 0x1);
-
- /* Set the default configuration */
- max3110_write_config();
-
- /* Register the kmsg dumper */
- if (!dumper_registered) {
- dw_dumper.dump = dw_kmsg_dump;
- kmsg_dump_register(&dw_dumper);
- dumper_registered = 1;
- }
-}
-
-/* Slave select should be called in the read/write function */
-static void early_mrst_spi_putc(char c)
-{
- unsigned int timeout;
- u32 sr;
-
- timeout = MRST_SPI_TIMEOUT;
- /* Early putc needs to make sure the TX FIFO is not full */
- while (--timeout) {
- sr = dw_readl(pspi, sr);
- if (!(sr & SR_TF_NOT_FULL))
- cpu_relax();
- else
- break;
- }
-
- if (!timeout)
- pr_warning("MRST earlycon: timed out\n");
- else
- max3110_write_data(c);
-}
-
-/* Early SPI only uses polling mode */
-static void early_mrst_spi_write(struct console *con, const char *str, unsigned n)
-{
- int i;
-
- for (i = 0; i < n && *str; i++) {
- if (*str == '\n')
- early_mrst_spi_putc('\r');
- early_mrst_spi_putc(*str);
- str++;
- }
-}
-
-struct console early_mrst_console = {
- .name = "earlymrst",
- .write = early_mrst_spi_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT2_PADDR 0xffa28180
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(void)
-{
- u8 lcr;
-
- phsu = (void *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE,
- HSU_PORT2_PADDR);
-
- /* Disable FIFO */
- writeb(0x0, phsu + UART_FCR);
-
- /* Set to default 115200 bps, 8n1 */
- lcr = readb(phsu + UART_LCR);
- writeb((0x80 | lcr), phsu + UART_LCR);
- writeb(0x18, phsu + UART_DLL);
- writeb(lcr, phsu + UART_LCR);
- writel(0x3600, phsu + UART_MUL*4);
-
- writeb(0x8, phsu + UART_MCR);
- writeb(0x7, phsu + UART_FCR);
- writeb(0x3, phsu + UART_LCR);
-
- /* Clear IRQ status */
- readb(phsu + UART_LSR);
- readb(phsu + UART_RX);
- readb(phsu + UART_IIR);
- readb(phsu + UART_MSR);
-
- /* Enable FIFO */
- writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
- unsigned int timeout = 10000; /* 10ms */
- u8 status;
-
- while (--timeout) {
- status = readb(phsu + UART_LSR);
- if (status & BOTH_EMPTY)
- break;
- udelay(1);
- }
-
- /* Only write the char when there was no timeout */
- if (timeout)
- writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
- int i;
-
- for (i = 0; i < n && *str; i++) {
- if (*str == '\n')
- early_hsu_putc('\r');
- early_hsu_putc(*str);
- str++;
- }
-}
-
-struct console early_hsu_console = {
- .name = "earlyhsu",
- .write = early_hsu_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e3ba417..d3b895f 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -299,17 +299,21 @@ ENDPROC(native_usergs_sysret64)
ENTRY(save_args)
XCPT_FRAME
cld
- movq_cfi rdi, RDI+16-ARGOFFSET
- movq_cfi rsi, RSI+16-ARGOFFSET
- movq_cfi rdx, RDX+16-ARGOFFSET
- movq_cfi rcx, RCX+16-ARGOFFSET
- movq_cfi rax, RAX+16-ARGOFFSET
- movq_cfi r8, R8+16-ARGOFFSET
- movq_cfi r9, R9+16-ARGOFFSET
- movq_cfi r10, R10+16-ARGOFFSET
- movq_cfi r11, R11+16-ARGOFFSET
-
- leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
+ /*
+ * start from rbp in pt_regs and jump over
+ * return address.
+ */
+ movq_cfi rdi, RDI+8-RBP
+ movq_cfi rsi, RSI+8-RBP
+ movq_cfi rdx, RDX+8-RBP
+ movq_cfi rcx, RCX+8-RBP
+ movq_cfi rax, RAX+8-RBP
+ movq_cfi r8, R8+8-RBP
+ movq_cfi r9, R9+8-RBP
+ movq_cfi r10, R10+8-RBP
+ movq_cfi r11, R11+8-RBP
+
+ leaq -RBP+8(%rsp),%rdi /* arg1 for handler */
movq_cfi rbp, 8 /* push %rbp */
leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
testl $3, CS(%rdi)
@@ -782,8 +786,9 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- subq $ORIG_RAX-ARGOFFSET+8, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-ARGOFFSET+8
+ /* reserve pt_regs for scratch regs and rbp */
+ subq $ORIG_RAX-RBP, %rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
call save_args
PARTIAL_FRAME 0
call \func
@@ -808,9 +813,14 @@ ret_from_intr:
TRACE_IRQS_OFF
decl PER_CPU_VAR(irq_count)
leaveq
+
CFI_RESTORE rbp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
+
+ /* we did not save rbx, restore only from ARGOFFSET */
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
exit_intr:
GET_THREAD_INFO(%rcx)
testl $3,CS-ARGOFFSET(%rsp)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3afb33f..382eb29 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/list.h>
+#include <linux/module.h>
#include <trace/syscall.h>
@@ -49,6 +50,7 @@ static DEFINE_PER_CPU(int, save_modifying_code);
int ftrace_arch_code_modify_prepare(void)
{
set_kernel_text_rw();
+ set_all_modules_text_rw();
modifying_code = 1;
return 0;
}
@@ -56,6 +58,7 @@ int ftrace_arch_code_modify_prepare(void)
int ftrace_arch_code_modify_post_process(void)
{
modifying_code = 0;
+ set_all_modules_text_ro();
set_kernel_text_ro();
return 0;
}
@@ -167,9 +170,9 @@ static void ftrace_mod_code(void)
void ftrace_nmi_enter(void)
{
- __get_cpu_var(save_modifying_code) = modifying_code;
+ __this_cpu_write(save_modifying_code, modifying_code);
- if (!__get_cpu_var(save_modifying_code))
+ if (!__this_cpu_read(save_modifying_code))
return;
if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
@@ -183,7 +186,7 @@ void ftrace_nmi_enter(void)
void ftrace_nmi_exit(void)
{
- if (!__get_cpu_var(save_modifying_code))
+ if (!__this_cpu_read(save_modifying_code))
return;
/* Finish all executions before clearing nmi_running */
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c
index 7633101..7f138b3 100644
--- a/arch/x86/kernel/head32.c
+++ b/arch/x86/kernel/head32.c
@@ -61,6 +61,9 @@ void __init i386_start_kernel(void)
case X86_SUBARCH_MRST:
x86_mrst_early_setup();
break;
+ case X86_SUBARCH_CE4100:
+ x86_ce4100_early_setup();
+ break;
default:
i386_default_early_setup();
break;
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index c0dbd9a..9f54b20 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -139,39 +139,6 @@ ENTRY(startup_32)
movl %eax, pa(olpc_ofw_pgd)
#endif
-#ifdef CONFIG_PARAVIRT
- /* This is can only trip for a broken bootloader... */
- cmpw $0x207, pa(boot_params + BP_version)
- jb default_entry
-
- /* Paravirt-compatible boot parameters. Look to see what architecture
- we're booting under. */
- movl pa(boot_params + BP_hardware_subarch), %eax
- cmpl $num_subarch_entries, %eax
- jae bad_subarch
-
- movl pa(subarch_entries)(,%eax,4), %eax
- subl $__PAGE_OFFSET, %eax
- jmp *%eax
-
-bad_subarch:
-WEAK(lguest_entry)
-WEAK(xen_entry)
- /* Unknown implementation; there's really
- nothing we can do at this point. */
- ud2a
-
- __INITDATA
-
-subarch_entries:
- .long default_entry /* normal x86/PC */
- .long lguest_entry /* lguest hypervisor */
- .long xen_entry /* Xen hypervisor */
- .long default_entry /* Moorestown MID */
-num_subarch_entries = (. - subarch_entries) / 4
-.previous
-#endif /* CONFIG_PARAVIRT */
-
/*
* Initialize page tables. This creates a PDE and a set of page
* tables, which are located immediately beyond __brk_base. The variable
@@ -181,7 +148,6 @@ num_subarch_entries = (. - subarch_entries) / 4
*
* Note that the stack is not yet set up!
*/
-default_entry:
#ifdef CONFIG_X86_PAE
/*
@@ -261,7 +227,42 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax
movl %eax,pa(initial_page_table+0xffc)
#endif
- jmp 3f
+
+#ifdef CONFIG_PARAVIRT
+ /* This is can only trip for a broken bootloader... */
+ cmpw $0x207, pa(boot_params + BP_version)
+ jb default_entry
+
+ /* Paravirt-compatible boot parameters. Look to see what architecture
+ we're booting under. */
+ movl pa(boot_params + BP_hardware_subarch), %eax
+ cmpl $num_subarch_entries, %eax
+ jae bad_subarch
+
+ movl pa(subarch_entries)(,%eax,4), %eax
+ subl $__PAGE_OFFSET, %eax
+ jmp *%eax
+
+bad_subarch:
+WEAK(lguest_entry)
+WEAK(xen_entry)
+ /* Unknown implementation; there's really
+ nothing we can do at this point. */
+ ud2a
+
+ __INITDATA
+
+subarch_entries:
+ .long default_entry /* normal x86/PC */
+ .long lguest_entry /* lguest hypervisor */
+ .long xen_entry /* Xen hypervisor */
+ .long default_entry /* Moorestown MID */
+num_subarch_entries = (. - subarch_entries) / 4
+.previous
+#else
+ jmp default_entry
+#endif /* CONFIG_PARAVIRT */
+
/*
* Non-boot CPU entry point; entered from trampoline.S
* We can't lgdt here, because lgdt itself uses a data segment, but
@@ -282,7 +283,7 @@ ENTRY(startup_32_smp)
movl %eax,%fs
movl %eax,%gs
#endif /* CONFIG_SMP */
-3:
+default_entry:
/*
* New page tables may be in 4Mbyte page mode and may
@@ -316,6 +317,10 @@ ENTRY(startup_32_smp)
subl $0x80000001, %eax
cmpl $(0x8000ffff-0x80000001), %eax
ja 6f
+
+ /* Clear bogus XD_DISABLE bits */
+ call verify_cpu
+
mov $0x80000001, %eax
cpuid
/* Execute Disable bit supported? */
@@ -611,6 +616,8 @@ ignore_int:
#endif
iret
+#include "verify_cpu.S"
+
__REFDATA
.align 4
ENTRY(initial_code)
@@ -622,13 +629,13 @@ ENTRY(initial_code)
__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
#ifdef CONFIG_X86_PAE
-ENTRY(initial_pg_pmd)
+initial_pg_pmd:
.fill 1024*KPMDS,4,0
#else
ENTRY(initial_page_table)
.fill 1024,4,0
#endif
-ENTRY(initial_pg_fixmap)
+initial_pg_fixmap:
.fill 1024,4,0
ENTRY(empty_zero_page)
.fill 4096,1,0
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 42c5942..02f0763 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -122,7 +122,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp)
return -EBUSY;
set_debugreg(info->address, i);
- __get_cpu_var(cpu_debugreg[i]) = info->address;
+ __this_cpu_write(cpu_debugreg[i], info->address);
dr7 = &__get_cpu_var(cpu_dr7);
*dr7 |= encode_dr7(i, info->len, info->type);
@@ -397,12 +397,12 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
void hw_breakpoint_restore(void)
{
- set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0);
- set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1);
- set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2);
- set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3);
+ set_debugreg(__this_cpu_read(cpu_debugreg[0]), 0);
+ set_debugreg(__this_cpu_read(cpu_debugreg[1]), 1);
+ set_debugreg(__this_cpu_read(cpu_debugreg[2]), 2);
+ set_debugreg(__this_cpu_read(cpu_debugreg[3]), 3);
set_debugreg(current->thread.debugreg6, 6);
- set_debugreg(__get_cpu_var(cpu_dr7), 7);
+ set_debugreg(__this_cpu_read(cpu_dr7), 7);
}
EXPORT_SYMBOL_GPL(hw_breakpoint_restore);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 83ec017..3a43caa 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -234,7 +234,7 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
exit_idle();
irq_enter();
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
if (!handle_irq(irq, regs)) {
ack_APIC_irq();
@@ -350,12 +350,12 @@ void fixup_irqs(void)
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
unsigned int irr;
- if (__get_cpu_var(vector_irq)[vector] < 0)
+ if (__this_cpu_read(vector_irq[vector]) < 0)
continue;
irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
if (irr & (1 << (vector % 32))) {
- irq = __get_cpu_var(vector_irq)[vector];
+ irq = __this_cpu_read(vector_irq[vector]);
data = irq_get_irq_data(irq);
raw_spin_lock(&desc->lock);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 96656f2..48ff6dc 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -79,7 +79,7 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
u32 *isp, arg1, arg2;
curctx = (union irq_ctx *) current_thread_info();
- irqctx = __get_cpu_var(hardirq_ctx);
+ irqctx = __this_cpu_read(hardirq_ctx);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -166,7 +166,7 @@ asmlinkage void do_softirq(void)
if (local_softirq_pending()) {
curctx = current_thread_info();
- irqctx = __get_cpu_var(softirq_ctx);
+ irqctx = __this_cpu_read(softirq_ctx);
irqctx->tinfo.task = curctx->task;
irqctx->tinfo.previous_esp = current_stack_pointer;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index cd21b65..a413000 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -48,6 +48,7 @@
#include <asm/apicdef.h>
#include <asm/system.h>
#include <asm/apic.h>
+#include <asm/nmi.h>
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{
@@ -525,10 +526,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
}
return NOTIFY_DONE;
- case DIE_NMI_IPI:
- /* Just ignore, we will handle the roundup on DIE_NMI. */
- return NOTIFY_DONE;
-
case DIE_NMIUNKNOWN:
if (was_in_debug_nmi[raw_smp_processor_id()]) {
was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -606,7 +603,7 @@ static struct notifier_block kgdb_notifier = {
/*
* Lowest-prio notifier priority, we want to be notified last:
*/
- .priority = -INT_MAX,
+ .priority = NMI_LOCAL_LOW_PRIOR,
};
/**
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 1cbd54c..d91c477 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -403,7 +403,7 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
kcb->kprobe_old_flags = kcb->prev_kprobe.old_flags;
kcb->kprobe_saved_flags = kcb->prev_kprobe.saved_flags;
@@ -412,7 +412,7 @@ static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_flags = kcb->kprobe_old_flags
= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
if (is_IF_modifier(p->ainsn.insn))
@@ -586,7 +586,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
preempt_enable_no_resched();
return 1;
} else if (kprobe_running()) {
- p = __get_cpu_var(current_kprobe);
+ p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
return 1;
@@ -759,11 +759,11 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs)
orig_ret_address = (unsigned long)ri->ret_addr;
if (ri->rp && ri->rp->handler) {
- __get_cpu_var(current_kprobe) = &ri->rp->kp;
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
ri->ret_addr = correct_ret_addr;
ri->rp->handler(ri, regs);
- __get_cpu_var(current_kprobe) = NULL;
+ __this_cpu_write(current_kprobe, NULL);
}
recycle_rp_inst(ri, &empty_rp);
@@ -1184,6 +1184,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ /* This is possible if op is under delayed unoptimizing */
+ if (kprobe_disabled(&op->kp))
+ return;
+
preempt_disable();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
@@ -1198,10 +1202,10 @@ static void __kprobes optimized_callback(struct optimized_kprobe *op,
regs->ip = (unsigned long)op->kp.addr + INT3_SIZE;
regs->orig_ax = ~0UL;
- __get_cpu_var(current_kprobe) = &op->kp;
+ __this_cpu_write(current_kprobe, &op->kp);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
opt_pre_handler(&op->kp, regs);
- __get_cpu_var(current_kprobe) = NULL;
+ __this_cpu_write(current_kprobe, NULL);
}
preempt_enable_no_resched();
}
@@ -1401,10 +1405,16 @@ int __kprobes arch_prepare_optimized_kprobe(struct optimized_kprobe *op)
return 0;
}
-/* Replace a breakpoint (int3) with a relative jump. */
-int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
+#define MAX_OPTIMIZE_PROBES 256
+static struct text_poke_param *jump_poke_params;
+static struct jump_poke_buffer {
+ u8 buf[RELATIVEJUMP_SIZE];
+} *jump_poke_bufs;
+
+static void __kprobes setup_optimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
{
- unsigned char jmp_code[RELATIVEJUMP_SIZE];
s32 rel = (s32)((long)op->optinsn.insn -
((long)op->kp.addr + RELATIVEJUMP_SIZE));
@@ -1412,16 +1422,79 @@ int __kprobes arch_optimize_kprobe(struct optimized_kprobe *op)
memcpy(op->optinsn.copied_insn, op->kp.addr + INT3_SIZE,
RELATIVE_ADDR_SIZE);
- jmp_code[0] = RELATIVEJUMP_OPCODE;
- *(s32 *)(&jmp_code[1]) = rel;
+ insn_buf[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&insn_buf[1]) = rel;
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Replace breakpoints (int3) with relative jumps.
+ * Caller must call with locking kprobe_mutex and text_mutex.
+ */
+void __kprobes arch_optimize_kprobes(struct list_head *oplist)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ WARN_ON(kprobe_disabled(&op->kp));
+ /* Setup param */
+ setup_optimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_del_init(&op->list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
/*
* text_poke_smp doesn't support NMI/MCE code modifying.
* However, since kprobes itself also doesn't support NMI/MCE
* code probing, it's not a problem.
*/
- text_poke_smp(op->kp.addr, jmp_code, RELATIVEJUMP_SIZE);
- return 0;
+ text_poke_smp_batch(jump_poke_params, c);
+}
+
+static void __kprobes setup_unoptimize_kprobe(struct text_poke_param *tprm,
+ u8 *insn_buf,
+ struct optimized_kprobe *op)
+{
+ /* Set int3 to first byte for kprobes */
+ insn_buf[0] = BREAKPOINT_INSTRUCTION;
+ memcpy(insn_buf + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ tprm->addr = op->kp.addr;
+ tprm->opcode = insn_buf;
+ tprm->len = RELATIVEJUMP_SIZE;
+}
+
+/*
+ * Recover original instructions and breakpoints from relative jumps.
+ * Caller must call with locking kprobe_mutex.
+ */
+extern void arch_unoptimize_kprobes(struct list_head *oplist,
+ struct list_head *done_list)
+{
+ struct optimized_kprobe *op, *tmp;
+ int c = 0;
+
+ list_for_each_entry_safe(op, tmp, oplist, list) {
+ /* Setup param */
+ setup_unoptimize_kprobe(&jump_poke_params[c],
+ jump_poke_bufs[c].buf, op);
+ list_move(&op->list, done_list);
+ if (++c >= MAX_OPTIMIZE_PROBES)
+ break;
+ }
+
+ /*
+ * text_poke_smp doesn't support NMI/MCE code modifying.
+ * However, since kprobes itself also doesn't support NMI/MCE
+ * code probing, it's not a problem.
+ */
+ text_poke_smp_batch(jump_poke_params, c);
}
/* Replace a relative jump with a breakpoint (int3). */
@@ -1453,11 +1526,35 @@ static int __kprobes setup_detour_execution(struct kprobe *p,
}
return 0;
}
+
+static int __kprobes init_poke_params(void)
+{
+ /* Allocate code buffer and parameter array */
+ jump_poke_bufs = kmalloc(sizeof(struct jump_poke_buffer) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_bufs)
+ return -ENOMEM;
+
+ jump_poke_params = kmalloc(sizeof(struct text_poke_param) *
+ MAX_OPTIMIZE_PROBES, GFP_KERNEL);
+ if (!jump_poke_params) {
+ kfree(jump_poke_bufs);
+ jump_poke_bufs = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+#else /* !CONFIG_OPTPROBES */
+static int __kprobes init_poke_params(void)
+{
+ return 0;
+}
#endif
int __init arch_init_kprobes(void)
{
- return 0;
+ return init_poke_params();
}
int __kprobes arch_trampoline_kprobe(struct kprobe *p)
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index ce0cb47..0fe6d1a 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -155,12 +155,6 @@ static int apply_microcode_amd(int cpu)
return 0;
}
-static int get_ucode_data(void *to, const u8 *from, size_t n)
-{
- memcpy(to, from, n);
- return 0;
-}
-
static void *
get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
{
@@ -168,8 +162,7 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
void *mc;
- if (get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR))
- return NULL;
+ get_ucode_data(section_hdr, buf, UCODE_CONTAINER_SECTION_HDR);
if (section_hdr[0] != UCODE_UCODE_TYPE) {
pr_err("error: invalid type field in container file section header\n");
@@ -183,16 +176,13 @@ get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
return NULL;
}
- mc = vmalloc(UCODE_MAX_SIZE);
- if (mc) {
- memset(mc, 0, UCODE_MAX_SIZE);
- if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR,
- total_size)) {
- vfree(mc);
- mc = NULL;
- } else
- *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
- }
+ mc = vzalloc(UCODE_MAX_SIZE);
+ if (!mc)
+ return NULL;
+
+ get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size);
+ *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR;
+
return mc;
}
@@ -202,8 +192,7 @@ static int install_equiv_cpu_table(const u8 *buf)
unsigned int *buf_pos = (unsigned int *)container_hdr;
unsigned long size;
- if (get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE))
- return 0;
+ get_ucode_data(&container_hdr, buf, UCODE_CONTAINER_HEADER_SIZE);
size = buf_pos[2];
@@ -219,10 +208,7 @@ static int install_equiv_cpu_table(const u8 *buf)
}
buf += UCODE_CONTAINER_HEADER_SIZE;
- if (get_ucode_data(equiv_cpu_table, buf, size)) {
- vfree(equiv_cpu_table);
- return 0;
- }
+ get_ucode_data(equiv_cpu_table, buf, size);
return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */
}
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 9af64d9..01b0f6d 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -118,21 +118,8 @@ static void __init MP_bus_info(struct mpc_bus *m)
static void __init MP_ioapic_info(struct mpc_ioapic *m)
{
- if (!(m->flags & MPC_APIC_USABLE))
- return;
-
- printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
- m->apicid, m->apicver, m->apicaddr);
-
- mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
-}
-
-static void print_MP_intsrc_info(struct mpc_intsrc *m)
-{
- apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
- " IRQ %02x, APIC ID %x, APIC INT %02x\n",
- m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
- m->srcbusirq, m->dstapic, m->dstirq);
+ if (m->flags & MPC_APIC_USABLE)
+ mp_register_ioapic(m->apicid, m->apicaddr, gsi_top);
}
static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
@@ -144,73 +131,11 @@ static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
}
-static void __init assign_to_mp_irq(struct mpc_intsrc *m,
- struct mpc_intsrc *mp_irq)
-{
- mp_irq->dstapic = m->dstapic;
- mp_irq->type = m->type;
- mp_irq->irqtype = m->irqtype;
- mp_irq->irqflag = m->irqflag;
- mp_irq->srcbus = m->srcbus;
- mp_irq->srcbusirq = m->srcbusirq;
- mp_irq->dstirq = m->dstirq;
-}
-
-static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- m->dstapic = mp_irq->dstapic;
- m->type = mp_irq->type;
- m->irqtype = mp_irq->irqtype;
- m->irqflag = mp_irq->irqflag;
- m->srcbus = mp_irq->srcbus;
- m->srcbusirq = mp_irq->srcbusirq;
- m->dstirq = mp_irq->dstirq;
-}
-
-static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
- struct mpc_intsrc *m)
-{
- if (mp_irq->dstapic != m->dstapic)
- return 1;
- if (mp_irq->type != m->type)
- return 2;
- if (mp_irq->irqtype != m->irqtype)
- return 3;
- if (mp_irq->irqflag != m->irqflag)
- return 4;
- if (mp_irq->srcbus != m->srcbus)
- return 5;
- if (mp_irq->srcbusirq != m->srcbusirq)
- return 6;
- if (mp_irq->dstirq != m->dstirq)
- return 7;
-
- return 0;
-}
-
-static void __init MP_intsrc_info(struct mpc_intsrc *m)
-{
- int i;
-
- print_MP_intsrc_info(m);
-
- for (i = 0; i < mp_irq_entries; i++) {
- if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
- return;
- }
-
- assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
- panic("Max # of irq sources exceeded!!\n");
-}
#else /* CONFIG_X86_IO_APIC */
static inline void __init MP_bus_info(struct mpc_bus *m) {}
static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
-static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
#endif /* CONFIG_X86_IO_APIC */
-
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
{
apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
@@ -222,7 +147,6 @@ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
/*
* Read/parse the MPC
*/
-
static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
{
@@ -275,18 +199,6 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
-static void __init smp_register_lapic_address(unsigned long address)
-{
- mp_lapic_addr = address;
-
- set_fixmap_nocache(FIX_APIC_BASE, address);
- if (boot_cpu_physical_apicid == -1U) {
- boot_cpu_physical_apicid = read_apic_id();
- apic_version[boot_cpu_physical_apicid] =
- GET_APIC_VERSION(apic_read(APIC_LVR));
- }
-}
-
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -301,17 +213,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
#ifdef CONFIG_X86_32
generic_mps_oem_check(mpc, oem, str);
#endif
- /* save the local APIC address, it might be non-default */
+ /* Initialize the lapic mapping */
if (!acpi_lapic)
- mp_lapic_addr = mpc->lapic;
+ register_lapic_address(mpc->lapic);
if (early)
return 1;
- /* Initialize the lapic mapping */
- if (!acpi_lapic)
- smp_register_lapic_address(mpc->lapic);
-
if (mpc->oemptr)
x86_init.mpparse.smp_read_mpc_oem(mpc);
@@ -337,7 +245,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
break;
case MP_INTSRC:
- MP_intsrc_info((struct mpc_intsrc *)mpt);
+ mp_save_irq((struct mpc_intsrc *)mpt);
skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
break;
case MP_LINTSRC:
@@ -429,13 +337,13 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
intsrc.srcbusirq = i;
intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
- MP_intsrc_info(&intsrc);
+ mp_save_irq(&intsrc);
}
intsrc.irqtype = mp_ExtINT;
intsrc.srcbusirq = 0;
intsrc.dstirq = 0; /* 8259A to INTIN0 */
- MP_intsrc_info(&intsrc);
+ mp_save_irq(&intsrc);
}
@@ -784,11 +692,11 @@ static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
int i;
apic_printk(APIC_VERBOSE, "OLD ");
- print_MP_intsrc_info(m);
+ print_mp_irq_info(m);
i = get_MP_intsrc_index(m);
if (i > 0) {
- assign_to_mpc_intsrc(&mp_irqs[i], m);
+ memcpy(m, &mp_irqs[i], sizeof(*m));
apic_printk(APIC_VERBOSE, "NEW ");
print_mp_irq_info(&mp_irqs[i]);
return;
@@ -875,14 +783,14 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
if (nr_m_spare > 0) {
apic_printk(APIC_VERBOSE, "*NEW* found\n");
nr_m_spare--;
- assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+ memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i]));
m_spare[nr_m_spare] = NULL;
} else {
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
count += sizeof(struct mpc_intsrc);
if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
goto out;
- assign_to_mpc_intsrc(&mp_irqs[i], m);
+ memcpy(m, &mp_irqs[i], sizeof(*m));
mpc->length = count;
mpt += sizeof(struct mpc_intsrc);
}
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index ba0f0ca..c01ffa5 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -143,7 +143,7 @@ static void flush_gart(void)
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
- k8_flush_garts();
+ amd_flush_garts();
need_flush = false;
}
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
@@ -561,17 +561,17 @@ static void enable_gart_translations(void)
{
int i;
- if (!k8_northbridges.gart_supported)
+ if (!amd_nb_has_feature(AMD_NB_GART))
return;
- for (i = 0; i < k8_northbridges.num; i++) {
- struct pci_dev *dev = k8_northbridges.nb_misc[i];
+ for (i = 0; i < amd_nb_num(); i++) {
+ struct pci_dev *dev = node_to_amd_nb(i)->misc;
enable_gart_translation(dev, __pa(agp_gatt_table));
}
/* Flush the GART-TLB to remove stale entries */
- k8_flush_garts();
+ amd_flush_garts();
}
/*
@@ -596,13 +596,13 @@ static void gart_fixup_northbridges(struct sys_device *dev)
if (!fix_up_north_bridges)
return;
- if (!k8_northbridges.gart_supported)
+ if (!amd_nb_has_feature(AMD_NB_GART))
return;
pr_info("PCI-DMA: Restoring GART aperture settings\n");
- for (i = 0; i < k8_northbridges.num; i++) {
- struct pci_dev *dev = k8_northbridges.nb_misc[i];
+ for (i = 0; i < amd_nb_num(); i++) {
+ struct pci_dev *dev = node_to_amd_nb(i)->misc;
/*
* Don't enable translations just yet. That is the next
@@ -644,7 +644,7 @@ static struct sys_device device_gart = {
* Private Northbridge GATT initialization in case we cannot use the
* AGP driver for some reason.
*/
-static __init int init_k8_gatt(struct agp_kern_info *info)
+static __init int init_amd_gatt(struct agp_kern_info *info)
{
unsigned aper_size, gatt_size, new_aper_size;
unsigned aper_base, new_aper_base;
@@ -656,8 +656,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
aper_size = aper_base = info->aper_size = 0;
dev = NULL;
- for (i = 0; i < k8_northbridges.num; i++) {
- dev = k8_northbridges.nb_misc[i];
+ for (i = 0; i < amd_nb_num(); i++) {
+ dev = node_to_amd_nb(i)->misc;
new_aper_base = read_aperture(dev, &new_aper_size);
if (!new_aper_base)
goto nommu;
@@ -725,13 +725,13 @@ static void gart_iommu_shutdown(void)
if (!no_agp)
return;
- if (!k8_northbridges.gart_supported)
+ if (!amd_nb_has_feature(AMD_NB_GART))
return;
- for (i = 0; i < k8_northbridges.num; i++) {
+ for (i = 0; i < amd_nb_num(); i++) {
u32 ctl;
- dev = k8_northbridges.nb_misc[i];
+ dev = node_to_amd_nb(i)->misc;
pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl);
ctl &= ~GARTEN;
@@ -749,14 +749,14 @@ int __init gart_iommu_init(void)
unsigned long scratch;
long i;
- if (!k8_northbridges.gart_supported)
+ if (!amd_nb_has_feature(AMD_NB_GART))
return 0;
#ifndef CONFIG_AGP_AMD64
no_agp = 1;
#else
/* Makefile puts PCI initialization via subsys_initcall first. */
- /* Add other K8 AGP bridge drivers here */
+ /* Add other AMD AGP bridge drivers here */
no_agp = no_agp ||
(agp_amd64_init() < 0) ||
(agp_copy_info(agp_bridge, &info) < 0);
@@ -765,7 +765,7 @@ int __init gart_iommu_init(void)
if (no_iommu ||
(!force_iommu && max_pfn <= MAX_DMA32_PFN) ||
!gart_iommu_aperture ||
- (no_agp && init_k8_gatt(&info) < 0)) {
+ (no_agp && init_amd_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
pr_warning("falling back to iommu=soft.\n");
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index b647215..7c23a0c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -86,8 +86,7 @@ void exit_thread(void)
void show_regs(struct pt_regs *regs)
{
show_registers(regs);
- show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
- regs->bp);
+ show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs));
}
void show_regs_common(void)
@@ -369,6 +368,7 @@ void default_idle(void)
{
if (hlt_use_halt()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
+ trace_cpu_idle(1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -439,8 +439,9 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
+ trace_cpu_idle((ax>>4)+1, smp_processor_id());
if (!need_resched()) {
- if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -455,7 +456,8 @@ static void mwait_idle(void)
{
if (!need_resched()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
- if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
+ trace_cpu_idle(1, smp_processor_id());
+ if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
@@ -476,10 +478,12 @@ static void mwait_idle(void)
static void poll_idle(void)
{
trace_power_start(POWER_CSTATE, 0, smp_processor_id());
+ trace_cpu_idle(0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(0);
+ trace_power_end(smp_processor_id());
+ trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
}
/*
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 96586c3..4b9befa 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -113,8 +113,8 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
-
trace_power_end(smp_processor_id());
+ trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3d7a3a..4c818a7 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -142,6 +142,8 @@ void cpu_idle(void)
start_critical_timings();
trace_power_end(smp_processor_id());
+ trace_cpu_idle(PWR_EVENT_EXIT,
+ smp_processor_id());
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index c495aa8d..fc7aae1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
#include <asm/pci_x86.h>
#include <asm/virtext.h>
#include <asm/cpu.h>
+#include <asm/nmi.h>
#ifdef CONFIG_X86_32
# include <linux/ctype.h>
@@ -747,7 +748,7 @@ static int crash_nmi_callback(struct notifier_block *self,
{
int cpu;
- if (val != DIE_NMI_IPI)
+ if (val != DIE_NMI)
return NOTIFY_OK;
cpu = raw_smp_processor_id();
@@ -778,6 +779,8 @@ static void smp_send_nmi_allbutself(void)
static struct notifier_block crash_nmi_nb = {
.notifier_call = crash_nmi_callback,
+ /* we want to be the first one called */
+ .priority = NMI_LOCAL_HIGH_PRIOR+1,
};
/* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/reboot_fixups_32.c b/arch/x86/kernel/reboot_fixups_32.c
index fda313e..c8e41e9 100644
--- a/arch/x86/kernel/reboot_fixups_32.c
+++ b/arch/x86/kernel/reboot_fixups_32.c
@@ -43,17 +43,33 @@ static void rdc321x_reset(struct pci_dev *dev)
outb(1, 0x92);
}
+static void ce4100_reset(struct pci_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ outb(0x2, 0xcf9);
+ udelay(50);
+ }
+}
+
struct device_fixup {
unsigned int vendor;
unsigned int device;
void (*reboot_fixup)(struct pci_dev *);
};
+/*
+ * PCI ids solely used for fixups_table go here
+ */
+#define PCI_DEVICE_ID_INTEL_CE4100 0x0708
+
static const struct device_fixup fixups_table[] = {
{ PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, cs5530a_warm_reset },
{ PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CS5536_ISA, cs5536_warm_reset },
{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_SC1100_BRIDGE, cs5530a_warm_reset },
{ PCI_VENDOR_ID_RDC, PCI_DEVICE_ID_RDC_R6030, rdc321x_reset },
+{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CE4100, ce4100_reset },
};
/*
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a0f52af..d3cfe26c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -705,7 +705,7 @@ static u64 __init get_max_mapped(void)
void __init setup_arch(char **cmdline_p)
{
int acpi = 0;
- int k8 = 0;
+ int amd = 0;
unsigned long flags;
#ifdef CONFIG_X86_32
@@ -991,12 +991,12 @@ void __init setup_arch(char **cmdline_p)
acpi = acpi_numa_init();
#endif
-#ifdef CONFIG_K8_NUMA
+#ifdef CONFIG_AMD_NUMA
if (!acpi)
- k8 = !k8_numa_init(0, max_pfn);
+ amd = !amd_numa_init(0, max_pfn);
#endif
- initmem_init(0, max_pfn, acpi, k8);
+ initmem_init(0, max_pfn, acpi, amd);
memblock_find_dma_reserve();
dma32_reserve_bootmem();
@@ -1045,10 +1045,7 @@ void __init setup_arch(char **cmdline_p)
#endif
init_apic_mappings();
- ioapic_init_mappings();
-
- /* need to wait for io_apic is mapped */
- probe_nr_irqs_gsi();
+ ioapic_and_gsi_init();
kvm_guest_init();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 083e99d..763df77 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -97,12 +97,12 @@ static DEFINE_PER_CPU(struct task_struct *, idle_thread_array);
*/
static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex);
-void cpu_hotplug_driver_lock()
+void cpu_hotplug_driver_lock(void)
{
mutex_lock(&x86_cpu_hotplug_driver_mutex);
}
-void cpu_hotplug_driver_unlock()
+void cpu_hotplug_driver_unlock(void)
{
mutex_unlock(&x86_cpu_hotplug_driver_mutex);
}
@@ -281,6 +281,13 @@ static void __cpuinit smp_callin(void)
*/
smp_store_cpu_info(cpuid);
+ /*
+ * This must be done before setting cpu_online_mask
+ * or calling notify_cpu_starting.
+ */
+ set_cpu_sibling_map(raw_smp_processor_id());
+ wmb();
+
notify_cpu_starting(cpuid);
/*
@@ -316,16 +323,6 @@ notrace static void __cpuinit start_secondary(void *unused)
*/
check_tsc_sync_target();
- if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->mask(0);
- enable_NMI_through_LVT0();
- legacy_pic->unmask(0);
- }
-
- /* This must be done before setting cpu_online_mask */
- set_cpu_sibling_map(raw_smp_processor_id());
- wmb();
-
/*
* We need to hold call_lock, so there is no inconsistency
* between the time smp_call_function() determines number of
@@ -430,7 +427,7 @@ void __cpuinit set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, c->llc_shared_map);
- if (current_cpu_data.x86_max_cores == 1) {
+ if (__this_cpu_read(cpu_info.x86_max_cores) == 1) {
cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
c->booted_cores = 1;
return;
@@ -1061,8 +1058,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
printk(KERN_INFO "SMP mode deactivated.\n");
smpboot_clear_io_apic();
- localise_nmi_watchdog();
-
connect_bsp_APIC();
setup_local_APIC();
end_local_APIC_setup();
@@ -1094,7 +1089,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
preempt_disable();
smp_cpu_index_default();
- current_cpu_data = boot_cpu_data;
+ memcpy(__this_cpu_ptr(&cpu_info), &boot_cpu_data, sizeof(cpu_info));
cpumask_copy(cpu_callin_mask, cpumask_of(0));
mb();
/*
@@ -1166,6 +1161,20 @@ out:
preempt_enable();
}
+void arch_disable_nonboot_cpus_begin(void)
+{
+ /*
+ * Avoid the smp alternatives switch during the disable_nonboot_cpus().
+ * In the suspend path, we will be back in the SMP mode shortly anyways.
+ */
+ skip_smp_alternatives = true;
+}
+
+void arch_disable_nonboot_cpus_end(void)
+{
+ skip_smp_alternatives = false;
+}
+
void arch_enable_nonboot_cpus_begin(void)
{
set_mtrr_aps_delayed_init();
@@ -1196,7 +1205,6 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
#ifdef CONFIG_X86_IO_APIC
setup_ioapic_dest();
#endif
- check_nmi_watchdog();
mtrr_aps_init();
}
@@ -1341,8 +1349,6 @@ int native_cpu_disable(void)
if (cpu == 0)
return -EBUSY;
- if (nmi_watchdog == NMI_LOCAL_APIC)
- stop_apic_nmi_watchdog(NULL);
clear_local_APIC();
cpu_disable_common();
@@ -1377,7 +1383,7 @@ void play_dead_common(void)
mb();
/* Ack it */
- __get_cpu_var(cpu_state) = CPU_DEAD;
+ __this_cpu_write(cpu_state, CPU_DEAD);
/*
* With physical CPU hotplug, we should halt the cpu
@@ -1397,11 +1403,11 @@ static inline void mwait_play_dead(void)
int i;
void *mwait_ptr;
- if (!cpu_has(&current_cpu_data, X86_FEATURE_MWAIT))
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_MWAIT))
return;
- if (!cpu_has(&current_cpu_data, X86_FEATURE_CLFLSH))
+ if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
return;
- if (current_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;
eax = CPUID_MWAIT_LEAF;
@@ -1452,7 +1458,7 @@ static inline void mwait_play_dead(void)
static inline void hlt_play_dead(void)
{
- if (current_cpu_data.x86 >= 4)
+ if (__this_cpu_read(cpu_info.x86) >= 4)
wbinvd();
while (1) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index b53c525..938c8e1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -73,22 +73,22 @@ static const struct stacktrace_ops save_stack_ops_nosched = {
*/
void save_stack_trace(struct stack_trace *trace)
{
- dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
+ dump_trace(current, NULL, NULL, &save_stack_ops, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
-void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
+void save_stack_trace_regs(struct stack_trace *trace, struct pt_regs *regs)
{
- dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
+ dump_trace(current, regs, NULL, &save_stack_ops, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+ dump_trace(tsk, NULL, NULL, &save_stack_ops_nosched, trace);
if (trace->nr_entries < trace->max_entries)
trace->entries[trace->nr_entries++] = ULONG_MAX;
}
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index fb5cc5e1..25a28a2 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -22,10 +22,6 @@
#include <asm/hpet.h>
#include <asm/time.h>
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC)
-int timer_ack;
-#endif
-
#ifdef CONFIG_X86_64
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
#endif
@@ -63,20 +59,6 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
/* Keep nmi watchdog up to date */
inc_irq_stat(irq0_irqs);
- /* Optimized out for !IO_APIC and x86_64 */
- if (timer_ack) {
- /*
- * Subtle, when I/O APICs are used we have to ack timer IRQ
- * manually to deassert NMI lines for the watchdog if run
- * on an 82489DX-based system.
- */
- raw_spin_lock(&i8259A_lock);
- outb(0x0c, PIC_MASTER_OCW3);
- /* Ack the IRQ; AEOI will end it automatically. */
- inb(PIC_MASTER_POLL);
- raw_spin_unlock(&i8259A_lock);
- }
-
global_clock_event->event_handler(global_clock_event);
/* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 3af2dff..075d130 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -127,7 +127,7 @@ startup_64:
no_longmode:
hlt
jmp no_longmode
-#include "verify_cpu_64.S"
+#include "verify_cpu.S"
# Careful these need to be in the same 64K segment as the above;
tidt:
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index cb838ca..b9b6716 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,13 @@ EXPORT_SYMBOL_GPL(used_vectors);
static int ignore_nmis;
+int unknown_nmi_panic;
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
static inline void conditional_sti(struct pt_regs *regs)
{
if (regs->flags & X86_EFLAGS_IF)
@@ -300,16 +307,23 @@ gp_in_kernel:
die("general protection fault", regs, error_code);
}
-static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+static int __init setup_unknown_nmi_panic(char *str)
{
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ unknown_nmi_panic = 1;
+ return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
- printk(KERN_EMERG
- "You have some hardware problem, likely on the PCI bus.\n");
+static notrace __kprobes void
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
+{
+ pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+ /*
+ * On some machines, PCI SERR line is used to report memory
+ * errors. EDAC makes use of it.
+ */
#if defined(CONFIG_EDAC)
if (edac_handler_set()) {
edac_atomic_assert_error();
@@ -320,11 +334,11 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs)
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
+ /* Clear and disable the PCI SERR error line. */
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -332,22 +346,26 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
{
unsigned long i;
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ pr_emerg(
+ "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
show_registers(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
- i = 2000;
- while (--i)
- udelay(1000);
+ i = 20000;
+ while (--i) {
+ touch_nmi_watchdog();
+ udelay(100);
+ }
- reason &= ~8;
- outb(reason, 0x61);
+ reason &= ~NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -366,69 +384,50 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
return;
}
#endif
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
- if (panic_on_unrecovered_nmi)
+ pr_emerg("Do you have a strange power saving mode enabled?\n");
+ if (unknown_nmi_panic || panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
{
unsigned char reason = 0;
- int cpu;
- cpu = smp_processor_id();
-
- /* Only the BSP gets external NMIs from the system. */
- if (!cpu)
- reason = get_nmi_reason();
-
- if (!(reason & 0xc0)) {
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ /*
+ * CPU-specific NMI must be processed before non-CPU-specific
+ * NMI, otherwise we may lose it, because the CPU-specific
+ * NMI can not be detected/processed on other CPUs.
+ */
+ if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+ return;
-#ifdef CONFIG_X86_LOCAL_APIC
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP)
- return;
+ /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+ raw_spin_lock(&nmi_reason_lock);
+ reason = get_nmi_reason();
-#ifndef CONFIG_LOCKUP_DETECTOR
+ if (reason & NMI_REASON_MASK) {
+ if (reason & NMI_REASON_SERR)
+ pci_serr_error(reason, regs);
+ else if (reason & NMI_REASON_IOCHK)
+ io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
/*
- * Ok, so this is none of the documented NMI sources,
- * so it must be the NMI watchdog.
+ * Reassert NMI in case it became active
+ * meanwhile as it's edge-triggered:
*/
- if (nmi_watchdog_tick(regs, reason))
- return;
- if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
- unknown_nmi_error(reason, regs);
-#else
- unknown_nmi_error(reason, regs);
+ reassert_nmi();
#endif
-
+ raw_spin_unlock(&nmi_reason_lock);
return;
}
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
- return;
+ raw_spin_unlock(&nmi_reason_lock);
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
- io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
- /*
- * Reassert NMI in case it became active meanwhile
- * as it's edge-triggered:
- */
- reassert_nmi();
-#endif
+ unknown_nmi_error(reason, regs);
}
dotraplinkage notrace __kprobes void
@@ -446,14 +445,12 @@ do_nmi(struct pt_regs *regs, long error_code)
void stop_nmi(void)
{
- acpi_nmi_disable();
ignore_nmis++;
}
void restart_nmi(void)
{
ignore_nmis--;
- acpi_nmi_enable();
}
/* May run on IST stack. */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 0c40d8b..823f79a 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -659,7 +659,7 @@ void restore_sched_clock_state(void)
local_irq_save(flags);
- __get_cpu_var(cyc2ns_offset) = 0;
+ __this_cpu_write(cyc2ns_offset, 0);
offset = cyc2ns_suspend - sched_clock();
for_each_possible_cpu(cpu)
@@ -872,6 +872,9 @@ __cpuinit int unsynchronized_tsc(void)
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
+
+ if (tsc_clocksource_reliable)
+ return 0;
/*
* Intel systems are normally all synchronized.
* Exceptions must mark TSC as unstable:
@@ -879,14 +882,92 @@ __cpuinit int unsynchronized_tsc(void)
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
/* assume multi socket systems are not synchronized: */
if (num_possible_cpus() > 1)
- tsc_unstable = 1;
+ return 1;
}
- return tsc_unstable;
+ return 0;
+}
+
+
+static void tsc_refine_calibration_work(struct work_struct *work);
+static DECLARE_DELAYED_WORK(tsc_irqwork, tsc_refine_calibration_work);
+/**
+ * tsc_refine_calibration_work - Further refine tsc freq calibration
+ * @work - ignored.
+ *
+ * This functions uses delayed work over a period of a
+ * second to further refine the TSC freq value. Since this is
+ * timer based, instead of loop based, we don't block the boot
+ * process while this longer calibration is done.
+ *
+ * If there are any calibration anomolies (too many SMIs, etc),
+ * or the refined calibration is off by 1% of the fast early
+ * calibration, we throw out the new calibration and use the
+ * early calibration.
+ */
+static void tsc_refine_calibration_work(struct work_struct *work)
+{
+ static u64 tsc_start = -1, ref_start;
+ static int hpet;
+ u64 tsc_stop, ref_stop, delta;
+ unsigned long freq;
+
+ /* Don't bother refining TSC on unstable systems */
+ if (check_tsc_unstable())
+ goto out;
+
+ /*
+ * Since the work is started early in boot, we may be
+ * delayed the first time we expire. So set the workqueue
+ * again once we know timers are working.
+ */
+ if (tsc_start == -1) {
+ /*
+ * Only set hpet once, to avoid mixing hardware
+ * if the hpet becomes enabled later.
+ */
+ hpet = is_hpet_enabled();
+ schedule_delayed_work(&tsc_irqwork, HZ);
+ tsc_start = tsc_read_refs(&ref_start, hpet);
+ return;
+ }
+
+ tsc_stop = tsc_read_refs(&ref_stop, hpet);
+
+ /* hpet or pmtimer available ? */
+ if (!hpet && !ref_start && !ref_stop)
+ goto out;
+
+ /* Check, whether the sampling was disturbed by an SMI */
+ if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)
+ goto out;
+
+ delta = tsc_stop - tsc_start;
+ delta *= 1000000LL;
+ if (hpet)
+ freq = calc_hpet_ref(delta, ref_start, ref_stop);
+ else
+ freq = calc_pmtimer_ref(delta, ref_start, ref_stop);
+
+ /* Make sure we're within 1% */
+ if (abs(tsc_khz - freq) > tsc_khz/100)
+ goto out;
+
+ tsc_khz = freq;
+ printk(KERN_INFO "Refined TSC clocksource calibration: "
+ "%lu.%03lu MHz.\n", (unsigned long)tsc_khz / 1000,
+ (unsigned long)tsc_khz % 1000);
+
+out:
+ clocksource_register_khz(&clocksource_tsc, tsc_khz);
}
-static void __init init_tsc_clocksource(void)
+
+static int __init init_tsc_clocksource(void)
{
+ if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+ return 0;
+
if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
/* lower the rating if we already know its unstable: */
@@ -894,8 +975,14 @@ static void __init init_tsc_clocksource(void)
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
- clocksource_register_khz(&clocksource_tsc, tsc_khz);
+ schedule_delayed_work(&tsc_irqwork, 0);
+ return 0;
}
+/*
+ * We use device_initcall here, to ensure we run after the hpet
+ * is fully initialized, which may occur at fs_initcall time.
+ */
+device_initcall(init_tsc_clocksource);
void __init tsc_init(void)
{
@@ -949,6 +1036,5 @@ void __init tsc_init(void)
mark_tsc_unstable("TSCs unsynchronized");
check_system_tsc_reliable();
- init_tsc_clocksource();
}
diff --git a/arch/x86/kernel/verify_cpu_64.S b/arch/x86/kernel/verify_cpu.S
index 56a8c2a..0edefc1 100644
--- a/arch/x86/kernel/verify_cpu_64.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -7,6 +7,7 @@
* Copyright (c) 2007 Andi Kleen (ak@suse.de)
* Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com)
* Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com)
+ * Copyright (c) 2010 Kees Cook (kees.cook@canonical.com)
*
* This source code is licensed under the GNU General Public License,
* Version 2. See the file COPYING for more details.
@@ -14,18 +15,17 @@
* This is a common code for verification whether CPU supports
* long mode and SSE or not. It is not called directly instead this
* file is included at various places and compiled in that context.
- * Following are the current usage.
+ * This file is expected to run in 32bit code. Currently:
*
- * This file is included by both 16bit and 32bit code.
+ * arch/x86/boot/compressed/head_64.S: Boot cpu verification
+ * arch/x86/kernel/trampoline_64.S: secondary processor verfication
+ * arch/x86/kernel/head_32.S: processor startup
*
- * arch/x86_64/boot/setup.S : Boot cpu verification (16bit)
- * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit)
- * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit)
- * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit)
- *
- * verify_cpu, returns the status of cpu check in register %eax.
+ * verify_cpu, returns the status of longmode and SSE in register %eax.
* 0: Success 1: Failure
*
+ * On Intel, the XD_DISABLE flag will be cleared as a side-effect.
+ *
* The caller needs to check for the error code and take the action
* appropriately. Either display a message or halt.
*/
@@ -62,8 +62,41 @@ verify_cpu:
cmpl $0x444d4163,%ecx
jnz verify_cpu_noamd
mov $1,%di # cpu is from AMD
+ jmp verify_cpu_check
verify_cpu_noamd:
+ cmpl $0x756e6547,%ebx # GenuineIntel?
+ jnz verify_cpu_check
+ cmpl $0x49656e69,%edx
+ jnz verify_cpu_check
+ cmpl $0x6c65746e,%ecx
+ jnz verify_cpu_check
+
+ # only call IA32_MISC_ENABLE when:
+ # family > 6 || (family == 6 && model >= 0xd)
+ movl $0x1, %eax # check CPU family and model
+ cpuid
+ movl %eax, %ecx
+
+ andl $0x0ff00f00, %eax # mask family and extended family
+ shrl $8, %eax
+ cmpl $6, %eax
+ ja verify_cpu_clear_xd # family > 6, ok
+ jb verify_cpu_check # family < 6, skip
+
+ andl $0x000f00f0, %ecx # mask model and extended model
+ shrl $4, %ecx
+ cmpl $0xd, %ecx
+ jb verify_cpu_check # family == 6, model < 0xd, skip
+
+verify_cpu_clear_xd:
+ movl $MSR_IA32_MISC_ENABLE, %ecx
+ rdmsr
+ btrl $2, %edx # clear MSR_IA32_MISC_ENABLE_XD_DISABLE
+ jnc verify_cpu_check # only write MSR if bit was changed
+ wrmsr
+
+verify_cpu_check:
movl $0x1,%eax # Does the cpu have what it takes
cpuid
andl $REQUIRED_MASK0,%edx
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e03530a..bf47007 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -69,7 +69,7 @@ jiffies_64 = jiffies;
PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
- data PT_LOAD FLAGS(7); /* RWE */
+ data PT_LOAD FLAGS(6); /* RW_ */
#ifdef CONFIG_X86_64
user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
@@ -116,6 +116,10 @@ SECTIONS
EXCEPTION_TABLE(16) :text = 0x9090
+#if defined(CONFIG_DEBUG_RODATA)
+ /* .text should occupy whole number of pages */
+ . = ALIGN(PAGE_SIZE);
+#endif
X64_ALIGN_DEBUG_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
X64_ALIGN_DEBUG_RODATA_END
@@ -335,7 +339,7 @@ SECTIONS
__bss_start = .;
*(.bss..page_aligned)
*(.bss)
- . = ALIGN(4);
+ . = ALIGN(PAGE_SIZE);
__bss_stop = .;
}