diff options
Diffstat (limited to 'arch/x86')
27 files changed, 188 insertions, 125 deletions
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8aa1b59..e8c8881 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -74,7 +74,7 @@ endif ifdef CONFIG_CC_STACKPROTECTOR cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh - ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(biarch)),y) + ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) stackp-y := -fstack-protector KBUILD_CFLAGS += $(stackp-y) else diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index b86feab..518bb99 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -50,7 +50,12 @@ /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %eax because syscall_trace_enter() returned - * the value it wants us to use in the table lookup. + * the %rax value we should see. Instead, we just truncate that + * value to 32 bits again as we did on entry from user mode. + * If it's a new value set by user_regset during entry tracing, + * this matches the normal truncation of the user-mode value. + * If it's -1 to make us punt the syscall, then (u32)-1 is still + * an appropriately invalid value. */ .macro LOAD_ARGS32 offset, _r9=0 .if \_r9 @@ -60,6 +65,7 @@ movl \offset+48(%rsp),%edx movl \offset+56(%rsp),%esi movl \offset+64(%rsp),%edi + movl %eax,%eax /* zero extension */ .endm .macro CFI_STARTPROC32 simple @@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz sysenter_tracesys - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys sysenter_do_call: IA32_ARG_FIXUP @@ -195,7 +201,7 @@ sysexit_from_sys_call: movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys movl %ebx,%edi /* reload 1st syscall arg */ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ @@ -248,7 +254,7 @@ sysenter_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */ jmp sysenter_do_call CFI_ENDPROC @@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) CFI_REMEMBER_STATE jnz cstar_tracesys - cmpl $IA32_NR_syscalls-1,%eax + cmpq $IA32_NR_syscalls-1,%rax ja ia32_badsys cstar_do_call: IA32_ARG_FIXUP 1 @@ -367,7 +373,7 @@ cstar_tracesys: LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ RESTORE_REST xchgl %ebp,%r9d - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -425,7 +431,7 @@ ENTRY(ia32_syscall) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys ia32_do_call: IA32_ARG_FIXUP @@ -444,7 +450,7 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call END(ia32_syscall) diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 306160e..1d9cd27 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 781a50b..c6fbb7b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -296,6 +296,7 @@ extern const char * const x86_power_flags[32]; #endif /* CONFIG_X86_64 */ +#if __GNUC__ >= 4 /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These are only valid after alternatives have run, but will statically @@ -304,7 +305,7 @@ extern const char * const x86_power_flags[32]; */ static __always_inline __pure bool __static_cpu_has(u16 bit) { -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +#if __GNUC__ > 4 || __GNUC_MINOR__ >= 5 asm goto("1: jmp %l[t_no]\n" "2:\n" ".section .altinstructions,\"a\"\n" @@ -345,7 +346,6 @@ static __always_inline __pure bool __static_cpu_has(u16 bit) #endif } -#if __GNUC__ >= 4 #define static_cpu_has(bit) \ ( \ __builtin_constant_p(boot_cpu_has(bit)) ? \ diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h index 004e6e2..1d5c08a 100644 --- a/arch/x86/include/asm/hpet.h +++ b/arch/x86/include/asm/hpet.h @@ -68,7 +68,6 @@ extern unsigned long force_hpet_address; extern u8 hpet_blockid; extern int hpet_force_user; extern u8 hpet_msi_disable; -extern u8 hpet_readback_cmp; extern int is_hpet_enabled(void); extern int hpet_enable(void); extern void hpet_disable(void); diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 528a11e..824ca07 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -20,7 +20,7 @@ struct arch_hw_breakpoint { #include <linux/list.h> /* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_X 0x00 +#define X86_BREAKPOINT_LEN_X 0x40 #define X86_BREAKPOINT_LEN_1 0x40 #define X86_BREAKPOINT_LEN_2 0x44 #define X86_BREAKPOINT_LEN_4 0x4c diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index f35eb45..c4191b3 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -26,11 +26,11 @@ #include <asm/pgtable.h> #include <asm/tlbflush.h> -void * +void __iomem * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); void -iounmap_atomic(void *kvaddr, enum km_type type); +iounmap_atomic(void __iomem *kvaddr, enum km_type type); int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot); diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 51cfd73..1f99ecf 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -152,9 +152,14 @@ struct x86_emulate_ops { struct operand { enum { OP_REG, OP_MEM, OP_IMM, OP_NONE } type; unsigned int bytes; - unsigned long orig_val, *ptr; + union { + unsigned long orig_val; + u64 orig_val64; + }; + unsigned long *ptr; union { unsigned long val; + u64 val64; char valptr[sizeof(unsigned long) + 2]; }; }; diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 404a880..d395540 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -27,6 +27,9 @@ extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); +#ifdef CONFIG_PCI + +#ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { struct pci_sysdata *sd = bus->sysdata; @@ -37,13 +40,12 @@ static inline int pci_proc_domain(struct pci_bus *bus) { return pci_domain_nr(bus); } - +#endif /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ -#ifdef CONFIG_PCI extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); # ifdef CONFIG_ACPI diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676..fedf32a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -11,6 +11,8 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_pvclock.o = -pg +CFLAGS_REMOVE_kvmclock.o = -pg CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 7b598b8..f744f54 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -698,9 +698,11 @@ void __init uv_system_init(void) for (j = 0; j < 64; j++) { if (!test_bit(j, &present)) continue; - uv_blade_info[blade].pnode = (i * 64 + j); + pnode = (i * 64 + j); + uv_blade_info[blade].pnode = pnode; uv_blade_info[blade].nr_possible_cpus = 0; uv_blade_info[blade].nr_online_cpus = 0; + max_pnode = max(pnode, max_pnode); blade++; } } @@ -738,7 +740,6 @@ void __init uv_system_init(void) uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid); uv_node_to_blade[nid] = blade; uv_cpu_to_blade[cpu] = blade; - max_pnode = max(pnode, max_pnode); } /* Add blade/pnode info for nodes without cpus */ @@ -750,7 +751,6 @@ void __init uv_system_init(void) pnode = (paddr >> m_val) & pnode_mask; blade = boot_pnode_to_blade(pnode); uv_node_to_blade[nid] = blade; - max_pnode = max(pnode, max_pnode); } map_gru_high(max_pnode); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 224392d..5e97529 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -530,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) err = -ENOMEM; goto out; } - if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) { + if (!zalloc_cpumask_var(&b->cpus, GFP_KERNEL)) { kfree(b); err = -ENOMEM; goto out; @@ -543,7 +543,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) #ifndef CONFIG_SMP cpumask_setall(b->cpus); #else - cpumask_copy(b->cpus, c->llc_shared_map); + cpumask_set_cpu(cpu, b->cpus); #endif per_cpu(threshold_banks, cpu)[bank] = b; diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index c2a8b26..d9368ee 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -202,10 +202,11 @@ static int therm_throt_process(bool new_event, int event, int level) #ifdef CONFIG_SYSFS /* Add/Remove thermal_throttle interface for CPU device: */ -static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) +static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, + unsigned int cpu) { int err; - struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); + struct cpuinfo_x86 *c = &cpu_data(cpu); err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); if (err) @@ -251,7 +252,7 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: mutex_lock(&therm_cpu_lock); - err = thermal_throttle_add_dev(sys_dev); + err = thermal_throttle_add_dev(sys_dev, cpu); mutex_unlock(&therm_cpu_lock); WARN_ON(err); break; @@ -287,7 +288,7 @@ static __init int thermal_throttle_init_device(void) #endif /* connect live CPUs to sysfs */ for_each_online_cpu(cpu) { - err = thermal_throttle_add_dev(get_cpu_sysdev(cpu)); + err = thermal_throttle_add_dev(get_cpu_sysdev(cpu), cpu); WARN_ON(err); } #ifdef CONFIG_HOTPLUG_CPU diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f2da20f..03a5b03 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,6 +102,7 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; int enabled; int n_events; @@ -1010,6 +1011,7 @@ static int x86_pmu_start(struct perf_event *event) x86_perf_event_set_period(event); cpuc->events[idx] = event; __set_bit(idx, cpuc->active_mask); + __set_bit(idx, cpuc->running); x86_pmu.enable(event); perf_event_update_userpage(event); @@ -1141,8 +1143,16 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) + if (!test_bit(idx, cpuc->active_mask)) { + /* + * Though we deactivated the counter some cpus + * might still deliver spurious interrupts still + * in flight. Catch them: + */ + if (__test_and_clear_bit(idx, cpuc->running)) + handled++; continue; + } event = cpuc->events[idx]; hwc = &event->hw; @@ -1154,7 +1164,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) /* * event overflow */ - handled = 1; + handled++; data.period = event->hw.last_period; if (!x86_perf_event_set_period(event)) @@ -1200,12 +1210,20 @@ void perf_events_lapic_init(void) apic_write(APIC_LVTPC, APIC_DM_NMI); } +struct pmu_nmi_state { + unsigned int marked; + int handled; +}; + +static DEFINE_PER_CPU(struct pmu_nmi_state, pmu_nmi); + static int __kprobes perf_event_nmi_handler(struct notifier_block *self, unsigned long cmd, void *__args) { struct die_args *args = __args; - struct pt_regs *regs; + unsigned int this_nmi; + int handled; if (!atomic_read(&active_events)) return NOTIFY_DONE; @@ -1214,22 +1232,47 @@ perf_event_nmi_handler(struct notifier_block *self, case DIE_NMI: case DIE_NMI_IPI: break; - + case DIE_NMIUNKNOWN: + this_nmi = percpu_read(irq_stat.__nmi_count); + if (this_nmi != __get_cpu_var(pmu_nmi).marked) + /* let the kernel handle the unknown nmi */ + return NOTIFY_DONE; + /* + * This one is a PMU back-to-back nmi. Two events + * trigger 'simultaneously' raising two back-to-back + * NMIs. If the first NMI handles both, the latter + * will be empty and daze the CPU. So, we drop it to + * avoid false-positive 'unknown nmi' messages. + */ + return NOTIFY_STOP; default: return NOTIFY_DONE; } - regs = args->regs; - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* - * Can't rely on the handled return value to say it was our NMI, two - * events could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); + + handled = x86_pmu.handle_irq(args->regs); + if (!handled) + return NOTIFY_DONE; + + this_nmi = percpu_read(irq_stat.__nmi_count); + if ((handled > 1) || + /* the next nmi could be a back-to-back nmi */ + ((__get_cpu_var(pmu_nmi).marked == this_nmi) && + (__get_cpu_var(pmu_nmi).handled > 1))) { + /* + * We could have two subsequent back-to-back nmis: The + * first handles more than one counter, the 2nd + * handles only one counter and the 3rd handles no + * counter. + * + * This is the 2nd nmi because the previous was + * handling more than one counter. We will mark the + * next (3rd) and then drop it if unhandled. + */ + __get_cpu_var(pmu_nmi).marked = this_nmi + 1; + __get_cpu_var(pmu_nmi).handled = handled; + } return NOTIFY_STOP; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d8d86d0..ee05c90 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -712,7 +712,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) struct perf_sample_data data; struct cpu_hw_events *cpuc; int bit, loops; - u64 ack, status; + u64 status; + int handled = 0; perf_sample_data_init(&data, 0); @@ -728,6 +729,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) loops = 0; again: + intel_pmu_ack_status(status); if (++loops > 100) { WARN_ONCE(1, "perfevents: irq loop stuck!\n"); perf_event_print_debug(); @@ -736,19 +738,22 @@ again: } inc_irq_stat(apic_perf_irqs); - ack = status; intel_pmu_lbr_read(); /* * PEBS overflow sets bit 62 in the global status register */ - if (__test_and_clear_bit(62, (unsigned long *)&status)) + if (__test_and_clear_bit(62, (unsigned long *)&status)) { + handled++; x86_pmu.drain_pebs(regs); + } for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; + handled++; + if (!test_bit(bit, cpuc->active_mask)) continue; @@ -761,8 +766,6 @@ again: x86_pmu_stop(event); } - intel_pmu_ack_status(ack); - /* * Repeat if there is more work to be done: */ @@ -772,7 +775,7 @@ again: done: intel_pmu_enable_all(0); - return 1; + return handled; } static struct event_constraint * diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 7e578e9..b560db3 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -692,7 +692,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) inc_irq_stat(apic_perf_irqs); } - return handled > 0; + return handled; } /* diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index e5cc7e8..ebdb85c 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -18,7 +18,6 @@ #include <asm/apic.h> #include <asm/iommu.h> #include <asm/gart.h> -#include <asm/hpet.h> static void __init fix_hypertransport_config(int num, int slot, int func) { @@ -192,21 +191,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -/* - * Force the read back of the CMP register in hpet_next_event() - * to work around the problem that the CMP register write seems to be - * delayed. See hpet_next_event() for details. - * - * We do this on all SMBUS incarnations for now until we have more - * information about the affected chipsets. - */ -static void __init ati_hpet_bugs(int num, int slot, int func) -{ -#ifdef CONFIG_HPET_TIMER - hpet_readback_cmp = 1; -#endif -} - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -236,8 +220,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, - { PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_hpet_bugs }, {} }; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 351f9c0..410fdb3 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -35,7 +35,6 @@ unsigned long hpet_address; u8 hpet_blockid; /* OS timer block num */ u8 hpet_msi_disable; -u8 hpet_readback_cmp; #ifdef CONFIG_PCI_MSI static unsigned long hpet_num_timers; @@ -395,23 +394,27 @@ static int hpet_next_event(unsigned long delta, * at that point and we would wait for the next hpet interrupt * forever. We found out that reading the CMP register back * forces the transfer so we can rely on the comparison with - * the counter register below. + * the counter register below. If the read back from the + * compare register does not match the value we programmed + * then we might have a real hardware problem. We can not do + * much about it here, but at least alert the user/admin with + * a prominent warning. * - * That works fine on those ATI chipsets, but on newer Intel - * chipsets (ICH9...) this triggers due to an erratum: Reading - * the comparator immediately following a write is returning - * the old value. + * An erratum on some chipsets (ICH9,..), results in + * comparator read immediately following a write returning old + * value. Workaround for this is to read this value second + * time, when first read returns old value. * - * We restrict the read back to the affected ATI chipsets (set - * by quirks) and also run it with hpet=verbose for debugging - * purposes. + * In fact the write to the comparator register is delayed up + * to two HPET cycles so the workaround we tried to restrict + * the readback to those known to be borked ATI chipsets + * failed miserably. So we give up on optimizations forever + * and penalize all HPET incarnations unconditionally. */ - if (hpet_readback_cmp || hpet_verbose) { - u32 cmp = hpet_readl(HPET_Tn_CMP(timer)); - - if (cmp != cnt) + if (unlikely((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt)) { + if (hpet_readl(HPET_Tn_CMP(timer)) != cnt) printk_once(KERN_WARNING - "hpet: compare register read back failed.\n"); + "hpet: compare register read back failed.\n"); } return (s32)(hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index a474ec3..ff15c9d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -206,11 +206,27 @@ int arch_check_bp_in_kernelspace(struct perf_event *bp) int arch_bp_generic_fields(int x86_len, int x86_type, int *gen_len, int *gen_type) { - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_X: + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + if (x86_len != X86_BREAKPOINT_LEN_X) + return -EINVAL; + + *gen_type = HW_BREAKPOINT_X; *gen_len = sizeof(long); + return 0; + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; + break; + default: + return -EINVAL; + } + + /* Len */ + switch (x86_len) { case X86_BREAKPOINT_LEN_1: *gen_len = HW_BREAKPOINT_LEN_1; break; @@ -229,21 +245,6 @@ int arch_bp_generic_fields(int x86_len, int x86_type, return -EINVAL; } - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - return 0; } @@ -316,9 +317,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; switch (info->len) { - case X86_BREAKPOINT_LEN_X: - align = sizeof(long) -1; - break; case X86_BREAKPOINT_LEN_1: align = 0; break; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index a874495..e2a5952 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -45,8 +45,7 @@ void __init setup_trampoline_page_table(void) /* Copy kernel address range */ clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, - KERNEL_PGD_BOUNDARY)); + KERNEL_PGD_PTRS); /* Initialize low mappings */ clone_pgd_range(trampoline_pg_dir, diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d632934..26a863a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -655,7 +655,7 @@ void restore_sched_clock_state(void) local_irq_save(flags); - get_cpu_var(cyc2ns_offset) = 0; + __get_cpu_var(cyc2ns_offset) = 0; offset = cyc2ns_suspend - sched_clock(); for_each_possible_cpu(cpu) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b38bd8b..66ca98a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1870,17 +1870,16 @@ static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) { struct decode_cache *c = &ctxt->decode; - u64 old = c->dst.orig_val; + u64 old = c->dst.orig_val64; if (((u32) (old >> 0) != (u32) c->regs[VCPU_REGS_RAX]) || ((u32) (old >> 32) != (u32) c->regs[VCPU_REGS_RDX])) { - c->regs[VCPU_REGS_RAX] = (u32) (old >> 0); c->regs[VCPU_REGS_RDX] = (u32) (old >> 32); ctxt->eflags &= ~EFLG_ZF; } else { - c->dst.val = ((u64)c->regs[VCPU_REGS_RCX] << 32) | - (u32) c->regs[VCPU_REGS_RBX]; + c->dst.val64 = ((u64)c->regs[VCPU_REGS_RCX] << 32) | + (u32) c->regs[VCPU_REGS_RBX]; ctxt->eflags |= EFLG_ZF; } @@ -2616,7 +2615,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) c->src.valptr, c->src.bytes); if (rc != X86EMUL_CONTINUE) goto done; - c->src.orig_val = c->src.val; + c->src.orig_val64 = c->src.val64; } if (c->src2.type == OP_MEM) { diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 8d10c06..4b7b73c 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -64,6 +64,9 @@ static void pic_unlock(struct kvm_pic *s) if (!found) found = s->kvm->bsp_vcpu; + if (!found) + return; + kvm_vcpu_kick(found); } } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index ffed068..63c3145 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -43,7 +43,6 @@ struct kvm_kpic_state { u8 irr; /* interrupt request register */ u8 imr; /* interrupt mask register */ u8 isr; /* interrupt service register */ - u8 isr_ack; /* interrupt ack detection */ u8 priority_add; /* highest irq priority */ u8 irq_base; u8 read_reg_select; @@ -56,6 +55,7 @@ struct kvm_kpic_state { u8 init4; /* true if 4 byte init */ u8 elcr; /* PIIX edge/trigger selection */ u8 elcr_mask; + u8 isr_ack; /* interrupt ack detection */ struct kvm_pic *pics_state; }; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 9257510..9d5f558 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -324,9 +324,8 @@ static void lguest_load_gdt(const struct desc_ptr *desc) } /* - * For a single GDT entry which changes, we do the lazy thing: alter our GDT, - * then tell the Host to reload the entire thing. This operation is so rare - * that this naive implementation is reasonable. + * For a single GDT entry which changes, we simply change our copy and + * then tell the host about it. */ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, const void *desc, int type) @@ -338,9 +337,13 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, int entrynum, } /* - * OK, I lied. There are three "thread local storage" GDT entries which change + * There are three "thread local storage" GDT entries which change * on every context switch (these three entries are how glibc implements - * __thread variables). So we have a hypercall specifically for this case. + * __thread variables). As an optimization, we have a hypercall + * specifically for this case. + * + * Wouldn't it be nicer to have a general LOAD_GDT_ENTRIES hypercall + * which took a range of entries? */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 84e236c..72fc70c 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -74,7 +74,7 @@ void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) /* * Map 'pfn' using fixed map 'type' and protections 'prot' */ -void * +void __iomem * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) { /* @@ -86,12 +86,12 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC)) prot = PAGE_KERNEL_UC_MINUS; - return kmap_atomic_prot_pfn(pfn, type, prot); + return (void __force __iomem *) kmap_atomic_prot_pfn(pfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); void -iounmap_atomic(void *kvaddr, enum km_type type) +iounmap_atomic(void __iomem *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index f6b48f6..f1575c9 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -568,8 +568,13 @@ static int __init init_sysfs(void) int error; error = sysdev_class_register(&oprofile_sysclass); - if (!error) - error = sysdev_register(&device_oprofile); + if (error) + return error; + + error = sysdev_register(&device_oprofile); + if (error) + sysdev_class_unregister(&oprofile_sysclass); + return error; } @@ -580,8 +585,10 @@ static void exit_sysfs(void) } #else -#define init_sysfs() do { } while (0) -#define exit_sysfs() do { } while (0) + +static inline int init_sysfs(void) { return 0; } +static inline void exit_sysfs(void) { } + #endif /* CONFIG_PM */ static int __init p4_init(char **cpu_type) @@ -664,7 +671,10 @@ static int __init ppro_init(char **cpu_type) case 14: *cpu_type = "i386/core"; break; - case 15: case 23: + case 0x0f: + case 0x16: + case 0x17: + case 0x1d: *cpu_type = "i386/core_2"; break; case 0x1a: @@ -695,6 +705,8 @@ int __init op_nmi_init(struct oprofile_operations *ops) char *cpu_type = NULL; int ret = 0; + using_nmi = 0; + if (!cpu_has_apic) return -ENODEV; @@ -774,7 +786,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) mux_init(ops); - init_sysfs(); + ret = init_sysfs(); + if (ret) + return ret; + using_nmi = 1; printk(KERN_INFO "oprofile: using NMI interrupt.\n"); return 0; |