summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/ia32/ia32entry.S2
-rw-r--r--arch/x86/include/asm/desc.h4
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h6
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/ptrace.h19
-rw-r--r--arch/x86/include/asm/pvclock.h2
-rw-r--r--arch/x86/include/asm/traps.h2
-rw-r--r--arch/x86/include/asm/unistd_64.h4
-rw-r--r--arch/x86/include/asm/vsyscall.h6
-rw-r--r--arch/x86/include/asm/xen/page.h4
-rw-r--r--arch/x86/kernel/Makefile13
-rw-r--r--arch/x86/kernel/acpi/cstate.c23
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c3
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c1
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S1
-rw-r--r--arch/x86/kernel/paravirt.c4
-rw-r--r--arch/x86/kernel/process.c23
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c4
-rw-r--r--arch/x86/kernel/step.c2
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/traps.c6
-rw-r--r--arch/x86/kernel/vmlinux.lds.S41
-rw-r--r--arch/x86/kernel/vsyscall_64.c90
-rw-r--r--arch/x86/kernel/vsyscall_emu_64.S36
-rw-r--r--arch/x86/kernel/vsyscall_trace.h29
-rw-r--r--arch/x86/kvm/Kconfig3
-rw-r--r--arch/x86/mm/fault.c15
-rw-r--r--arch/x86/pci/acpi.c14
-rw-r--r--arch/x86/platform/mrst/Makefile1
-rw-r--r--arch/x86/platform/mrst/mrst.c4
-rw-r--r--arch/x86/platform/mrst/pmu.c817
-rw-r--r--arch/x86/platform/mrst/pmu.h234
-rw-r--r--arch/x86/platform/olpc/olpc.c4
-rw-r--r--arch/x86/vdso/vdso.S1
-rw-r--r--arch/x86/vdso/vdso32/sysenter.S2
-rw-r--r--arch/x86/xen/Makefile2
-rw-r--r--arch/x86/xen/enlighten.c8
-rw-r--r--arch/x86/xen/mmu.c16
-rw-r--r--arch/x86/xen/setup.c32
-rw-r--r--arch/x86/xen/smp.c14
-rw-r--r--arch/x86/xen/trace.c1
-rw-r--r--arch/x86/xen/xen-asm_32.S8
49 files changed, 1346 insertions, 182 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7cf916f..6a47bb2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -72,6 +72,7 @@ config X86
select USE_GENERIC_SMP_HELPERS if SMP
select HAVE_BPF_JIT if (X86_64 && NET)
select CLKEVT_I8253
+ select ARCH_HAVE_NMI_SAFE_CMPXCHG
config INSTRUCTION_DECODER
def_bool (KPROBES || PERF_EVENTS)
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index a0e866d..54edb207 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -672,7 +672,7 @@ ia32_sys_call_table:
.quad sys32_vm86_warning /* vm86 */
.quad quiet_ni_syscall /* query_module */
.quad sys_poll
- .quad compat_sys_nfsservctl
+ .quad quiet_ni_syscall /* old nfsservctl */
.quad sys_setresgid16 /* 170 */
.quad sys_getresgid16
.quad sys_prctl
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 7b439d9..41935fa 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -27,8 +27,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
desc->base2 = (info->base_addr & 0xff000000) >> 24;
/*
- * Don't allow setting of the lm bit. It is useless anyway
- * because 64bit system calls require __USER_CS:
+ * Don't allow setting of the lm bit. It would confuse
+ * user_64bit_mode and would get overridden by sysret anyway.
*/
desc->l = 0;
}
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d02804d..d8e8eef 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -40,8 +40,6 @@
#include <linux/compiler.h>
#include <asm/page.h>
-#include <xen/xen.h>
-
#define build_mmio_read(name, size, type, reg, barrier) \
static inline type name(const volatile void __iomem *addr) \
{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
@@ -334,6 +332,7 @@ extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
#ifdef CONFIG_XEN
+#include <xen/xen.h>
struct bio_vec;
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f9a3209..7e50f06 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -17,7 +17,6 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vector 204 : legacy x86_64 vsyscall emulation
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
*
@@ -51,9 +50,6 @@
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
#endif
-#ifdef CONFIG_X86_64
-# define VSYSCALL_EMU_VECTOR 0xcc
-#endif
/*
* Vectors 0x30-0x3f are used for ISA interrupts.
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 2c76521..8e8b9a4 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -41,6 +41,7 @@
#include <asm/desc_defs.h>
#include <asm/kmap_types.h>
+#include <asm/pgtable_types.h>
struct page;
struct thread_struct;
@@ -63,6 +64,11 @@ struct paravirt_callee_save {
struct pv_info {
unsigned int kernel_rpl;
int shared_kernel_pmd;
+
+#ifdef CONFIG_X86_64
+ u16 extra_user_64bit_cs; /* __USER_CS if none */
+#endif
+
int paravirt_enabled;
const char *name;
};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2193715..0d1171c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -751,8 +751,6 @@ static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
-extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
-
extern void select_idle_routine(const struct cpuinfo_x86 *c);
extern void init_amd_e400_c1e_mask(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 94e7618..3566454 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -131,6 +131,9 @@ struct pt_regs {
#ifdef __KERNEL__
#include <linux/init.h>
+#ifdef CONFIG_PARAVIRT
+#include <asm/paravirt_types.h>
+#endif
struct cpuinfo_x86;
struct task_struct;
@@ -187,6 +190,22 @@ static inline int v8086_mode(struct pt_regs *regs)
#endif
}
+#ifdef CONFIG_X86_64
+static inline bool user_64bit_mode(struct pt_regs *regs)
+{
+#ifndef CONFIG_PARAVIRT
+ /*
+ * On non-paravirt systems, this is the only long mode CPL 3
+ * selector. We do not allow long mode selectors in the LDT.
+ */
+ return regs->cs == __USER_CS;
+#else
+ /* Headers are too twisted for this to go in paravirt.h. */
+ return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
+#endif
+}
+#endif
+
/*
* X86_32 CPUs don't save ss and esp if the CPU is already in kernel mode
* when it traps. The previous stack will be directly underneath the saved
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index a518c0a..c59cc97 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -44,7 +44,7 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
#elif defined(__x86_64__)
__asm__ (
- "mul %[mul_frac] ; shrd $32, %[hi], %[lo]"
+ "mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
: [lo]"=a"(product),
[hi]"=d"(tmp)
: "0"(delta),
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 2bae0a5..0012d09 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -40,7 +40,6 @@ asmlinkage void alignment_check(void);
asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
-asmlinkage void emulate_vsyscall(void);
dotraplinkage void do_divide_error(struct pt_regs *, long);
dotraplinkage void do_debug(struct pt_regs *, long);
@@ -67,7 +66,6 @@ dotraplinkage void do_alignment_check(struct pt_regs *, long);
dotraplinkage void do_machine_check(struct pt_regs *, long);
#endif
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
-dotraplinkage void do_emulate_vsyscall(struct pt_regs *, long);
#ifdef CONFIG_X86_32
dotraplinkage void do_iret_error(struct pt_regs *, long);
#endif
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 705bf13..2010405 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -414,7 +414,7 @@ __SYSCALL(__NR_query_module, sys_ni_syscall)
__SYSCALL(__NR_quotactl, sys_quotactl)
#define __NR_nfsservctl 180
-__SYSCALL(__NR_nfsservctl, sys_nfsservctl)
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
/* reserved for LiS/STREAMS */
#define __NR_getpmsg 181
@@ -681,6 +681,8 @@ __SYSCALL(__NR_syncfs, sys_syncfs)
__SYSCALL(__NR_sendmmsg, sys_sendmmsg)
#define __NR_setns 308
__SYSCALL(__NR_setns, sys_setns)
+#define __NR_getcpu 309
+__SYSCALL(__NR_getcpu, sys_getcpu)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 6010707..eaea1d3 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -27,6 +27,12 @@ extern struct timezone sys_tz;
extern void map_vsyscall(void);
+/*
+ * Called on instruction fetch fault in vsyscall page.
+ * Returns true if handled.
+ */
+extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_VSYSCALL_H */
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 64a619d..7ff4669 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -39,7 +39,7 @@ typedef struct xpaddr {
((unsigned long)((u64)CONFIG_XEN_MAX_DOMAIN_MEMORY * 1024 * 1024 * 1024 / PAGE_SIZE))
extern unsigned long *machine_to_phys_mapping;
-extern unsigned int machine_to_phys_order;
+extern unsigned long machine_to_phys_nr;
extern unsigned long get_phys_to_machine(unsigned long pfn);
extern bool set_phys_to_machine(unsigned long pfn, unsigned long mfn);
@@ -87,7 +87,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn)
if (xen_feature(XENFEAT_auto_translated_physmap))
return mfn;
- if (unlikely((mfn >> machine_to_phys_order) != 0)) {
+ if (unlikely(mfn >= machine_to_phys_nr)) {
pfn = ~0;
goto try_override;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0410557..82f2912 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -17,19 +17,6 @@ CFLAGS_REMOVE_ftrace.o = -pg
CFLAGS_REMOVE_early_printk.o = -pg
endif
-#
-# vsyscalls (which work on the user stack) should have
-# no stack-protector checks:
-#
-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
-CFLAGS_hpet.o := $(nostackp)
-CFLAGS_paravirt.o := $(nostackp)
-GCOV_PROFILE_vsyscall_64.o := n
-GCOV_PROFILE_hpet.o := n
-GCOV_PROFILE_tsc.o := n
-GCOV_PROFILE_paravirt.o := n
-
obj-y := process_$(BITS).o signal.o entry_$(BITS).o
obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
obj-y += time.o ioport.o ldt.o dumpstack.o
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 5812404..f50e7fb 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -149,6 +149,29 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu,
}
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
+/*
+ * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
+ * which can obviate IPI to trigger checking of need_resched.
+ * We execute MONITOR against need_resched and enter optimized wait state
+ * through MWAIT. Whenever someone changes need_resched, we would be woken
+ * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
+ */
+void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
+{
+ if (!need_resched()) {
+ if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
+ clflush((void *)&current_thread_info()->flags);
+
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ smp_mb();
+ if (!need_resched())
+ __mwait(ax, cx);
+ }
+}
+
void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
{
unsigned int cpu = smp_processor_id();
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index adc66c3..34b1859 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -207,7 +207,6 @@ static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_ri
((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
APIC_DM_INIT;
uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
- mdelay(10);
val = (1UL << UVH_IPI_INT_SEND_SHFT) |
(phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 08119a3..6b96110 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -149,7 +149,6 @@ struct set_mtrr_data {
*/
static int mtrr_rendezvous_handler(void *info)
{
-#ifdef CONFIG_SMP
struct set_mtrr_data *data = info;
/*
@@ -171,7 +170,6 @@ static int mtrr_rendezvous_handler(void *info)
} else if (mtrr_aps_delayed_init || !cpu_online(smp_processor_id())) {
mtrr_if->set_all();
}
-#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4ee3abf..cfa62ec 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1900,6 +1900,9 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
perf_callchain_store(entry, regs->ip);
+ if (!current->mm)
+ return;
+
if (perf_callchain_user32(regs, entry))
return;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 45fbb8f..f88af2c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1590,6 +1590,7 @@ static __init int intel_pmu_init(void)
break;
case 42: /* SandyBridge */
+ case 45: /* SandyBridge, "Romely-EP" */
memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
sizeof(hw_cache_event_ids));
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5c1a9197..f3f6f53 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -54,6 +54,7 @@
#include <asm/ftrace.h>
#include <asm/irq_vectors.h>
#include <asm/cpufeature.h>
+#include <asm/alternative-asm.h>
/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
#include <linux/elf-em.h>
@@ -873,12 +874,7 @@ ENTRY(simd_coprocessor_error)
661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
- .balign 4
- .long 661b
- .long 663f
- .word X86_FEATURE_XMM
- .byte 662b-661b
- .byte 664f-663f
+ altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
.previous
.section .altinstr_replacement,"ax"
663: pushl $do_simd_coprocessor_error
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e13329d..6419bb0 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -1111,7 +1111,6 @@ zeroentry spurious_interrupt_bug do_spurious_interrupt_bug
zeroentry coprocessor_error do_coprocessor_error
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
-zeroentry emulate_vsyscall do_emulate_vsyscall
/* Reload gs selector with exception handling */
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 613a793..d90272e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -307,6 +307,10 @@ struct pv_info pv_info = {
.paravirt_enabled = 0,
.kernel_rpl = 0,
.shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
+
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = __USER_CS,
+#endif
};
struct pv_init_ops pv_init_ops = {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e1ba8cb..e7e3b01 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -438,29 +438,6 @@ void cpu_idle_wait(void)
}
EXPORT_SYMBOL_GPL(cpu_idle_wait);
-/*
- * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
- * which can obviate IPI to trigger checking of need_resched.
- * We execute MONITOR against need_resched and enter optimized wait state
- * through MWAIT. Whenever someone changes need_resched, we would be woken
- * up from MWAIT (without an IPI).
- *
- * New with Core Duo processors, MWAIT can take some hints based on CPU
- * capability.
- */
-void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
-{
- if (!need_resched()) {
- if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
- clflush((void *)&current_thread_info()->flags);
-
- __monitor((void *)&current_thread_info()->flags, 0, 0);
- smp_mb();
- if (!need_resched())
- __mwait(ax, cx);
- }
-}
-
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a3d0dc5..7a3b651 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -38,6 +38,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -109,7 +110,8 @@ void cpu_idle(void)
local_irq_disable();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
}
tick_nohz_restart_sched_tick();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ca6f7ab..f693e44 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/cpuidle.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -136,7 +137,8 @@ void cpu_idle(void)
enter_idle();
/* Don't trace irqs off for idle */
stop_critical_timings();
- pm_idle();
+ if (cpuidle_idle_call())
+ pm_idle();
start_critical_timings();
/* In many cases the interrupt that ended idle
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7977f0c..c346d11 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -74,7 +74,7 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
#ifdef CONFIG_X86_64
case 0x40 ... 0x4f:
- if (regs->cs != __USER_CS)
+ if (!user_64bit_mode(regs))
/* 32-bit mode: register increment */
return 0;
/* 64-bit mode: REX prefix */
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index fbb0a04..bc19be3 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -168,7 +168,7 @@ ENTRY(sys_call_table)
.long ptregs_vm86
.long sys_ni_syscall /* Old sys_query_module */
.long sys_poll
- .long sys_nfsservctl
+ .long sys_ni_syscall /* Old nfsservctl */
.long sys_setresgid16 /* 170 */
.long sys_getresgid16
.long sys_prctl
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9682ec5..6913369 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -872,12 +872,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors);
#endif
-#ifdef CONFIG_X86_64
- BUG_ON(test_bit(VSYSCALL_EMU_VECTOR, used_vectors));
- set_system_intr_gate(VSYSCALL_EMU_VECTOR, &emulate_vsyscall);
- set_bit(VSYSCALL_EMU_VECTOR, used_vectors);
-#endif
-
/*
* Should be a barrier for any external CPU state:
*/
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 4aa9c54..0f703f1 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -71,7 +71,6 @@ PHDRS {
text PT_LOAD FLAGS(5); /* R_E */
data PT_LOAD FLAGS(6); /* RW_ */
#ifdef CONFIG_X86_64
- user PT_LOAD FLAGS(5); /* R_E */
#ifdef CONFIG_SMP
percpu PT_LOAD FLAGS(6); /* RW_ */
#endif
@@ -154,44 +153,16 @@ SECTIONS
#ifdef CONFIG_X86_64
-#define VSYSCALL_ADDR (-10*1024*1024)
-
-#define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)
-#define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)
-
-#define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)
-#define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)
-
- . = ALIGN(4096);
- __vsyscall_0 = .;
-
- . = VSYSCALL_ADDR;
- .vsyscall : AT(VLOAD(.vsyscall)) {
- *(.vsyscall_0)
-
- . = 1024;
- *(.vsyscall_1)
-
- . = 2048;
- *(.vsyscall_2)
-
- . = 4096; /* Pad the whole page. */
- } :user =0xcc
- . = ALIGN(__vsyscall_0 + PAGE_SIZE, PAGE_SIZE);
-
-#undef VSYSCALL_ADDR
-#undef VLOAD_OFFSET
-#undef VLOAD
-#undef VVIRT_OFFSET
-#undef VVIRT
-
+ . = ALIGN(PAGE_SIZE);
__vvar_page = .;
.vvar : AT(ADDR(.vvar) - LOAD_OFFSET) {
+ /* work around gold bug 13023 */
+ __vvar_beginning_hack = .;
- /* Place all vvars at the offsets in asm/vvar.h. */
-#define EMIT_VVAR(name, offset) \
- . = offset; \
+ /* Place all vvars at the offsets in asm/vvar.h. */
+#define EMIT_VVAR(name, offset) \
+ . = __vvar_beginning_hack + offset; \
*(.vvar_ ## name)
#define __VVAR_KERNEL_LDS
#include <asm/vvar.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index dda7dff..18ae83d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -18,9 +18,6 @@
* use the vDSO.
*/
-/* Disable profiling for userspace code: */
-#define DISABLE_BRANCH_PROFILING
-
#include <linux/time.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -50,12 +47,36 @@
#include <asm/vgtod.h>
#include <asm/traps.h>
+#define CREATE_TRACE_POINTS
+#include "vsyscall_trace.h"
+
DEFINE_VVAR(int, vgetcpu_mode);
DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
{
.lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
};
+static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
+
+static int __init vsyscall_setup(char *str)
+{
+ if (str) {
+ if (!strcmp("emulate", str))
+ vsyscall_mode = EMULATE;
+ else if (!strcmp("native", str))
+ vsyscall_mode = NATIVE;
+ else if (!strcmp("none", str))
+ vsyscall_mode = NONE;
+ else
+ return -EINVAL;
+
+ return 0;
+ }
+
+ return -EINVAL;
+}
+early_param("vsyscall", vsyscall_setup);
+
void update_vsyscall_tz(void)
{
unsigned long flags;
@@ -100,7 +121,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
printk("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, tsk->comm, task_pid_nr(tsk),
- message, regs->ip - 2, regs->cs,
+ message, regs->ip, regs->cs,
regs->sp, regs->ax, regs->si, regs->di);
}
@@ -118,46 +139,39 @@ static int addr_to_vsyscall_nr(unsigned long addr)
return nr;
}
-void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
+bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
{
struct task_struct *tsk;
unsigned long caller;
int vsyscall_nr;
long ret;
- local_irq_enable();
-
/*
- * Real 64-bit user mode code has cs == __USER_CS. Anything else
- * is bogus.
+ * No point in checking CS -- the only way to get here is a user mode
+ * trap to a high address, which means that we're in 64-bit user code.
*/
- if (regs->cs != __USER_CS) {
- /*
- * If we trapped from kernel mode, we might as well OOPS now
- * instead of returning to some random address and OOPSing
- * then.
- */
- BUG_ON(!user_mode(regs));
- /* Compat mode and non-compat 32-bit CS should both segfault. */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc from 32-bit mode");
- goto sigsegv;
+ WARN_ON_ONCE(address != regs->ip);
+
+ if (vsyscall_mode == NONE) {
+ warn_bad_vsyscall(KERN_INFO, regs,
+ "vsyscall attempted with vsyscall=none");
+ return false;
}
- /*
- * x86-ism here: regs->ip points to the instruction after the int 0xcc,
- * and int 0xcc is two bytes long.
- */
- vsyscall_nr = addr_to_vsyscall_nr(regs->ip - 2);
+ vsyscall_nr = addr_to_vsyscall_nr(address);
+
+ trace_emulate_vsyscall(vsyscall_nr);
+
if (vsyscall_nr < 0) {
warn_bad_vsyscall(KERN_WARNING, regs,
- "illegal int 0xcc (exploit attempt?)");
+ "misaligned vsyscall (exploit attempt or buggy program) -- look up the vsyscall kernel parameter if you need a workaround");
goto sigsegv;
}
if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
- warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
+ warn_bad_vsyscall(KERN_WARNING, regs,
+ "vsyscall with bad stack (exploit attempt?)");
goto sigsegv;
}
@@ -202,13 +216,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
regs->ip = caller;
regs->sp += 8;
- local_irq_disable();
- return;
+ return true;
sigsegv:
- regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
force_sig(SIGSEGV, current);
- local_irq_disable();
+ return true;
}
/*
@@ -256,15 +268,21 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
void __init map_vsyscall(void)
{
- extern char __vsyscall_0;
- unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
+ extern char __vsyscall_page;
+ unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
extern char __vvar_page;
unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
- /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
- __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
+ __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall,
+ vsyscall_mode == NATIVE
+ ? PAGE_KERNEL_VSYSCALL
+ : PAGE_KERNEL_VVAR);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_FIRST_PAGE) !=
+ (unsigned long)VSYSCALL_START);
+
__set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
- BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
+ BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) !=
+ (unsigned long)VVAR_ADDRESS);
}
static int __init vsyscall_init(void)
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index ffa845e..c9596a9 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,21 +7,31 @@
*/
#include <linux/linkage.h>
+
#include <asm/irq_vectors.h>
+#include <asm/page_types.h>
+#include <asm/unistd_64.h>
+
+__PAGE_ALIGNED_DATA
+ .globl __vsyscall_page
+ .balign PAGE_SIZE, 0xcc
+ .type __vsyscall_page, @object
+__vsyscall_page:
+
+ mov $__NR_gettimeofday, %rax
+ syscall
+ ret
-/* The unused parts of the page are filled with 0xcc by the linker script. */
+ .balign 1024, 0xcc
+ mov $__NR_time, %rax
+ syscall
+ ret
-.section .vsyscall_0, "a"
-ENTRY(vsyscall_0)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_0)
+ .balign 1024, 0xcc
+ mov $__NR_getcpu, %rax
+ syscall
+ ret
-.section .vsyscall_1, "a"
-ENTRY(vsyscall_1)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_1)
+ .balign 4096, 0xcc
-.section .vsyscall_2, "a"
-ENTRY(vsyscall_2)
- int $VSYSCALL_EMU_VECTOR
-END(vsyscall_2)
+ .size __vsyscall_page, 4096
diff --git a/arch/x86/kernel/vsyscall_trace.h b/arch/x86/kernel/vsyscall_trace.h
new file mode 100644
index 0000000..a8b2ede
--- /dev/null
+++ b/arch/x86/kernel/vsyscall_trace.h
@@ -0,0 +1,29 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vsyscall
+
+#if !defined(__VSYSCALL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define __VSYSCALL_TRACE_H
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(emulate_vsyscall,
+
+ TP_PROTO(int nr),
+
+ TP_ARGS(nr),
+
+ TP_STRUCT__entry(__field(int, nr)),
+
+ TP_fast_assign(
+ __entry->nr = nr;
+ ),
+
+ TP_printk("nr = %d", __entry->nr)
+);
+
+#endif
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../arch/x86/kernel
+#define TRACE_INCLUDE_FILE vsyscall_trace
+#include <trace/define_trace.h>
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 988724b..ff5790d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -22,6 +22,8 @@ config KVM
depends on HAVE_KVM
# for device assignment:
depends on PCI
+ # for TASKSTATS/TASK_DELAY_ACCT:
+ depends on NET
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
@@ -31,6 +33,7 @@ config KVM
select KVM_ASYNC_PF
select USER_RETURN_NOTIFIER
select KVM_MMIO
+ select TASKSTATS
select TASK_DELAY_ACCT
---help---
Support hosting fully virtualized guest machines using hardware
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 4d09df0..0d17c8c 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -17,6 +17,7 @@
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
+#include <asm/vsyscall.h>
/*
* Page fault error code bits:
@@ -105,7 +106,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
* but for now it's good enough to assume that long
* mode only uses well known segments or kernel.
*/
- return (!user_mode(regs)) || (regs->cs == __USER_CS);
+ return (!user_mode(regs) || user_64bit_mode(regs));
#endif
case 0x60:
/* 0x64 thru 0x67 are valid prefixes in all modes. */
@@ -720,6 +721,18 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
if (is_errata100(regs, address))
return;
+#ifdef CONFIG_X86_64
+ /*
+ * Instruction fetch faults in the vsyscall page might need
+ * emulation.
+ */
+ if (unlikely((error_code & PF_INSTR) &&
+ ((address & ~0xfff) == VSYSCALL_START))) {
+ if (emulate_vsyscall(regs, address))
+ return;
+ }
+#endif
+
if (unlikely(show_unhandled_signals))
show_signal_msg(regs, error_code, address, tsk);
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index ae3cb23..039d913 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -360,6 +360,20 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_pci_root *root)
}
}
+ /* After the PCI-E bus has been walked and all devices discovered,
+ * configure any settings of the fabric that might be necessary.
+ */
+ if (bus) {
+ struct pci_bus *child;
+ list_for_each_entry(child, &bus->children, node) {
+ struct pci_dev *self = child->self;
+ if (!self)
+ continue;
+
+ pcie_bus_configure_settings(child, self->pcie_mpss);
+ }
+ }
+
if (!bus)
kfree(sd);
diff --git a/arch/x86/platform/mrst/Makefile b/arch/x86/platform/mrst/Makefile
index f61ccdd..1ea3877 100644
--- a/arch/x86/platform/mrst/Makefile
+++ b/arch/x86/platform/mrst/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_X86_MRST) += mrst.o
obj-$(CONFIG_X86_MRST) += vrtc.o
obj-$(CONFIG_EARLY_PRINTK_MRST) += early_printk_mrst.o
+obj-$(CONFIG_X86_MRST) += pmu.o
diff --git a/arch/x86/platform/mrst/mrst.c b/arch/x86/platform/mrst/mrst.c
index 7000e74..58425ad 100644
--- a/arch/x86/platform/mrst/mrst.c
+++ b/arch/x86/platform/mrst/mrst.c
@@ -689,7 +689,9 @@ static int __init sfi_parse_devs(struct sfi_table_header *table)
irq_attr.trigger = 1;
irq_attr.polarity = 1;
io_apic_set_pci_routing(NULL, pentry->irq, &irq_attr);
- }
+ } else
+ pentry->irq = 0; /* No irq */
+
switch (pentry->type) {
case SFI_DEV_TYPE_IPC:
/* ID as IRQ is a hack that will go away */
diff --git a/arch/x86/platform/mrst/pmu.c b/arch/x86/platform/mrst/pmu.c
new file mode 100644
index 0000000..9281da7
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.c
@@ -0,0 +1,817 @@
+/*
+ * mrst/pmu.c - driver for MRST Power Management Unit
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/cpuidle.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/sfi.h>
+#include <asm/intel_scu_ipc.h>
+#include "pmu.h"
+
+#define IPCMSG_FW_REVISION 0xF4
+
+struct mrst_device {
+ u16 pci_dev_num; /* DEBUG only */
+ u16 lss;
+ u16 latest_request;
+ unsigned int pci_state_counts[PCI_D3cold + 1]; /* DEBUG only */
+};
+
+/*
+ * comlete list of MRST PCI devices
+ */
+static struct mrst_device mrst_devs[] = {
+/* 0 */ { 0x0800, LSS_SPI0 }, /* Moorestown SPI Ctrl 0 */
+/* 1 */ { 0x0801, LSS_SPI1 }, /* Moorestown SPI Ctrl 1 */
+/* 2 */ { 0x0802, LSS_I2C0 }, /* Moorestown I2C 0 */
+/* 3 */ { 0x0803, LSS_I2C1 }, /* Moorestown I2C 1 */
+/* 4 */ { 0x0804, LSS_I2C2 }, /* Moorestown I2C 2 */
+/* 5 */ { 0x0805, LSS_KBD }, /* Moorestown Keyboard Ctrl */
+/* 6 */ { 0x0806, LSS_USB_HC }, /* Moorestown USB Ctrl */
+/* 7 */ { 0x0807, LSS_SD_HC0 }, /* Moorestown SD Host Ctrl 0 */
+/* 8 */ { 0x0808, LSS_SD_HC1 }, /* Moorestown SD Host Ctrl 1 */
+/* 9 */ { 0x0809, LSS_NAND }, /* Moorestown NAND Ctrl */
+/* 10 */ { 0x080a, LSS_AUDIO }, /* Moorestown Audio Ctrl */
+/* 11 */ { 0x080b, LSS_IMAGING }, /* Moorestown ISP */
+/* 12 */ { 0x080c, LSS_SECURITY }, /* Moorestown Security Controller */
+/* 13 */ { 0x080d, LSS_DISPLAY }, /* Moorestown External Displays */
+/* 14 */ { 0x080e, 0 }, /* Moorestown SCU IPC */
+/* 15 */ { 0x080f, LSS_GPIO }, /* Moorestown GPIO Controller */
+/* 16 */ { 0x0810, 0 }, /* Moorestown Power Management Unit */
+/* 17 */ { 0x0811, LSS_USB_OTG }, /* Moorestown OTG Ctrl */
+/* 18 */ { 0x0812, LSS_SPI2 }, /* Moorestown SPI Ctrl 2 */
+/* 19 */ { 0x0813, 0 }, /* Moorestown SC DMA */
+/* 20 */ { 0x0814, LSS_AUDIO_LPE }, /* Moorestown LPE DMA */
+/* 21 */ { 0x0815, LSS_AUDIO_SSP }, /* Moorestown SSP0 */
+
+/* 22 */ { 0x084F, LSS_SD_HC2 }, /* Moorestown SD Host Ctrl 2 */
+
+/* 23 */ { 0x4102, 0 }, /* Lincroft */
+/* 24 */ { 0x4110, 0 }, /* Lincroft */
+};
+
+/* n.b. We ignore PCI-id 0x815 in LSS9 b/c MeeGo has no driver for it */
+static u16 mrst_lss9_pci_ids[] = {0x080a, 0x0814, 0};
+static u16 mrst_lss10_pci_ids[] = {0x0800, 0x0801, 0x0802, 0x0803,
+ 0x0804, 0x0805, 0x080f, 0};
+
+/* handle concurrent SMP invokations of pmu_pci_set_power_state() */
+static spinlock_t mrst_pmu_power_state_lock;
+
+static unsigned int wake_counters[MRST_NUM_LSS]; /* DEBUG only */
+static unsigned int pmu_irq_stats[INT_INVALID + 1]; /* DEBUG only */
+
+static int graphics_is_off;
+static int lss_s0i3_enabled;
+static bool mrst_pmu_s0i3_enable;
+
+/* debug counters */
+static u32 pmu_wait_ready_calls;
+static u32 pmu_wait_ready_udelays;
+static u32 pmu_wait_ready_udelays_max;
+static u32 pmu_wait_done_calls;
+static u32 pmu_wait_done_udelays;
+static u32 pmu_wait_done_udelays_max;
+static u32 pmu_set_power_state_entry;
+static u32 pmu_set_power_state_send_cmd;
+
+static struct mrst_device *pci_id_2_mrst_dev(u16 pci_dev_num)
+{
+ int index = 0;
+
+ if ((pci_dev_num >= 0x0800) && (pci_dev_num <= 0x815))
+ index = pci_dev_num - 0x800;
+ else if (pci_dev_num == 0x084F)
+ index = 22;
+ else if (pci_dev_num == 0x4102)
+ index = 23;
+ else if (pci_dev_num == 0x4110)
+ index = 24;
+
+ if (pci_dev_num != mrst_devs[index].pci_dev_num) {
+ WARN_ONCE(1, FW_BUG "Unknown PCI device 0x%04X\n", pci_dev_num);
+ return 0;
+ }
+
+ return &mrst_devs[index];
+}
+
+/**
+ * mrst_pmu_validate_cstates
+ * @dev: cpuidle_device
+ *
+ * Certain states are not appropriate for governor to pick in some cases.
+ * This function will be called as cpuidle_device's prepare callback and
+ * thus tells governor to ignore such states when selecting the next state
+ * to enter.
+ */
+
+#define IDLE_STATE4_IS_C6 4
+#define IDLE_STATE5_IS_S0I3 5
+
+int mrst_pmu_invalid_cstates(void)
+{
+ int cpu = smp_processor_id();
+
+ /*
+ * Demote to C4 if the PMU is busy.
+ * Since LSS changes leave the busy bit clear...
+ * busy means either the PMU is waiting for an ACK-C6 that
+ * isn't coming due to an MWAIT that returned immediately;
+ * or we returned from S0i3 successfully, and the PMU
+ * is not done sending us interrupts.
+ */
+ if (pmu_read_busy_status())
+ return 1 << IDLE_STATE4_IS_C6 | 1 << IDLE_STATE5_IS_S0I3;
+
+ /*
+ * Disallow S0i3 if: PMU is not initialized, or CPU1 is active,
+ * or if device LSS is insufficient, or the GPU is active,
+ * or if it has been explicitly disabled.
+ */
+ if (!pmu_reg || !cpumask_equal(cpu_online_mask, cpumask_of(cpu)) ||
+ !lss_s0i3_enabled || !graphics_is_off || !mrst_pmu_s0i3_enable)
+ return 1 << IDLE_STATE5_IS_S0I3;
+ else
+ return 0;
+}
+
+/*
+ * pmu_update_wake_counters(): read PM_WKS, update wake_counters[]
+ * DEBUG only.
+ */
+static void pmu_update_wake_counters(void)
+{
+ int lss;
+ u32 wake_status;
+
+ wake_status = pmu_read_wks();
+
+ for (lss = 0; lss < MRST_NUM_LSS; ++lss) {
+ if (wake_status & (1 << lss))
+ wake_counters[lss]++;
+ }
+}
+
+int mrst_pmu_s0i3_entry(void)
+{
+ int status;
+
+ /* Clear any possible error conditions */
+ pmu_write_ics(0x300);
+
+ /* set wake control to current D-states */
+ pmu_write_wssc(S0I3_SSS_TARGET);
+
+ status = mrst_s0i3_entry(PM_S0I3_COMMAND, &pmu_reg->pm_cmd);
+ pmu_update_wake_counters();
+ return status;
+}
+
+/* poll for maximum of 5ms for busy bit to clear */
+static int pmu_wait_ready(void)
+{
+ int udelays;
+
+ pmu_wait_ready_calls++;
+
+ for (udelays = 0; udelays < 500; ++udelays) {
+ if (udelays > pmu_wait_ready_udelays_max)
+ pmu_wait_ready_udelays_max = udelays;
+
+ if (pmu_read_busy_status() == 0)
+ return 0;
+
+ udelay(10);
+ pmu_wait_ready_udelays++;
+ }
+
+ /*
+ * if this fires, observe
+ * /sys/kernel/debug/mrst_pmu_wait_ready_calls
+ * /sys/kernel/debug/mrst_pmu_wait_ready_udelays
+ */
+ WARN_ONCE(1, "SCU not ready for 5ms");
+ return -EBUSY;
+}
+/* poll for maximum of 50ms us for busy bit to clear */
+static int pmu_wait_done(void)
+{
+ int udelays;
+
+ pmu_wait_done_calls++;
+
+ for (udelays = 0; udelays < 500; ++udelays) {
+ if (udelays > pmu_wait_done_udelays_max)
+ pmu_wait_done_udelays_max = udelays;
+
+ if (pmu_read_busy_status() == 0)
+ return 0;
+
+ udelay(100);
+ pmu_wait_done_udelays++;
+ }
+
+ /*
+ * if this fires, observe
+ * /sys/kernel/debug/mrst_pmu_wait_done_calls
+ * /sys/kernel/debug/mrst_pmu_wait_done_udelays
+ */
+ WARN_ONCE(1, "SCU not done for 50ms");
+ return -EBUSY;
+}
+
+u32 mrst_pmu_msi_is_disabled(void)
+{
+ return pmu_msi_is_disabled();
+}
+
+void mrst_pmu_enable_msi(void)
+{
+ pmu_msi_enable();
+}
+
+/**
+ * pmu_irq - pmu driver interrupt handler
+ * Context: interrupt context
+ */
+static irqreturn_t pmu_irq(int irq, void *dummy)
+{
+ union pmu_pm_ics pmu_ics;
+
+ pmu_ics.value = pmu_read_ics();
+
+ if (!pmu_ics.bits.pending)
+ return IRQ_NONE;
+
+ switch (pmu_ics.bits.cause) {
+ case INT_SPURIOUS:
+ case INT_CMD_DONE:
+ case INT_CMD_ERR:
+ case INT_WAKE_RX:
+ case INT_SS_ERROR:
+ case INT_S0IX_MISS:
+ case INT_NO_ACKC6:
+ pmu_irq_stats[pmu_ics.bits.cause]++;
+ break;
+ default:
+ pmu_irq_stats[INT_INVALID]++;
+ }
+
+ pmu_write_ics(pmu_ics.value); /* Clear pending interrupt */
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Translate PCI power management to MRST LSS D-states
+ */
+static int pci_2_mrst_state(int lss, pci_power_t pci_state)
+{
+ switch (pci_state) {
+ case PCI_D0:
+ if (SSMSK(D0i1, lss) & D0I1_ACG_SSS_TARGET)
+ return D0i1;
+ else
+ return D0;
+ case PCI_D1:
+ return D0i1;
+ case PCI_D2:
+ return D0i2;
+ case PCI_D3hot:
+ case PCI_D3cold:
+ return D0i3;
+ default:
+ WARN(1, "pci_state %d\n", pci_state);
+ return 0;
+ }
+}
+
+static int pmu_issue_command(u32 pm_ssc)
+{
+ union pmu_pm_set_cfg_cmd_t command;
+
+ if (pmu_read_busy_status()) {
+ pr_debug("pmu is busy, Operation not permitted\n");
+ return -1;
+ }
+
+ /*
+ * enable interrupts in PMU so that interrupts are
+ * propagated when ioc bit for a particular set
+ * command is set
+ */
+
+ pmu_irq_enable();
+
+ /* Configure the sub systems for pmu2 */
+
+ pmu_write_ssc(pm_ssc);
+
+ /*
+ * Send the set config command for pmu its configured
+ * for mode CM_IMMEDIATE & hence with No Trigger
+ */
+
+ command.pmu2_params.d_param.cfg_mode = CM_IMMEDIATE;
+ command.pmu2_params.d_param.cfg_delay = 0;
+ command.pmu2_params.d_param.rsvd = 0;
+
+ /* construct the command to send SET_CFG to particular PMU */
+ command.pmu2_params.d_param.cmd = SET_CFG_CMD;
+ command.pmu2_params.d_param.ioc = 0;
+ command.pmu2_params.d_param.mode_id = 0;
+ command.pmu2_params.d_param.sys_state = SYS_STATE_S0I0;
+
+ /* write the value of PM_CMD into particular PMU */
+ pr_debug("pmu command being written %x\n",
+ command.pmu_pm_set_cfg_cmd_value);
+
+ pmu_write_cmd(command.pmu_pm_set_cfg_cmd_value);
+
+ return 0;
+}
+
+static u16 pmu_min_lss_pci_req(u16 *ids, u16 pci_state)
+{
+ u16 existing_request;
+ int i;
+
+ for (i = 0; ids[i]; ++i) {
+ struct mrst_device *mrst_dev;
+
+ mrst_dev = pci_id_2_mrst_dev(ids[i]);
+ if (unlikely(!mrst_dev))
+ continue;
+
+ existing_request = mrst_dev->latest_request;
+ if (existing_request < pci_state)
+ pci_state = existing_request;
+ }
+ return pci_state;
+}
+
+/**
+ * pmu_pci_set_power_state - Callback function is used by all the PCI devices
+ * for a platform specific device power on/shutdown.
+ */
+
+int pmu_pci_set_power_state(struct pci_dev *pdev, pci_power_t pci_state)
+{
+ u32 old_sss, new_sss;
+ int status = 0;
+ struct mrst_device *mrst_dev;
+
+ pmu_set_power_state_entry++;
+
+ BUG_ON(pdev->vendor != PCI_VENDOR_ID_INTEL);
+ BUG_ON(pci_state < PCI_D0 || pci_state > PCI_D3cold);
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+ if (unlikely(!mrst_dev))
+ return -ENODEV;
+
+ mrst_dev->pci_state_counts[pci_state]++; /* count invocations */
+
+ /* PMU driver calls self as part of PCI initialization, ignore */
+ if (pdev->device == PCI_DEV_ID_MRST_PMU)
+ return 0;
+
+ BUG_ON(!pmu_reg); /* SW bug if called before initialized */
+
+ spin_lock(&mrst_pmu_power_state_lock);
+
+ if (pdev->d3_delay) {
+ dev_dbg(&pdev->dev, "d3_delay %d, should be 0\n",
+ pdev->d3_delay);
+ pdev->d3_delay = 0;
+ }
+ /*
+ * If Lincroft graphics, simply remember state
+ */
+ if ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY
+ && !((pdev->class & PCI_SUB_CLASS_MASK) >> 8)) {
+ if (pci_state == PCI_D0)
+ graphics_is_off = 0;
+ else
+ graphics_is_off = 1;
+ goto ret;
+ }
+
+ if (!mrst_dev->lss)
+ goto ret; /* device with no LSS */
+
+ if (mrst_dev->latest_request == pci_state)
+ goto ret; /* no change */
+
+ mrst_dev->latest_request = pci_state; /* record latest request */
+
+ /*
+ * LSS9 and LSS10 contain multiple PCI devices.
+ * Use the lowest numbered (highest power) state in the LSS
+ */
+ if (mrst_dev->lss == 9)
+ pci_state = pmu_min_lss_pci_req(mrst_lss9_pci_ids, pci_state);
+ else if (mrst_dev->lss == 10)
+ pci_state = pmu_min_lss_pci_req(mrst_lss10_pci_ids, pci_state);
+
+ status = pmu_wait_ready();
+ if (status)
+ goto ret;
+
+ old_sss = pmu_read_sss();
+ new_sss = old_sss & ~SSMSK(3, mrst_dev->lss);
+ new_sss |= SSMSK(pci_2_mrst_state(mrst_dev->lss, pci_state),
+ mrst_dev->lss);
+
+ if (new_sss == old_sss)
+ goto ret; /* nothing to do */
+
+ pmu_set_power_state_send_cmd++;
+
+ status = pmu_issue_command(new_sss);
+
+ if (unlikely(status != 0)) {
+ dev_err(&pdev->dev, "Failed to Issue a PM command\n");
+ goto ret;
+ }
+
+ if (pmu_wait_done())
+ goto ret;
+
+ lss_s0i3_enabled =
+ ((pmu_read_sss() & S0I3_SSS_TARGET) == S0I3_SSS_TARGET);
+ret:
+ spin_unlock(&mrst_pmu_power_state_lock);
+ return status;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static char *d0ix_names[] = {"D0", "D0i1", "D0i2", "D0i3"};
+
+static inline const char *d0ix_name(int state)
+{
+ return d0ix_names[(int) state];
+}
+
+static int debug_mrst_pmu_show(struct seq_file *s, void *unused)
+{
+ struct pci_dev *pdev = NULL;
+ u32 cur_pmsss;
+ int lss;
+
+ seq_printf(s, "0x%08X D0I1_ACG_SSS_TARGET\n", D0I1_ACG_SSS_TARGET);
+
+ cur_pmsss = pmu_read_sss();
+
+ seq_printf(s, "0x%08X S0I3_SSS_TARGET\n", S0I3_SSS_TARGET);
+
+ seq_printf(s, "0x%08X Current SSS ", cur_pmsss);
+ seq_printf(s, lss_s0i3_enabled ? "\n" : "[BLOCKS s0i3]\n");
+
+ if (cpumask_equal(cpu_online_mask, cpumask_of(0)))
+ seq_printf(s, "cpu0 is only cpu online\n");
+ else
+ seq_printf(s, "cpu0 is NOT only cpu online [BLOCKS S0i3]\n");
+
+ seq_printf(s, "GFX: %s\n", graphics_is_off ? "" : "[BLOCKS s0i3]");
+
+
+ for_each_pci_dev(pdev) {
+ int pos;
+ u16 pmcsr;
+ struct mrst_device *mrst_dev;
+ int i;
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+
+ seq_printf(s, "%s %04x/%04X %-16.16s ",
+ dev_name(&pdev->dev),
+ pdev->vendor, pdev->device,
+ dev_driver_string(&pdev->dev));
+
+ if (unlikely (!mrst_dev)) {
+ seq_printf(s, " UNKNOWN\n");
+ continue;
+ }
+
+ if (mrst_dev->lss)
+ seq_printf(s, "LSS %2d %-4s ", mrst_dev->lss,
+ d0ix_name(((cur_pmsss >>
+ (mrst_dev->lss * 2)) & 0x3)));
+ else
+ seq_printf(s, " ");
+
+ /* PCI PM config space setting */
+ pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
+ if (pos != 0) {
+ pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
+ seq_printf(s, "PCI-%-4s",
+ pci_power_name(pmcsr & PCI_PM_CTRL_STATE_MASK));
+ } else {
+ seq_printf(s, " ");
+ }
+
+ seq_printf(s, " %s ", pci_power_name(mrst_dev->latest_request));
+ for (i = 0; i <= PCI_D3cold; ++i)
+ seq_printf(s, "%d ", mrst_dev->pci_state_counts[i]);
+
+ if (mrst_dev->lss) {
+ unsigned int lssmask;
+
+ lssmask = SSMSK(D0i3, mrst_dev->lss);
+
+ if ((lssmask & S0I3_SSS_TARGET) &&
+ ((lssmask & cur_pmsss) !=
+ (lssmask & S0I3_SSS_TARGET)))
+ seq_printf(s , "[BLOCKS s0i3]");
+ }
+
+ seq_printf(s, "\n");
+ }
+ seq_printf(s, "Wake Counters:\n");
+ for (lss = 0; lss < MRST_NUM_LSS; ++lss)
+ seq_printf(s, "LSS%d %d\n", lss, wake_counters[lss]);
+
+ seq_printf(s, "Interrupt Counters:\n");
+ seq_printf(s,
+ "INT_SPURIOUS \t%8u\n" "INT_CMD_DONE \t%8u\n"
+ "INT_CMD_ERR \t%8u\n" "INT_WAKE_RX \t%8u\n"
+ "INT_SS_ERROR \t%8u\n" "INT_S0IX_MISS\t%8u\n"
+ "INT_NO_ACKC6 \t%8u\n" "INT_INVALID \t%8u\n",
+ pmu_irq_stats[INT_SPURIOUS], pmu_irq_stats[INT_CMD_DONE],
+ pmu_irq_stats[INT_CMD_ERR], pmu_irq_stats[INT_WAKE_RX],
+ pmu_irq_stats[INT_SS_ERROR], pmu_irq_stats[INT_S0IX_MISS],
+ pmu_irq_stats[INT_NO_ACKC6], pmu_irq_stats[INT_INVALID]);
+
+ seq_printf(s, "mrst_pmu_wait_ready_calls %8d\n",
+ pmu_wait_ready_calls);
+ seq_printf(s, "mrst_pmu_wait_ready_udelays %8d\n",
+ pmu_wait_ready_udelays);
+ seq_printf(s, "mrst_pmu_wait_ready_udelays_max %8d\n",
+ pmu_wait_ready_udelays_max);
+ seq_printf(s, "mrst_pmu_wait_done_calls %8d\n",
+ pmu_wait_done_calls);
+ seq_printf(s, "mrst_pmu_wait_done_udelays %8d\n",
+ pmu_wait_done_udelays);
+ seq_printf(s, "mrst_pmu_wait_done_udelays_max %8d\n",
+ pmu_wait_done_udelays_max);
+ seq_printf(s, "mrst_pmu_set_power_state_entry %8d\n",
+ pmu_set_power_state_entry);
+ seq_printf(s, "mrst_pmu_set_power_state_send_cmd %8d\n",
+ pmu_set_power_state_send_cmd);
+ seq_printf(s, "SCU busy: %d\n", pmu_read_busy_status());
+
+ return 0;
+}
+
+static int debug_mrst_pmu_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, debug_mrst_pmu_show, NULL);
+}
+
+static const struct file_operations devices_state_operations = {
+ .open = debug_mrst_pmu_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+#endif /* DEBUG_FS */
+
+/*
+ * Validate SCU PCI shim PCI vendor capability byte
+ * against LSS hard-coded in mrst_devs[] above.
+ * DEBUG only.
+ */
+static void pmu_scu_firmware_debug(void)
+{
+ struct pci_dev *pdev = NULL;
+
+ for_each_pci_dev(pdev) {
+ struct mrst_device *mrst_dev;
+ u8 pci_config_lss;
+ int pos;
+
+ mrst_dev = pci_id_2_mrst_dev(pdev->device);
+ if (unlikely(!mrst_dev)) {
+ printk(KERN_ERR FW_BUG "pmu: Unknown "
+ "PCI device 0x%04X\n", pdev->device);
+ continue;
+ }
+
+ if (mrst_dev->lss == 0)
+ continue; /* no LSS in our table */
+
+ pos = pci_find_capability(pdev, PCI_CAP_ID_VNDR);
+ if (!pos != 0) {
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+ "missing PCI Vendor Capability\n",
+ pdev->device);
+ continue;
+ }
+ pci_read_config_byte(pdev, pos + 4, &pci_config_lss);
+ if (!(pci_config_lss & PCI_VENDOR_CAP_LOG_SS_MASK)) {
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X "
+ "invalid PCI Vendor Capability 0x%x "
+ " expected LSS 0x%X\n",
+ pdev->device, pci_config_lss, mrst_dev->lss);
+ continue;
+ }
+ pci_config_lss &= PCI_VENDOR_CAP_LOG_ID_MASK;
+
+ if (mrst_dev->lss == pci_config_lss)
+ continue;
+
+ printk(KERN_ERR FW_BUG "pmu: 0x%04X LSS = %d, expected %d\n",
+ pdev->device, pci_config_lss, mrst_dev->lss);
+ }
+}
+
+/**
+ * pmu_probe
+ */
+static int __devinit pmu_probe(struct pci_dev *pdev,
+ const struct pci_device_id *pci_id)
+{
+ int ret;
+ struct mrst_pmu_reg *pmu;
+
+ /* Init the device */
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Unable to Enable PCI device\n");
+ return ret;
+ }
+
+ ret = pci_request_regions(pdev, MRST_PMU_DRV_NAME);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Cannot obtain PCI resources, aborting\n");
+ goto out_err1;
+ }
+
+ /* Map the memory of PMU reg base */
+ pmu = pci_iomap(pdev, 0, 0);
+ if (!pmu) {
+ dev_err(&pdev->dev, "Unable to map the PMU address space\n");
+ ret = -ENOMEM;
+ goto out_err2;
+ }
+
+#ifdef CONFIG_DEBUG_FS
+ /* /sys/kernel/debug/mrst_pmu */
+ (void) debugfs_create_file("mrst_pmu", S_IFREG | S_IRUGO,
+ NULL, NULL, &devices_state_operations);
+#endif
+ pmu_reg = pmu; /* success */
+
+ if (request_irq(pdev->irq, pmu_irq, 0, MRST_PMU_DRV_NAME, NULL)) {
+ dev_err(&pdev->dev, "Registering isr has failed\n");
+ ret = -1;
+ goto out_err3;
+ }
+
+ pmu_scu_firmware_debug();
+
+ pmu_write_wkc(S0I3_WAKE_SOURCES); /* Enable S0i3 wakeup sources */
+
+ pmu_wait_ready();
+
+ pmu_write_ssc(D0I1_ACG_SSS_TARGET); /* Enable Auto-Clock_Gating */
+ pmu_write_cmd(0x201);
+
+ spin_lock_init(&mrst_pmu_power_state_lock);
+
+ /* Enable the hardware interrupt */
+ pmu_irq_enable();
+ return 0;
+
+out_err3:
+ free_irq(pdev->irq, NULL);
+ pci_iounmap(pdev, pmu_reg);
+ pmu_reg = NULL;
+out_err2:
+ pci_release_region(pdev, 0);
+out_err1:
+ pci_disable_device(pdev);
+ return ret;
+}
+
+static void __devexit pmu_remove(struct pci_dev *pdev)
+{
+ dev_err(&pdev->dev, "Mid PM pmu_remove called\n");
+
+ /* Freeing up the irq */
+ free_irq(pdev->irq, NULL);
+
+ pci_iounmap(pdev, pmu_reg);
+ pmu_reg = NULL;
+
+ /* disable the current PCI device */
+ pci_release_region(pdev, 0);
+ pci_disable_device(pdev);
+}
+
+static DEFINE_PCI_DEVICE_TABLE(pmu_pci_ids) = {
+ { PCI_VDEVICE(INTEL, PCI_DEV_ID_MRST_PMU), 0 },
+ { }
+};
+
+MODULE_DEVICE_TABLE(pci, pmu_pci_ids);
+
+static struct pci_driver driver = {
+ .name = MRST_PMU_DRV_NAME,
+ .id_table = pmu_pci_ids,
+ .probe = pmu_probe,
+ .remove = __devexit_p(pmu_remove),
+};
+
+/**
+ * pmu_pci_register - register the PMU driver as PCI device
+ */
+static int __init pmu_pci_register(void)
+{
+ return pci_register_driver(&driver);
+}
+
+/* Register and probe via fs_initcall() to preceed device_initcall() */
+fs_initcall(pmu_pci_register);
+
+static void __exit mid_pci_cleanup(void)
+{
+ pci_unregister_driver(&driver);
+}
+
+static int ia_major;
+static int ia_minor;
+
+static int pmu_sfi_parse_oem(struct sfi_table_header *table)
+{
+ struct sfi_table_simple *sb;
+
+ sb = (struct sfi_table_simple *)table;
+ ia_major = (sb->pentry[1] >> 0) & 0xFFFF;
+ ia_minor = (sb->pentry[1] >> 16) & 0xFFFF;
+ printk(KERN_INFO "mrst_pmu: IA FW version v%x.%x\n",
+ ia_major, ia_minor);
+
+ return 0;
+}
+
+static int __init scu_fw_check(void)
+{
+ int ret;
+ u32 fw_version;
+
+ if (!pmu_reg)
+ return 0; /* this driver didn't probe-out */
+
+ sfi_table_parse("OEMB", NULL, NULL, pmu_sfi_parse_oem);
+
+ if (ia_major < 0x6005 || ia_minor < 0x1525) {
+ WARN(1, "mrst_pmu: IA FW version too old\n");
+ return -1;
+ }
+
+ ret = intel_scu_ipc_command(IPCMSG_FW_REVISION, 0, NULL, 0,
+ &fw_version, 1);
+
+ if (ret) {
+ WARN(1, "mrst_pmu: IPC FW version? %d\n", ret);
+ } else {
+ int scu_major = (fw_version >> 8) & 0xFF;
+ int scu_minor = (fw_version >> 0) & 0xFF;
+
+ printk(KERN_INFO "mrst_pmu: firmware v%x\n", fw_version);
+
+ if ((scu_major >= 0xC0) && (scu_minor >= 0x49)) {
+ printk(KERN_INFO "mrst_pmu: enabling S0i3\n");
+ mrst_pmu_s0i3_enable = true;
+ } else {
+ WARN(1, "mrst_pmu: S0i3 disabled, old firmware %X.%X",
+ scu_major, scu_minor);
+ }
+ }
+ return 0;
+}
+late_initcall(scu_fw_check);
+module_exit(mid_pci_cleanup);
diff --git a/arch/x86/platform/mrst/pmu.h b/arch/x86/platform/mrst/pmu.h
new file mode 100644
index 0000000..bfbfe64
--- /dev/null
+++ b/arch/x86/platform/mrst/pmu.h
@@ -0,0 +1,234 @@
+/*
+ * mrst/pmu.h - private definitions for MRST Power Management Unit mrst/pmu.c
+ *
+ * Copyright (c) 2011, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef _MRST_PMU_H_
+#define _MRST_PMU_H_
+
+#define PCI_DEV_ID_MRST_PMU 0x0810
+#define MRST_PMU_DRV_NAME "mrst_pmu"
+#define PCI_SUB_CLASS_MASK 0xFF00
+
+#define PCI_VENDOR_CAP_LOG_ID_MASK 0x7F
+#define PCI_VENDOR_CAP_LOG_SS_MASK 0x80
+
+#define SUB_SYS_ALL_D0I1 0x01155555
+#define S0I3_WAKE_SOURCES 0x00001FFF
+
+#define PM_S0I3_COMMAND \
+ ((0 << 31) | /* Reserved */ \
+ (0 << 30) | /* Core must be idle */ \
+ (0xc2 << 22) | /* ACK C6 trigger */ \
+ (3 << 19) | /* Trigger on DMI message */ \
+ (3 << 16) | /* Enter S0i3 */ \
+ (0 << 13) | /* Numeric mode ID (sw) */ \
+ (3 << 9) | /* Trigger mode */ \
+ (0 << 8) | /* Do not interrupt */ \
+ (1 << 0)) /* Set configuration */
+
+#define LSS_DMI 0
+#define LSS_SD_HC0 1
+#define LSS_SD_HC1 2
+#define LSS_NAND 3
+#define LSS_IMAGING 4
+#define LSS_SECURITY 5
+#define LSS_DISPLAY 6
+#define LSS_USB_HC 7
+#define LSS_USB_OTG 8
+#define LSS_AUDIO 9
+#define LSS_AUDIO_LPE 9
+#define LSS_AUDIO_SSP 9
+#define LSS_I2C0 10
+#define LSS_I2C1 10
+#define LSS_I2C2 10
+#define LSS_KBD 10
+#define LSS_SPI0 10
+#define LSS_SPI1 10
+#define LSS_SPI2 10
+#define LSS_GPIO 10
+#define LSS_SRAM 11 /* used by SCU, do not touch */
+#define LSS_SD_HC2 12
+/* LSS hardware bits 15,14,13 are hardwired to 0, thus unusable */
+#define MRST_NUM_LSS 13
+
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+#define SSMSK(mask, lss) ((mask) << ((lss) * 2))
+#define D0 0
+#define D0i1 1
+#define D0i2 2
+#define D0i3 3
+
+#define S0I3_SSS_TARGET ( \
+ SSMSK(D0i1, LSS_DMI) | \
+ SSMSK(D0i3, LSS_SD_HC0) | \
+ SSMSK(D0i3, LSS_SD_HC1) | \
+ SSMSK(D0i3, LSS_NAND) | \
+ SSMSK(D0i3, LSS_SD_HC2) | \
+ SSMSK(D0i3, LSS_IMAGING) | \
+ SSMSK(D0i3, LSS_SECURITY) | \
+ SSMSK(D0i3, LSS_DISPLAY) | \
+ SSMSK(D0i3, LSS_USB_HC) | \
+ SSMSK(D0i3, LSS_USB_OTG) | \
+ SSMSK(D0i3, LSS_AUDIO) | \
+ SSMSK(D0i1, LSS_I2C0))
+
+/*
+ * D0i1 on Langwell is Autonomous Clock Gating (ACG).
+ * Enable ACG on every LSS except camera and audio
+ */
+#define D0I1_ACG_SSS_TARGET \
+ (SUB_SYS_ALL_D0I1 & ~SSMSK(D0i1, LSS_IMAGING) & ~SSMSK(D0i1, LSS_AUDIO))
+
+enum cm_mode {
+ CM_NOP, /* ignore the config mode value */
+ CM_IMMEDIATE,
+ CM_DELAY,
+ CM_TRIGGER,
+ CM_INVALID
+};
+
+enum sys_state {
+ SYS_STATE_S0I0,
+ SYS_STATE_S0I1,
+ SYS_STATE_S0I2,
+ SYS_STATE_S0I3,
+ SYS_STATE_S3,
+ SYS_STATE_S5
+};
+
+#define SET_CFG_CMD 1
+
+enum int_status {
+ INT_SPURIOUS = 0,
+ INT_CMD_DONE = 1,
+ INT_CMD_ERR = 2,
+ INT_WAKE_RX = 3,
+ INT_SS_ERROR = 4,
+ INT_S0IX_MISS = 5,
+ INT_NO_ACKC6 = 6,
+ INT_INVALID = 7,
+};
+
+/* PMU register interface */
+static struct mrst_pmu_reg {
+ u32 pm_sts; /* 0x00 */
+ u32 pm_cmd; /* 0x04 */
+ u32 pm_ics; /* 0x08 */
+ u32 _resv1; /* 0x0C */
+ u32 pm_wkc[2]; /* 0x10 */
+ u32 pm_wks[2]; /* 0x18 */
+ u32 pm_ssc[4]; /* 0x20 */
+ u32 pm_sss[4]; /* 0x30 */
+ u32 pm_wssc[4]; /* 0x40 */
+ u32 pm_c3c4; /* 0x50 */
+ u32 pm_c5c6; /* 0x54 */
+ u32 pm_msi_disable; /* 0x58 */
+} *pmu_reg;
+
+static inline u32 pmu_read_sts(void) { return readl(&pmu_reg->pm_sts); }
+static inline u32 pmu_read_ics(void) { return readl(&pmu_reg->pm_ics); }
+static inline u32 pmu_read_wks(void) { return readl(&pmu_reg->pm_wks[0]); }
+static inline u32 pmu_read_sss(void) { return readl(&pmu_reg->pm_sss[0]); }
+
+static inline void pmu_write_cmd(u32 arg) { writel(arg, &pmu_reg->pm_cmd); }
+static inline void pmu_write_ics(u32 arg) { writel(arg, &pmu_reg->pm_ics); }
+static inline void pmu_write_wkc(u32 arg) { writel(arg, &pmu_reg->pm_wkc[0]); }
+static inline void pmu_write_ssc(u32 arg) { writel(arg, &pmu_reg->pm_ssc[0]); }
+static inline void pmu_write_wssc(u32 arg)
+ { writel(arg, &pmu_reg->pm_wssc[0]); }
+
+static inline void pmu_msi_enable(void) { writel(0, &pmu_reg->pm_msi_disable); }
+static inline u32 pmu_msi_is_disabled(void)
+ { return readl(&pmu_reg->pm_msi_disable); }
+
+union pmu_pm_ics {
+ struct {
+ u32 cause:8;
+ u32 enable:1;
+ u32 pending:1;
+ u32 reserved:22;
+ } bits;
+ u32 value;
+};
+
+static inline void pmu_irq_enable(void)
+{
+ union pmu_pm_ics pmu_ics;
+
+ pmu_ics.value = pmu_read_ics();
+ pmu_ics.bits.enable = 1;
+ pmu_write_ics(pmu_ics.value);
+}
+
+union pmu_pm_status {
+ struct {
+ u32 pmu_rev:8;
+ u32 pmu_busy:1;
+ u32 mode_id:4;
+ u32 Reserved:19;
+ } pmu_status_parts;
+ u32 pmu_status_value;
+};
+
+static inline int pmu_read_busy_status(void)
+{
+ union pmu_pm_status result;
+
+ result.pmu_status_value = pmu_read_sts();
+
+ return result.pmu_status_parts.pmu_busy;
+}
+
+/* pmu set config parameters */
+struct cfg_delay_param_t {
+ u32 cmd:8;
+ u32 ioc:1;
+ u32 cfg_mode:4;
+ u32 mode_id:3;
+ u32 sys_state:3;
+ u32 cfg_delay:8;
+ u32 rsvd:5;
+};
+
+struct cfg_trig_param_t {
+ u32 cmd:8;
+ u32 ioc:1;
+ u32 cfg_mode:4;
+ u32 mode_id:3;
+ u32 sys_state:3;
+ u32 cfg_trig_type:3;
+ u32 cfg_trig_val:8;
+ u32 cmbi:1;
+ u32 rsvd1:1;
+};
+
+union pmu_pm_set_cfg_cmd_t {
+ union {
+ struct cfg_delay_param_t d_param;
+ struct cfg_trig_param_t t_param;
+ } pmu2_params;
+ u32 pmu_pm_set_cfg_cmd_value;
+};
+
+#ifdef FUTURE_PATCH
+extern int mrst_s0i3_entry(u32 regval, u32 *regaddr);
+#else
+static inline int mrst_s0i3_entry(u32 regval, u32 *regaddr) { return -1; }
+#endif
+#endif
diff --git a/arch/x86/platform/olpc/olpc.c b/arch/x86/platform/olpc/olpc.c
index 8b9940e..7cce722 100644
--- a/arch/x86/platform/olpc/olpc.c
+++ b/arch/x86/platform/olpc/olpc.c
@@ -161,13 +161,13 @@ restart:
if (inbuf && inlen) {
/* write data to EC */
for (i = 0; i < inlen; i++) {
+ pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
+ outb(inbuf[i], 0x68);
if (wait_on_ibf(0x6c, 0)) {
printk(KERN_ERR "olpc-ec: timeout waiting for"
" EC accept data!\n");
goto err;
}
- pr_devel("olpc-ec: sending cmd arg 0x%x\n", inbuf[i]);
- outb(inbuf[i], 0x68);
}
}
if (outbuf && outlen) {
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S
index 1b979c1..01f5e3b 100644
--- a/arch/x86/vdso/vdso.S
+++ b/arch/x86/vdso/vdso.S
@@ -9,6 +9,7 @@ __PAGE_ALIGNED_DATA
vdso_start:
.incbin "arch/x86/vdso/vdso.so"
vdso_end:
+ .align PAGE_SIZE /* extra data here leaks to userspace. */
.previous
diff --git a/arch/x86/vdso/vdso32/sysenter.S b/arch/x86/vdso/vdso32/sysenter.S
index e2800af..e354bce 100644
--- a/arch/x86/vdso/vdso32/sysenter.S
+++ b/arch/x86/vdso/vdso32/sysenter.S
@@ -43,7 +43,7 @@ __kernel_vsyscall:
.space 7,0x90
/* 14: System call restart point is here! (SYSENTER_RETURN-2) */
- jmp .Lenter_kernel
+ int $0x80
/* 16: System call normal return point is here! */
VDSO32_SYSENTER_RETURN: /* Symbol used by sysenter.c via vdso32-syms.h */
pop %ebp
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 45e94ac..add2c2d 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -15,7 +15,7 @@ obj-y := enlighten.o setup.o multicalls.o mmu.o irq.o \
grant-table.o suspend.o platform-pci-unplug.o \
p2m.o
-obj-$(CONFIG_FUNCTION_TRACER) += trace.o
+obj-$(CONFIG_EVENT_TRACING) += trace.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 974a528..2d69617 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(xen_domain_type);
unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
EXPORT_SYMBOL(machine_to_phys_mapping);
-unsigned int machine_to_phys_order;
-EXPORT_SYMBOL(machine_to_phys_order);
+unsigned long machine_to_phys_nr;
+EXPORT_SYMBOL(machine_to_phys_nr);
struct start_info *xen_start_info;
EXPORT_SYMBOL_GPL(xen_start_info);
@@ -951,6 +951,10 @@ static const struct pv_info xen_info __initconst = {
.paravirt_enabled = 1,
.shared_kernel_pmd = 0,
+#ifdef CONFIG_X86_64
+ .extra_user_64bit_cs = FLAT_USER_CS64,
+#endif
+
.name = "Xen",
};
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index f987bde..20a6142 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1713,15 +1713,19 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
void __init xen_setup_machphys_mapping(void)
{
struct xen_machphys_mapping mapping;
- unsigned long machine_to_phys_nr_ents;
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
+ machine_to_phys_nr = mapping.max_mfn + 1;
} else {
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
+ machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
}
- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
+#ifdef CONFIG_X86_32
+ if ((machine_to_phys_mapping + machine_to_phys_nr)
+ < machine_to_phys_mapping)
+ machine_to_phys_nr = (unsigned long *)NULL
+ - machine_to_phys_mapping;
+#endif
}
#ifdef CONFIG_X86_64
@@ -1916,6 +1920,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
# endif
#else
case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+ case VVAR_PAGE:
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
@@ -1956,7 +1961,8 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#ifdef CONFIG_X86_64
/* Replicate changes to map the vsyscall page into the user
pagetable vsyscall mapping. */
- if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+ if ((idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) ||
+ idx == VVAR_PAGE) {
unsigned long vaddr = __fix_to_virt(idx);
set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 60aeeb5..c3b8d44 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/pm.h>
#include <linux/memblock.h>
+#include <linux/cpuidle.h>
#include <asm/elf.h>
#include <asm/vdso.h>
@@ -92,8 +93,6 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
if (end <= start)
return 0;
- printk(KERN_INFO "xen_release_chunk: looking at area pfn %lx-%lx: ",
- start, end);
for(pfn = start; pfn < end; pfn++) {
unsigned long mfn = pfn_to_mfn(pfn);
@@ -106,14 +105,14 @@ static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
&reservation);
- WARN(ret != 1, "Failed to release memory %lx-%lx err=%d\n",
- start, end, ret);
+ WARN(ret != 1, "Failed to release pfn %lx err=%d\n", pfn, ret);
if (ret == 1) {
__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
len++;
}
}
- printk(KERN_CONT "%ld pages freed\n", len);
+ printk(KERN_INFO "Freeing %lx-%lx pfn range: %lu pages freed\n",
+ start, end, len);
return len;
}
@@ -139,7 +138,7 @@ static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
if (last_end < max_addr)
released += xen_release_chunk(last_end, max_addr);
- printk(KERN_INFO "released %ld pages of unused memory\n", released);
+ printk(KERN_INFO "released %lu pages of unused memory\n", released);
return released;
}
@@ -185,6 +184,19 @@ static unsigned long __init xen_set_identity(const struct e820entry *list,
PFN_UP(start_pci), PFN_DOWN(last));
return identity;
}
+
+static unsigned long __init xen_get_max_pages(void)
+{
+ unsigned long max_pages = MAX_DOMAIN_PAGES;
+ domid_t domid = DOMID_SELF;
+ int ret;
+
+ ret = HYPERVISOR_memory_op(XENMEM_maximum_reservation, &domid);
+ if (ret > 0)
+ max_pages = ret;
+ return min(max_pages, MAX_DOMAIN_PAGES);
+}
+
/**
* machine_specific_memory_setup - Hook for machine specific memory setup.
**/
@@ -293,6 +305,12 @@ char * __init xen_memory_setup(void)
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+ extra_limit = xen_get_max_pages();
+ if (extra_limit >= max_pfn)
+ extra_pages = extra_limit - max_pfn;
+ else
+ extra_pages = 0;
+
extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
/*
@@ -426,7 +444,7 @@ void __init xen_arch_setup(void)
#ifdef CONFIG_X86_32
boot_cpu_data.hlt_works_ok = 1;
#endif
- pm_idle = default_idle;
+ disable_cpuidle();
boot_option_idle_override = IDLE_HALT;
fiddle_vdso();
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index b4533a8..d4fc6d4 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -32,6 +32,7 @@
#include <xen/page.h>
#include <xen/events.h>
+#include <xen/hvc-console.h>
#include "xen-ops.h"
#include "mmu.h"
@@ -207,6 +208,15 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus)
unsigned cpu;
unsigned int i;
+ if (skip_ioapic_setup) {
+ char *m = (max_cpus == 0) ?
+ "The nosmp parameter is incompatible with Xen; " \
+ "use Xen dom0_max_vcpus=1 parameter" :
+ "The noapic parameter is incompatible with Xen";
+
+ xen_raw_printk(m);
+ panic(m);
+ }
xen_init_lock_cpu(0);
smp_store_cpu_info(0);
@@ -521,8 +531,6 @@ static void __init xen_hvm_smp_prepare_cpus(unsigned int max_cpus)
native_smp_prepare_cpus(max_cpus);
WARN_ON(xen_smp_intr_init(0));
- if (!xen_have_vector_callback)
- return;
xen_init_lock_cpu(0);
xen_init_spinlocks();
}
@@ -546,6 +554,8 @@ static void xen_hvm_cpu_die(unsigned int cpu)
void __init xen_hvm_smp_init(void)
{
+ if (!xen_have_vector_callback)
+ return;
smp_ops.smp_prepare_cpus = xen_hvm_smp_prepare_cpus;
smp_ops.smp_send_reschedule = xen_smp_send_reschedule;
smp_ops.cpu_up = xen_hvm_cpu_up;
diff --git a/arch/x86/xen/trace.c b/arch/x86/xen/trace.c
index 734beba..520022d 100644
--- a/arch/x86/xen/trace.c
+++ b/arch/x86/xen/trace.c
@@ -1,4 +1,5 @@
#include <linux/ftrace.h>
+#include <xen/interface/xen.h>
#define N(x) [__HYPERVISOR_##x] = "("#x")"
static const char *xen_hypercall_names[] = {
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 22a2093..b040b0e 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -113,11 +113,13 @@ xen_iret_start_crit:
/*
* If there's something pending, mask events again so we can
- * jump back into xen_hypervisor_callback
+ * jump back into xen_hypervisor_callback. Otherwise do not
+ * touch XEN_vcpu_info_mask.
*/
- sete XEN_vcpu_info_mask(%eax)
+ jne 1f
+ movb $1, XEN_vcpu_info_mask(%eax)
- popl %eax
+1: popl %eax
/*
* From this point on the registers are restored and the stack