diff options
Diffstat (limited to 'arch/x86')
102 files changed, 733 insertions, 399 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bacac55..e795b5b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,7 +28,7 @@ config X86 select HAVE_KRETPROBES select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE + select HAVE_FUNCTION_TRACER select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) select HAVE_ARCH_KGDB if !X86_VOYAGER select HAVE_ARCH_TRACEHOOK @@ -167,9 +167,12 @@ config GENERIC_PENDING_IRQ config X86_SMP bool depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) - select USE_GENERIC_SMP_HELPERS default y +config USE_GENERIC_SMP_HELPERS + def_bool y + depends on SMP + config X86_32_SMP def_bool y depends on X86_32 && SMP @@ -231,6 +234,10 @@ config SMP If you don't know what to do here, say N. +config X86_HAS_BOOT_CPU_ID + def_bool y + depends on X86_VOYAGER + config X86_FIND_SMP_CONFIG def_bool y depends on X86_MPPARSE || X86_VOYAGER @@ -1494,7 +1501,7 @@ config HAVE_ARCH_EARLY_PFN_TO_NID def_bool X86_64 depends on NUMA -menu "Power management options" +menu "Power management and ACPI options" depends on !X86_VOYAGER config ARCH_HIBERNATION_HEADER @@ -1894,6 +1901,10 @@ config SYSVIPC_COMPAT endmenu +config HAVE_ATOMIC_IOMAP + def_bool y + depends on X86_32 + source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 0b7c4a3..b815664 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -513,19 +513,19 @@ config CPU_SUP_UMC_32 If unsure, say N. config X86_DS - bool "Debug Store support" - default y - help - Add support for Debug Store. - This allows the kernel to provide a memory buffer to the hardware - to store various profiling and tracing events. + def_bool X86_PTRACE_BTS + depends on X86_DEBUGCTLMSR config X86_PTRACE_BTS - bool "ptrace interface to Branch Trace Store" + bool "Branch Trace Store" default y - depends on (X86_DS && X86_DEBUGCTLMSR) + depends on X86_DEBUGCTLMSR help - Add a ptrace interface to allow collecting an execution trace - of the traced task. - This collects control flow changes in a (cyclic) buffer and allows - debuggers to fill in the gaps and show an execution trace of the debuggee. + This adds a ptrace interface to the hardware's branch trace store. + + Debuggers may use it to collect an execution trace of the debugged + application in order to answer the question 'how did I get here?'. + Debuggers may trace user mode as well as kernel mode. + + Say Y unless there is no application development on this machine + and you want to save a small amount of code size. diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index be0ed06..63eff3b 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore @@ -1 +1,3 @@ relocs +vmlinux.bin.all +vmlinux.relocs diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 0be77b3..7e8e8b2 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -74,7 +74,7 @@ static int kbd_pending(void) { u8 pending; asm volatile("int $0x16; setnz %0" - : "=rm" (pending) + : "=qm" (pending) : "a" (0x0100)); return pending; } diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8d676d8..9830681 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void) acpi_pci_disabled = 1; acpi_noirq_set(); } -extern int acpi_irq_balance_set(char *str); /* routines for saving/restoring kernel state */ extern int acpi_save_state_mem(void); diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 1a30c04..ac302a2 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -251,13 +251,6 @@ struct amd_iommu { /* Pointer to PCI device of this IOMMU */ struct pci_dev *dev; - /* - * Capability pointer. There could be more than one IOMMU per PCI - * device function if there are more than one AMD IOMMU capability - * pointers. - */ - u16 cap_ptr; - /* physical address of MMIO space */ u64 mmio_phys; /* virtual address of MMIO space */ @@ -266,6 +259,13 @@ struct amd_iommu { /* capabilities of that IOMMU read from ACPI */ u32 cap; + /* + * Capability pointer. There could be more than one IOMMU per PCI + * device function if there are more than one AMD IOMMU capability + * pointers. + */ + u16 cap_ptr; + /* pci domain of this IOMMU */ u16 pci_seg; @@ -284,19 +284,19 @@ struct amd_iommu { /* size of command buffer */ u32 cmd_buf_size; - /* event buffer virtual address */ - u8 *evt_buf; /* size of event buffer */ u32 evt_buf_size; + /* event buffer virtual address */ + u8 *evt_buf; /* MSI number for event interrupt */ u16 evt_msi_num; - /* if one, we need to send a completion wait command */ - int need_sync; - /* true if interrupts for this IOMMU are already enabled */ bool int_enabled; + /* if one, we need to send a completion wait command */ + int need_sync; + /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; }; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index f73e95d..cfdf8c2 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -91,7 +91,7 @@ #define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */ #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ -#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */ +#define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 4a5397b..097794f 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) /* Make sure we keep the same behaviour */ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { -#ifdef CONFIG_X86_32 - return 0; -#else +#ifdef CONFIG_X86_64 struct dma_mapping_ops *ops = get_dma_ops(dev); if (ops->mapping_error) return ops->mapping_error(dev, dma_addr); - return (dma_addr == bad_dma_address); #endif + return (dma_addr == bad_dma_address); } #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) @@ -255,9 +253,11 @@ static inline unsigned long dma_alloc_coherent_mask(struct device *dev, static inline gfp_t dma_alloc_coherent_gfp_flags(struct device *dev, gfp_t gfp) { -#ifdef CONFIG_X86_64 unsigned long dma_mask = dma_alloc_coherent_mask(dev, gfp); + if (dma_mask <= DMA_24BIT_MASK) + gfp |= GFP_DMA; +#ifdef CONFIG_X86_64 if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) gfp |= GFP_DMA32; #endif diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index 72c5a19..a950084 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h @@ -23,12 +23,13 @@ #ifndef _ASM_X86_DS_H #define _ASM_X86_DS_H -#ifdef CONFIG_X86_DS #include <linux/types.h> #include <linux/init.h> +#ifdef CONFIG_X86_DS + struct task_struct; /* @@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context); #else /* CONFIG_X86_DS */ -#define ds_init_intel(config) do {} while (0) +struct cpuinfo_x86; +static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} #endif /* CONFIG_X86_DS */ #endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 3ffc5a7..3984934 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h @@ -50,10 +50,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* __ASM_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index 8668a94..23696d4 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -9,6 +9,10 @@ extern int fixmaps_set; +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; +extern pte_t *pkmap_page_table; + void __native_set_fixmap(enum fixed_addresses idx, pte_t pte); void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags); diff --git a/arch/x86/include/asm/fixmap_32.h b/arch/x86/include/asm/fixmap_32.h index 09f29ab..c7115c1 100644 --- a/arch/x86/include/asm/fixmap_32.h +++ b/arch/x86/include/asm/fixmap_32.h @@ -28,10 +28,8 @@ extern unsigned long __FIXADDR_TOP; #include <asm/acpi.h> #include <asm/apicdef.h> #include <asm/page.h> -#ifdef CONFIG_HIGHMEM #include <linux/threads.h> #include <asm/kmap_types.h> -#endif /* * Here we define all the compile-time 'special' virtual @@ -75,10 +73,8 @@ enum fixed_addresses { #ifdef CONFIG_X86_CYCLONE_TIMER FIX_CYCLONE_TIMER, /*cyclone timer register*/ #endif -#ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, -#endif #ifdef CONFIG_PCI_MMCONFIG FIX_PCIE_MCFG, #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 47f7e65..9e8bc29 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_FTRACE_H #define _ASM_X86_FTRACE_H -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #define MCOUNT_ADDR ((long)(mcount)) #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ @@ -19,6 +19,6 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) } #endif -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #endif /* _ASM_X86_FTRACE_H */ diff --git a/arch/x86/include/asm/highmem.h b/arch/x86/include/asm/highmem.h index a3b3b7c..bf9276b 100644 --- a/arch/x86/include/asm/highmem.h +++ b/arch/x86/include/asm/highmem.h @@ -25,14 +25,11 @@ #include <asm/kmap_types.h> #include <asm/tlbflush.h> #include <asm/paravirt.h> +#include <asm/fixmap.h> /* declarations for highmem.c */ extern unsigned long highstart_pfn, highend_pfn; -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; -extern pte_t *pkmap_page_table; - /* * Right now we initialize only a single pte table. It can be extended * easily, subsequent pte tables have to be allocated in one physical diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 5618a10..ac2abc8 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -82,9 +82,9 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); extern void early_ioremap_init(void); extern void early_ioremap_clear(void); extern void early_ioremap_reset(void); -extern void *early_ioremap(unsigned long offset, unsigned long size); -extern void *early_memremap(unsigned long offset, unsigned long size); -extern void early_iounmap(void *addr, unsigned long size); +extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); +extern void __iomem *early_memremap(unsigned long offset, unsigned long size); +extern void early_iounmap(void __iomem *addr, unsigned long size); extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys); diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h new file mode 100644 index 0000000..c1f0628 --- /dev/null +++ b/arch/x86/include/asm/iomap.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> + +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); + +void +iounmap_atomic(void *kvaddr, enum km_type type); diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 98e28ea..0b500c5 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -6,8 +6,6 @@ extern void no_iommu_init(void); extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; -extern int dmar_disabled; -extern int forbid_dac; extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index d843ed0..0005adb 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -101,30 +101,22 @@ #define LAST_VM86_IRQ 15 #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) -#ifdef CONFIG_X86_64 +#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) # if NR_CPUS < MAX_IO_APICS # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) # else # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) # endif -#elif !defined(CONFIG_X86_VOYAGER) +#elif defined(CONFIG_X86_VOYAGER) -# if defined(CONFIG_X86_IO_APIC) || defined(CONFIG_PARAVIRT) || defined(CONFIG_X86_VISWS) - -# define NR_IRQS 224 - -# else /* IO_APIC || PARAVIRT */ - -# define NR_IRQS 16 - -# endif +# define NR_IRQS 224 -#else /* !VISWS && !VOYAGER */ +#else /* IO_APIC || VOYAGER */ -# define NR_IRQS 224 +# define NR_IRQS 16 -#endif /* VISWS */ +#endif /* Voyager specific defines */ /* These define the CPIs we use in linux */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 65679d0..8346be8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -364,6 +364,9 @@ struct kvm_arch{ struct page *ept_identity_pagetable; bool ept_identity_pagetable_done; + + unsigned long irq_sources_bitmap; + unsigned long irq_states[KVM_IOAPIC_NUM_PINS]; }; struct kvm_vm_stat { diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index d5c0b82..9d80db9 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h @@ -33,10 +33,9 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) { } -#if APIC_DEBUG - #define inquire_remote_apic(apicid) __inquire_remote_apic(apicid) -#else - #define inquire_remote_apic(apicid) {} -#endif +#define inquire_remote_apic(apicid) do { \ + if (apic_verbosity >= APIC_DEBUG) \ + __inquire_remote_apic(apicid); \ + } while (0) #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 485bdf0..07f1af4 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void) extern int early_pfn_to_nid(unsigned long pfn); +extern void resume_map_numa_kva(pgd_t *pgd); + #else /* !CONFIG_NUMA */ #define get_memcfg_numa get_memcfg_numa_flat +static inline void resume_map_numa_kva(pgd_t *pgd) {} + #endif /* CONFIG_NUMA */ #ifdef CONFIG_DISCONTIGMEM diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 478a924..397efa3 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -108,9 +108,7 @@ static __always_inline unsigned long long __native_read_tsc(void) { DECLARE_ARGS(val, low, high); - rdtsc_barrier(); asm volatile("rdtsc" : EAX_EDX_RET(val, low, high)); - rdtsc_barrier(); return EAX_EDX_VAL(val, low, high); } diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index 5b28995..d02d936 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h @@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void); */ #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) -#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU) - #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ dma_addr_t ADDR_NAME; #define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ @@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void); #define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ (((PTR)->LEN_NAME) = (VAL)) -#else -/* No IOMMU */ - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) -#define pci_unmap_addr(PTR, ADDR_NAME) (0) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) -#define pci_unmap_len(PTR, LEN_NAME) (0) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) - -#endif - #endif /* __KERNEL__ */ #endif /* _ASM_X86_PCI_64_H */ diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index fb16cec..52597ae 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -120,13 +120,13 @@ static inline void pud_clear(pud_t *pudp) write_cr3(pgd); } -#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK)) +#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT) #define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK)) /* Find an entry in the second-level page table.. */ -#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \ +#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \ pmd_index(address)) #ifdef CONFIG_SMP diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d1531c8..eefb059 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#define __ARCH_WANT_COMPAT_SYS_PTRACE - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 2766021..d12811c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -225,5 +225,11 @@ static inline int hard_smp_processor_id(void) #endif /* CONFIG_X86_LOCAL_APIC */ +#ifdef CONFIG_X86_HAS_BOOT_CPU_ID +extern unsigned char boot_cpu_id; +#else +#define boot_cpu_id 0 +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_SMP_H */ diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 90ac771..ff386ff 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -154,7 +154,7 @@ extern unsigned long node_remap_size[]; #endif -/* sched_domains SD_NODE_INIT for NUMAQ machines */ +/* sched_domains SD_NODE_INIT for NUMA machines */ #define SD_NODE_INIT (struct sched_domain) { \ .min_interval = 8, \ .max_interval = 32, \ @@ -169,8 +169,9 @@ extern unsigned long node_remap_size[]; .flags = SD_LOAD_BALANCE \ | SD_BALANCE_EXEC \ | SD_BALANCE_FORK \ - | SD_SERIALIZE \ - | SD_WAKE_BALANCE, \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE \ + | SD_SERIALIZE, \ .last_balance = jiffies, \ .balance_interval = 1, \ } @@ -238,7 +239,7 @@ struct pci_bus; void set_pci_bus_resources_arch_default(struct pci_bus *b); #ifdef CONFIG_SMP -#define mc_capable() (boot_cpu_data.x86_max_cores > 1) +#define mc_capable() (cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids) #define smt_capable() (smp_num_siblings > 1) #endif diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 38ae163..9cd83a8 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -34,6 +34,8 @@ static inline cycles_t get_cycles(void) static __always_inline cycles_t vget_cycles(void) { + cycles_t cycles; + /* * We only do VDSOs on TSC capable CPUs, so this shouldnt * access boot_cpu_data (which is not VDSO-safe): @@ -42,7 +44,11 @@ static __always_inline cycles_t vget_cycles(void) if (!cpu_has_tsc) return 0; #endif - return (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + cycles = (cycles_t)__native_read_tsc(); + rdtsc_barrier(); + + return cycles; } extern void tsc_init(void); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 664f152..f8cfd00 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -46,7 +46,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size) return ret; case 10: __get_user_asm(*(u64 *)dst, (u64 __user *)src, - ret, "q", "", "=r", 16); + ret, "q", "", "=r", 10); if (unlikely(ret)) return ret; __get_user_asm(*(u16 *)(8 + (char *)dst), diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index 834b2c1..d2e415e 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h @@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate) __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime) #define __NR_timerfd_gettime 287 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime) -#define __NR_paccept 288 -__SYSCALL(__NR_paccept, sys_paccept) +#define __NR_accept4 288 +__SYSCALL(__NR_accept4, sys_accept4) #define __NR_signalfd4 289 __SYSCALL(__NR_signalfd4, sys_signalfd4) #define __NR_eventfd2 290 diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index c6ad93e..7a57826 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -13,6 +13,7 @@ #include <linux/numa.h> #include <linux/percpu.h> +#include <linux/timer.h> #include <asm/types.h> #include <asm/percpu.h> diff --git a/arch/x86/include/asm/voyager.h b/arch/x86/include/asm/voyager.h index 9c811d2..b3e6473 100644 --- a/arch/x86/include/asm/voyager.h +++ b/arch/x86/include/asm/voyager.h @@ -520,6 +520,7 @@ extern void voyager_restart(void); extern void voyager_cat_power_off(void); extern void voyager_cat_do_common_interrupt(void); extern void voyager_handle_nmi(void); +extern void voyager_smp_intr_init(void); /* Commands for the following are */ #define VOYAGER_PSI_READ 0 #define VOYAGER_PSI_WRITE 1 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index d7e5a58..b62a766 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -6,11 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg endif # @@ -40,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-y += ds.o +obj-$(CONFIG_X86_DS) += ds.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8c1f76a..4c51a2f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void) error = acpi_parse_madt_ioapic_entries(); if (!error) { acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC; - acpi_irq_balance_set(NULL); acpi_ioapic = 1; smp_found_config = 1; diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a8fd9eb..a7b6dec 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -50,7 +50,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, /* returns !0 if the IOMMU is caching non-present entries in its TLB */ static int iommu_has_npcache(struct amd_iommu *iommu) { - return iommu->cap & IOMMU_CAP_NPCACHE; + return iommu->cap & (1UL << IOMMU_CAP_NPCACHE); } /**************************************************************************** @@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) spin_lock_irqsave(&iommu->lock, flags); ret = __iommu_queue_command(iommu, cmd); + if (!ret) + iommu->need_sync = 1; spin_unlock_irqrestore(&iommu->lock, flags); return ret; @@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu) cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); - iommu->need_sync = 0; - spin_lock_irqsave(&iommu->lock, flags); + if (!iommu->need_sync) + goto out; + + iommu->need_sync = 0; + ret = __iommu_queue_command(iommu, &cmd); if (ret) @@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, ret = iommu_queue_command(iommu, &cmd); - iommu->need_sync = 1; - return ret; } @@ -343,7 +344,7 @@ static int iommu_map(struct protection_domain *dom, u64 __pte, *pte, *page; bus_addr = PAGE_ALIGN(bus_addr); - phys_addr = PAGE_ALIGN(bus_addr); + phys_addr = PAGE_ALIGN(phys_addr); /* only support 512GB address spaces for now */ if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) @@ -536,6 +537,9 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, { address >>= PAGE_SHIFT; iommu_area_free(dom->bitmap, address, pages); + + if (address >= dom->next_bit) + dom->need_flush = true; } /**************************************************************************** @@ -596,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) continue; p2 = IOMMU_PTE_PAGE(p1[i]); - for (j = 0; j < 512; ++i) { + for (j = 0; j < 512; ++j) { if (!IOMMU_PTE_PRESENT(p2[j])) continue; p3 = IOMMU_PTE_PAGE(p2[j]); @@ -759,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu, write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); iommu_queue_inv_dev_entry(iommu, devid); - - iommu->need_sync = 1; } /***************************************************************************** @@ -855,6 +857,9 @@ static int get_device_resources(struct device *dev, print_devid(_bdf, 1); } + if (domain_for_device(_bdf) == NULL) + set_device_domain(*iommu, *domain, _bdf); + return 1; } @@ -905,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, if (address >= dom->aperture_size) return; - WARN_ON(address & 0xfffULL || address > dom->aperture_size); + WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; pte += IOMMU_PTE_L0_INDEX(address); @@ -917,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, /* * This function contains common code for mapping of a physically - * contiguous memory region into DMA address space. It is uses by all - * mapping functions provided by this IOMMU driver. + * contiguous memory region into DMA address space. It is used by all + * mapping functions provided with this IOMMU driver. * Must be called with the domain lock held. */ static dma_addr_t __map_single(struct device *dev, @@ -978,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu, dma_addr_t i, start; unsigned int pages; - if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) + if ((dma_addr == bad_dma_address) || + (dma_addr + size > dma_dom->aperture_size)) return; pages = iommu_num_pages(dma_addr, size, PAGE_SIZE); @@ -992,8 +998,10 @@ static void __unmap_single(struct amd_iommu *iommu, dma_ops_free_addresses(dma_dom, dma_addr, pages); - if (amd_iommu_unmap_flush) + if (amd_iommu_unmap_flush || dma_dom->need_flush) { iommu_flush_pages(iommu, dma_dom->domain.id, dma_addr, size); + dma_dom->need_flush = false; + } } /* @@ -1026,8 +1034,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, if (addr == bad_dma_address) goto out; - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1055,8 +1062,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, __unmap_single(iommu, domain->priv, dma_addr, size, dir); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1122,8 +1128,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, goto unmap; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1168,8 +1173,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, s->dma_address = s->dma_length = 0; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1220,8 +1224,7 @@ static void *alloc_coherent(struct device *dev, size_t size, goto out; } - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); out: spin_unlock_irqrestore(&domain->lock, flags); @@ -1252,8 +1255,7 @@ static void free_coherent(struct device *dev, size_t size, __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); - if (unlikely(iommu->need_sync)) - iommu_completion_wait(iommu); + iommu_completion_wait(iommu); spin_unlock_irqrestore(&domain->lock, flags); diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 0cdcda3..30ae270 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings we find in ACPI */ unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ -int amd_iommu_isolate; /* if 1, device isolation is enabled */ +int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the @@ -1213,7 +1213,9 @@ static int __init parse_amd_iommu_options(char *str) for (; *str; ++str) { if (strncmp(str, "isolate", 7) == 0) amd_iommu_isolate = 1; - if (strncmp(str, "fullflush", 11) == 0) + if (strncmp(str, "share", 5) == 0) + amd_iommu_isolate = 0; + if (strncmp(str, "fullflush", 9) == 0) amd_iommu_unmap_flush = true; } diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 04a7f96..16f9487 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1315,7 +1315,7 @@ void enable_x2apic(void) } } -void enable_IR_x2apic(void) +void __init enable_IR_x2apic(void) { #ifdef CONFIG_INTR_REMAP int ret; diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 0d9c993..ef8f831 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) */ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP unsigned int eax, ebx, ecx, edx, sub_index; unsigned int ht_mask_width, core_plus_mask_width; unsigned int core_select_mask, core_level_siblings; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 25581dc..b9c9ea0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -20,6 +20,7 @@ #include <asm/pat.h> #include <asm/asm.h> #include <asm/numa.h> +#include <asm/smp.h> #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> @@ -549,6 +550,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_early_init(c); validate_pat_support(c); + +#ifdef CONFIG_SMP + c->cpu_index = boot_cpu_id; +#endif } void __init early_cpu_init(void) @@ -1134,7 +1139,7 @@ void __cpuinit cpu_init(void) /* * Boot processor to setup the FP and extended state context info. */ - if (!smp_processor_id()) + if (smp_processor_id() == boot_cpu_id) init_thread_xstate(); xsave_init(); diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index d3dcd58..7f05f44 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; if (cpu_family == CPU_HW_PSTATE) { - rdmsr(MSR_PSTATE_STATUS, lo, hi); - i = lo & HW_PSTATE_MASK; - data->currpstate = i; + if (data->currpstate == HW_PSTATE_INVALID) { + /* read (initial) hw pstate if not yet set */ + rdmsr(MSR_PSTATE_STATUS, lo, hi); + i = lo & HW_PSTATE_MASK; + + /* + * a workaround for family 11h erratum 311 might cause + * an "out-of-range Pstate if the core is in Pstate-0 + */ + if (i >= data->numps) + data->currpstate = HW_PSTATE_0; + else + data->currpstate = i; + } return 0; } do { @@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) } data->cpu = pol->cpu; + data->currpstate = HW_PSTATE_INVALID; if (powernow_k8_cpu_init_acpi(data)) { /* diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index ab48cfe..65cfb5d 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h @@ -5,6 +5,19 @@ * http://www.gnu.org/licenses/gpl.html */ + +enum pstate { + HW_PSTATE_INVALID = 0xff, + HW_PSTATE_0 = 0, + HW_PSTATE_1 = 1, + HW_PSTATE_2 = 2, + HW_PSTATE_3 = 3, + HW_PSTATE_4 = 4, + HW_PSTATE_5 = 5, + HW_PSTATE_6 = 6, + HW_PSTATE_7 = 7, +}; + struct powernow_k8_data { unsigned int cpu; @@ -23,7 +36,9 @@ struct powernow_k8_data { u32 exttype; /* extended interface = 1 */ /* keep track of the current fid / vid or pstate */ - u32 currvid, currfid, currpstate; + u32 currvid; + u32 currfid; + enum pstate currpstate; /* the powernow_table includes all frequency and vid/fid pairings: * fid are the lower 8 bits of the index, vid are the upper 8 bits. diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 2b69994..a2d1176 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c @@ -21,8 +21,6 @@ */ -#ifdef CONFIG_X86_DS - #include <asm/ds.h> #include <linux/errno.h> @@ -211,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context); static inline struct ds_context *ds_get_context(struct task_struct *task) { struct ds_context *context; + unsigned long irq; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); context = (task ? task->thread.ds_ctx : this_system_context); if (context) context->count++; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); return context; } @@ -226,18 +225,16 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) /* * Same as ds_get_context, but allocates the context and it's DS * structure, if necessary; returns NULL; if out of memory. - * - * pre: requires ds_lock to be held */ static inline struct ds_context *ds_alloc_context(struct task_struct *task) { struct ds_context **p_context = (task ? &task->thread.ds_ctx : &this_system_context); struct ds_context *context = *p_context; + unsigned long irq; if (!context) { context = kzalloc(sizeof(*context), GFP_KERNEL); - if (!context) return NULL; @@ -247,18 +244,27 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) return NULL; } - *p_context = context; + spin_lock_irqsave(&ds_lock, irq); - context->this = p_context; - context->task = task; + if (*p_context) { + kfree(context->ds); + kfree(context); + + context = *p_context; + } else { + *p_context = context; - if (task) - set_tsk_thread_flag(task, TIF_DS_AREA_MSR); + context->this = p_context; + context->task = task; - if (!task || (task == current)) - wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0); + if (task) + set_tsk_thread_flag(task, TIF_DS_AREA_MSR); - get_tracer(task); + if (!task || (task == current)) + wrmsrl(MSR_IA32_DS_AREA, + (unsigned long)context->ds); + } + spin_unlock_irqrestore(&ds_lock, irq); } context->count++; @@ -272,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task) */ static inline void ds_put_context(struct ds_context *context) { + unsigned long irq; + if (!context) return; - spin_lock(&ds_lock); + spin_lock_irqsave(&ds_lock, irq); if (--context->count) goto out; @@ -297,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context) kfree(context->ds); kfree(context); out: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); } @@ -368,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size, struct ds_context *context; unsigned long buffer, adj; const unsigned long alignment = (1 << 3); + unsigned long irq; int error = 0; if (!ds_cfg.sizeof_ds) @@ -382,25 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size, return -EOPNOTSUPP; - spin_lock(&ds_lock); - - if (!check_tracer(task)) - return -EPERM; - - error = -ENOMEM; context = ds_alloc_context(task); if (!context) + return -ENOMEM; + + spin_lock_irqsave(&ds_lock, irq); + + error = -EPERM; + if (!check_tracer(task)) goto out_unlock; + get_tracer(task); + error = -EALREADY; if (context->owner[qual] == current) - goto out_unlock; + goto out_put_tracer; error = -EPERM; if (context->owner[qual] != NULL) - goto out_unlock; + goto out_put_tracer; context->owner[qual] = current; - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); error = -ENOMEM; @@ -448,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size, out_release: context->owner[qual] = NULL; ds_put_context(context); + put_tracer(task); + return error; + + out_put_tracer: + spin_unlock_irqrestore(&ds_lock, irq); + ds_put_context(context); + put_tracer(task); return error; out_unlock: - spin_unlock(&ds_lock); + spin_unlock_irqrestore(&ds_lock, irq); ds_put_context(context); return error; } @@ -801,13 +819,21 @@ static const struct ds_configuration ds_cfg_var = { .sizeof_ds = sizeof(long) * 12, .sizeof_field = sizeof(long), .sizeof_rec[ds_bts] = sizeof(long) * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = sizeof(long) * 10 +#else + .sizeof_rec[ds_pebs] = sizeof(long) * 18 +#endif }; static const struct ds_configuration ds_cfg_64 = { .sizeof_ds = 8 * 12, .sizeof_field = 8, .sizeof_rec[ds_bts] = 8 * 3, +#ifdef __i386__ .sizeof_rec[ds_pebs] = 8 * 10 +#else + .sizeof_rec[ds_pebs] = 8 * 18 +#endif }; static inline void @@ -861,4 +887,3 @@ void ds_free(struct ds_context *context) while (leftovers--) ds_put_context(context); } -#endif /* CONFIG_X86_DS */ diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index ce97bf3..7aafeb5 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -1290,15 +1290,17 @@ void __init e820_reserve_resources(void) res->start = e820.map[i].addr; res->end = end; - res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + res->flags = IORESOURCE_MEM; /* * don't register the region that could be conflicted with * pci device BAR resource and insert them later in * pcibios_resource_survey() */ - if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) + if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) { + res->flags |= IORESOURCE_BUSY; insert_resource(&iomem_resource, res); + } res++; } @@ -1318,7 +1320,7 @@ void __init e820_reserve_resources_late(void) res = e820_res; for (i = 0; i < e820.nr_map; i++) { if (!res->parent && res->end) - reserve_region_with_split(&iomem_resource, res->start, res->end, res->name); + insert_resource_expand_to_fit(&iomem_resource, res); res++; } } diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 3ce029f..1b894b7 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func) } #endif -#ifdef CONFIG_DMAR -static void __init intel_g33_dmar(int num, int slot, int func) -{ - struct acpi_table_header *dmar_tbl; - acpi_status status; - - status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl); - if (ACPI_SUCCESS(status)) { - printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n"); - dmar_disabled = 1; - } -} -#endif - #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs }, { PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS, PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd }, -#ifdef CONFIG_DMAR - { PCI_VENDOR_ID_INTEL, 0x29c0, - PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar }, -#endif {} }; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dd65143..28b597e 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1149,7 +1149,7 @@ ENDPROC(xen_failsafe_callback) #endif /* CONFIG_XEN */ -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -1204,7 +1204,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ .section .rodata,"a" #include "syscall_table_32.S" diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 09e7145..b86f332 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -61,7 +61,7 @@ .code64 -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) retq @@ -138,7 +138,7 @@ trace: jmp ftrace_stub END(mcount) #endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FTRACE */ +#endif /* CONFIG_FUNCTION_TRACER */ #ifndef CONFIG_PREEMPT #define retint_kernel retint_restore_args diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index f454c78f..0aa2c44 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c @@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) { struct acpi_table_header *header = NULL; int i = 0; - acpi_size tbl_size; - while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) { + while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { struct oem_table *t = (struct oem_table *)header; oem_addrX = t->OEMTableAddr; oem_size = t->OEMTableSize; - early_acpi_os_unmap_memory(header, tbl_size); *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); return 0; } - early_acpi_os_unmap_memory(header, tbl_size); } return -1; } void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) { - if (!oem_addr) - return; - - __acpi_unmap_table((char *)oem_addr, oem_size); } #endif diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d073d98..50ea0ac 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -21,8 +21,7 @@ #include <asm/nops.h> -/* Long is fine, even if it is only 4 bytes ;-) */ -static unsigned long *ftrace_nop; +static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; union ftrace_code_union { char code[MCOUNT_INSN_SIZE]; @@ -33,17 +32,17 @@ union ftrace_code_union { }; -static int notrace ftrace_calc_offset(long ip, long addr) +static int ftrace_calc_offset(long ip, long addr) { return (int)(addr - ip); } -notrace unsigned char *ftrace_nop_replace(void) +unsigned char *ftrace_nop_replace(void) { - return (char *)ftrace_nop; + return ftrace_nop; } -notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -57,7 +56,7 @@ notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) return calc.code; } -notrace int +int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -66,26 +65,31 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, /* * Note: Due to modules and __init, code can * disappear and change, we need to protect against faulting - * as well as code changing. + * as well as code changing. We do this by using the + * probe_kernel_* functions. * * No real locking needed, this code is run through * kstop_machine, or before SMP starts. */ - if (__copy_from_user_inatomic(replaced, (char __user *)ip, MCOUNT_INSN_SIZE)) - return 1; + /* read the text we want to modify */ + if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) - return 2; + return -EINVAL; - WARN_ON_ONCE(__copy_to_user_inatomic((char __user *)ip, new_code, - MCOUNT_INSN_SIZE)); + /* replace the text with the new text */ + if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) + return -EPERM; sync_core(); return 0; } -notrace int ftrace_update_ftrace_func(ftrace_func_t func) +int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); unsigned char old[MCOUNT_INSN_SIZE], *new; @@ -98,13 +102,6 @@ notrace int ftrace_update_ftrace_func(ftrace_func_t func) return ret; } -notrace int ftrace_mcount_set(unsigned long *data) -{ - /* mcount is initialized as a nop */ - *data = 0; - return 0; -} - int __init ftrace_dyn_arch_init(void *data) { extern const unsigned char ftrace_test_p6nop[]; @@ -127,9 +124,6 @@ int __init ftrace_dyn_arch_init(void *data) * TODO: check the cpuid to determine the best nop. */ asm volatile ( - "jmp ftrace_test_jmp\n" - /* This code needs to stay around */ - ".section .text, \"ax\"\n" "ftrace_test_jmp:" "jmp ftrace_test_p6nop\n" "nop\n" @@ -140,8 +134,6 @@ int __init ftrace_dyn_arch_init(void *data) "jmp 1f\n" "ftrace_test_nop5:" ".byte 0x66,0x66,0x66,0x66,0x90\n" - "jmp 1f\n" - ".previous\n" "1:" ".section .fixup, \"ax\"\n" "2: movl $1, %0\n" @@ -156,15 +148,15 @@ int __init ftrace_dyn_arch_init(void *data) switch (faulted) { case 0: pr_info("ftrace: converting mcount calls to 0f 1f 44 00 00\n"); - ftrace_nop = (unsigned long *)ftrace_test_p6nop; + memcpy(ftrace_nop, ftrace_test_p6nop, MCOUNT_INSN_SIZE); break; case 1: pr_info("ftrace: converting mcount calls to 66 66 66 66 90\n"); - ftrace_nop = (unsigned long *)ftrace_test_nop5; + memcpy(ftrace_nop, ftrace_test_nop5, MCOUNT_INSN_SIZE); break; case 2: pr_info("ftrace: converting mcount calls to jmp . + 5\n"); - ftrace_nop = (unsigned long *)ftrace_test_jmp; + memcpy(ftrace_nop, ftrace_test_jmp, MCOUNT_INSN_SIZE); break; } diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 680a065..2c7dbdb 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -15,7 +15,6 @@ #include <linux/ctype.h> #include <linux/init.h> #include <linux/sched.h> -#include <linux/bootmem.h> #include <linux/module.h> #include <linux/hardirq.h> #include <asm/smp.h> @@ -398,16 +397,16 @@ void __init uv_system_init(void) printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades()); bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); - uv_blade_info = alloc_bootmem_pages(bytes); + uv_blade_info = kmalloc(bytes, GFP_KERNEL); get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); - uv_node_to_blade = alloc_bootmem_pages(bytes); + uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_node_to_blade, 255, bytes); bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); - uv_cpu_to_blade = alloc_bootmem_pages(bytes); + uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); memset(uv_cpu_to_blade, 255, bytes); blade = 0; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 77017e8..067d8de 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta, * what we wrote hit the chip before we compare it to the * counter. */ - WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); + WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt); return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; } @@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) { if (request_irq(dev->irq, hpet_interrupt_handler, - IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev)) + IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) return -1; disable_irq(dev->irq); diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index dd7ebee..43cec6b 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -5,7 +5,7 @@ #include <asm/desc.h> #include <asm/ftrace.h> -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f20608..b0f61f0 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void) stts(); } -void __init init_thread_xstate(void) +void __cpuinit init_thread_xstate(void) { if (!HAVE_HWFP) { xstate_size = sizeof(struct i387_soft_struct); diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index b764d74..9043251 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -1140,6 +1140,20 @@ static void __clear_irq_vector(int irq) cfg->vector = 0; cpus_clear(cfg->domain); + + if (likely(!cfg->move_in_progress)) + return; + cpus_and(mask, cfg->old_domain, cpu_online_map); + for_each_cpu_mask_nr(cpu, mask) { + for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; + vector++) { + if (per_cpu(vector_irq, cpu)[vector] != irq) + continue; + per_cpu(vector_irq, cpu)[vector] = -1; + break; + } + } + cfg->move_in_progress = 0; } void __setup_vector_irq(int cpu) @@ -3594,25 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic) int __init probe_nr_irqs(void) { - int idx; - int nr = 0; -#ifndef CONFIG_XEN - int nr_min = 32; -#else - int nr_min = NR_IRQS; -#endif - - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - /* double it for hotplug and msi and nmi */ - nr <<= 1; - - /* something wrong ? */ - if (nr < nr_min) - nr = nr_min; - - return nr; + return NR_IRQS; } /* -------------------------------------------------------------------------- @@ -3759,7 +3755,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) void __init setup_ioapic_dest(void) { int pin, ioapic, irq, irq_entry; + struct irq_desc *desc; struct irq_cfg *cfg; + cpumask_t mask; if (skip_ioapic_setup == 1) return; @@ -3776,16 +3774,30 @@ void __init setup_ioapic_dest(void) * cpu is online. */ cfg = irq_cfg(irq); - if (!cfg->vector) + if (!cfg->vector) { setup_IO_APIC_irq(ioapic, pin, irq, irq_trigger(irq_entry), irq_polarity(irq_entry)); + continue; + + } + + /* + * Honour affinities which have been set in early boot + */ + desc = irq_to_desc(irq); + if (desc->status & + (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) + mask = desc->affinity; + else + mask = TARGET_CPUS; + #ifdef CONFIG_INTR_REMAP - else if (intr_remapping_enabled) - set_ir_ioapic_affinity_irq(irq, TARGET_CPUS); -#endif + if (intr_remapping_enabled) + set_ir_ioapic_affinity_irq(irq, mask); else - set_ioapic_affinity_irq(irq, TARGET_CPUS); +#endif + set_ioapic_affinity_irq(irq, mask); } } diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 304d8bad..cbc4332 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -18,7 +18,6 @@ static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 774ac49..e169ae9 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt) } #ifdef CONFIG_X86_LOCAL_APIC -static void kvm_setup_secondary_clock(void) +static void __cpuinit kvm_setup_secondary_clock(void) { /* * Now that the first cpu already had this clocksource initialized, diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 0732adb..7a38574 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -162,7 +162,10 @@ void machine_kexec(struct kimage *image) page_list[VA_PTE_0] = (unsigned long)kexec_pte0; page_list[PA_PTE_1] = __pa(kexec_pte1); page_list[VA_PTE_1] = (unsigned long)kexec_pte1; - page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); + + if (image->type == KEXEC_TYPE_DEFAULT) + page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) + << PAGE_SHIFT); /* The segment registers are funny things, they have both a * visible and an invisible part. Whenever the visible part is diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 7a1f8ee..5f8e5d7 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c @@ -39,7 +39,7 @@ #include <asm/microcode.h> MODULE_DESCRIPTION("AMD Microcode Update Driver"); -MODULE_AUTHOR("Peter Oruba <peter.oruba@amd.com>"); +MODULE_AUTHOR("Peter Oruba"); MODULE_LICENSE("GPL v2"); #define UCODE_MAGIC 0x00414d44 diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 936d8d5..82fb280 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c @@ -480,8 +480,8 @@ static int __init microcode_init(void) printk(KERN_INFO "Microcode Update Driver: v" MICROCODE_VERSION - " <tigran@aivazian.fsnet.co.uk>" - " <peter.oruba@amd.com>\n"); + " <tigran@aivazian.fsnet.co.uk>," + " Peter Oruba\n"); return 0; } diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index f98f4e1..0f4c1fd 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early) printk(KERN_INFO "Using ACPI for processor (LAPIC) " "configuration information\n"); + if (!mpf) + return; + printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification); #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 0e9f198..95777b0 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -7,7 +7,8 @@ #include <asm/paravirt.h> -static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags) +static inline void +default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags) { __raw_spin_lock(lock); } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index e1e731d..d28bbdc 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p) ++p; if (*p == '\0') break; - bridge = simple_strtol(p, &endp, 0); + bridge = simple_strtoul(p, &endp, 0); if (p == endp) break; diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1972266..1926248 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -9,6 +9,8 @@ #include <asm/calgary.h> #include <asm/amd_iommu.h> +static int forbid_dac __read_mostly; + struct dma_mapping_ops *dma_ops; EXPORT_SYMBOL(dma_ops); @@ -291,3 +293,17 @@ void pci_iommu_shutdown(void) } /* Must execute after PCI subsystem */ fs_initcall(pci_iommu_init); + +#ifdef CONFIG_PCI +/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ + +static __devinit void via_no_dac(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); + forbid_dac = 1; + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); +#endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index e3f75bb..ba7ad83 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size) spin_lock_irqsave(&iommu_bitmap_lock, flags); iommu_area_free(iommu_gart_bitmap, offset, size); + if (offset >= next_bit) + next_bit = offset + size; spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } @@ -744,7 +746,7 @@ void __init gart_iommu_init(void) long i; if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { - printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); + printk(KERN_INFO "PCI-GART: No AMD GART found.\n"); return; } diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index c4ce033..3c539d1 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -18,9 +18,21 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); } +static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t flags) +{ + void *vaddr; + + vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags); + if (vaddr) + return vaddr; + + return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags); +} + struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, - .alloc_coherent = swiotlb_alloc_coherent, + .alloc_coherent = x86_swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f4c93f1..cc5a254 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -29,11 +29,7 @@ EXPORT_SYMBOL(pm_power_off); static const struct desc_ptr no_idt = {}; static int reboot_mode; -/* - * Keyboard reset and triple fault may result in INIT, not RESET, which - * doesn't work when we're in vmx root mode. Try ACPI first. - */ -enum reboot_type reboot_type = BOOT_ACPI; +enum reboot_type reboot_type = BOOT_KBD; int reboot_force; #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) @@ -173,6 +169,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "0KW626"), }, }, + { /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */ + .callback = set_bios_reboot, + .ident = "Dell OptiPlex 330", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"), + DMI_MATCH(DMI_BOARD_NAME, "0KP561"), + }, + }, { /* Handle problems with rebooting on Dell 2400's */ .callback = set_bios_reboot, .ident = "Dell PowerEdge 2400", diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0fa6790..9d5674f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = { .callback = dmi_low_memory_corruption, .ident = "Phoenix BIOS", .matches = { - DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"), + DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"), }, }, #endif diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index e00534b..f4049f3 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -154,6 +154,12 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, flush_mm = mm; flush_va = va; cpus_or(flush_cpumask, cpumask, flush_cpumask); + + /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); /* * We have to send the IPI only to * CPUs affected. diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index dcbf7a1..8f919ca 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -183,6 +183,11 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); /* + * Make the above memory operations globally visible before + * sending the IPI. + */ + smp_mb(); + /* * We have to send the IPI only to * CPUs affected. */ diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 161bb85..424093b 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -55,7 +55,7 @@ u64 native_sched_clock(void) rdtscll(this_offset); /* return the value in ns */ - return cycles_2_ns(this_offset); + return __cycles_2_ns(this_offset); } /* We need to define a real function for sched_clock, to override the @@ -759,7 +759,7 @@ __cpuinit int unsynchronized_tsc(void) if (!cpu_has_tsc || tsc_unstable) return 1; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_SMP if (apic_is_clustered_box()) return 1; #endif @@ -813,10 +813,6 @@ void __init tsc_init(void) cpu_khz = calibrate_cpu(); #endif - lpj = ((u64)tsc_khz * 1000); - do_div(lpj, HZ); - lpj_fine = lpj; - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); @@ -836,6 +832,10 @@ void __init tsc_init(void) /* now allow native_sched_clock() to use rdtsc */ tsc_disabled = 0; + lpj = ((u64)tsc_khz * 1000); + do_div(lpj, HZ); + lpj_fine = lpj; + use_tsc_delay(); /* Check and install the TSC clocksource */ dmi_check_system(bad_tsc_dmi_table); diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 9ffb01c..1c0dfbc 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void) cycles_t start, now, prev, end; int i; + rdtsc_barrier(); start = get_cycles(); + rdtsc_barrier(); /* * The measurement runs for 20 msecs: */ @@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void) */ __raw_spin_lock(&sync_lock); prev = last_tsc; + rdtsc_barrier(); now = get_cycles(); + rdtsc_barrier(); last_tsc = now; __raw_spin_unlock(&sync_lock); diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 7766d36..a688f3b 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c @@ -78,7 +78,7 @@ static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, static void __init set_vsmp_pv_ops(void) { - void *address; + void __iomem *address; unsigned int cap, ctl, cfg; /* set vSMP magic bits to indicate vSMP capable kernel */ diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index b545f37..695e426 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -12,7 +12,7 @@ #include <asm/desc.h> #include <asm/ftrace.h> -#ifdef CONFIG_FTRACE +#ifdef CONFIG_FUNCTION_TRACER /* mcount is defined in assembly */ EXPORT_SYMBOL(mcount); #endif diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index b13acb7..15c3e69 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -310,7 +310,7 @@ static void __init setup_xstate_init(void) /* * Enable and initialize the xsave feature. */ -void __init xsave_cntxt_init(void) +void __ref xsave_cntxt_init(void) { unsigned int eax, ebx, ecx, edx; diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index ce3251c..b81125f 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -20,6 +20,8 @@ if VIRTUALIZATION config KVM tristate "Kernel-based Virtual Machine (KVM) support" depends on HAVE_KVM + # for device assignment: + depends on PCI select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 11c6725..59ebd37 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -545,6 +545,14 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm) if (!pit) return NULL; + mutex_lock(&kvm->lock); + pit->irq_source_id = kvm_request_irq_source_id(kvm); + mutex_unlock(&kvm->lock); + if (pit->irq_source_id < 0) { + kfree(pit); + return NULL; + } + mutex_init(&pit->pit_state.lock); mutex_lock(&pit->pit_state.lock); spin_lock_init(&pit->pit_state.inject_lock); @@ -587,6 +595,7 @@ void kvm_free_pit(struct kvm *kvm) mutex_lock(&kvm->arch.vpit->pit_state.lock); timer = &kvm->arch.vpit->pit_state.pit_timer.timer; hrtimer_cancel(timer); + kvm_free_irq_source_id(kvm, kvm->arch.vpit->irq_source_id); mutex_unlock(&kvm->arch.vpit->pit_state.lock); kfree(kvm->arch.vpit); } @@ -595,8 +604,8 @@ void kvm_free_pit(struct kvm *kvm) static void __inject_pit_timer_intr(struct kvm *kvm) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, 0, 1); - kvm_set_irq(kvm, 0, 0); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); + kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); mutex_unlock(&kvm->lock); } diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index e436d49..4178022 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -44,6 +44,7 @@ struct kvm_pit { struct kvm_io_device speaker_dev; struct kvm *kvm; struct kvm_kpit_state pit_state; + int irq_source_id; }; #define KVM_PIT_BASE_ADDRESS 0x40 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 99c239c..410ddbc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) if (r) goto out; r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache, - rmap_desc_cache, 1); + rmap_desc_cache, 4); if (r) goto out; r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8); @@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) } rmap_write_protect(vcpu->kvm, sp->gfn); + kvm_unlink_unsync_page(vcpu->kvm, sp); if (vcpu->arch.mmu.sync_page(vcpu, sp)) { kvm_mmu_zap_page(vcpu->kvm, sp); return 1; } kvm_mmu_flush_tlb(vcpu); - kvm_unlink_unsync_page(vcpu->kvm, sp); return 0; } @@ -2634,6 +2634,7 @@ static int kvm_pv_mmu_write(struct kvm_vcpu *vcpu, static int kvm_pv_mmu_flush_tlb(struct kvm_vcpu *vcpu) { kvm_x86_ops->tlb_flush(vcpu); + set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests); return 1; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 613ec9a..84eee43 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 2]) { + kvm_mmu_put_page(shadow_page, sptep); kvm_release_pfn_clean(sw->pfn); sw->sptep = NULL; return 1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2643b43..a4018b0 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) if (cpu_has_virtual_nmis()) { if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { - if (vmx_nmi_enabled(vcpu)) { + if (vcpu->arch.interrupt.pending) { + enable_nmi_window(vcpu); + } else if (vmx_nmi_enabled(vcpu)) { vcpu->arch.nmi_pending = false; vcpu->arch.nmi_injected = true; } else { @@ -3564,7 +3566,8 @@ static int __init vmx_init(void) bypass_guest_pf = 0; kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | + VMX_EPT_IGMT_BIT); kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK); kvm_enable_tdp(); diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 3e010d2..ec5edc3 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h @@ -352,6 +352,7 @@ enum vmcs_field { #define VMX_EPT_READABLE_MASK 0x1ull #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull +#define VMX_EPT_IGMT_BIT (1ull << 6) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4f0677d..f1f8ff2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1742,7 +1742,8 @@ long kvm_arch_vm_ioctl(struct file *filp, goto out; if (irqchip_in_kernel(kvm)) { mutex_lock(&kvm->lock); - kvm_set_irq(kvm, irq_event.irq, irq_event.level); + kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event.irq, irq_event.level); mutex_unlock(&kvm->lock); r = 0; } @@ -4013,6 +4014,9 @@ struct kvm *kvm_arch_create_vm(void) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ + set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap); + return kvm; } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 48ee4f9..a5d8e1a 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -367,10 +367,9 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, * lazily after a task switch, and Linux uses that gratefully, but wouldn't a * name like "FPUTRAP bit" be a little less cryptic? * - * We store cr0 (and cr3) locally, because the Host never changes it. The - * Guest sometimes wants to read it and we'd prefer not to bother the Host - * unnecessarily. */ -static unsigned long current_cr0, current_cr3; + * We store cr0 locally because the Host never changes it. The Guest sometimes + * wants to read it and we'd prefer not to bother the Host unnecessarily. */ +static unsigned long current_cr0; static void lguest_write_cr0(unsigned long val) { lazy_hcall(LHCALL_TS, val & X86_CR0_TS, 0, 0); @@ -399,17 +398,23 @@ static unsigned long lguest_read_cr2(void) return lguest_data.cr2; } +/* See lguest_set_pte() below. */ +static bool cr3_changed = false; + /* cr3 is the current toplevel pagetable page: the principle is the same as - * cr0. Keep a local copy, and tell the Host when it changes. */ + * cr0. Keep a local copy, and tell the Host when it changes. The only + * difference is that our local copy is in lguest_data because the Host needs + * to set it upon our initial hypercall. */ static void lguest_write_cr3(unsigned long cr3) { + lguest_data.pgdir = cr3; lazy_hcall(LHCALL_NEW_PGTABLE, cr3, 0, 0); - current_cr3 = cr3; + cr3_changed = true; } static unsigned long lguest_read_cr3(void) { - return current_cr3; + return lguest_data.pgdir; } /* cr4 is used to enable and disable PGE, but we don't care. */ @@ -498,13 +503,13 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) * to forget all of them. Fortunately, this is very rare. * * ... except in early boot when the kernel sets up the initial pagetables, - * which makes booting astonishingly slow. So we don't even tell the Host - * anything changed until we've done the first page table switch. */ + * which makes booting astonishingly slow: 1.83 seconds! So we don't even tell + * the Host anything changed until we've done the first page table switch, + * which brings boot back to 0.25 seconds. */ static void lguest_set_pte(pte_t *ptep, pte_t pteval) { *ptep = pteval; - /* Don't bother with hypercall before initial setup. */ - if (current_cr3) + if (cr3_changed) lazy_hcall(LHCALL_FLUSH_TLB, 1, 0, 0); } @@ -521,7 +526,7 @@ static void lguest_set_pte(pte_t *ptep, pte_t pteval) static void lguest_flush_tlb_single(unsigned long addr) { /* Simply set it to zero: if it was not, it will fault back in. */ - lazy_hcall(LHCALL_SET_PTE, current_cr3, addr, 0); + lazy_hcall(LHCALL_SET_PTE, lguest_data.pgdir, addr, 0); } /* This is what happens after the Guest has removed a large number of entries. @@ -581,6 +586,9 @@ static void __init lguest_init_IRQ(void) for (i = 0; i < LGUEST_IRQS; i++) { int vector = FIRST_EXTERNAL_VECTOR + i; + /* Some systems map "vectors" to interrupts weirdly. Lguest has + * a straightforward 1 to 1 mapping, so force that here. */ + __get_cpu_var(vector_irq)[vector] = i; if (vector != SYSCALL_VECTOR) { set_intr_gate(vector, interrupt[vector]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c index 6bbdd63..a580b95 100644 --- a/arch/x86/mach-voyager/setup.c +++ b/arch/x86/mach-voyager/setup.c @@ -27,7 +27,7 @@ static struct irqaction irq2 = { void __init intr_init_hook(void) { #ifdef CONFIG_SMP - smp_intr_init(); + voyager_smp_intr_init(); #endif setup_irq(2, &irq2); diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 0f6e8a6..5214500 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -7,6 +7,7 @@ * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ +#include <linux/cpu.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/kernel_stat.h> @@ -90,6 +91,7 @@ static void ack_vic_irq(unsigned int irq); static void vic_enable_cpi(void); static void do_boot_cpu(__u8 cpuid); static void do_quad_bootstrap(void); +static void initialize_secondary(void); int hard_smp_processor_id(void); int safe_smp_processor_id(void); @@ -344,6 +346,12 @@ static void do_quad_bootstrap(void) } } +void prefill_possible_map(void) +{ + /* This is empty on voyager because we need a much + * earlier detection which is done in find_smp_config */ +} + /* Set up all the basic stuff: read the SMP config and make all the * SMP information reflect only the boot cpu. All others will be * brought on-line later. */ @@ -413,6 +421,7 @@ void __init smp_store_cpu_info(int id) struct cpuinfo_x86 *c = &cpu_data(id); *c = boot_cpu_data; + c->cpu_index = id; identify_secondary_cpu(c); } @@ -650,6 +659,8 @@ void __init smp_boot_cpus(void) smp_tune_scheduling(); */ smp_store_cpu_info(boot_cpu_id); + /* setup the jump vector */ + initial_code = (unsigned long)initialize_secondary; printk("CPU%d: ", boot_cpu_id); print_cpu_info(&cpu_data(boot_cpu_id)); @@ -702,7 +713,7 @@ void __init smp_boot_cpus(void) /* Reload the secondary CPUs task structure (this function does not * return ) */ -void __init initialize_secondary(void) +static void __init initialize_secondary(void) { #if 0 // AC kernels only @@ -1248,7 +1259,7 @@ static void handle_vic_irq(unsigned int irq, struct irq_desc *desc) #define QIC_SET_GATE(cpi, vector) \ set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector)) -void __init smp_intr_init(void) +void __init voyager_smp_intr_init(void) { int i; @@ -1780,6 +1791,17 @@ void __init smp_setup_processor_id(void) x86_write_percpu(cpu_number, hard_smp_processor_id()); } +static void voyager_send_call_func(cpumask_t callmask) +{ + __u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id()); + send_CPI(mask, VIC_CALL_FUNCTION_CPI); +} + +static void voyager_send_call_func_single(int cpu) +{ + send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI); +} + struct smp_ops smp_ops = { .smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu, .smp_prepare_cpus = voyager_smp_prepare_cpus, @@ -1789,6 +1811,6 @@ struct smp_ops smp_ops = { .smp_send_stop = voyager_smp_send_stop, .smp_send_reschedule = voyager_smp_send_reschedule, - .send_call_func_ipi = native_send_call_func_ipi, - .send_call_func_single_ipi = native_send_call_func_single_ipi, + .send_call_func_ipi = voyager_send_call_func, + .send_call_func_single_ipi = voyager_send_call_func_single, }; diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 59f89b4..fea4565 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -1,7 +1,7 @@ obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ pat.o pgtable.o gup.o -obj-$(CONFIG_X86_32) += pgtable_32.o +obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 4ba373c..be54176 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -233,7 +233,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, len = (unsigned long) nr_pages << PAGE_SHIFT; end = start + len; if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, - start, len))) + (void __user *)start, len))) goto slow_irqon; /* diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8396868..c483f42 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -334,7 +334,6 @@ int devmem_is_allowed(unsigned long pagenr) return 0; } -#ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; @@ -357,6 +356,7 @@ static void __init kmap_init(void) kmap_prot = PAGE_KERNEL; } +#ifdef CONFIG_HIGHMEM static void __init permanent_kmaps_init(pgd_t *pgd_base) { unsigned long vaddr; @@ -436,7 +436,6 @@ static void __init set_highmem_pages_init(void) #endif /* !CONFIG_NUMA */ #else -# define kmap_init() do { } while (0) # define permanent_kmaps_init(pgd_base) do { } while (0) # define set_highmem_pages_init() do { } while (0) #endif /* CONFIG_HIGHMEM */ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index b8e461d..9db01db 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -350,8 +350,10 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end, * pagetable pages as RO. So assume someone who pre-setup * these mappings are more intelligent. */ - if (pte_val(*pte)) + if (pte_val(*pte)) { + pages++; continue; + } if (0) printk(" pte=%p addr=%lx pte=%016lx\n", @@ -418,8 +420,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_2M)) + if (page_size_mask & (1 << PG_LEVEL_2M)) { + pages++; continue; + } new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd)); } @@ -499,8 +503,10 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, * not differ with respect to page frame and * attributes. */ - if (page_size_mask & (1 << PG_LEVEL_1G)) + if (page_size_mask & (1 << PG_LEVEL_1G)) { + pages++; continue; + } prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud)); } @@ -665,12 +671,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long last_map_addr = 0; unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; + unsigned long pos; struct map_range mr[NR_RANGE_MR]; int nr_range, i; int use_pse, use_gbpages; - printk(KERN_INFO "init_memory_mapping\n"); + printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end); /* * Find space for the kernel direct mapping tables. @@ -704,35 +711,50 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, /* head if not big page alignment ?*/ start_pfn = start >> PAGE_SHIFT; - end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) + pos = start_pfn << PAGE_SHIFT; + end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); + pos = end_pfn << PAGE_SHIFT; + } /* big page (2M) range*/ - start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) + end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) - end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); + if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT))) + end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + pos = end_pfn << PAGE_SHIFT; + } /* big page (1G) range */ - start_pfn = end_pfn; - end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT) + << (PUD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, page_size_mask & ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); + pos = end_pfn << PAGE_SHIFT; + } /* tail is not big page (1G) alignment */ - start_pfn = end_pfn; - end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); - nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, - page_size_mask & (1<<PG_LEVEL_2M)); + start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT) + << (PMD_SHIFT - PAGE_SHIFT); + end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); + if (start_pfn < end_pfn) { + nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, + page_size_mask & (1<<PG_LEVEL_2M)); + pos = end_pfn << PAGE_SHIFT; + } /* tail is not big page (2M) alignment */ - start_pfn = end_pfn; + start_pfn = pos>>PAGE_SHIFT; end_pfn = end>>PAGE_SHIFT; nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); @@ -831,12 +853,12 @@ int arch_add_memory(int nid, u64 start, u64 size) unsigned long nr_pages = size >> PAGE_SHIFT; int ret; - last_mapped_pfn = init_memory_mapping(start, start + size-1); + last_mapped_pfn = init_memory_mapping(start, start + size); if (last_mapped_pfn > max_pfn_mapped) max_pfn_mapped = last_mapped_pfn; ret = __add_pages(zone, start_pfn, nr_pages); - WARN_ON(1); + WARN_ON_ONCE(ret); return ret; } @@ -878,6 +900,7 @@ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, void __init mem_init(void) { long codesize, reservedpages, datasize, initsize; + unsigned long absent_pages; start_periodic_check_for_corruption(); @@ -893,8 +916,9 @@ void __init mem_init(void) #else totalram_pages = free_all_bootmem(); #endif - reservedpages = max_pfn - totalram_pages - - absent_pages_in_range(0, max_pfn); + + absent_pages = absent_pages_in_range(0, max_pfn); + reservedpages = max_pfn - totalram_pages - absent_pages; after_bootmem = 1; codesize = (unsigned long) &_etext - (unsigned long) &_text; @@ -911,10 +935,11 @@ void __init mem_init(void) VSYSCALL_END - VSYSCALL_START); printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, " - "%ldk reserved, %ldk data, %ldk init)\n", + "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), max_pfn << (PAGE_SHIFT-10), codesize >> 10, + absent_pages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datasize >> 10, initsize >> 10); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c new file mode 100644 index 0000000..d0151d8 --- /dev/null +++ b/arch/x86/mm/iomap_32.c @@ -0,0 +1,59 @@ +/* + * Copyright © 2008 Ingo Molnar + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + */ + +#include <asm/iomap.h> +#include <linux/module.h> + +/* Map 'pfn' using fixed map 'type' and protections 'prot' + */ +void * +iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot) +{ + enum fixed_addresses idx; + unsigned long vaddr; + + pagefault_disable(); + + idx = type + KM_TYPE_NR*smp_processor_id(); + vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); + set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); + arch_flush_lazy_mmu_mode(); + + return (void*) vaddr; +} +EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); + +void +iounmap_atomic(void *kvaddr, enum km_type type) +{ + unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; + enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); + + /* + * Force other mappings to Oops if they'll try to access this pte + * without first remap it. Keeping stale mappings around is a bad idea + * also, in case the page changes cacheability attributes or becomes + * a protected page in a hypervisor. + */ + if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + kpte_clear_flush(kmap_pte-idx, vaddr); + + arch_flush_lazy_mmu_mode(); + pagefault_enable(); +} +EXPORT_SYMBOL_GPL(iounmap_atomic); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index ae71e11..d4c4307 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -387,7 +387,7 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, unsigned long size) { unsigned long flags; - void *ret; + void __iomem *ret; int err; /* @@ -399,11 +399,11 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, if (err < 0) return NULL; - ret = (void *) __ioremap_caller(phys_addr, size, flags, - __builtin_return_address(0)); + ret = __ioremap_caller(phys_addr, size, flags, + __builtin_return_address(0)); free_memtype(phys_addr, phys_addr + size); - return (void __iomem *)ret; + return ret; } void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, @@ -622,7 +622,7 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) __early_set_fixmap(idx, 0, __pgprot(0)); } -static void *prev_map[FIX_BTMAPS_SLOTS] __initdata; +static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; static int __init check_early_ioremap_leak(void) { @@ -645,7 +645,7 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -713,23 +713,23 @@ static void __init *__early_ioremap(unsigned long phys_addr, unsigned long size, if (early_ioremap_debug) printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); - prev_map[slot] = (void *) (offset + fix_to_virt(idx0)); + prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); return prev_map[slot]; } /* Remap an IO device */ -void __init *early_ioremap(unsigned long phys_addr, unsigned long size) +void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); } /* Remap memory */ -void __init *early_memremap(unsigned long phys_addr, unsigned long size) +void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size) { return __early_ioremap(phys_addr, size, PAGE_KERNEL); } -void __init early_iounmap(void *addr, unsigned long size) +void __init early_iounmap(void __iomem *addr, unsigned long size) { unsigned long virt_addr; unsigned long offset; @@ -779,7 +779,7 @@ void __init early_iounmap(void *addr, unsigned long size) --idx; --nrpages; } - prev_map[slot] = 0; + prev_map[slot] = NULL; } void __this_fixmap_does_not_exist(void) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 847c164..8518c67 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -222,6 +222,41 @@ static void __init remap_numa_kva(void) } } +#ifdef CONFIG_HIBERNATION +/** + * resume_map_numa_kva - add KVA mapping to the temporary page tables created + * during resume from hibernation + * @pgd_base - temporary resume page directory + */ +void resume_map_numa_kva(pgd_t *pgd_base) +{ + int node; + + for_each_online_node(node) { + unsigned long start_va, start_pfn, size, pfn; + + start_va = (unsigned long)node_remap_start_vaddr[node]; + start_pfn = node_remap_start_pfn[node]; + size = node_remap_size[node]; + + printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node); + + for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { + unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); + pgd_t *pgd = pgd_base + pgd_index(vaddr); + pud_t *pud = pud_offset(pgd, vaddr); + pmd_t *pmd = pmd_offset(pud, vaddr); + + set_pmd(pmd, pfn_pmd(start_pfn + pfn, + PAGE_KERNEL_LARGE_EXEC)); + + printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", + __FUNCTION__, vaddr, start_pfn + pfn); + } + } +} +#endif + static unsigned long calculate_numa_remap_pages(void) { int nid; diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f1dc1b7..e89d248 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -67,18 +67,18 @@ static void split_page_count(int level) void arch_report_meminfo(struct seq_file *m) { - seq_printf(m, "DirectMap4k: %8lu kB\n", + seq_printf(m, "DirectMap4k: %8lu kB\n", direct_pages_count[PG_LEVEL_4K] << 2); #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) - seq_printf(m, "DirectMap2M: %8lu kB\n", + seq_printf(m, "DirectMap2M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 11); #else - seq_printf(m, "DirectMap4M: %8lu kB\n", + seq_printf(m, "DirectMap4M: %8lu kB\n", direct_pages_count[PG_LEVEL_2M] << 12); #endif #ifdef CONFIG_X86_64 if (direct_gbpages) - seq_printf(m, "DirectMap1G: %8lu kB\n", + seq_printf(m, "DirectMap1G: %8lu kB\n", direct_pages_count[PG_LEVEL_1G] << 20); #endif } diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 738fd0f2..eb1bf00 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -481,12 +481,16 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) return 1; } #else +/* This check is needed to avoid cache aliasing when PAT is enabled */ static inline int range_is_allowed(unsigned long pfn, unsigned long size) { u64 from = ((u64)pfn) << PAGE_SHIFT; u64 to = from + size; u64 cursor = from; + if (!pat_enabled) + return 1; + while (cursor < to) { if (!devmem_is_allowed(pfn)) { printk(KERN_INFO diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 022cd41..202864a 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type) *cpu_type = "i386/pii"; break; case 6 ... 8: + case 10 ... 11: *cpu_type = "i386/piii"; break; case 9: + case 13: *cpu_type = "i386/p6_mobile"; break; - case 10 ... 13: - *cpu_type = "i386/p6"; - break; case 14: *cpu_type = "i386/core"; break; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 0620d6d..e9f80c7 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -27,8 +27,7 @@ static int num_counters = 2; static int counter_width = 32; #define CTR_IS_RESERVED(msrs, c) (msrs->counters[(c)].addr ? 1 : 0) -#define CTR_READ(l, h, msrs, c) do {rdmsr(msrs->counters[(c)].addr, (l), (h)); } while (0) -#define CTR_OVERFLOWED(n) (!((n) & (1U<<(counter_width-1)))) +#define CTR_OVERFLOWED(n) (!((n) & (1ULL<<(counter_width-1)))) #define CTRL_IS_RESERVED(msrs, c) (msrs->controls[(c)].addr ? 1 : 0) #define CTRL_READ(l, h, msrs, c) do {rdmsr((msrs->controls[(c)].addr), (l), (h)); } while (0) @@ -70,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) int i; if (!reset_value) { - reset_value = kmalloc(sizeof(unsigned) * num_counters, + reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; @@ -124,14 +123,14 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs) static int ppro_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { - unsigned int low, high; + u64 val; int i; for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; - CTR_READ(low, high, msrs, i); - if (CTR_OVERFLOWED(low)) { + rdmsrl(msrs->counters[i].addr, val); + if (CTR_OVERFLOWED(val)) { oprofile_add_sample(regs, i); wrmsrl(msrs->counters[i].addr, -reset_value[i]); } @@ -157,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { CTRL_READ(low, high, msrs, i); @@ -172,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs) unsigned int low, high; int i; + if (!reset_value) + return; for (i = 0; i < num_counters; ++i) { if (!reset_value[i]) continue; diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 3c27a80..2051dc9 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015, pci_siemens_interrupt_controller); /* - * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config - * have 4096 bytes. Even if the device is capable, that doesn't mean we can - * access it. Maybe we don't have a way to generate extended config space - * accesses. So check it + * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have + * 4096 bytes configuration space for each function of their processor + * configuration space. */ -static void fam10h_pci_cfg_space_size(struct pci_dev *dev) +static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev) { dev->cfg_size = pci_cfg_space_size_ext(dev); } - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size); -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size); /* * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c index f2b6e3f..81197c6 100644 --- a/arch/x86/power/hibernate_32.c +++ b/arch/x86/power/hibernate_32.c @@ -12,6 +12,7 @@ #include <asm/system.h> #include <asm/page.h> #include <asm/pgtable.h> +#include <asm/mmzone.h> /* Defined in hibernate_asm_32.S */ extern int restore_image(void); @@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base) } } } + + resume_map_numa_kva(pgd_base); + return 0; } diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 3139479..6dcefba 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -1,4 +1,4 @@ -ifdef CONFIG_FTRACE +ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities CFLAGS_REMOVE_spinlock.o = -pg CFLAGS_REMOVE_time.o = -pg diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index b61534c..5e4686d 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -863,15 +863,16 @@ static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned l if (PagePinned(virt_to_page(mm->pgd))) { SetPagePinned(page); + vm_unmap_aliases(); if (!PageHighMem(page)) { make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn))); if (level == PT_PTE && USE_SPLIT_PTLOCKS) pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn); - } else + } else { /* make sure there are no stray mappings of this page */ kmap_flush_unused(); - vm_unmap_aliases(); + } } } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d4d52f5..636ef4c 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -246,11 +246,21 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) { unsigned long address = (unsigned long)vaddr; unsigned int level; - pte_t *pte = lookup_address(address, &level); - unsigned offset = address & ~PAGE_MASK; + pte_t *pte; + unsigned offset; - BUG_ON(pte == NULL); + /* + * if the PFN is in the linear mapped vaddr range, we can just use + * the (quick) virt_to_machine() p2m lookup + */ + if (virt_addr_valid(vaddr)) + return virt_to_machine(vaddr); + /* otherwise we have to do a (slower) full page-table walk */ + + pte = lookup_address(address, &level); + BUG_ON(pte == NULL); + offset = address & ~PAGE_MASK; return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); } @@ -410,7 +420,7 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, xen_mc_batch(); - u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; + u.ptr = arbitrary_virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD; u.val = pte_val_ma(pte); xen_extend_mmu_update(&u); @@ -651,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val) * For 64-bit, we must skip the Xen hole in the middle of the address * space, just after the big x86-64 virtual hole. */ -static int xen_pgd_walk(struct mm_struct *mm, - int (*func)(struct mm_struct *mm, struct page *, - enum pt_level), - unsigned long limit) +static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) { - pgd_t *pgd = mm->pgd; int flush = 0; unsigned hole_low, hole_high; unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; @@ -743,6 +752,14 @@ out: return flush; } +static int xen_pgd_walk(struct mm_struct *mm, + int (*func)(struct mm_struct *mm, struct page *, + enum pt_level), + unsigned long limit) +{ + return __xen_pgd_walk(mm, mm->pgd, func, limit); +} + /* If we're using split pte locks, then take the page's lock and return a pointer to it. Otherwise return NULL. */ static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm) @@ -840,13 +857,16 @@ static int xen_pin_page(struct mm_struct *mm, struct page *page, read-only, and can be pinned. */ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) { + vm_unmap_aliases(); + xen_mc_batch(); - if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) { - /* re-enable interrupts for kmap_flush_unused */ + if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) { + /* re-enable interrupts for flushing */ xen_mc_issue(0); + kmap_flush_unused(); - vm_unmap_aliases(); + xen_mc_batch(); } @@ -864,7 +884,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) #else /* CONFIG_X86_32 */ #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is pinnable */ - xen_pin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_pin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); @@ -981,11 +1001,11 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) #ifdef CONFIG_X86_PAE /* Need to make sure unshared kernel PMD is unpinned */ - xen_unpin_page(mm, virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), + xen_unpin_page(mm, pgd_page(pgd[pgd_index(TASK_SIZE)]), PT_PMD); #endif - xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT); + __xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT); xen_mc_issue(0); } diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d77da61..acd9b67 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu) alternatives_smp_switch(0); } -static void xen_play_dead(void) +static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */ { play_dead_common(); HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL); diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index d7422dc..9e1afae 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu); void xen_mark_init_mm_pinned(void); -void __init xen_setup_vcpu_info_placement(void); +void xen_setup_vcpu_info_placement(void); #ifdef CONFIG_SMP void xen_smp_init(void); |