From 8a7c5ef3ba8cca8f9c4f91ddef3d081f517914ae Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Tue, 19 Aug 2008 02:13:55 +0200 Subject: x86 iommu: remove unneeded parenthesis The parenthesis in __iommu_queue_command() are not needed when assigning into 'target' variable. Signed-off-by: Jiri Kosina Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index de39e1f..69b4d06 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -65,7 +65,7 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) u8 *target; tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); - target = (iommu->cmd_buf + tail); + target = iommu->cmd_buf + tail; memcpy_toio(target, cmd, sizeof(*cmd)); tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); -- cgit v0.10.2 From c6744955d0ec0cb485c28c51eeb7185e260f6172 Mon Sep 17 00:00:00 2001 From: Samuel Sieb Date: Wed, 6 Aug 2008 22:06:29 -0700 Subject: x86: fix "kernel won't boot on a Cyrix MediaGXm (Geode)" Cyrix MediaGXm/Cx5530 Unicorn Revision 1.19.3B has stopped booting starting at v2.6.22. The reason is this commit: > commit f25f64ed5bd3c2932493681bdfdb483ea707da0a > Author: Juergen Beisert > Date: Sun Jul 22 11:12:38 2007 +0200 > > x86: Replace NSC/Cyrix specific chipset access macros by inlined functions. this commit activated a macro which was dormant before due to (buggy) macro side-effects. I've looked through various datasheets and found that the GXm and GXLV Geode processors don't have an incrementor. Remove the incrementor setup entirely. As the incrementor value differs according to clock speed and we would hope that the BIOS configures it correctly, it is probably the right solution. Cc: Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67..e710a21 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -134,23 +134,6 @@ static void __cpuinit set_cx86_memwb(void) setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } -static void __cpuinit set_cx86_inc(void) -{ - unsigned char ccr3; - - printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); - - ccr3 = getCx86(CX86_CCR3); - setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ - /* PCR1 -- Performance Control */ - /* Incrementor on, whatever that is */ - setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); - /* PCR0 -- Performance Control */ - /* Incrementor Margin 10 */ - setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); - setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ -} - /* * Configure later MediaGX and/or Geode processor. */ @@ -174,7 +157,6 @@ static void __cpuinit geode_configure(void) set_cx86_memwb(); set_cx86_reorder(); - set_cx86_inc(); local_irq_restore(flags); } -- cgit v0.10.2 From 80c5e73d6028e0f03ab0c70e7c4cbf98ac2e0c43 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Tue, 19 Aug 2008 16:28:01 -0700 Subject: x86: fix Xorg startup/shutdown slowdown with PAT Rene Herman reported significant Xorg startup/shutdown slowdown due to PAT. It turns out that the memtype list has thousands of entries. Add cached_entry to list add routine, in order to speed up the lookup for sequential reserve_memtype calls. Reported-by: Rene Herman Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 2fe3091..bb6e8a2 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -207,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry, return -EBUSY; } +static struct memtype *cached_entry; +static u64 cached_start; + /* * req_type typically has one of the: * - _PAGE_CACHE_WB @@ -280,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, spin_lock(&memtype_lock); + if (cached_entry && start >= cached_start) + entry = cached_entry; + else + entry = list_entry(&memtype_list, struct memtype, nd); + /* Search for existing mapping that overlaps the current range */ where = NULL; - list_for_each_entry(entry, &memtype_list, nd) { + list_for_each_entry_continue(entry, &memtype_list, nd) { if (end <= entry->start) { where = entry->nd.prev; + cached_entry = list_entry(where, struct memtype, nd); break; } else if (start <= entry->start) { /* end > entry->start */ err = chk_conflict(new, entry, new_type); @@ -292,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); where = entry->nd.prev; + cached_entry = list_entry(where, + struct memtype, nd); } break; } else if (start < entry->end) { /* start > entry->start */ @@ -299,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (!err) { dprintk("Overlap at 0x%Lx-0x%Lx\n", entry->start, entry->end); - where = &entry->nd; + cached_entry = list_entry(entry->nd.prev, + struct memtype, nd); + + /* + * Move to right position in the linked + * list to add this new entry + */ + list_for_each_entry_continue(entry, + &memtype_list, nd) { + if (start <= entry->start) { + where = entry->nd.prev; + break; + } + } } break; } @@ -314,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return err; } + cached_start = start; + if (where) list_add(&new->nd, where); else @@ -343,6 +369,9 @@ int free_memtype(u64 start, u64 end) spin_lock(&memtype_lock); list_for_each_entry(entry, &memtype_list, nd) { if (entry->start == start && entry->end == end) { + if (cached_entry == entry || cached_start == start) + cached_entry = NULL; + list_del(&entry->nd); kfree(entry); err = 0; -- cgit v0.10.2 From 99dd8713306a89f3e106143581244e550e00a644 Mon Sep 17 00:00:00 2001 From: Cliff Wickman Date: Tue, 19 Aug 2008 12:51:59 -0500 Subject: x86, SGI UV: hardcode the TLB flush interrupt system vector The UV TLB shootdown mechanism needs a system interrupt vector. Its vector had been hardcoded as 200, but needs to moved to the reserved system vector range so that it does not collide with some device vector. This is still temporary until dynamic system IRQ allocation is provided. But it will be needed when real UV hardware becomes available and runs 2.6.27. Signed-off-by: Cliff Wickman Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb77..8b8c0d6 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -783,7 +784,7 @@ static int __init uv_bau_init(void) uv_init_blade(blade, node, cur_cpu); cur_cpu += uv_blade_nr_possible_cpus(blade); } - set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); + alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); uv_enable_timeouts(); return 0; diff --git a/include/asm-x86/irq_vectors.h b/include/asm-x86/irq_vectors.h index b95d167..a48c7f2 100644 --- a/include/asm-x86/irq_vectors.h +++ b/include/asm-x86/irq_vectors.h @@ -76,6 +76,7 @@ #define CALL_FUNCTION_SINGLE_VECTOR 0xfb #define THERMAL_APIC_VECTOR 0xfa #define THRESHOLD_APIC_VECTOR 0xf9 +#define UV_BAU_MESSAGE 0xf8 #define INVALIDATE_TLB_VECTOR_END 0xf7 #define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index 91ac0df..610b6b3 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h @@ -40,11 +40,6 @@ #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 #define UV_DISTRIBUTION_SIZE 256 #define UV_SW_ACK_NPENDING 8 -#define UV_BAU_MESSAGE 200 -/* - * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h - * To be dynamically allocated in the future - */ #define UV_NET_ENDPOINT_INTD 0x38 #define UV_DESC_BASE_PNODE_SHIFT 49 #define UV_PAYLOADQ_PNODE_SHIFT 49 -- cgit v0.10.2 From 80a8c9fffa78f57d7d4351af2f15a56386805ceb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 19 Aug 2008 03:13:38 +0200 Subject: x86: fix oprofile + hibernation badness Vegard Nossum reported oprofile + hibernation problems: > Now some warnings: > > ------------[ cut here ]------------ > WARNING: at /uio/arkimedes/s29/vegardno/git-working/linux-2.6/kernel/smp.c:328 s > mp_call_function_mask+0x194/0x1a0() The usual problem: the suspend function when interrupts are already disabled calls smp_call_function which is not allowed with interrupt off. But at this point all the other CPUs should be already down anyways, so it should be enough to just drop that. This patch should fix that problem at least by fixing cpu hotplug& suspend support. [ mingo@elte.hu: fixed 5 coding style errors. ] Signed-off-by: Andi Kleen Tested-by: Vegard Nossum Signed-off-by: Ingo Molnar diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3f90289..0227694 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -28,23 +29,48 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); static int nmi_start(void); static void nmi_stop(void); +static void nmi_cpu_start(void *dummy); +static void nmi_cpu_stop(void *dummy); /* 0 == registered but off, 1 == registered and on */ static int nmi_enabled = 0; +#ifdef CONFIG_SMP +static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); + break; + case CPU_DOWN_PREPARE: + smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block oprofile_cpu_nb = { + .notifier_call = oprofile_cpu_notifier +}; +#endif + #ifdef CONFIG_PM static int nmi_suspend(struct sys_device *dev, pm_message_t state) { + /* Only one CPU left, just stop that one */ if (nmi_enabled == 1) - nmi_stop(); + nmi_cpu_stop(NULL); return 0; } static int nmi_resume(struct sys_device *dev) { if (nmi_enabled == 1) - nmi_start(); + nmi_cpu_start(NULL); return 0; } @@ -463,6 +489,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) } init_sysfs(); +#ifdef CONFIG_SMP + register_cpu_notifier(&oprofile_cpu_nb); +#endif using_nmi = 1; ops->create_files = nmi_create_files; ops->setup = nmi_setup; @@ -476,6 +505,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) void op_nmi_exit(void) { - if (using_nmi) + if (using_nmi) { exit_sysfs(); +#ifdef CONFIG_SMP + unregister_cpu_notifier(&oprofile_cpu_nb); +#endif + } } -- cgit v0.10.2 From 85d577979a8a94a32ca1d828a91c2eb8300bc9f2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 18 Aug 2008 11:58:17 +0200 Subject: werror: fix pci calgary Fix an integer comparison always false warning in the PCI Calgary 64 driver. A u8 is being compared to something that's 512 by default, resulting in the following warning: arch/x86/kernel/pci-calgary_64.c:1285: warning: comparison is always false due to limited range of data type This was introduced by patch b34e90b8f0f30151349134f87b5dc6ef75a5218c. Signed-off-by: David Howells Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 218d783..363a74e 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1269,13 +1269,15 @@ static inline int __init determine_tce_table_size(u64 ram) static int __init build_detail_arrays(void) { unsigned long ptr; - int i, scal_detail_size, rio_detail_size; + unsigned numnodes, i; + int scal_detail_size, rio_detail_size; - if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ + numnodes = rio_table_hdr->num_scal_dev; + if (numnodes > MAX_NUMNODES){ printk(KERN_WARNING "Calgary: MAX_NUMNODES too low! Defined as %d, " "but system has %d nodes.\n", - MAX_NUMNODES, rio_table_hdr->num_scal_dev); + MAX_NUMNODES, numnodes); return -ENODEV; } @@ -1296,8 +1298,7 @@ static int __init build_detail_arrays(void) } ptr = ((unsigned long)rio_table_hdr) + 3; - for (i = 0; i < rio_table_hdr->num_scal_dev; - i++, ptr += scal_detail_size) + for (i = 0; i < numnodes; i++, ptr += scal_detail_size) scal_devs[i] = (struct scal_detail *)ptr; for (i = 0; i < rio_table_hdr->num_rio_dev; -- cgit v0.10.2 From 0c072bb452f0cfd4959bc01ff3627d6385255b20 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 8 Jul 2008 09:50:22 -0700 Subject: x86: use WARN() in arch/x86/mm/ioremap.c Use WARN() instead of a printk+WARN_ON() pair; this way the message becomes part of the warning section for better reporting/collection. Signed-off-by: Arjan van de Ven Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 6ba6f88..d4b6e6a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -553,13 +553,11 @@ static int __init check_early_ioremap_leak(void) { if (!early_ioremap_nested) return 0; - - printk(KERN_WARNING + WARN(1, KERN_WARNING "Debug warning: early ioremap leak of %d areas detected.\n", - early_ioremap_nested); + early_ioremap_nested); printk(KERN_WARNING - "please boot with early_ioremap_debug and report the dmesg.\n"); - WARN_ON(1); + "please boot with early_ioremap_debug and report the dmesg.\n"); return 1; } -- cgit v0.10.2 From bde78a79a6eb015f33aa4660df1b06f5135def20 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Tue, 8 Jul 2008 09:51:56 -0700 Subject: x86: use WARN() in arch/x86/kernel Use WARN() instead of a printk+WARN_ON() pair; this way the message becomes part of the warning section for better reporting/collection. This also allowed the folding of some if()'s into the WARN() Signed-off-by: Arjan van de Ven Cc: akpm@linux-foundation.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969..b117d7f 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) /* kvm/qemu doesn't have mtrr set right, don't trim them all */ if (!highest_pfn) { - if (!kvm_para_available()) { - printk(KERN_WARNING + WARN(!kvm_para_available(), KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); - WARN_ON(1); - } return 0; } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 363a74e..dcdac6c 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -343,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { - printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " + WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); - WARN_ON(1); return; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825..9ffb01c 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) __raw_spin_unlock(&sync_lock); } } - if (!(now-start)) { - printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", + WARN(!(now-start), + "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", now-start, end-start); - WARN_ON(1); - } } /* -- cgit v0.10.2 From 7701e8c59b0e7cbcf21c1bc49d8b8c6db43027a4 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Wed, 20 Aug 2008 21:07:47 +0200 Subject: x86, mmiotrace: silence section mismatch warning - leave_uniprocessor WARNING: vmlinux.o(.text+0x180af): Section mismatch in reference from the function leave_uniprocessor() to the function .cpuinit.text:cpu_up() The function leave_uniprocessor() references the function __cpuinit cpu_up(). This is often because leave_uniprocessor lacks a __cpuinit annotation or the annotation of cpu_up is wrong. leave_uniprocessor calls cpu_up only when CONFIG_HOTPLUG_CPU is set, so it can be safely annotated as __ref Signed-off-by: Marcin Slusarz Cc: Pekka Paalanen Signed-off-by: Ingo Molnar Cc: Ingo Molnar Cc: Pekka Paalanen diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index e7397e1..635b50e 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -430,7 +430,9 @@ static void enter_uniprocessor(void) "may miss events.\n"); } -static void leave_uniprocessor(void) +/* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, + but this whole function is ifdefed CONFIG_HOTPLUG_CPU */ +static void __ref leave_uniprocessor(void) { int cpu; int err; -- cgit v0.10.2 From 7946612de2087e163308e26034286fc2dc9dacf1 Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Wed, 20 Aug 2008 11:31:07 -0700 Subject: x86: export pv_lock_ops non-GPL None of the spinlock API is exported GPL, so there's no reason for pv_lock_ops to be. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: drago01 diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 94da4d5..300da17 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -471,7 +471,7 @@ struct pv_lock_ops pv_lock_ops = { .spin_unlock = __ticket_spin_unlock, #endif }; -EXPORT_SYMBOL_GPL(pv_lock_ops); +EXPORT_SYMBOL(pv_lock_ops); EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); -- cgit v0.10.2 From b2a6a58ca6a3ddf4e278a53199c5b8fd39adbc0e Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Wed, 20 Aug 2008 18:18:26 +0200 Subject: x86: fix BUG: unable to handle kernel paging request (numaq_tsc_disable) This section mismatch: >> Seems to be a section mismatch; init_intel() is __cpuinit while >> numaq_tsc_disable() is __init. Seems to be introduced in: >> >> commit 64898a8bad8c94ad7a4bd5cc86b66edfbb081f4a >> Author: Yinghai Lu >> Date: Sat Jul 19 18:01:16 2008 -0700 >> >> x86: extend and use x86_quirks to clean up NUMAQ code > > Oops, I am wrong about numaq_tsc_disable() being __init. Still, I > believe that Yinghai might be able to say what's really wrong :-) Would lead to this crash: BUG: unable to handle kernel paging request at c08a45f0 IP: [] numaq_tsc_disable+0x0/0x40 Fixed by the patch below. Signed-off-by: Vegard Nossum Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index b8c4561..eecc8c1 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c @@ -73,7 +73,7 @@ static void __init smp_dump_qct(void) } -void __init numaq_tsc_disable(void) +void __cpuinit numaq_tsc_disable(void) { if (!found_numaq) return; -- cgit v0.10.2 From c15238df3b65e34fadb1021b0fb0d5aebc7c42c6 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 20 Aug 2008 16:45:51 -0700 Subject: x86: PAT proper tracking of set_memory_uc and friends Big thinko in pat memtype tracking code. reserve_memtype should be called with physical address and not virtual address. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index f5f5154..43e2f84 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -849,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages) /* * for now UC MINUS. see comments in ioremap_nocache() */ - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_UC_MINUS, NULL)) return -EINVAL; @@ -868,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages) if (!pat_enabled) return set_memory_uc(addr, numpages); - if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, + if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_WC, NULL)) return -EINVAL; @@ -884,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages) int set_memory_wb(unsigned long addr, int numpages) { - free_memtype(addr, addr + numpages * PAGE_SIZE); + free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); return _set_memory_wb(addr, numpages); } -- cgit v0.10.2 From 28df82ebab79c6a2b4295dd94fd8de88196a49df Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 20 Aug 2008 16:45:52 -0700 Subject: devmem, x86: PAT Change /dev/mem mmap with O_SYNC to use UC_MINUS All kernel mappings like ioremap(), etc uses UC_MINUS as the type. /dev/mem mappings with /dev/mem being opened with O_SYNC however was using UC, resulting in a conflict with /dev/mem mmap failing. This seems to be affecting some apps (one being flashrom) which are using O_SYNC and which were working before. Switch /dev/mem with O_SYNC also to UC_MINUS. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index bb6e8a2..2a50e0f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -390,14 +390,6 @@ int free_memtype(u64 start, u64 end) } -/* - * /dev/mem mmap interface. The memtype used for mapping varies: - * - Use UC for mappings with O_SYNC flag - * - Without O_SYNC flag, if there is any conflict in reserve_memtype, - * inherit the memtype from existing mapping. - * - Else use UC_MINUS memtype (for backward compatibility with existing - * X drivers. - */ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -435,14 +427,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { u64 offset = ((u64) pfn) << PAGE_SHIFT; - unsigned long flags = _PAGE_CACHE_UC_MINUS; + unsigned long flags = -1; int retval; if (!range_is_allowed(pfn, size)) return 0; if (file->f_flags & O_SYNC) { - flags = _PAGE_CACHE_UC; + flags = _PAGE_CACHE_UC_MINUS; } #ifdef CONFIG_X86_32 @@ -465,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, #endif /* - * With O_SYNC, we can only take UC mapping. Fail if we cannot. + * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. + * * Without O_SYNC, we want to get * - WB for WB-able memory and no other conflicting mappings * - UC_MINUS for non-WB-able memory with no other conflicting mappings * - Inherit from confliting mappings otherwise */ - if (flags != _PAGE_CACHE_UC_MINUS) { + if (flags != -1) { retval = reserve_memtype(offset, offset + size, flags, NULL); } else { retval = reserve_memtype(offset, offset + size, -1, &flags); -- cgit v0.10.2 From 8323444b5dba3fe55e56a95d20d8f55c1d6745af Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Wed, 20 Aug 2008 16:45:53 -0700 Subject: x86: PAT Update validate_pat_support for intel CPUs Pentium III and Core Solo/Duo CPUs have an erratum " Page with PAT set to WC while associated MTRR is UC may consolidate to UC " which can result in WC setting in PAT to be ineffective. We will disable PAT on such CPUs, so that we can continue to use MTRR WC setting. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220..a6ef672 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c @@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) switch (c->x86_vendor) { case X86_VENDOR_INTEL: - if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) + /* + * There is a known erratum on Pentium III and Core Solo + * and Core Duo CPUs. + * " Page with PAT set to WC while associated MTRR is UC + * may consolidate to UC " + * Because of this erratum, it is better to stick with + * setting WC in MTRR rather than using PAT on these CPUs. + * + * Enable PAT WC only on P4, Core 2 or later CPUs. + */ + if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) return; - break; + + pat_disable("PAT WC disabled due to known CPU erratum."); + return; + case X86_VENDOR_AMD: case X86_VENDOR_CENTAUR: case X86_VENDOR_TRANSMETA: -- cgit v0.10.2 From 38cc1c3df77c1bb739a4766788eb9fa49f16ffdf Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 21 Aug 2008 20:24:24 -0700 Subject: x86: work around MTRR mask setting Joshua Hoblitt reported that only 3 GB of his 16 GB of RAM is usable. Booting with mtrr_show showed us the BIOS-initialized MTRR settings - which are all wrong. So the root cause is that the BIOS has not set the mask correctly: > [ 0.429971] MSR00000200: 00000000d0000000 > [ 0.433305] MSR00000201: 0000000ff0000800 > should be ==> [ 0.433305] MSR00000201: 0000003ff0000800 > > [ 0.436638] MSR00000202: 00000000e0000000 > [ 0.439971] MSR00000203: 0000000fe0000800 > should be ==> [ 0.439971] MSR00000203: 0000003fe0000800 > > [ 0.443304] MSR00000204: 0000000000000006 > [ 0.446637] MSR00000205: 0000000c00000800 > should be ==> [ 0.446637] MSR00000205: 0000003c00000800 > > [ 0.449970] MSR00000206: 0000000400000006 > [ 0.453303] MSR00000207: 0000000fe0000800 > should be ==> [ 0.453303] MSR00000207: 0000003fe0000800 > > [ 0.456636] MSR00000208: 0000000420000006 > [ 0.459970] MSR00000209: 0000000ff0000800 > should be ==> [ 0.459970] MSR00000209: 0000003ff0000800 So detect this borkage and add the prefix 111. Signed-off-by: Yinghai Lu Cc: Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d..43102e0 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, unsigned long *size, mtrr_type *type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; + unsigned int tmp, hi; rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); if ((mask_lo & 0x800) == 0) { @@ -392,8 +393,18 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); /* Work out the shifted address mask. */ - mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) - | mask_lo >> PAGE_SHIFT; + tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; + mask_lo = size_or_mask | tmp; + /* Expand tmp with high bits to all 1s*/ + hi = fls(tmp); + if (hi > 0) { + tmp |= ~((1<<(hi - 1)) - 1); + + if (tmp != mask_lo) { + WARN_ON("mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + mask_lo = tmp; + } + } /* This works correctly if size is a power of two, i.e. a contiguous range. */ -- cgit v0.10.2 From 8ae3a5a8dff2c92bd1087bb97c4a3bb61174303e Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 21 Aug 2008 14:27:22 +0100 Subject: x86: fix 1:1 mapping init on 64-bit (memory hotplug case) While I don't have a hotplug capable system at hand, I think two issues need fixing: - pud_phys (in kernel_physical_ampping_init()) would remain uninitialized in the after_bootmem case - the locking done just around phys_pmd_{init,update}() would leave out pgd updates, and it was needlessly covering code portions that do allocations (perhaps using a more friendly gfp value in alloc_low_page() would then be possible) Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a87ea0e..8f48770 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -336,9 +336,12 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, } if (pmd_val(*pmd)) { - if (!pmd_large(*pmd)) + if (!pmd_large(*pmd)) { + spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pte_update(pmd, address, - end); + end); + spin_unlock(&init_mm.page_table_lock); + } /* Count entries we're using from level2_ident_pgt */ if (start == 0) pages++; @@ -347,8 +350,10 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (address & PMD_MASK) + PMD_SIZE; continue; } @@ -357,7 +362,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, last_map_addr = phys_pte_init(pte, address, end); unmap_low_page(pte); + spin_lock(&init_mm.page_table_lock); pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); + spin_unlock(&init_mm.page_table_lock); } update_page_count(PG_LEVEL_2M, pages); return last_map_addr; @@ -370,9 +377,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, pmd_t *pmd = pmd_offset(pud, 0); unsigned long last_map_addr; - spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); - spin_unlock(&init_mm.page_table_lock); __flush_tlb_all(); return last_map_addr; } @@ -408,20 +413,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, if (page_size_mask & (1<> PAGE_SHIFT, PAGE_KERNEL_LARGE)); + spin_unlock(&init_mm.page_table_lock); last_map_addr = (addr & PUD_MASK) + PUD_SIZE; continue; } pmd = alloc_low_page(&pmd_phys); - - spin_lock(&init_mm.page_table_lock); last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); unmap_low_page(pmd); + + spin_lock(&init_mm.page_table_lock); pud_populate(&init_mm, pud, __va(pmd_phys)); spin_unlock(&init_mm.page_table_lock); - } __flush_tlb_all(); update_page_count(PG_LEVEL_1G, pages); @@ -513,16 +519,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, continue; } - if (after_bootmem) - pud = pud_offset(pgd, start & PGDIR_MASK); - else - pud = alloc_low_page(&pud_phys); - + pud = alloc_low_page(&pud_phys); last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), page_size_mask); unmap_low_page(pud); - pgd_populate(&init_mm, pgd_offset_k(start), - __va(pud_phys)); + + spin_lock(&init_mm.page_table_lock); + pgd_populate(&init_mm, pgd, __va(pud_phys)); + spin_unlock(&init_mm.page_table_lock); } return last_map_addr; -- cgit v0.10.2 From 9482ac6e34dd1890a9a956d460a135bf992cb54a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 21 Aug 2008 14:28:42 +0100 Subject: x86: fix two modpost warnings in mm/init_64.c early_io{re,un}map() are __init and hence can't be called from __meminit functions. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8f48770..d3746ef 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -241,7 +241,7 @@ static unsigned long __initdata table_start; static unsigned long __meminitdata table_end; static unsigned long __meminitdata table_top; -static __meminit void *alloc_low_page(unsigned long *phys) +static __ref void *alloc_low_page(unsigned long *phys) { unsigned long pfn = table_end++; void *adr; @@ -262,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) return adr; } -static __meminit void unmap_low_page(void *adr) +static __ref void unmap_low_page(void *adr) { if (after_bootmem) return; -- cgit v0.10.2 From 3a6ddd5f18405ca92e004416af8ed44b9c9783d7 Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Thu, 21 Aug 2008 11:32:26 -0700 Subject: x86: fix VMI for early params while fixing a different bug i moved the call to vmi_init before early params could be parsed. This broke the vmi specific commandline parameters. Fix that, by moving vmi initialization after kernel has got a chance to parse early parameters. Signed-off-by: Alok N Kataria Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a4656ad..362d4e7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -604,14 +604,6 @@ void __init setup_arch(char **cmdline_p) early_cpu_init(); early_ioremap_init(); -#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) - /* - * Must be before kernel pagetables are setup - * or fixmap area is touched. - */ - vmi_init(); -#endif - ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); screen_info = boot_params.screen_info; edid_info = boot_params.edid_info; @@ -678,6 +670,14 @@ void __init setup_arch(char **cmdline_p) parse_early_param(); +#if defined(CONFIG_VMI) && defined(CONFIG_X86_32) + /* + * Must be before kernel pagetables are setup + * or fixmap area is touched. + */ + vmi_init(); +#endif + /* after early param, so could get panic from serial */ reserve_early_setup_data(); -- cgit v0.10.2 From c4bd1fdab0deec0f69aeabab22075cb22ac8ad44 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Thu, 21 Aug 2008 20:49:05 +0200 Subject: x86: fix section mismatch warning - uv_cpu_init WARNING: vmlinux.o(.cpuinit.text+0x3cc4): Section mismatch in reference from the function uv_cpu_init() to the function .init.text:uv_system_init() The function __cpuinit uv_cpu_init() references a function __init uv_system_init(). If uv_system_init is only used by uv_cpu_init then annotate uv_system_init with a matching annotation. uv_system_init was ment to be called only once, so do it from codepath (native_smp_prepare_cpus) which is called once, right before activation of other cpus (smp_init). Note: old code relied on uv_node_to_blade being initialized to 0, but it'a not initialized from anywhere. Signed-off-by: Marcin Slusarz Acked-by: Jack Steiner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2d7e307..bfa837c 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -293,7 +293,9 @@ static __init void uv_rtc_init(void) sn_rtc_cycles_per_second = ticks_per_sec; } -static __init void uv_system_init(void) +static bool uv_system_inited; + +void __init uv_system_init(void) { union uvh_si_addr_map_config_u m_n_config; union uvh_node_id_u node_id; @@ -383,6 +385,7 @@ static __init void uv_system_init(void) map_mmr_high(max_pnode); map_config_high(max_pnode); map_mmioh_high(max_pnode); + uv_system_inited = true; } /* @@ -391,8 +394,7 @@ static __init void uv_system_init(void) */ void __cpuinit uv_cpu_init(void) { - if (!uv_node_to_blade) - uv_system_init(); + BUG_ON(!uv_system_inited); uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e139e61..7985c5b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1221,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) printk(KERN_INFO "CPU%d: ", 0); print_cpu_info(&cpu_data(0)); setup_boot_clock(); + + if (is_uv_system()) + uv_system_init(); out: preempt_enable(); } diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index b02ea6e..754d635 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -118,6 +118,7 @@ enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define get_uv_system_type() UV_NONE #define is_uv_system() 0 #define uv_wakeup_secondary(a, b) 1 +#define uv_system_init() do {} while (0) #endif diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h index 0f85046..a47d631 100644 --- a/include/asm-x86/genapic_64.h +++ b/include/asm-x86/genapic_64.h @@ -42,6 +42,7 @@ extern int is_uv_system(void); extern struct genapic apic_x2apic_uv_x; DECLARE_PER_CPU(int, x2apic_extra_bits); extern void uv_cpu_init(void); +extern void uv_system_init(void); extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip); extern void setup_apic_routing(void); -- cgit v0.10.2 From 9754a5b840a209bc1f192d59f63e81b698a55ac8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 Aug 2008 08:22:23 +0200 Subject: x86: work around MTRR mask setting, v2 improve the debug printout: - make it actually display something - print it only once would be nice to have a WARN_ONCE() facility, to feed such things to kerneloops.org. Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 43102e0..cb7d3b6 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -401,7 +401,12 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, tmp |= ~((1<<(hi - 1)) - 1); if (tmp != mask_lo) { - WARN_ON("mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + static int once = 1; + + if (once) { + printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); + once = 0; + } mask_lo = tmp; } } -- cgit v0.10.2 From 9b4e27b52853c5da77e61a4e36fbc40688b7a829 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 22 Aug 2008 20:23:37 +0200 Subject: x86: fix: do not run code in amd_bus.c on non-AMD CPUs Jan Beulich wrote: > Even worse - this would even try to access the MSR on non-AMD CPUs > (currently probably prevented just by the fact that only AMD ones use > family values of 0x10 or higher). This patch adds cpu vendor check to the postcore_initcalls. Reported-by: Jan Beulich Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index dbf5323..4a6f1a6 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -555,9 +555,11 @@ static int __init early_fill_mp_bus_info(void) return 0; } -postcore_initcall(early_fill_mp_bus_info); +#else /* !CONFIG_X86_64 */ -#endif +static int __init early_fill_mp_bus_info(void) { return 0; } + +#endif /* !CONFIG_X86_64 */ /* common 32/64 bit code */ @@ -583,4 +585,15 @@ static int __init enable_pci_io_ecs(void) return 0; } -postcore_initcall(enable_pci_io_ecs); +static int __init amd_postcore_init(void) +{ + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return 0; + + early_fill_mp_bus_info(); + enable_pci_io_ecs(); + + return 0; +} + +postcore_initcall(amd_postcore_init); -- cgit v0.10.2 From 91ede005d72df60d6b3f252be177a4743a6aa46a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 22 Aug 2008 20:23:38 +0200 Subject: x86: fix: make PCI ECS for AMD CPUs hotplug capable Until now, PCI ECS setup was performed at boot time only and for cpus that are enabled then. This patch fixes this and adds cpu hotplug. Tests sequence (check if ECS bit is set when bringing cpu online again): # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr 00000008 00404010 # ( perl -e 'sysseek(STDOUT, 0xC001001F, 0); print pack "l*", 8, 0x00400010' ) > /dev/cpu/1/msr # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr 00000008 00400010 # echo 0 > /sys/devices/system/cpu/cpu1/online # echo 1 > /sys/devices/system/cpu/cpu1/online # ( perl -e 'sysseek(STDIN, 0xC001001F, 0)'; hexdump -n 8 -e '2/4 "%08x " "\n"' ) < /dev/cpu/1/msr 00000008 00404010 Reported-by: Yinghai Lu Signed-off-by: Robert Richter Signed-off-by: Ingo Molnar diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 4a6f1a6..6a0fca7 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "pci.h" #ifdef CONFIG_X86_64 @@ -565,7 +566,7 @@ static int __init early_fill_mp_bus_info(void) { return 0; } #define ENABLE_CF8_EXT_CFG (1ULL << 46) -static void enable_pci_io_ecs_per_cpu(void *unused) +static void enable_pci_io_ecs(void *unused) { u64 reg; rdmsrl(MSR_AMD64_NB_CFG, reg); @@ -575,13 +576,39 @@ static void enable_pci_io_ecs_per_cpu(void *unused) } } -static int __init enable_pci_io_ecs(void) +static int __cpuinit amd_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { + int cpu = (long)hcpu; + switch(action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata amd_cpu_notifier = { + .notifier_call = amd_cpu_notify, +}; + +static int __init pci_io_ecs_init(void) +{ + int cpu; + /* assume all cpus from fam10h have IO ECS */ if (boot_cpu_data.x86 < 0x10) return 0; - on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1); + + register_cpu_notifier(&amd_cpu_notifier); + for_each_online_cpu(cpu) + amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, + (void *)(long)cpu); pci_probe |= PCI_HAS_IO_ECS; + return 0; } @@ -591,7 +618,7 @@ static int __init amd_postcore_init(void) return 0; early_fill_mp_bus_info(); - enable_pci_io_ecs(); + pci_io_ecs_init(); return 0; } -- cgit v0.10.2 From 8735728ef8dc935c4fb351f913758fdbb62c308d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 22 Aug 2008 22:23:09 +0200 Subject: x86 MCE: Fix CPU hotplug problem with multiple multicore AMD CPUs During CPU hot-remove the sysfs directory created by threshold_create_bank(), defined in arch/x86/kernel/cpu/mcheck/mce_amd_64.c, has to be removed before its parent directory, created by mce_create_device(), defined in arch/x86/kernel/cpu/mcheck/mce_64.c . Moreover, when the CPU in question is hotplugged again, obviously the latter has to be created before the former. At present, the right ordering is not enforced, because all of these operations are carried out by CPU hotplug notifiers which are not appropriately ordered with respect to each other. This leads to serious problems on systems with two or more multicore AMD CPUs, among other things during suspend and hibernation. Fix the problem by placing threshold bank CPU hotplug callbacks in mce_cpu_callback(), so that they are invoked at the right places, if defined. Additionally, use kobject_del() to remove the sysfs directory associated with the kobject created by kobject_create_and_add() in threshold_create_bank(), to prevent the kernel from crashing during CPU hotplug operations on systems with two or more multicore AMD CPUs. This patch fixes bug #11337. Signed-off-by: Rafael J. Wysocki Acked-by: Andi Kleen Tested-by: Mark Langsdorf Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 65a3396..726a5fc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -759,6 +759,7 @@ static struct sysdev_class mce_sysclass = { }; DEFINE_PER_CPU(struct sys_device, device_mce); +void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -883,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, case CPU_ONLINE: case CPU_ONLINE_FROZEN: mce_create_device(cpu); + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); break; case CPU_DEAD: case CPU_DEAD_FROZEN: + if (threshold_cpu_callback) + threshold_cpu_callback(action, cpu); mce_remove_device(cpu); break; } diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 88736ca..5eb390a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) deallocate_threshold_block(cpu, bank); free_out: + kobject_del(b->kobj); kobject_put(b->kobj); kfree(b); per_cpu(threshold_banks, cpu)[bank] = NULL; @@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) } /* get notified when a cpu comes on/off */ -static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, - unsigned long action, void *hcpu) +static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, + unsigned int cpu) { - /* cpu was unsigned int to begin with */ - unsigned int cpu = (unsigned long)hcpu; - if (cpu >= NR_CPUS) - goto out; + return; switch (action) { case CPU_ONLINE: @@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, default: break; } - out: - return NOTIFY_OK; } -static struct notifier_block threshold_cpu_notifier __cpuinitdata = { - .notifier_call = threshold_cpu_callback, -}; - static __init int threshold_init_device(void) { unsigned lcpu = 0; @@ -684,7 +676,7 @@ static __init int threshold_init_device(void) if (err) return err; } - register_hotcpu_notifier(&threshold_cpu_notifier); + threshold_cpu_callback = amd_64_threshold_cpu_callback; return 0; } diff --git a/include/asm-x86/mce.h b/include/asm-x86/mce.h index 94f1fd7..531eaa5 100644 --- a/include/asm-x86/mce.h +++ b/include/asm-x86/mce.h @@ -92,6 +92,7 @@ extern int mce_disabled; void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, device_mce); +extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); #ifdef CONFIG_X86_MCE_INTEL void mce_intel_feature_init(struct cpuinfo_x86 *c); -- cgit v0.10.2 From 060700b571717c997a2ea5e2049b848fa248ee13 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Aug 2008 11:52:06 -0700 Subject: x86: do not enable TSC notifier if we don't need it Impact: crash on non-TSC-equipped CPUs Don't enable the TSC notifier if we *either*: 1. don't have a CPU, or 2. have a CPU with constant TSC. In either of those cases, the notifier is either damaging (1) or useless(2). From: Linus Torvalds Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 46af716..9bed5ca 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -325,6 +325,10 @@ static struct notifier_block time_cpufreq_notifier_block = { static int __init cpufreq_tsc(void) { + if (!cpu_has_tsc) + return 0; + if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return 0; cpufreq_register_notifier(&time_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); return 0; -- cgit v0.10.2 From a2bd7274b47124d2fc4dfdb8c0591f545ba749dd Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 25 Aug 2008 00:56:08 -0700 Subject: x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet against BAR, v3 David Witbrodt tracked down (and bisected) a hpet bootup hang on his system to the following problem: a BIOS bug made the hpet device visible as a generic PCI device. If e820 reserved entries happen to be registered first in the resource tree [which v2.6.26 started doing], then the PCI code will reallocate that device's BAR to some other address - breaking timer IRQs and hanging the system. ( Normally hpet devices are hidden by the BIOS from the OS's PCI discovery via chipset magic. Sometimes the hpet is not a PCI device at all. ) Solve this fundamental fragility by making non-PCI platform drivers insert resources into the resource tree even if it overlaps the e820 reserved entry, to keep the resource manager from updating the BAR. Also do these checks for the ioapic and mmconfig addresses, and emit a warning if this happens. Bisected-by: David Witbrodt Signed-off-by: Yinghai Lu Tested-by: David Witbrodt Signed-off-by: Ingo Molnar diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5807d1b..d765da9 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -31,8 +31,11 @@ #include #include #include +#include #include +#include +#include #include "pci.h" @@ -77,6 +80,77 @@ pcibios_align_resource(void *data, struct resource *res, } EXPORT_SYMBOL(pcibios_align_resource); +static int check_res_with_valid(struct pci_dev *dev, struct resource *res) +{ + unsigned long base; + unsigned long size; + int i; + + base = res->start; + size = (res->start == 0 && res->end == res->start) ? 0 : + (res->end - res->start + 1); + + if (!base || !size) + return 0; + +#ifdef CONFIG_HPET_TIMER + /* for hpet */ + if (base == hpet_address && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has HPET at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } +#endif + +#ifdef CONFIG_X86_IO_APIC + for (i = 0; i < nr_ioapics; i++) { + unsigned long ioapic_phys = mp_ioapics[i].mp_apicaddr; + + if (base == ioapic_phys && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has ioapic at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } + } +#endif + +#ifdef CONFIG_PCI_MMCONFIG + for (i = 0; i < pci_mmcfg_config_num; i++) { + unsigned long addr; + + addr = pci_mmcfg_config[i].address; + if (base == addr && (res->flags & IORESOURCE_MEM)) { + dev_info(&dev->dev, "BAR has MMCONFIG at %08lx-%08lx\n", + base, base + size - 1); + return 1; + } + } +#endif + + return 0; +} + +static int check_platform(struct pci_dev *dev, struct resource *res) +{ + struct resource *root = NULL; + + /* + * forcibly insert it into the + * resource tree + */ + if (res->flags & IORESOURCE_MEM) + root = &iomem_resource; + else if (res->flags & IORESOURCE_IO) + root = &ioport_resource; + + if (root && check_res_with_valid(dev, res)) { + insert_resource(root, res); + + return 1; + } + + return 0; +} /* * Handle resources of PCI devices. If the world were perfect, we could * just allocate all the resource regions and do nothing more. It isn't. @@ -128,6 +202,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) pr = pci_find_parent_resource(dev, r); if (!r->start || !pr || request_resource(pr, r) < 0) { + if (check_platform(dev, r)) + continue; dev_err(&dev->dev, "BAR %d: can't " "allocate resource\n", idx); /* @@ -171,6 +247,8 @@ static void __init pcibios_allocate_resources(int pass) r->flags, disabled, pass); pr = pci_find_parent_resource(dev, r); if (!pr || request_resource(pr, r) < 0) { + if (check_platform(dev, r)) + continue; dev_err(&dev->dev, "BAR %d: can't " "allocate resource\n", idx); /* We'll assign a new address later */ -- cgit v0.10.2