From acf01734b1747b1ec4be6f159aff579ea5f7f8e2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Aug 2010 18:28:23 +0200 Subject: x86, tsc: Remove CPU frequency calibration on AMD 6b37f5a20c0e5c334c010a587058354215433e92 introduced the CPU frequency calibration code for AMD CPUs whose TSCs didn't increment with the core's P0 frequency. From F10h, revB onward, however, the TSC increment rate is denoted by MSRC001_0015[24] and when this bit is set (which should be done by the BIOS) the TSC increments with the P0 frequency so the calibration is not needed and booting can be a couple of mcecs faster on those machines. Besides, there should be virtually no machines out there which don't have this bit set, therefore this calibration can be safely removed. It is a shaky hack anyway since it assumes implicitly that the core is in P0 when BIOS hands off to the OS, which might not always be the case. Signed-off-by: Borislav Petkov LKML-Reference: <20100825162823.GE26438@aftab> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index ba5f62f..fc563fa 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -412,6 +412,23 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_EXTD_APICID); } #endif + + /* We need to do the following only once */ + if (c != &boot_cpu_data) + return; + + if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { + + if (c->x86 > 0x10 || + (c->x86 == 0x10 && c->x86_model >= 0x2)) { + u64 val; + + rdmsrl(MSR_K7_HWCR, val); + if (!(val & BIT(24))) + printk(KERN_WARNING FW_BUG "TSC doesn't count " + "with P0 frequency!\n"); + } + } } static void __cpuinit init_amd(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index ce8e502..13b6a6c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -854,60 +854,6 @@ static void __init init_tsc_clocksource(void) clocksource_register_khz(&clocksource_tsc, tsc_khz); } -#ifdef CONFIG_X86_64 -/* - * calibrate_cpu is used on systems with fixed rate TSCs to determine - * processor frequency - */ -#define TICK_COUNT 100000000 -static unsigned long __init calibrate_cpu(void) -{ - int tsc_start, tsc_now; - int i, no_ctr_free; - unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; - unsigned long flags; - - for (i = 0; i < 4; i++) - if (avail_to_resrv_perfctr_nmi_bit(i)) - break; - no_ctr_free = (i == 4); - if (no_ctr_free) { - WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " - "cpu_khz value may be incorrect.\n"); - i = 3; - rdmsrl(MSR_K7_EVNTSEL3, evntsel3); - wrmsrl(MSR_K7_EVNTSEL3, 0); - rdmsrl(MSR_K7_PERFCTR3, pmc3); - } else { - reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); - reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } - local_irq_save(flags); - /* start measuring cycles, incrementing from 0 */ - wrmsrl(MSR_K7_PERFCTR0 + i, 0); - wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); - rdtscl(tsc_start); - do { - rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); - tsc_now = get_cycles(); - } while ((tsc_now - tsc_start) < TICK_COUNT); - - local_irq_restore(flags); - if (no_ctr_free) { - wrmsrl(MSR_K7_EVNTSEL3, 0); - wrmsrl(MSR_K7_PERFCTR3, pmc3); - wrmsrl(MSR_K7_EVNTSEL3, evntsel3); - } else { - release_perfctr_nmi(MSR_K7_PERFCTR0 + i); - release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); - } - - return pmc_now * tsc_khz / (tsc_now - tsc_start); -} -#else -static inline unsigned long calibrate_cpu(void) { return cpu_khz; } -#endif - void __init tsc_init(void) { u64 lpj; @@ -926,10 +872,6 @@ void __init tsc_init(void) return; } - if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && - (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) - cpu_khz = calibrate_cpu(); - printk("Detected %lu.%03lu MHz processor.\n", (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); -- cgit v0.10.2 From d9fadd7ba99a67030783a212bcb17d11f0678433 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 2 Sep 2010 15:37:10 +0200 Subject: x86, AMD: Remove needless CPU family check (for L3 cache info) Old 32-bit AMD CPUs (all w/o L3 cache) should always return 0 for cpuid_edx(0x80000006). For unknown reason the 32-bit implementation differed from the 64-bit implementation. See commit 67cddd94799 ("i386: Add L3 cache support to AMD CPUID4 emulation"). The current check is the result of the x86 merge. Signed-off-by: Andreas Herrmann Cc: Andi Kleen LKML-Reference: <20100902133710.GA5449@loge.amd.com> Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index fc563fa..0f0ace5d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -540,7 +540,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) #endif if (c->extended_cpuid_level >= 0x80000006) { - if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000)) + if (cpuid_edx(0x80000006) & 0xf000) num_cache_leaves = 4; else num_cache_leaves = 3; -- cgit v0.10.2 From 7ef8aa72ab176e0288f363d1247079732c5d5792 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 6 Sep 2010 15:14:17 +0200 Subject: x86, cpu: Fix renamed, not-yet-shipping AMD CPUID feature bit The AMD SSE5 feature set as-it has been replaced by some extensions to the AVX instruction set. Thus the bit formerly advertised as SSE5 is re-used for one of these extensions (XOP). Although this changes the /proc/cpuinfo output, it is not user visible, as there are no CPUs (yet) having this feature. To avoid confusion this should be added to the stable series, too. Cc: stable@kernel.org [.32.x .34.x, .35.x] Signed-off-by: Andre Przywara LKML-Reference: <1283778860-26843-2-git-send-email-andre.przywara@amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 781a50b..c9c73d8 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -152,7 +152,7 @@ #define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ #define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */ #define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ +#define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3a09c62..dd54779 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1996,7 +1996,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | - F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | + F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | 0 /* SKINIT */ | 0 /* WDT */; /* all calls to cpuid_count() should be made on the same cpu */ -- cgit v0.10.2 From 33ed82fb6c5f032151f7e9f1ac7b667f78f426b8 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 6 Sep 2010 15:14:18 +0200 Subject: x86, cpu: Update AMD CPUID feature bits AMD's public CPUID specification has been updated and some bits have got names. Add them to properly describe new CPU features. Signed-off-by: Andre Przywara LKML-Reference: <1283778860-26843-3-git-send-email-andre.przywara@amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index c9c73d8..341835df 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -155,7 +155,11 @@ #define X86_FEATURE_XOP (6*32+11) /* extended AVX instructions */ #define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */ #define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */ +#define X86_FEATURE_LWP (6*32+15) /* Light Weight Profiling */ +#define X86_FEATURE_FMA4 (6*32+16) /* 4 operands MAC instructions */ #define X86_FEATURE_NODEID_MSR (6*32+19) /* NodeId MSR */ +#define X86_FEATURE_TBM (6*32+21) /* trailing bit manipulations */ +#define X86_FEATURE_TOPOEXT (6*32+22) /* topology extensions CPUID leafs */ /* * Auxiliary flags: Linux defined - For features scattered in various -- cgit v0.10.2 From 6d886fd042634c0d3312bace63a5d0c541b721dc Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 6 Sep 2010 15:14:19 +0200 Subject: x86, cpu: Fix allowed CPUID bits for KVM guests The AMD extensions to AVX (FMA4, XOP) work on the same YMM register set as AVX, so they are safe for guests to use, as long as AVX itself is allowed. Add F16C and AES on the way for the same reasons. Signed-off-by: Andre Przywara LKML-Reference: <1283778860-26843-4-git-send-email-andre.przywara@amd.com> Acked-by: Avi Kivity Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index dd54779..6c2ecf0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1991,13 +1991,14 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | - 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); + 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | + F(F16C); /* cpuid 0x80000001.ecx */ const u32 kvm_supported_word6_x86_features = F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(XOP) | - 0 /* SKINIT */ | 0 /* WDT */; + 0 /* SKINIT, WDT, LWP */ | F(FMA4) | F(TBM); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); -- cgit v0.10.2 From aeb9c7d618264dcf6eea39142fefee096c3b09e2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 6 Sep 2010 15:14:20 +0200 Subject: x86, kvm: add new AMD SVM feature bits The recently updated CPUID specification names new SVM feature bits. Add them to the list of reported features. Signed-off-by: Andre Przywara LKML-Reference: <1283778860-26843-5-git-send-email-andre.przywara@amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 341835df..bffeab7 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -183,6 +183,13 @@ #define X86_FEATURE_LBRV (8*32+ 6) /* AMD LBR Virtualization support */ #define X86_FEATURE_SVML (8*32+ 7) /* "svm_lock" AMD SVM locking MSR */ #define X86_FEATURE_NRIPS (8*32+ 8) /* "nrip_save" AMD SVM next_rip save */ +#define X86_FEATURE_TSCRATEMSR (8*32+ 9) /* "tsc_scale" AMD TSC scaling support */ +#define X86_FEATURE_VMCBCLEAN (8*32+10) /* "vmcb_clean" AMD VMCB clean bits support */ +#define X86_FEATURE_FLUSHBYASID (8*32+11) /* AMD flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (8*32+12) /* AMD Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (8*32+13) /* AMD filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (8*32+14) /* AMD pause filter threshold */ + /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 34b4dad..2c77931 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -43,6 +43,12 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c) { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, { X86_FEATURE_NRIPS, CR_EDX, 3, 0x8000000a, 0 }, + { X86_FEATURE_TSCRATEMSR, CR_EDX, 4, 0x8000000a, 0 }, + { X86_FEATURE_VMCBCLEAN, CR_EDX, 5, 0x8000000a, 0 }, + { X86_FEATURE_FLUSHBYASID, CR_EDX, 6, 0x8000000a, 0 }, + { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, + { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, + { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, { 0, 0, 0, 0, 0 } }; -- cgit v0.10.2 From 3518dd14ca888085797ca8d3a9e11c8ef9e7ae68 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 17 Sep 2010 18:07:45 +0200 Subject: x86, cacheinfo: Fix dependency of AMD L3 CID L3 cache index disable code uses PCI accesses to AMD northbridge functions. Currently the code is #ifdef CONFIG_CPU_SUP_AMD. But it should be #if (defined(CONFIG_CPU_SUP_AMD) && defined(CONFIG_PCI)) which in the end is a dependency to K8_NB. Signed-off-by: Andreas Herrmann LKML-Reference: <20100917160744.GF4958@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 898c2f4..2521cdc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -306,7 +306,7 @@ struct _cache_attr { ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); }; -#ifdef CONFIG_CPU_SUP_AMD +#ifdef CONFIG_K8_NB /* * L3 cache descriptors @@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); -#else /* CONFIG_CPU_SUP_AMD */ +#else /* CONFIG_K8_NB */ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) { }; -#endif /* CONFIG_CPU_SUP_AMD */ +#endif /* CONFIG_K8_NB */ static int __cpuinit cpuid4_cache_lookup_regs(int index, @@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = { static struct attribute *default_l3_attrs[] = { DEFAULT_SYSFS_CACHE_ATTRS, -#ifdef CONFIG_CPU_SUP_AMD +#ifdef CONFIG_K8_NB &cache_disable_0.attr, &cache_disable_1.attr, #endif -- cgit v0.10.2 From 900f9ac9f12dc3dd6fc8e33e16df172eafcaead6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 17 Sep 2010 18:02:54 +0200 Subject: x86, k8-gart: Decouple handling of garts and northbridges So far we only provide num_k8_northbridges. This is required in different areas (e.g. L3 cache index disable, GART). But not all AMD CPUs provide a GART. Thus it is useful to split off the GART handling from the generic caching of AMD northbridge misc devices. Signed-off-by: Andreas Herrmann LKML-Reference: <20100917160254.GC4958@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index af00bd1..9cee145 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h @@ -7,24 +7,27 @@ extern struct pci_device_id k8_nb_ids[]; struct bootnode; extern int early_is_k8_nb(u32 value); -extern struct pci_dev **k8_northbridges; -extern int num_k8_northbridges; extern int cache_k8_northbridges(void); extern void k8_flush_garts(void); extern int k8_get_nodes(struct bootnode *nodes); extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); extern int k8_scan_nodes(void); +struct k8_northbridge_info { + u16 num; + u8 gart_supported; + struct pci_dev **nb_misc; +}; +extern struct k8_northbridge_info k8_northbridges; + #ifdef CONFIG_K8_NB -extern int num_k8_northbridges; static inline struct pci_dev *node_to_k8_nb_misc(int node) { - return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; + return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; } #else -#define num_k8_northbridges 0 static inline struct pci_dev *node_to_k8_nb_misc(int node) { diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2521cdc..6fdfb0b2 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -369,7 +369,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, return; /* not in virtualized environments */ - if (num_k8_northbridges == 0) + if (k8_northbridges.num == 0) return; /* @@ -377,7 +377,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, * never freed but this is done only on shutdown so it doesn't matter. */ if (!l3_caches) { - int size = num_k8_northbridges * sizeof(struct amd_l3_cache *); + int size = k8_northbridges.num * sizeof(struct amd_l3_cache *); l3_caches = kzalloc(size, GFP_ATOMIC); if (!l3_caches) diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c index 0f7bc20..5de1b6b 100644 --- a/arch/x86/kernel/k8.c +++ b/arch/x86/kernel/k8.c @@ -10,9 +10,6 @@ #include #include -int num_k8_northbridges; -EXPORT_SYMBOL(num_k8_northbridges); - static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { @@ -22,7 +19,7 @@ struct pci_device_id k8_nb_ids[] = { }; EXPORT_SYMBOL(k8_nb_ids); -struct pci_dev **k8_northbridges; +struct k8_northbridge_info k8_northbridges; EXPORT_SYMBOL(k8_northbridges); static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) @@ -40,36 +37,44 @@ int cache_k8_northbridges(void) int i; struct pci_dev *dev; - if (num_k8_northbridges) + if (k8_northbridges.num) return 0; dev = NULL; while ((dev = next_k8_northbridge(dev)) != NULL) - num_k8_northbridges++; + k8_northbridges.num++; + + /* some CPU families (e.g. family 0x11) do not support GART */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) + k8_northbridges.gart_supported = 1; - k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), - GFP_KERNEL); - if (!k8_northbridges) + k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * + sizeof(void *), GFP_KERNEL); + if (!k8_northbridges.nb_misc) return -ENOMEM; - if (!num_k8_northbridges) { - k8_northbridges[0] = NULL; + if (!k8_northbridges.num) { + k8_northbridges.nb_misc[0] = NULL; return 0; } - flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); - if (!flush_words) { - kfree(k8_northbridges); - return -ENOMEM; + if (k8_northbridges.gart_supported) { + flush_words = kmalloc(k8_northbridges.num * sizeof(u32), + GFP_KERNEL); + if (!flush_words) { + kfree(k8_northbridges.nb_misc); + return -ENOMEM; + } } dev = NULL; i = 0; while ((dev = next_k8_northbridge(dev)) != NULL) { - k8_northbridges[i] = dev; - pci_read_config_dword(dev, 0x9c, &flush_words[i++]); + k8_northbridges.nb_misc[i] = dev; + if (k8_northbridges.gart_supported) + pci_read_config_dword(dev, 0x9c, &flush_words[i++]); } - k8_northbridges[i] = NULL; + k8_northbridges.nb_misc[i] = NULL; return 0; } EXPORT_SYMBOL_GPL(cache_k8_northbridges); @@ -93,22 +98,25 @@ void k8_flush_garts(void) unsigned long flags; static DEFINE_SPINLOCK(gart_lock); + if (!k8_northbridges.gart_supported) + return; + /* Avoid races between AGP and IOMMU. In theory it's not needed but I'm not sure if the hardware won't lose flush requests when another is pending. This whole thing is so expensive anyways that it doesn't matter to serialize more. -AK */ spin_lock_irqsave(&gart_lock, flags); flushed = 0; - for (i = 0; i < num_k8_northbridges; i++) { - pci_write_config_dword(k8_northbridges[i], 0x9c, + for (i = 0; i < k8_northbridges.num; i++) { + pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, flush_words[i]|1); flushed++; } - for (i = 0; i < num_k8_northbridges; i++) { + for (i = 0; i < k8_northbridges.num; i++) { u32 w; /* Make sure the hardware actually executed the flush*/ for (;;) { - pci_read_config_dword(k8_northbridges[i], + pci_read_config_dword(k8_northbridges.nb_misc[i], 0x9c, &w); if (!(w & 1)) break; diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 0f7f130..8f214a2 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -560,8 +560,11 @@ static void enable_gart_translations(void) { int i; - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + if (!k8_northbridges.gart_supported) + return; + + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; enable_gart_translation(dev, __pa(agp_gatt_table)); } @@ -592,10 +595,13 @@ static void gart_fixup_northbridges(struct sys_device *dev) if (!fix_up_north_bridges) return; + if (!k8_northbridges.gart_supported) + return; + pr_info("PCI-DMA: Restoring GART aperture settings\n"); - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; /* * Don't enable translations just yet. That is the next @@ -649,8 +655,8 @@ static __init int init_k8_gatt(struct agp_kern_info *info) aper_size = aper_base = info->aper_size = 0; dev = NULL; - for (i = 0; i < num_k8_northbridges; i++) { - dev = k8_northbridges[i]; + for (i = 0; i < k8_northbridges.num; i++) { + dev = k8_northbridges.nb_misc[i]; new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu; @@ -718,10 +724,13 @@ static void gart_iommu_shutdown(void) if (!no_agp) return; - for (i = 0; i < num_k8_northbridges; i++) { + if (!k8_northbridges.gart_supported) + return; + + for (i = 0; i < k8_northbridges.num; i++) { u32 ctl; - dev = k8_northbridges[i]; + dev = k8_northbridges.nb_misc[i]; pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &ctl); ctl &= ~GARTEN; @@ -739,7 +748,7 @@ int __init gart_iommu_init(void) unsigned long scratch; long i; - if (num_k8_northbridges == 0) + if (!k8_northbridges.gart_supported) return 0; #ifndef CONFIG_AGP_AMD64 diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 70312da..bdf00d58 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -124,7 +124,7 @@ static int amd64_fetch_size(void) u32 temp; struct aper_size_info_32 *values; - dev = k8_northbridges[0]; + dev = k8_northbridges.nb_misc[0]; if (dev==NULL) return 0; @@ -181,10 +181,14 @@ static int amd_8151_configure(void) unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real); int i; + if (!k8_northbridges.gart_supported) + return 0; + /* Configure AGP regs in each x86-64 host bridge. */ - for (i = 0; i < num_k8_northbridges; i++) { + for (i = 0; i < k8_northbridges.num; i++) { agp_bridge->gart_bus_addr = - amd64_configure(k8_northbridges[i], gatt_bus); + amd64_configure(k8_northbridges.nb_misc[i], + gatt_bus); } k8_flush_garts(); return 0; @@ -195,8 +199,12 @@ static void amd64_cleanup(void) { u32 tmp; int i; - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + + if (!k8_northbridges.gart_supported) + return; + + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; /* disable gart translation */ pci_read_config_dword(dev, AMD64_GARTAPERTURECTL, &tmp); tmp &= ~AMD64_GARTEN; @@ -319,16 +327,19 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, return 0; } -static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) +static __devinit int cache_nbs(struct pci_dev *pdev, u32 cap_ptr) { int i; if (cache_k8_northbridges() < 0) return -ENODEV; + if (!k8_northbridges.gart_supported) + return -ENODEV; + i = 0; - for (i = 0; i < num_k8_northbridges; i++) { - struct pci_dev *dev = k8_northbridges[i]; + for (i = 0; i < k8_northbridges.num; i++) { + struct pci_dev *dev = k8_northbridges.nb_misc[i]; if (fix_northbridge(dev, pdev, cap_ptr) < 0) { dev_err(&dev->dev, "no usable aperture found\n"); #ifdef __x86_64__ @@ -405,7 +416,8 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) } /* shadow x86-64 registers into ULi registers */ - pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea); + pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, + &httfea); /* if x86-64 aperture base is beyond 4G, exit here */ if ((httfea & 0x7fff) >> (32 - 25)) { @@ -472,7 +484,8 @@ static int nforce3_agp_init(struct pci_dev *pdev) pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); /* shadow x86-64 registers into NVIDIA registers */ - pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase); + pci_read_config_dword (k8_northbridges.nb_misc[0], AMD64_GARTAPERTUREBASE, + &apbase); /* if x86-64 aperture base is beyond 4G, exit here */ if ( (apbase & 0x7fff) >> (32 - 25) ) { diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index e7d5d6b..5babf6f 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2927,7 +2927,7 @@ static int __init amd64_edac_init(void) * to finish initialization of the MC instances. */ err = -ENODEV; - for (nb = 0; nb < num_k8_northbridges; nb++) { + for (nb = 0; nb < k8_northbridges.num; nb++) { if (!pvt_lookup[nb]) continue; -- cgit v0.10.2 From 23ac4ae827e6264e21b898f2cd3f601450aa02a6 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Fri, 17 Sep 2010 18:03:43 +0200 Subject: x86, k8: Rename k8.[ch] to amd_nb.[ch] and CONFIG_K8_NB to CONFIG_AMD_NB The file names are somehow misleading as the code is not specific to AMD K8 CPUs anymore. The files accomodate code for other AMD CPU northbridges as well. Same is true for the config option which is valid for AMD CPU northbridges in general and not specific to K8. Signed-off-by: Andreas Herrmann LKML-Reference: <20100917160343.GD4958@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cea0cd9..7fd41f0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -670,7 +670,7 @@ config GART_IOMMU bool "GART IOMMU support" if EMBEDDED default y select SWIOTLB - depends on X86_64 && PCI && K8_NB + depends on X86_64 && PCI && AMD_NB ---help--- Support for full DMA access of devices with 32bit memory access only on systems with more than 3GB. This is usually needed for USB, @@ -2076,7 +2076,7 @@ config OLPC_OPENFIRMWARE endif # X86_32 -config K8_NB +config AMD_NB def_bool y depends on CPU_SUP_AMD && PCI diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h new file mode 100644 index 0000000..c8517f8 --- /dev/null +++ b/arch/x86/include/asm/amd_nb.h @@ -0,0 +1,39 @@ +#ifndef _ASM_X86_AMD_NB_H +#define _ASM_X86_AMD_NB_H + +#include + +extern struct pci_device_id k8_nb_ids[]; +struct bootnode; + +extern int early_is_k8_nb(u32 value); +extern int cache_k8_northbridges(void); +extern void k8_flush_garts(void); +extern int k8_get_nodes(struct bootnode *nodes); +extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); +extern int k8_scan_nodes(void); + +struct k8_northbridge_info { + u16 num; + u8 gart_supported; + struct pci_dev **nb_misc; +}; +extern struct k8_northbridge_info k8_northbridges; + +#ifdef CONFIG_AMD_NB + +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; +} + +#else + +static inline struct pci_dev *node_to_k8_nb_misc(int node) +{ + return NULL; +} +#endif + + +#endif /* _ASM_X86_AMD_NB_H */ diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h deleted file mode 100644 index 9cee145..0000000 --- a/arch/x86/include/asm/k8.h +++ /dev/null @@ -1,39 +0,0 @@ -#ifndef _ASM_X86_K8_H -#define _ASM_X86_K8_H - -#include - -extern struct pci_device_id k8_nb_ids[]; -struct bootnode; - -extern int early_is_k8_nb(u32 value); -extern int cache_k8_northbridges(void); -extern void k8_flush_garts(void); -extern int k8_get_nodes(struct bootnode *nodes); -extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn); -extern int k8_scan_nodes(void); - -struct k8_northbridge_info { - u16 num; - u8 gart_supported; - struct pci_dev **nb_misc; -}; -extern struct k8_northbridge_info k8_northbridges; - -#ifdef CONFIG_K8_NB - -static inline struct pci_dev *node_to_k8_nb_misc(int node) -{ - return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL; -} - -#else - -static inline struct pci_dev *node_to_k8_nb_misc(int node) -{ - return NULL; -} -#endif - - -#endif /* _ASM_X86_K8_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0925676..25dc82d 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -87,7 +87,7 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_APB_TIMER) += apb_timer.o -obj-$(CONFIG_K8_NB) += k8.o +obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c new file mode 100644 index 0000000..4ffc38d --- /dev/null +++ b/arch/x86/kernel/amd_nb.c @@ -0,0 +1,145 @@ +/* + * Shared support code for AMD K8 northbridges and derivates. + * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. + */ +#include +#include +#include +#include +#include +#include +#include + +static u32 *flush_words; + +struct pci_device_id k8_nb_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + {} +}; +EXPORT_SYMBOL(k8_nb_ids); + +struct k8_northbridge_info k8_northbridges; +EXPORT_SYMBOL(k8_northbridges); + +static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) +{ + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_ids[0], dev)); + return dev; +} + +int cache_k8_northbridges(void) +{ + int i; + struct pci_dev *dev; + + if (k8_northbridges.num) + return 0; + + dev = NULL; + while ((dev = next_k8_northbridge(dev)) != NULL) + k8_northbridges.num++; + + /* some CPU families (e.g. family 0x11) do not support GART */ + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) + k8_northbridges.gart_supported = 1; + + k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * + sizeof(void *), GFP_KERNEL); + if (!k8_northbridges.nb_misc) + return -ENOMEM; + + if (!k8_northbridges.num) { + k8_northbridges.nb_misc[0] = NULL; + return 0; + } + + if (k8_northbridges.gart_supported) { + flush_words = kmalloc(k8_northbridges.num * sizeof(u32), + GFP_KERNEL); + if (!flush_words) { + kfree(k8_northbridges.nb_misc); + return -ENOMEM; + } + } + + dev = NULL; + i = 0; + while ((dev = next_k8_northbridge(dev)) != NULL) { + k8_northbridges.nb_misc[i] = dev; + if (k8_northbridges.gart_supported) + pci_read_config_dword(dev, 0x9c, &flush_words[i++]); + } + k8_northbridges.nb_misc[i] = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(cache_k8_northbridges); + +/* Ignores subdevice/subvendor but as far as I can figure out + they're useless anyways */ +int __init early_is_k8_nb(u32 device) +{ + struct pci_device_id *id; + u32 vendor = device & 0xffff; + device >>= 16; + for (id = k8_nb_ids; id->vendor; id++) + if (vendor == id->vendor && device == id->device) + return 1; + return 0; +} + +void k8_flush_garts(void) +{ + int flushed, i; + unsigned long flags; + static DEFINE_SPINLOCK(gart_lock); + + if (!k8_northbridges.gart_supported) + return; + + /* Avoid races between AGP and IOMMU. In theory it's not needed + but I'm not sure if the hardware won't lose flush requests + when another is pending. This whole thing is so expensive anyways + that it doesn't matter to serialize more. -AK */ + spin_lock_irqsave(&gart_lock, flags); + flushed = 0; + for (i = 0; i < k8_northbridges.num; i++) { + pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, + flush_words[i]|1); + flushed++; + } + for (i = 0; i < k8_northbridges.num; i++) { + u32 w; + /* Make sure the hardware actually executed the flush*/ + for (;;) { + pci_read_config_dword(k8_northbridges.nb_misc[i], + 0x9c, &w); + if (!(w & 1)) + break; + cpu_relax(); + } + } + spin_unlock_irqrestore(&gart_lock, flags); + if (!flushed) + printk("nothing to flush?\n"); +} +EXPORT_SYMBOL_GPL(k8_flush_garts); + +static __init int init_k8_nbs(void) +{ + int err = 0; + + err = cache_k8_northbridges(); + + if (err < 0) + printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); + + return err; +} + +/* This has to go after the PCI subsystem */ +fs_initcall(init_k8_nbs); diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index a2e0caf..e91e042 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include int gart_iommu_aperture; diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6fdfb0b2..12cd823 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #define LVL_1_INST 1 @@ -306,7 +306,7 @@ struct _cache_attr { ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); }; -#ifdef CONFIG_K8_NB +#ifdef CONFIG_AMD_NB /* * L3 cache descriptors @@ -556,12 +556,12 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, show_cache_disable_1, store_cache_disable_1); -#else /* CONFIG_K8_NB */ +#else /* CONFIG_AMD_NB */ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index) { }; -#endif /* CONFIG_K8_NB */ +#endif /* CONFIG_AMD_NB */ static int __cpuinit cpuid4_cache_lookup_regs(int index, @@ -1000,7 +1000,7 @@ static struct attribute *default_attrs[] = { static struct attribute *default_l3_attrs[] = { DEFAULT_SYSFS_CACHE_ATTRS, -#ifdef CONFIG_K8_NB +#ifdef CONFIG_AMD_NB &cache_disable_0.attr, &cache_disable_1.attr, #endif diff --git a/arch/x86/kernel/k8.c b/arch/x86/kernel/k8.c deleted file mode 100644 index 5de1b6b..0000000 --- a/arch/x86/kernel/k8.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Shared support code for AMD K8 northbridges and derivates. - * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. - */ -#include -#include -#include -#include -#include -#include -#include - -static u32 *flush_words; - -struct pci_device_id k8_nb_ids[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, - {} -}; -EXPORT_SYMBOL(k8_nb_ids); - -struct k8_northbridge_info k8_northbridges; -EXPORT_SYMBOL(k8_northbridges); - -static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) -{ - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (!dev) - break; - } while (!pci_match_id(&k8_nb_ids[0], dev)); - return dev; -} - -int cache_k8_northbridges(void) -{ - int i; - struct pci_dev *dev; - - if (k8_northbridges.num) - return 0; - - dev = NULL; - while ((dev = next_k8_northbridge(dev)) != NULL) - k8_northbridges.num++; - - /* some CPU families (e.g. family 0x11) do not support GART */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) - k8_northbridges.gart_supported = 1; - - k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * - sizeof(void *), GFP_KERNEL); - if (!k8_northbridges.nb_misc) - return -ENOMEM; - - if (!k8_northbridges.num) { - k8_northbridges.nb_misc[0] = NULL; - return 0; - } - - if (k8_northbridges.gart_supported) { - flush_words = kmalloc(k8_northbridges.num * sizeof(u32), - GFP_KERNEL); - if (!flush_words) { - kfree(k8_northbridges.nb_misc); - return -ENOMEM; - } - } - - dev = NULL; - i = 0; - while ((dev = next_k8_northbridge(dev)) != NULL) { - k8_northbridges.nb_misc[i] = dev; - if (k8_northbridges.gart_supported) - pci_read_config_dword(dev, 0x9c, &flush_words[i++]); - } - k8_northbridges.nb_misc[i] = NULL; - return 0; -} -EXPORT_SYMBOL_GPL(cache_k8_northbridges); - -/* Ignores subdevice/subvendor but as far as I can figure out - they're useless anyways */ -int __init early_is_k8_nb(u32 device) -{ - struct pci_device_id *id; - u32 vendor = device & 0xffff; - device >>= 16; - for (id = k8_nb_ids; id->vendor; id++) - if (vendor == id->vendor && device == id->device) - return 1; - return 0; -} - -void k8_flush_garts(void) -{ - int flushed, i; - unsigned long flags; - static DEFINE_SPINLOCK(gart_lock); - - if (!k8_northbridges.gart_supported) - return; - - /* Avoid races between AGP and IOMMU. In theory it's not needed - but I'm not sure if the hardware won't lose flush requests - when another is pending. This whole thing is so expensive anyways - that it doesn't matter to serialize more. -AK */ - spin_lock_irqsave(&gart_lock, flags); - flushed = 0; - for (i = 0; i < k8_northbridges.num; i++) { - pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c, - flush_words[i]|1); - flushed++; - } - for (i = 0; i < k8_northbridges.num; i++) { - u32 w; - /* Make sure the hardware actually executed the flush*/ - for (;;) { - pci_read_config_dword(k8_northbridges.nb_misc[i], - 0x9c, &w); - if (!(w & 1)) - break; - cpu_relax(); - } - } - spin_unlock_irqrestore(&gart_lock, flags); - if (!flushed) - printk("nothing to flush?\n"); -} -EXPORT_SYMBOL_GPL(k8_flush_garts); - -static __init int init_k8_nbs(void) -{ - int err = 0; - - err = cache_k8_northbridges(); - - if (err < 0) - printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n"); - - return err; -} - -/* This has to go after the PCI subsystem */ -fs_initcall(init_k8_nbs); diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 8f214a2..67e5665 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index c3a4fbb..77eea03 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -107,7 +107,7 @@ #include #include #include -#include +#include #ifdef CONFIG_X86_64 #include #endif diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 970ed57..ab75b18 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include static struct bootnode __initdata nodes[8]; static nodemask_t __initdata nodes_parsed = NODE_MASK_NONE; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index a7bcc23..4962f1a 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index 4b66c69..5ddf67e 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -57,7 +57,7 @@ config AGP_AMD config AGP_AMD64 tristate "AMD Opteron/Athlon64 on-CPU GART support" - depends on AGP && X86 && K8_NB + depends on AGP && X86 && AMD_NB help This option gives you AGP support for the GLX component of X using the on-CPU northbridge of the AMD Athlon64/Opteron CPUs. diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index bdf00d58..4d6087c 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -15,7 +15,7 @@ #include #include /* PAGE_SIZE */ #include -#include +#include #include #include "agp.h" diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 70bb350..734e2e0 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -66,7 +66,7 @@ config EDAC_MCE config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" - depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE + depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE help Support for error detection and correction on the AMD 64 Families of Memory Controllers (K8, F10h and F11h) diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5babf6f..09fcc52 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -1,5 +1,5 @@ #include "amd64_edac.h" -#include +#include static struct edac_pci_ctl_info *amd64_ctl_pci; -- cgit v0.10.2 From 3fdbf004c1706480a7c7fac3c9d836fa6df20d7d Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:32:35 +0200 Subject: x86, mtrr: Assume SYS_CFG[Tom2ForceMemTypeWB] exists on all future AMD CPUs Instead of adapting the CPU family check in amd_special_default_mtrr() for each new CPU family assume that all new AMD CPUs support the necessary bits in SYS_CFG MSR. Tom2Enabled is architectural (defined in APM Vol.2). Tom2ForceMemTypeWB is defined in all BKDGs starting with K8 NPT. In pre K8-NPT BKDG this bit is reserved (read as zero). W/o this adaption Linux would unnecessarily complain about bad MTRR settings on every new AMD CPU family, e.g. [ 0.000000] WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing 4863MB of RAM. Cc: stable@kernel.org # .32.x, .35.x Signed-off-by: Andreas Herrmann LKML-Reference: <20100930123235.GB20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index c5f59d0..ac140c7 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c @@ -827,7 +827,7 @@ int __init amd_special_default_mtrr(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) return 0; - if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) + if (boot_cpu_data.x86 < 0xf) return 0; /* In case some hypervisor doesn't pass SYSCFG through: */ if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) -- cgit v0.10.2 From 420b13b60a3e5c5dcc6ec290e131cf5fbc603d94 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:33:58 +0200 Subject: x86, nmi: Support NMI watchdog on newer AMD CPU families CPU families 0x12, 0x14 and 0x15 support this functionality. Signed-off-by: Andreas Herrmann LKML-Reference: <20100930123357.GC20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index fb329e9..d9f4ff8 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -700,11 +700,10 @@ static void probe_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: - if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && - boot_cpu_data.x86 != 16 && boot_cpu_data.x86 != 17) - return; - wd_ops = &k7_wd_ops; - break; + if (boot_cpu_data.x86 == 6 || + (boot_cpu_data.x86 >= 0xf && boot_cpu_data.x86 <= 0x15)) + wd_ops = &k7_wd_ops; + return; case X86_VENDOR_INTEL: /* Work around where perfctr1 doesn't have a working enable * bit as described in the following errata: -- cgit v0.10.2 From 23588c38a84c9175c6668789b64ffba4651e5c6a Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:36:28 +0200 Subject: x86, amd: Add support for CPUID topology extension of AMD CPUs Node information (ID, number of internal nodes) is provided via CPUID Fn8000_001e_ECX. See AMD CPUID Specification (Publication # 25481, Revision 2.34, September 2010). Signed-off-by: Andreas Herrmann LKML-Reference: <20100930123628.GD20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 0f0ace5d..7e6a37d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -253,37 +253,41 @@ static int __cpuinit nearby_node(int apicid) #endif /* - * Fixup core topology information for AMD multi-node processors. - * Assumption: Number of cores in each internal node is the same. + * Fixup core topology information for + * (1) AMD multi-node processors + * Assumption: Number of cores in each internal node is the same. */ #ifdef CONFIG_X86_HT -static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) +static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) { - unsigned long long value; u32 nodes, cores_per_node; + u8 node_id; + unsigned long long value; int cpu = smp_processor_id(); - if (!cpu_has(c, X86_FEATURE_NODEID_MSR)) - return; - - /* fixup topology information only once for a core */ - if (cpu_has(c, X86_FEATURE_AMD_DCM)) + /* get information required for multi-node processors */ + if (cpu_has(c, X86_FEATURE_TOPOEXT)) { + value = cpuid_ecx(0x8000001e); + nodes = ((value >> 8) & 7) + 1; + node_id = value & 7; + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { + rdmsrl(MSR_FAM10H_NODE_ID, value); + nodes = ((value >> 3) & 7) + 1; + node_id = value & 7; + } else return; - rdmsrl(MSR_FAM10H_NODE_ID, value); + /* fixup multi-node processor information */ + if (nodes > 1) { + set_cpu_cap(c, X86_FEATURE_AMD_DCM); + cores_per_node = c->x86_max_cores / nodes; - nodes = ((value >> 3) & 7) + 1; - if (nodes == 1) - return; - - set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes; + /* store NodeID, use llc_shared_map to store sibling info */ + per_cpu(cpu_llc_id, cpu) = node_id; - /* store NodeID, use llc_shared_map to store sibling info */ - per_cpu(cpu_llc_id, cpu) = value & 7; - - /* fixup core id to be in range from 0 to (cores_per_node - 1) */ - c->cpu_core_id = c->cpu_core_id % cores_per_node; + /* core id to be in range from 0 to (cores_per_node - 1) */ + c->cpu_core_id = c->cpu_core_id % cores_per_node; + } } #endif @@ -304,9 +308,7 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; - /* fixup topology information on multi-node processors */ - if ((c->x86 == 0x10) && (c->x86_model == 9)) - amd_fixup_dcm(c); + amd_get_topology(c); #endif } -- cgit v0.10.2 From 6057b4d331f19a3ea51aec463ea7839c128b3227 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:38:57 +0200 Subject: x86, amd: Extract compute unit information for AMD CPUs Get compute unit information from CPUID Fn8000_001E_EBX. (See AMD CPUID Specification - publication # 25481, revision 2.34, September 2010.) Note that each core on a compute unit still has a core_id of its own. Signed-off-by: Andreas Herrmann LKML-Reference: <20100930123857.GE20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 325b7bd..69e80c2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -110,6 +110,8 @@ struct cpuinfo_x86 { u16 phys_proc_id; /* Core id: */ u16 cpu_core_id; + /* Compute unit id */ + u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e6a37d..70168ab 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -256,21 +256,29 @@ static int __cpuinit nearby_node(int apicid) * Fixup core topology information for * (1) AMD multi-node processors * Assumption: Number of cores in each internal node is the same. + * (2) AMD processors supporting compute units */ #ifdef CONFIG_X86_HT static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) { - u32 nodes, cores_per_node; + u32 nodes; u8 node_id; - unsigned long long value; int cpu = smp_processor_id(); /* get information required for multi-node processors */ if (cpu_has(c, X86_FEATURE_TOPOEXT)) { - value = cpuid_ecx(0x8000001e); - nodes = ((value >> 8) & 7) + 1; - node_id = value & 7; + u32 eax, ebx, ecx, edx; + + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + nodes = ((ecx >> 8) & 7) + 1; + node_id = ecx & 7; + + /* get compute unit information */ + smp_num_siblings = ((ebx >> 8) & 3) + 1; + c->compute_unit_id = ebx & 0xff; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { + u64 value; + rdmsrl(MSR_FAM10H_NODE_ID, value); nodes = ((value >> 3) & 7) + 1; node_id = value & 7; @@ -279,6 +287,8 @@ static void __cpuinit amd_get_topology(struct cpuinfo_x86 *c) /* fixup multi-node processor information */ if (nodes > 1) { + u32 cores_per_node; + set_cpu_cap(c, X86_FEATURE_AMD_DCM); cores_per_node = c->x86_max_cores / nodes; -- cgit v0.10.2 From d4fbe4f03557e1fd4d9bbb3a1957aad560f39e96 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:41:56 +0200 Subject: x86, amd: Use compute unit information to determine thread siblings This information is vital for different load balancing policies. Signed-off-by: Andreas Herrmann LKML-Reference: <20100930124156.GF20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8b3bfc4..bc2cc44 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -397,6 +397,19 @@ void __cpuinit smp_store_cpu_info(int id) identify_secondary_cpu(c); } +static void __cpuinit link_thread_siblings(int cpu1, int cpu2) +{ + struct cpuinfo_x86 *c1 = &cpu_data(cpu1); + struct cpuinfo_x86 *c2 = &cpu_data(cpu2); + + cpumask_set_cpu(cpu1, cpu_sibling_mask(cpu2)); + cpumask_set_cpu(cpu2, cpu_sibling_mask(cpu1)); + cpumask_set_cpu(cpu1, cpu_core_mask(cpu2)); + cpumask_set_cpu(cpu2, cpu_core_mask(cpu1)); + cpumask_set_cpu(cpu1, c2->llc_shared_map); + cpumask_set_cpu(cpu2, c1->llc_shared_map); +} + void __cpuinit set_cpu_sibling_map(int cpu) { @@ -409,14 +422,13 @@ void __cpuinit set_cpu_sibling_map(int cpu) for_each_cpu(i, cpu_sibling_setup_mask) { struct cpuinfo_x86 *o = &cpu_data(i); - if (c->phys_proc_id == o->phys_proc_id && - c->cpu_core_id == o->cpu_core_id) { - cpumask_set_cpu(i, cpu_sibling_mask(cpu)); - cpumask_set_cpu(cpu, cpu_sibling_mask(i)); - cpumask_set_cpu(i, cpu_core_mask(cpu)); - cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, c->llc_shared_map); - cpumask_set_cpu(cpu, o->llc_shared_map); + if (cpu_has(c, X86_FEATURE_TOPOEXT)) { + if (c->phys_proc_id == o->phys_proc_id && + c->compute_unit_id == o->compute_unit_id) + link_thread_siblings(cpu, i); + } else if (c->phys_proc_id == o->phys_proc_id && + c->cpu_core_id == o->cpu_core_id) { + link_thread_siblings(cpu, i); } } } else { -- cgit v0.10.2 From 5c80cc78de46aef6cd5e714208da05c3f7f548f8 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Sep 2010 14:43:16 +0200 Subject: x86, amd_nb: Enable GART support for AMD family 0x15 CPUs AMD CPU family 0x15 still supports GART for compatibility reasons. Signed-off-by: Andreas Herrmann LKML-Reference: <20100930124316.GG20545@loge.amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 4ffc38d..8f6463d 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -15,6 +15,7 @@ static u32 *flush_words; struct pci_device_id k8_nb_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) }, {} }; EXPORT_SYMBOL(k8_nb_ids); @@ -45,7 +46,8 @@ int cache_k8_northbridges(void) k8_northbridges.num++; /* some CPU families (e.g. family 0x11) do not support GART */ - if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10) + if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 || + boot_cpu_data.x86 == 0x15) k8_northbridges.gart_supported = 1; k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) * diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d3330..edc0279 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -514,6 +514,7 @@ #define PCI_DEVICE_ID_AMD_11H_NB_DRAM 0x1302 #define PCI_DEVICE_ID_AMD_11H_NB_MISC 0x1303 #define PCI_DEVICE_ID_AMD_11H_NB_LINK 0x1304 +#define PCI_DEVICE_ID_AMD_15H_NB_MISC 0x1603 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 -- cgit v0.10.2