summaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-05-16 22:15:17 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-16 22:15:17 (GMT)
commit168f1a7163b37294a0ef33829e1ed54d41e33c42 (patch)
tree16fa34f24156c28f0a3060d984e98bf4df878f91 /arch/x86/kernel
parent825a3b2605c3aa193e0075d0f9c72e33c17ab16a (diff)
parent4afd0565552c87f23834db9121dd9cf6955d0b43 (diff)
downloadlinux-168f1a7163b37294a0ef33829e1ed54d41e33c42.tar.xz
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 asm updates from Ingo Molnar: "The main changes in this cycle were: - MSR access API fixes and enhancements (Andy Lutomirski) - early exception handling improvements (Andy Lutomirski) - user-space FS/GS prctl usage fixes and improvements (Andy Lutomirski) - Remove the cpu_has_*() APIs and replace them with equivalents (Borislav Petkov) - task switch micro-optimization (Brian Gerst) - 32-bit entry code simplification (Denys Vlasenko) - enhance PAT handling in enumated CPUs (Toshi Kani) ... and lots of other cleanups/fixlets" * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/arch_prctl/64: Restore accidentally removed put_cpu() in ARCH_SET_GS x86/entry/32: Remove asmlinkage_protect() x86/entry/32: Remove GET_THREAD_INFO() from entry code x86/entry, sched/x86: Don't save/restore EFLAGS on task switch x86/asm/entry/32: Simplify pushes of zeroed pt_regs->REGs selftests/x86/ldt_gdt: Test set_thread_area() deletion of an active segment x86/tls: Synchronize segment registers in set_thread_area() x86/asm/64: Rename thread_struct's fs and gs to fsbase and gsbase x86/arch_prctl/64: Remove FSBASE/GSBASE < 4G optimization x86/segments/64: When load_gs_index fails, clear the base x86/segments/64: When loadsegment(fs, ...) fails, clear the base x86/asm: Make asm/alternative.h safe from assembly x86/asm: Stop depending on ptrace.h in alternative.h x86/entry: Rename is_{ia32,x32}_task() to in_{ia32,x32}_syscall() x86/asm: Make sure verify_cpu() has a good stack x86/extable: Add a comment about early exception handlers x86/msr: Set the return value to zero when native_rdmsr_safe() fails x86/paravirt: Make "unsafe" MSR accesses unsafe even if PARAVIRT=y x86/paravirt: Add paravirt_{read,write}_msr() x86/msr: Carry on after a non-"safe" MSR access fails ...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/alternative.c1
-rw-r--r--arch/x86/kernel/apic/apic.c32
-rw-r--r--arch/x86/kernel/apic/apic_noop.c4
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/vector.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c20
-rw-r--r--arch/x86/kernel/cpu/common.c82
-rw-r--r--arch/x86/kernel/cpu/cyrix.c2
-rw-r--r--arch/x86/kernel/cpu/intel.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c4
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c28
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h1
-rw-r--r--arch/x86/kernel/cpu/vmware.c2
-rw-r--r--arch/x86/kernel/devicetree.c2
-rw-r--r--arch/x86/kernel/fpu/bugs.c16
-rw-r--r--arch/x86/kernel/fpu/core.c50
-rw-r--r--arch/x86/kernel/fpu/init.c16
-rw-r--r--arch/x86/kernel/fpu/regset.c25
-rw-r--r--arch/x86/kernel/fpu/xstate.c18
-rw-r--r--arch/x86/kernel/head_32.S116
-rw-r--r--arch/x86/kernel/head_64.S103
-rw-r--r--arch/x86/kernel/hpet.c1
-rw-r--r--arch/x86/kernel/jump_label.c1
-rw-r--r--arch/x86/kernel/kgdb.c1
-rw-r--r--arch/x86/kernel/kprobes/core.c1
-rw-r--r--arch/x86/kernel/kprobes/opt.c1
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/paravirt.c6
-rw-r--r--arch/x86/kernel/process_64.c241
-rw-r--r--arch/x86/kernel/ptrace.c50
-rw-r--r--arch/x86/kernel/signal.c6
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/tce_64.c2
-rw-r--r--arch/x86/kernel/tls.c42
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/tsc.c33
-rw-r--r--arch/x86/kernel/uprobes.c2
43 files changed, 468 insertions, 490 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c2f1ef..2522e56 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -136,7 +136,7 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
{
struct acpi_table_madt *madt = NULL;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -EINVAL;
madt = (struct acpi_table_madt *)table;
@@ -951,7 +951,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void)
{
int count;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -979,7 +979,7 @@ static int __init acpi_parse_madt_lapic_entries(void)
int ret;
struct acpi_subtable_proc madt_proc[2];
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
@@ -1125,7 +1125,7 @@ static int __init acpi_parse_madt_ioapic_entries(void)
if (acpi_disabled || acpi_noirq)
return -ENODEV;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return -ENODEV;
/*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 25f9093..5cb272a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -11,6 +11,7 @@
#include <linux/stop_machine.h>
#include <linux/slab.h>
#include <linux/kdebug.h>
+#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index d356987..60078a6 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -607,7 +607,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
long tapic = apic_read(APIC_TMCCT);
unsigned long pm = acpi_pm_read_early();
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
switch (lapic_cal_loops++) {
@@ -668,7 +668,7 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
*delta = (long)res;
/* Correct the tsc counter value */
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
res = (((u64)(*deltatsc)) * pm_100ms);
do_div(res, deltapm);
apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
@@ -760,7 +760,7 @@ static int __init calibrate_APIC_clock(void)
apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
lapic_timer_frequency);
- if (cpu_has_tsc) {
+ if (boot_cpu_has(X86_FEATURE_TSC)) {
apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
"%ld.%04ld MHz.\n",
(deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
@@ -1085,7 +1085,7 @@ void lapic_shutdown(void)
{
unsigned long flags;
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return;
local_irq_save(flags);
@@ -1134,7 +1134,7 @@ void __init init_bsp_APIC(void)
* Don't do the setup now if we have a SMP BIOS as the
* through-I/O-APIC virtual wire mode might be active.
*/
- if (smp_found_config || !cpu_has_apic)
+ if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC))
return;
/*
@@ -1227,7 +1227,7 @@ void setup_local_APIC(void)
unsigned long long tsc = 0, ntsc;
long long max_loops = cpu_khz ? cpu_khz : 1000000;
- if (cpu_has_tsc)
+ if (boot_cpu_has(X86_FEATURE_TSC))
tsc = rdtsc();
if (disable_apic) {
@@ -1311,7 +1311,7 @@ void setup_local_APIC(void)
break;
}
if (queued) {
- if (cpu_has_tsc && cpu_khz) {
+ if (boot_cpu_has(X86_FEATURE_TSC) && cpu_khz) {
ntsc = rdtsc();
max_loops = (cpu_khz << 10) - (ntsc - tsc);
} else
@@ -1445,7 +1445,7 @@ static void __x2apic_disable(void)
{
u64 msr;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return;
rdmsrl(MSR_IA32_APICBASE, msr);
@@ -1561,7 +1561,7 @@ void __init check_x2apic(void)
pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
x2apic_mode = 1;
x2apic_state = X2APIC_ON;
- } else if (!cpu_has_x2apic) {
+ } else if (!boot_cpu_has(X86_FEATURE_X2APIC)) {
x2apic_state = X2APIC_DISABLED;
}
}
@@ -1632,7 +1632,7 @@ void __init enable_IR_x2apic(void)
*/
static int __init detect_init_APIC(void)
{
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
pr_info("No local APIC present\n");
return -1;
}
@@ -1711,14 +1711,14 @@ static int __init detect_init_APIC(void)
goto no_apic;
case X86_VENDOR_INTEL:
if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
- (boot_cpu_data.x86 == 5 && cpu_has_apic))
+ (boot_cpu_data.x86 == 5 && boot_cpu_has(X86_FEATURE_APIC)))
break;
goto no_apic;
default:
goto no_apic;
}
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
/*
* Over-ride BIOS and try to enable the local APIC only if
* "lapic" specified.
@@ -2233,19 +2233,19 @@ int __init APIC_init_uniprocessor(void)
return -1;
}
#ifdef CONFIG_X86_64
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
disable_apic = 1;
pr_info("Apic disabled by BIOS\n");
return -1;
}
#else
- if (!smp_found_config && !cpu_has_apic)
+ if (!smp_found_config && !boot_cpu_has(X86_FEATURE_APIC))
return -1;
/*
* Complain if the BIOS pretends there is one.
*/
- if (!cpu_has_apic &&
+ if (!boot_cpu_has(X86_FEATURE_APIC) &&
APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
boot_cpu_physical_apicid);
@@ -2426,7 +2426,7 @@ static void apic_pm_activate(void)
static int __init init_lapic_sysfs(void)
{
/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
- if (cpu_has_apic)
+ if (boot_cpu_has(X86_FEATURE_APIC))
register_syscore_ops(&lapic_syscore_ops);
return 0;
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
index 331a7a0..13d19ed 100644
--- a/arch/x86/kernel/apic/apic_noop.c
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -100,13 +100,13 @@ static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
static u32 noop_apic_read(u32 reg)
{
- WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
return 0;
}
static void noop_apic_write(u32 reg, u32 v)
{
- WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+ WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
}
struct apic apic_noop = {
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index fdb0fbf..84e33ff 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1454,7 +1454,7 @@ void native_disable_io_apic(void)
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
}
- if (cpu_has_apic || apic_from_smp_config())
+ if (boot_cpu_has(X86_FEATURE_APIC) || apic_from_smp_config())
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 28bde88..2a0f225 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -230,7 +230,7 @@ int safe_smp_processor_id(void)
{
int apicid, cpuid;
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index ef49551..a5e400a 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -944,7 +944,7 @@ static int __init print_ICs(void)
print_PIC();
/* don't print out if apic is not there */
- if (!cpu_has_apic && !apic_from_smp_config())
+ if (!boot_cpu_has(X86_FEATURE_APIC) && !apic_from_smp_config())
return 0;
print_local_APICs(show_lapic);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7b76eb6..c343a54 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -565,14 +565,17 @@ static void early_init_amd(struct cpuinfo_x86 *c)
* can safely set X86_FEATURE_EXTD_APICID unconditionally for families
* after 16h.
*/
- if (cpu_has_apic && c->x86 > 0x16) {
- set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
- } else if (cpu_has_apic && c->x86 >= 0xf) {
- /* check CPU config space for extended APIC ID */
- unsigned int val;
- val = read_pci_config(0, 24, 0, 0x68);
- if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18)))
+ if (boot_cpu_has(X86_FEATURE_APIC)) {
+ if (c->x86 > 0x16)
set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ else if (c->x86 >= 0xf) {
+ /* check CPU config space for extended APIC ID */
+ unsigned int val;
+
+ val = read_pci_config(0, 24, 0, 0x68);
+ if ((val >> 17 & 0x3) == 0x3)
+ set_cpu_cap(c, X86_FEATURE_EXTD_APICID);
+ }
}
#endif
@@ -628,6 +631,7 @@ static void init_amd_k8(struct cpuinfo_x86 *c)
*/
msr_set_bit(MSR_K7_HWCR, 6);
#endif
+ set_cpu_bug(c, X86_BUG_SWAPGS_FENCE);
}
static void init_amd_gh(struct cpuinfo_x86 *c)
@@ -746,7 +750,7 @@ static void init_amd(struct cpuinfo_x86 *c)
if (c->x86 >= 0xf)
set_cpu_cap(c, X86_FEATURE_K8);
- if (cpu_has_xmm2) {
+ if (cpu_has(c, X86_FEATURE_XMM2)) {
/* MFENCE stops RDTSC speculation */
set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dbc6f06..6ef6ed9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -430,7 +430,7 @@ void load_percpu_segment(int cpu)
#ifdef CONFIG_X86_32
loadsegment(fs, __KERNEL_PERCPU);
#else
- loadsegment(gs, 0);
+ __loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
#endif
load_stack_canary_segment();
@@ -866,30 +866,34 @@ static void detect_nopl(struct cpuinfo_x86 *c)
#else
set_cpu_cap(c, X86_FEATURE_NOPL);
#endif
+}
+static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
/*
- * ESPFIX is a strange bug. All real CPUs have it. Paravirt
- * systems that run Linux at CPL > 0 may or may not have the
- * issue, but, even if they have the issue, there's absolutely
- * nothing we can do about it because we can't use the real IRET
- * instruction.
+ * Empirically, writing zero to a segment selector on AMD does
+ * not clear the base, whereas writing zero to a segment
+ * selector on Intel does clear the base. Intel's behavior
+ * allows slightly faster context switches in the common case
+ * where GS is unused by the prev and next threads.
*
- * NB: For the time being, only 32-bit kernels support
- * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
- * whether to apply espfix using paravirt hooks. If any
- * non-paravirt system ever shows up that does *not* have the
- * ESPFIX issue, we can change this.
+ * Since neither vendor documents this anywhere that I can see,
+ * detect it directly instead of hardcoding the choice by
+ * vendor.
+ *
+ * I've designated AMD's behavior as the "bug" because it's
+ * counterintuitive and less friendly.
*/
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_PARAVIRT
- do {
- extern void native_iret(void);
- if (pv_cpu_ops.iret == native_iret)
- set_cpu_bug(c, X86_BUG_ESPFIX);
- } while (0);
-#else
- set_cpu_bug(c, X86_BUG_ESPFIX);
-#endif
+
+ unsigned long old_base, tmp;
+ rdmsrl(MSR_FS_BASE, old_base);
+ wrmsrl(MSR_FS_BASE, 1);
+ loadsegment(fs, 0);
+ rdmsrl(MSR_FS_BASE, tmp);
+ if (tmp != 0)
+ set_cpu_bug(c, X86_BUG_NULL_SEG);
+ wrmsrl(MSR_FS_BASE, old_base);
#endif
}
@@ -925,6 +929,33 @@ static void generic_identify(struct cpuinfo_x86 *c)
get_model_name(c); /* Default name */
detect_nopl(c);
+
+ detect_null_seg_behavior(c);
+
+ /*
+ * ESPFIX is a strange bug. All real CPUs have it. Paravirt
+ * systems that run Linux at CPL > 0 may or may not have the
+ * issue, but, even if they have the issue, there's absolutely
+ * nothing we can do about it because we can't use the real IRET
+ * instruction.
+ *
+ * NB: For the time being, only 32-bit kernels support
+ * X86_BUG_ESPFIX as such. 64-bit kernels directly choose
+ * whether to apply espfix using paravirt hooks. If any
+ * non-paravirt system ever shows up that does *not* have the
+ * ESPFIX issue, we can change this.
+ */
+#ifdef CONFIG_X86_32
+# ifdef CONFIG_PARAVIRT
+ do {
+ extern void native_iret(void);
+ if (pv_cpu_ops.iret == native_iret)
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+ } while (0);
+# else
+ set_cpu_bug(c, X86_BUG_ESPFIX);
+# endif
+#endif
}
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
@@ -1080,12 +1111,12 @@ void enable_sep_cpu(void)
struct tss_struct *tss;
int cpu;
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ return;
+
cpu = get_cpu();
tss = &per_cpu(cpu_tss, cpu);
- if (!boot_cpu_has(X86_FEATURE_SEP))
- goto out;
-
/*
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
* see the big comment in struct x86_hw_tss's definition.
@@ -1100,7 +1131,6 @@ void enable_sep_cpu(void)
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
-out:
put_cpu();
}
#endif
@@ -1532,7 +1562,7 @@ void cpu_init(void)
pr_info("Initializing CPU#%d\n", cpu);
if (cpu_feature_enabled(X86_FEATURE_VME) ||
- cpu_has_tsc ||
+ boot_cpu_has(X86_FEATURE_TSC) ||
boot_cpu_has(X86_FEATURE_DE))
cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 6adef9c..bd9dcd6 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -333,7 +333,7 @@ static void init_cyrix(struct cpuinfo_x86 *c)
switch (dir0_lsn) {
case 0xd: /* either a 486SLC or DLC w/o DEVID */
dir0_msn = 0;
- p = Cx486_name[(cpu_has_fpu ? 1 : 0)];
+ p = Cx486_name[!!boot_cpu_has(X86_FEATURE_FPU)];
break;
case 0xe: /* a 486S A step */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index e4393bf..b18f470 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -152,9 +152,9 @@ static void early_init_intel(struct cpuinfo_x86 *c)
* the TLB when any changes are made to any of the page table entries.
* The operating system must reload CR3 to cause the TLB to be flushed"
*
- * As a result cpu_has_pge() in arch/x86/include/asm/tlbflush.h should
- * be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
- * to be modified
+ * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h
+ * should be false so that __flush_tlb_all() causes CR3 insted of CR4.PGE
+ * to be modified.
*/
if (c->x86 == 5 && c->x86_model == 9) {
pr_info("Disabling PGE capability bit\n");
@@ -281,7 +281,7 @@ static void intel_workarounds(struct cpuinfo_x86 *c)
* integrated APIC (see 11AP erratum in "Pentium Processor
* Specification Update").
*/
- if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
+ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
(c->x86_mask < 0x6 || c->x86_mask == 0xb))
set_cpu_bug(c, X86_BUG_11AP);
@@ -456,7 +456,7 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
- if (cpu_has_xmm2)
+ if (cpu_has(c, X86_FEATURE_XMM2))
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
@@ -468,7 +468,7 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_PEBS);
}
- if (c->x86 == 6 && cpu_has_clflush &&
+ if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
(c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 1e8bb6c..1defb8e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -84,7 +84,7 @@ static int cmci_supported(int *banks)
*/
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has_apic || lapic_get_maxlvt() < 6)
+ if (!boot_cpu_has(X86_FEATURE_APIC) || lapic_get_maxlvt() < 6)
return 0;
rdmsrl(MSR_IA32_MCG_CAP, cap);
*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index ac780ca..6b9dc4d 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -450,7 +450,7 @@ asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs)
/* Thermal monitoring depends on APIC, ACPI and clock modulation */
static int intel_thermal_supported(struct cpuinfo_x86 *c)
{
- if (!cpu_has_apic)
+ if (!boot_cpu_has(X86_FEATURE_APIC))
return 0;
if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
return 0;
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index f8c81ba..b1086f7 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -137,7 +137,7 @@ static void prepare_set(void)
u32 cr0;
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -170,7 +170,7 @@ static void post_set(void)
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
}
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 19f5736..16e37a2 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -444,11 +444,24 @@ static void __init print_mtrr_state(void)
pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
}
+/* PAT setup for BP. We need to go through sync steps here */
+void __init mtrr_bp_pat_init(void)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ prepare_set();
+
+ pat_init();
+
+ post_set();
+ local_irq_restore(flags);
+}
+
/* Grab all of the MTRR state for this CPU into *state */
bool __init get_mtrr_state(void)
{
struct mtrr_var_range *vrs;
- unsigned long flags;
unsigned lo, dummy;
unsigned int i;
@@ -481,15 +494,6 @@ bool __init get_mtrr_state(void)
mtrr_state_set = 1;
- /* PAT setup for BP. We need to go through sync steps here */
- local_irq_save(flags);
- prepare_set();
-
- pat_init();
-
- post_set();
- local_irq_restore(flags);
-
return !!(mtrr_state.enabled & MTRR_STATE_MTRR_ENABLED);
}
@@ -741,7 +745,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
wbinvd();
/* Save value of CR4 and clear Page Global Enable (bit 7) */
- if (cpu_has_pge) {
+ if (boot_cpu_has(X86_FEATURE_PGE)) {
cr4 = __read_cr4();
__write_cr4(cr4 & ~X86_CR4_PGE);
}
@@ -771,7 +775,7 @@ static void post_set(void) __releases(set_atomicity_lock)
write_cr0(read_cr0() & ~X86_CR0_CD);
/* Restore value of CR4 */
- if (cpu_has_pge)
+ if (boot_cpu_has(X86_FEATURE_PGE))
__write_cr4(cr4);
raw_spin_unlock(&set_atomicity_lock);
}
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 10f8d47..7d393ec 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -752,6 +752,9 @@ void __init mtrr_bp_init(void)
/* BIOS may override */
__mtrr_enabled = get_mtrr_state();
+ if (mtrr_enabled())
+ mtrr_bp_pat_init();
+
if (mtrr_cleanup(phys_addr)) {
changed_by_mtrr_cleanup = 1;
mtrr_if->set_all();
@@ -759,8 +762,16 @@ void __init mtrr_bp_init(void)
}
}
- if (!mtrr_enabled())
+ if (!mtrr_enabled()) {
pr_info("MTRR: Disabled\n");
+
+ /*
+ * PAT initialization relies on MTRR's rendezvous handler.
+ * Skip PAT init until the handler can initialize both
+ * features independently.
+ */
+ pat_disable("MTRRs disabled, skipping PAT initialization too.");
+ }
}
void mtrr_ap_init(void)
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 951884d..6c7ced0 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -52,6 +52,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt);
void fill_mtrr_var_range(unsigned int index,
u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
bool get_mtrr_state(void);
+void mtrr_bp_pat_init(void);
extern void set_mtrr_ops(const struct mtrr_ops *ops);
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 364e583..8cac429 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -94,7 +94,7 @@ static void __init vmware_platform_setup(void)
*/
static uint32_t __init vmware_platform(void)
{
- if (cpu_has_hypervisor) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
unsigned int eax;
unsigned int hyper_vendor_id[3];
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 1f4acd6..3fe45f8 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -151,7 +151,7 @@ static void __init dtb_lapic_setup(void)
return;
/* Did the boot loader setup the local APIC ? */
- if (!cpu_has_apic) {
+ if (!boot_cpu_has(X86_FEATURE_APIC)) {
if (apic_force_enable(r.start))
return;
}
diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c
index dd9ca9b6..aad34aa 100644
--- a/arch/x86/kernel/fpu/bugs.c
+++ b/arch/x86/kernel/fpu/bugs.c
@@ -21,11 +21,15 @@ static double __initdata y = 3145727.0;
* We should really only care about bugs here
* anyway. Not features.
*/
-static void __init check_fpu(void)
+void __init fpu__init_check_bugs(void)
{
u32 cr0_saved;
s32 fdiv_bug;
+ /* kernel_fpu_begin/end() relies on patched alternative instructions. */
+ if (!boot_cpu_has(X86_FEATURE_FPU))
+ return;
+
/* We might have CR0::TS set already, clear it: */
cr0_saved = read_cr0();
write_cr0(cr0_saved & ~X86_CR0_TS);
@@ -59,13 +63,3 @@ static void __init check_fpu(void)
pr_warn("Hmm, FPU with FDIV bug\n");
}
}
-
-void __init fpu__init_check_bugs(void)
-{
- /*
- * kernel_fpu_begin/end() in check_fpu() relies on the patched
- * alternative instructions.
- */
- if (cpu_has_fpu)
- check_fpu();
-}
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 8e37cc8..9702754 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -217,14 +217,14 @@ static inline void fpstate_init_fstate(struct fregs_state *fp)
void fpstate_init(union fpregs_state *state)
{
- if (!cpu_has_fpu) {
+ if (!static_cpu_has(X86_FEATURE_FPU)) {
fpstate_init_soft(&state->soft);
return;
}
memset(state, 0, xstate_size);
- if (cpu_has_fxsr)
+ if (static_cpu_has(X86_FEATURE_FXSR))
fpstate_init_fxstate(&state->fxsave);
else
fpstate_init_fstate(&state->fsave);
@@ -237,7 +237,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
dst_fpu->fpregs_active = 0;
dst_fpu->last_cpu = -1;
- if (!src_fpu->fpstate_active || !cpu_has_fpu)
+ if (!src_fpu->fpstate_active || !static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -506,33 +506,6 @@ void fpu__clear(struct fpu *fpu)
* x87 math exception handling:
*/
-static inline unsigned short get_fpu_cwd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.cwd;
- } else {
- return (unsigned short)fpu->state.fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct fpu *fpu)
-{
- if (cpu_has_fxsr) {
- return fpu->state.fxsave.swd;
- } else {
- return (unsigned short)fpu->state.fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct fpu *fpu)
-{
- if (cpu_has_xmm) {
- return fpu->state.fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
int fpu__exception_code(struct fpu *fpu, int trap_nr)
{
int err;
@@ -547,10 +520,15 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* so if this combination doesn't produce any single exception,
* then we have a bad program that isn't synchronizing its FPU usage
* and it will suffer the consequences since we won't be able to
- * fully reproduce the context of the exception
+ * fully reproduce the context of the exception.
*/
- cwd = get_fpu_cwd(fpu);
- swd = get_fpu_swd(fpu);
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
+ cwd = fpu->state.fxsave.cwd;
+ swd = fpu->state.fxsave.swd;
+ } else {
+ cwd = (unsigned short)fpu->state.fsave.cwd;
+ swd = (unsigned short)fpu->state.fsave.swd;
+ }
err = swd & ~cwd;
} else {
@@ -560,7 +538,11 @@ int fpu__exception_code(struct fpu *fpu, int trap_nr)
* unmasked exception was caught we must mask the exception mask bits
* at 0x1f80, and then use these to mask the exception bits at 0x3f.
*/
- unsigned short mxcsr = get_fpu_mxcsr(fpu);
+ unsigned short mxcsr = MXCSR_DEFAULT;
+
+ if (boot_cpu_has(X86_FEATURE_XMM))
+ mxcsr = fpu->state.fxsave.mxcsr;
+
err = ~(mxcsr >> 7) & mxcsr;
}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 54c86ff..aacfd7a 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -29,22 +29,22 @@ static void fpu__init_cpu_generic(void)
unsigned long cr0;
unsigned long cr4_mask = 0;
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
cr4_mask |= X86_CR4_OSFXSR;
- if (cpu_has_xmm)
+ if (boot_cpu_has(X86_FEATURE_XMM))
cr4_mask |= X86_CR4_OSXMMEXCPT;
if (cr4_mask)
cr4_set_bits(cr4_mask);
cr0 = read_cr0();
cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
cr0 |= X86_CR0_EM;
write_cr0(cr0);
/* Flush out any pending x87 state: */
#ifdef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu)
+ if (!boot_cpu_has(X86_FEATURE_FPU))
fpstate_init_soft(&current->thread.fpu.state.soft);
else
#endif
@@ -89,7 +89,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
}
#ifndef CONFIG_MATH_EMULATION
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
for (;;)
asm volatile("hlt");
@@ -106,7 +106,7 @@ static void __init fpu__init_system_mxcsr(void)
{
unsigned int mask = 0;
- if (cpu_has_fxsr) {
+ if (boot_cpu_has(X86_FEATURE_FXSR)) {
/* Static because GCC does not get 16-byte stack alignment right: */
static struct fxregs_state fxregs __initdata;
@@ -212,7 +212,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
* fpu__init_system_xstate().
*/
- if (!cpu_has_fpu) {
+ if (!boot_cpu_has(X86_FEATURE_FPU)) {
/*
* Disable xsave as we do not support it if i387
* emulation is enabled.
@@ -221,7 +221,7 @@ static void __init fpu__init_system_xstate_size_legacy(void)
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
xstate_size = sizeof(struct swregs_state);
} else {
- if (cpu_has_fxsr)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
xstate_size = sizeof(struct fxregs_state);
else
xstate_size = sizeof(struct fregs_state);
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index 8bd1c00..81422df 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -21,7 +21,10 @@ int regset_xregset_fpregs_active(struct task_struct *target, const struct user_r
{
struct fpu *target_fpu = &target->thread.fpu;
- return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0;
+ if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->fpstate_active)
+ return regset->n;
+ else
+ return 0;
}
int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
@@ -30,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset,
{
struct fpu *fpu = &target->thread.fpu;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -47,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
struct fpu *fpu = &target->thread.fpu;
int ret;
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -65,7 +68,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
* update the header bits in the xsave header, indicating the
* presence of FP and SSE state.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FPSSE;
return ret;
@@ -79,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_read(fpu);
@@ -108,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
struct xregs_state *xsave;
int ret;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return -ENODEV;
fpu__activate_fpstate_write(fpu);
@@ -275,10 +278,10 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset,
fpu__activate_fpstate_read(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -306,10 +309,10 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
fpu__activate_fpstate_write(fpu);
fpstate_sanitize_xstate(fpu);
- if (!static_cpu_has(X86_FEATURE_FPU))
+ if (!boot_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf);
- if (!cpu_has_fxsr)
+ if (!boot_cpu_has(X86_FEATURE_FXSR))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&fpu->state.fsave, 0,
-1);
@@ -325,7 +328,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
* update the header bit in the xsave header, indicating the
* presence of FP.
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
fpu->state.xsave.header.xfeatures |= XFEATURE_MASK_FP;
return ret;
}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index b48ef35..4ea2a59 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -190,7 +190,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu)
*/
void fpu__init_cpu_xstate(void)
{
- if (!cpu_has_xsave || !xfeatures_mask)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask)
return;
cr4_set_bits(X86_CR4_OSXSAVE);
@@ -280,7 +280,7 @@ static void __init setup_xstate_comp(void)
xstate_comp_offsets[0] = 0;
xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (xfeature_enabled(i)) {
xstate_comp_offsets[i] = xstate_offsets[i];
@@ -316,13 +316,13 @@ static void __init setup_init_fpu_buf(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave)
+ if (!boot_cpu_has(X86_FEATURE_XSAVE))
return;
setup_xstate_features();
print_xstate_features();
- if (cpu_has_xsaves) {
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask;
init_fpstate.xsave.header.xfeatures = xfeatures_mask;
}
@@ -417,7 +417,7 @@ static int xfeature_size(int xfeature_nr)
*/
static int using_compacted_format(void)
{
- return cpu_has_xsaves;
+ return boot_cpu_has(X86_FEATURE_XSAVES);
}
static void __xstate_dump_leaves(void)
@@ -549,7 +549,7 @@ static unsigned int __init calculate_xstate_size(void)
unsigned int eax, ebx, ecx, edx;
unsigned int calculated_xstate_size;
- if (!cpu_has_xsaves) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
/*
* - CPUID function 0DH, sub-function 0:
* EBX enumerates the size (in bytes) required by
@@ -630,7 +630,7 @@ void __init fpu__init_system_xstate(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
- if (!cpu_has_xsave) {
+ if (!boot_cpu_has(X86_FEATURE_XSAVE)) {
pr_info("x86/fpu: Legacy x87 FPU detected.\n");
return;
}
@@ -667,7 +667,7 @@ void __init fpu__init_system_xstate(void)
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
xfeatures_mask,
xstate_size,
- cpu_has_xsaves ? "compacted" : "standard");
+ boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard");
}
/*
@@ -678,7 +678,7 @@ void fpu__resume_cpu(void)
/*
* Restore XCR0 on xsave capable CPUs:
*/
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask);
}
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index af11129..6f8902b 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -555,62 +555,53 @@ early_idt_handler_common:
*/
cld
- cmpl $2,(%esp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,%ss:early_recursion_flag
- je hlt_loop
incl %ss:early_recursion_flag
- push %eax # 16(%esp)
- push %ecx # 12(%esp)
- push %edx # 8(%esp)
- push %ds # 4(%esp)
- push %es # 0(%esp)
- movl $(__KERNEL_DS),%eax
- movl %eax,%ds
- movl %eax,%es
-
- cmpl $(__KERNEL_CS),32(%esp)
- jne 10f
+ /* The vector number is in pt_regs->gs */
- leal 28(%esp),%eax # Pointer to %eip
- call early_fixup_exception
- andl %eax,%eax
- jnz ex_entry /* found an exception entry */
-
-10:
-#ifdef CONFIG_PRINTK
- xorl %eax,%eax
- movw %ax,2(%esp) /* clean up the segment values on some cpus */
- movw %ax,6(%esp)
- movw %ax,34(%esp)
- leal 40(%esp),%eax
- pushl %eax /* %esp before the exception */
- pushl %ebx
- pushl %ebp
- pushl %esi
- pushl %edi
- movl %cr2,%eax
- pushl %eax
- pushl (20+6*4)(%esp) /* trapno */
- pushl $fault_msg
- call printk
-#endif
- call dump_stack
-hlt_loop:
- hlt
- jmp hlt_loop
-
-ex_entry:
- pop %es
- pop %ds
- pop %edx
- pop %ecx
- pop %eax
- decl %ss:early_recursion_flag
-.Lis_nmi:
- addl $8,%esp /* drop vector number and error code */
+ cld
+ pushl %fs /* pt_regs->fs */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %es /* pt_regs->es */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %ds /* pt_regs->ds */
+ movw $0, 2(%esp) /* clear high bits (some CPUs leave garbage) */
+ pushl %eax /* pt_regs->ax */
+ pushl %ebp /* pt_regs->bp */
+ pushl %edi /* pt_regs->di */
+ pushl %esi /* pt_regs->si */
+ pushl %edx /* pt_regs->dx */
+ pushl %ecx /* pt_regs->cx */
+ pushl %ebx /* pt_regs->bx */
+
+ /* Fix up DS and ES */
+ movl $(__KERNEL_DS), %ecx
+ movl %ecx, %ds
+ movl %ecx, %es
+
+ /* Load the vector number into EDX */
+ movl PT_GS(%esp), %edx
+
+ /* Load GS into pt_regs->gs and clear high bits */
+ movw %gs, PT_GS(%esp)
+ movw $0, PT_GS+2(%esp)
+
+ movl %esp, %eax /* args are pt_regs (EAX), trapnr (EDX) */
+ call early_fixup_exception
+
+ popl %ebx /* pt_regs->bx */
+ popl %ecx /* pt_regs->cx */
+ popl %edx /* pt_regs->dx */
+ popl %esi /* pt_regs->si */
+ popl %edi /* pt_regs->di */
+ popl %ebp /* pt_regs->bp */
+ popl %eax /* pt_regs->ax */
+ popl %ds /* pt_regs->ds */
+ popl %es /* pt_regs->es */
+ popl %fs /* pt_regs->fs */
+ popl %gs /* pt_regs->gs */
+ decl %ss:early_recursion_flag
+ addl $4, %esp /* pop pt_regs->orig_ax */
iret
ENDPROC(early_idt_handler_common)
@@ -647,10 +638,14 @@ ignore_int:
popl %eax
#endif
iret
+
+hlt_loop:
+ hlt
+ jmp hlt_loop
ENDPROC(ignore_int)
__INITDATA
.align 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
__REFDATA
@@ -715,19 +710,6 @@ __INITRODATA
int_msg:
.asciz "Unknown interrupt or fault at: %p %p %p\n"
-fault_msg:
-/* fault info: */
- .ascii "BUG: Int %d: CR2 %p\n"
-/* regs pushed in early_idt_handler: */
- .ascii " EDI %p ESI %p EBP %p EBX %p\n"
- .ascii " ESP %p ES %p DS %p\n"
- .ascii " EDX %p ECX %p EAX %p\n"
-/* fault frame: */
- .ascii " vec %p err %p EIP %p CS %p flg %p\n"
- .ascii "Stack: %p %p %p %p %p %p %p %p\n"
- .ascii " %p %p %p %p %p %p %p %p\n"
- .asciz " %p %p %p %p %p %p %p %p\n"
-
#include "../../x86/xen/xen-head.S"
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 22fbf9d..5df831e 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -20,6 +20,7 @@
#include <asm/processor-flags.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include "../entry/calling.h"
#ifdef CONFIG_PARAVIRT
#include <asm/asm-offsets.h>
@@ -64,6 +65,14 @@ startup_64:
* tables and then reload them.
*/
+ /*
+ * Setup stack for verify_cpu(). "-8" because stack_start is defined
+ * this way, see below. Our best guess is a NULL ptr for stack
+ * termination heuristics and we don't want to break anything which
+ * might depend on it (kgdb, ...).
+ */
+ leaq (__end_init_task - 8)(%rip), %rsp
+
/* Sanitize CPU configuration */
call verify_cpu
@@ -350,90 +359,48 @@ early_idt_handler_common:
*/
cld
- cmpl $2,(%rsp) # X86_TRAP_NMI
- je .Lis_nmi # Ignore NMI
-
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
incl early_recursion_flag(%rip)
- pushq %rax # 64(%rsp)
- pushq %rcx # 56(%rsp)
- pushq %rdx # 48(%rsp)
- pushq %rsi # 40(%rsp)
- pushq %rdi # 32(%rsp)
- pushq %r8 # 24(%rsp)
- pushq %r9 # 16(%rsp)
- pushq %r10 # 8(%rsp)
- pushq %r11 # 0(%rsp)
-
- cmpl $__KERNEL_CS,96(%rsp)
- jne 11f
-
- cmpl $14,72(%rsp) # Page fault?
+ /* The vector number is currently in the pt_regs->di slot. */
+ pushq %rsi /* pt_regs->si */
+ movq 8(%rsp), %rsi /* RSI = vector number */
+ movq %rdi, 8(%rsp) /* pt_regs->di = RDI */
+ pushq %rdx /* pt_regs->dx */
+ pushq %rcx /* pt_regs->cx */
+ pushq %rax /* pt_regs->ax */
+ pushq %r8 /* pt_regs->r8 */
+ pushq %r9 /* pt_regs->r9 */
+ pushq %r10 /* pt_regs->r10 */
+ pushq %r11 /* pt_regs->r11 */
+ pushq %rbx /* pt_regs->bx */
+ pushq %rbp /* pt_regs->bp */
+ pushq %r12 /* pt_regs->r12 */
+ pushq %r13 /* pt_regs->r13 */
+ pushq %r14 /* pt_regs->r14 */
+ pushq %r15 /* pt_regs->r15 */
+
+ cmpq $14,%rsi /* Page fault? */
jnz 10f
- GET_CR2_INTO(%rdi) # can clobber any volatile register if pv
+ GET_CR2_INTO(%rdi) /* Can clobber any volatile register if pv */
call early_make_pgtable
andl %eax,%eax
- jz 20f # All good
+ jz 20f /* All good */
10:
- leaq 88(%rsp),%rdi # Pointer to %rip
+ movq %rsp,%rdi /* RDI = pt_regs; RSI is already trapnr */
call early_fixup_exception
- andl %eax,%eax
- jnz 20f # Found an exception entry
-
-11:
-#ifdef CONFIG_EARLY_PRINTK
- GET_CR2_INTO(%r9) # can clobber any volatile register if pv
- movl 80(%rsp),%r8d # error code
- movl 72(%rsp),%esi # vector number
- movl 96(%rsp),%edx # %cs
- movq 88(%rsp),%rcx # %rip
- xorl %eax,%eax
- leaq early_idt_msg(%rip),%rdi
- call early_printk
- cmpl $2,early_recursion_flag(%rip)
- jz 1f
- call dump_stack
-#ifdef CONFIG_KALLSYMS
- leaq early_idt_ripmsg(%rip),%rdi
- movq 40(%rsp),%rsi # %rip again
- call __print_symbol
-#endif
-#endif /* EARLY_PRINTK */
-1: hlt
- jmp 1b
-
-20: # Exception table entry found or page table generated
- popq %r11
- popq %r10
- popq %r9
- popq %r8
- popq %rdi
- popq %rsi
- popq %rdx
- popq %rcx
- popq %rax
+
+20:
decl early_recursion_flag(%rip)
-.Lis_nmi:
- addq $16,%rsp # drop vector number and error code
- INTERRUPT_RETURN
+ jmp restore_regs_and_iret
ENDPROC(early_idt_handler_common)
__INITDATA
.balign 4
-early_recursion_flag:
+GLOBAL(early_recursion_flag)
.long 0
-#ifdef CONFIG_EARLY_PRINTK
-early_idt_msg:
- .asciz "PANIC: early exception %02lx rip %lx:%lx error %lx cr2 %lx\n"
-early_idt_ripmsg:
- .asciz "RIP %s\n"
-#endif /* CONFIG_EARLY_PRINTK */
-
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
GLOBAL(name)
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index a1f0e4a..7282c2e 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -773,7 +773,6 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
.resume = hpet_resume_counter,
- .archdata = { .vclock_mode = VCLOCK_HPET },
};
static int hpet_clocksource_register(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index e565e0e..fc25f69 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -13,6 +13,7 @@
#include <linux/cpu.h>
#include <asm/kprobes.h>
#include <asm/alternative.h>
+#include <asm/text-patching.h>
#ifdef HAVE_JUMP_LABEL
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 2da6ee9..04cde52 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -45,6 +45,7 @@
#include <linux/uaccess.h>
#include <linux/memory.h>
+#include <asm/text-patching.h>
#include <asm/debugreg.h>
#include <asm/apicdef.h>
#include <asm/apic.h>
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index ae703ac..38cf7a7 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -51,6 +51,7 @@
#include <linux/ftrace.h>
#include <linux/frame.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 7b3b9d1..4425f59 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -29,6 +29,7 @@
#include <linux/kallsyms.h>
#include <linux/ftrace.h>
+#include <asm/text-patching.h>
#include <asm/cacheflush.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8079508..dc1207e 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -522,7 +522,7 @@ static noinline uint32_t __kvm_cpuid_base(void)
if (boot_cpu_data.cpuid_level < 0)
return 0; /* So we don't blow up on old processors */
- if (cpu_has_hypervisor)
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
return 0;
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 005c03e..477ae80 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -31,6 +31,7 @@
#include <linux/jump_label.h>
#include <linux/random.h>
+#include <asm/text-patching.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index f08ac28..f958391 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -339,8 +339,10 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
.write_cr8 = native_write_cr8,
#endif
.wbinvd = native_wbinvd,
- .read_msr = native_read_msr_safe,
- .write_msr = native_write_msr_safe,
+ .read_msr = native_read_msr,
+ .write_msr = native_write_msr,
+ .read_msr_safe = native_read_msr_safe,
+ .write_msr_safe = native_write_msr_safe,
.read_pmc = native_read_pmc,
.load_tr_desc = native_load_tr_desc,
.set_ldt = native_set_ldt,
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6cbab31..6b16c36 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,25 +136,6 @@ void release_thread(struct task_struct *dead_task)
}
}
-static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
-{
- struct user_desc ud = {
- .base_addr = addr,
- .limit = 0xfffff,
- .seg_32bit = 1,
- .limit_in_pages = 1,
- .useable = 1,
- };
- struct desc_struct *desc = t->thread.tls_array;
- desc += tls;
- fill_ldt(desc, &ud);
-}
-
-static inline u32 read_32bit_tls(struct task_struct *t, int tls)
-{
- return get_desc_base(&t->thread.tls_array[tls]);
-}
-
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -169,9 +150,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
- p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
+ p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
savesegment(fs, p->thread.fsindex);
- p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
+ p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
@@ -210,7 +191,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
- if (is_ia32_task())
+ if (in_ia32_syscall())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)tls, 0);
else
@@ -282,7 +263,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned fsindex, gsindex;
+ unsigned prev_fsindex, prev_gsindex;
fpu_switch_t fpu_switch;
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -292,8 +273,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, fsindex);
- savesegment(gs, gsindex);
+ savesegment(fs, prev_fsindex);
+ savesegment(gs, prev_gsindex);
/*
* Load TLS before restoring any segments so that segment loads
@@ -336,66 +317,104 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
* Switch FS and GS.
*
* These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. Those bases
- * only differ from the values in the GDT or LDT if the selector
- * is 0.
- *
- * Loading the segment register resets the hidden base part of
- * the register to 0 or the value from the GDT / LDT. If the
- * next base address zero, writing 0 to the segment register is
- * much faster than using wrmsr to explicitly zero the base.
- *
- * The thread_struct.fs and thread_struct.gs values are 0
- * if the fs and gs bases respectively are not overridden
- * from the values implied by fsindex and gsindex. They
- * are nonzero, and store the nonzero base addresses, if
- * the bases are overridden.
- *
- * (fs != 0 && fsindex != 0) || (gs != 0 && gsindex != 0) should
- * be impossible.
- *
- * Therefore we need to reload the segment registers if either
- * the old or new selector is nonzero, and we need to override
- * the base address if next thread expects it to be overridden.
+ * 64-bit bases are that controlled by arch_prctl. The bases
+ * don't necessarily match the selectors, as user code can do
+ * any number of things to cause them to be inconsistent.
*
- * This code is unnecessarily slow in the case where the old and
- * new indexes are zero and the new base is nonzero -- it will
- * unnecessarily write 0 to the selector before writing the new
- * base address.
+ * We don't promise to preserve the bases if the selectors are
+ * nonzero. We also don't promise to preserve the base if the
+ * selector is zero and the base doesn't match whatever was
+ * most recently passed to ARCH_SET_FS/GS. (If/when the
+ * FSGSBASE instructions are enabled, we'll need to offer
+ * stronger guarantees.)
*
- * Note: This all depends on arch_prctl being the only way that
- * user code can override the segment base. Once wrfsbase and
- * wrgsbase are enabled, most of this code will need to change.
+ * As an invariant,
+ * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
+ * impossible.
*/
- if (unlikely(fsindex | next->fsindex | prev->fs)) {
+ if (next->fsindex) {
+ /* Loading a nonzero value into FS sets the index and base. */
loadsegment(fs, next->fsindex);
-
- /*
- * If user code wrote a nonzero value to FS, then it also
- * cleared the overridden base address.
- *
- * XXX: if user code wrote 0 to FS and cleared the base
- * address itself, we won't notice and we'll incorrectly
- * restore the prior base address next time we reschdule
- * the process.
- */
- if (fsindex)
- prev->fs = 0;
+ } else {
+ if (next->fsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_fsindex)
+ loadsegment(fs, 0);
+ wrmsrl(MSR_FS_BASE, next->fsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ */
+ loadsegment(fs, __USER_DS);
+ loadsegment(fs, 0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_FS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->fsbase || prev_fsindex)
+ loadsegment(fs, 0);
+ }
+ }
}
- if (next->fs)
- wrmsrl(MSR_FS_BASE, next->fs);
- prev->fsindex = fsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_fsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_fsindex)
+ prev->fsbase = 0;
+ prev->fsindex = prev_fsindex;
- if (unlikely(gsindex | next->gsindex | prev->gs)) {
+ if (next->gsindex) {
+ /* Loading a nonzero value into GS sets the index and base. */
load_gs_index(next->gsindex);
-
- /* This works (and fails) the same way as fsindex above. */
- if (gsindex)
- prev->gs = 0;
+ } else {
+ if (next->gsbase) {
+ /* Next index is zero but next base is nonzero. */
+ if (prev_gsindex)
+ load_gs_index(0);
+ wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
+ } else {
+ /* Next base and index are both zero. */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ /*
+ * We don't know the previous base and can't
+ * find out without RDMSR. Forcibly clear it.
+ *
+ * This contains a pointless SWAPGS pair.
+ * Fixing it would involve an explicit check
+ * for Xen or a new pvop.
+ */
+ load_gs_index(__USER_DS);
+ load_gs_index(0);
+ } else {
+ /*
+ * If the previous index is zero and ARCH_SET_GS
+ * didn't change the base, then the base is
+ * also zero and we don't need to do anything.
+ */
+ if (prev->gsbase || prev_gsindex)
+ load_gs_index(0);
+ }
+ }
}
- if (next->gs)
- wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
- prev->gsindex = gsindex;
+ /*
+ * Save the old state and preserve the invariant.
+ * NB: if prev_gsindex == 0, then we can't reliably learn the base
+ * without RDMSR because Intel user code can zero it without telling
+ * us and AMD user code can program any 32-bit value without telling
+ * us.
+ */
+ if (prev_gsindex)
+ prev->gsbase = 0;
+ prev->gsindex = prev_gsindex;
switch_fpu_finish(next_fpu, fpu_switch);
@@ -516,23 +535,11 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, GS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- load_gs_index(GS_TLS_SEL);
- }
- task->thread.gsindex = GS_TLS_SEL;
- task->thread.gs = 0;
- } else {
- task->thread.gsindex = 0;
- task->thread.gs = addr;
- if (doit) {
- load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
- }
+ task->thread.gsindex = 0;
+ task->thread.gsbase = addr;
+ if (doit) {
+ load_gs_index(0);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
}
put_cpu();
break;
@@ -542,52 +549,30 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
if (addr >= TASK_SIZE_OF(task))
return -EPERM;
cpu = get_cpu();
- /* handle small bases via the GDT because that's faster to
- switch. */
- if (addr <= 0xffffffff) {
- set_32bit_tls(task, FS_TLS, addr);
- if (doit) {
- load_TLS(&task->thread, cpu);
- loadsegment(fs, FS_TLS_SEL);
- }
- task->thread.fsindex = FS_TLS_SEL;
- task->thread.fs = 0;
- } else {
- task->thread.fsindex = 0;
- task->thread.fs = addr;
- if (doit) {
- /* set the selector to 0 to not confuse
- __switch_to */
- loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
- }
+ task->thread.fsindex = 0;
+ task->thread.fsbase = addr;
+ if (doit) {
+ /* set the selector to 0 to not confuse __switch_to */
+ loadsegment(fs, 0);
+ ret = wrmsrl_safe(MSR_FS_BASE, addr);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
- if (task->thread.fsindex == FS_TLS_SEL)
- base = read_32bit_tls(task, FS_TLS);
- else if (doit)
+ if (doit)
rdmsrl(MSR_FS_BASE, base);
else
- base = task->thread.fs;
+ base = task->thread.fsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
case ARCH_GET_GS: {
unsigned long base;
- unsigned gsindex;
- if (task->thread.gsindex == GS_TLS_SEL)
- base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
- savesegment(gs, gsindex);
- if (gsindex)
- rdmsrl(MSR_KERNEL_GS_BASE, base);
- else
- base = task->thread.gs;
- } else
- base = task->thread.gs;
+ if (doit)
+ rdmsrl(MSR_KERNEL_GS_BASE, base);
+ else
+ base = task->thread.gsbase;
ret = put_user(base, (unsigned long __user *)addr);
break;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 32e9d9c..e60ef91 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -303,29 +303,11 @@ static int set_segment_reg(struct task_struct *task,
switch (offset) {
case offsetof(struct user_regs_struct,fs):
- /*
- * If this is setting fs as for normal 64-bit use but
- * setting fs_base has implicitly changed it, leave it.
- */
- if ((value == FS_TLS_SEL && task->thread.fsindex == 0 &&
- task->thread.fs != 0) ||
- (value == 0 && task->thread.fsindex == FS_TLS_SEL &&
- task->thread.fs == 0))
- break;
task->thread.fsindex = value;
if (task == current)
loadsegment(fs, task->thread.fsindex);
break;
case offsetof(struct user_regs_struct,gs):
- /*
- * If this is setting gs as for normal 64-bit use but
- * setting gs_base has implicitly changed it, leave it.
- */
- if ((value == GS_TLS_SEL && task->thread.gsindex == 0 &&
- task->thread.gs != 0) ||
- (value == 0 && task->thread.gsindex == GS_TLS_SEL &&
- task->thread.gs == 0))
- break;
task->thread.gsindex = value;
if (task == current)
load_gs_index(task->thread.gsindex);
@@ -417,7 +399,7 @@ static int putreg(struct task_struct *child,
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
- if (child->thread.fs != value)
+ if (child->thread.fsbase != value)
return do_arch_prctl(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
@@ -426,7 +408,7 @@ static int putreg(struct task_struct *child,
*/
if (value >= TASK_SIZE_OF(child))
return -EIO;
- if (child->thread.gs != value)
+ if (child->thread.gsbase != value)
return do_arch_prctl(child, ARCH_SET_GS, value);
return 0;
#endif
@@ -453,31 +435,17 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset)
#ifdef CONFIG_X86_64
case offsetof(struct user_regs_struct, fs_base): {
/*
- * do_arch_prctl may have used a GDT slot instead of
- * the MSR. To userland, it appears the same either
- * way, except the %fs segment selector might not be 0.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.fsindex;
- if (task->thread.fs != 0)
- return task->thread.fs;
- if (task == current)
- asm("movl %%fs,%0" : "=r" (seg));
- if (seg != FS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[FS_TLS]);
+ return task->thread.fsbase;
}
case offsetof(struct user_regs_struct, gs_base): {
/*
- * Exactly the same here as the %fs handling above.
+ * XXX: This will not behave as expected if called on
+ * current or if fsindex != 0.
*/
- unsigned int seg = task->thread.gsindex;
- if (task->thread.gs != 0)
- return task->thread.gs;
- if (task == current)
- asm("movl %%gs,%0" : "=r" (seg));
- if (seg != GS_TLS_SEL)
- return 0;
- return get_desc_base(&task->thread.tls_array[GS_TLS]);
+ return task->thread.gsbase;
}
#endif
}
@@ -1266,7 +1234,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
compat_ulong_t caddr, compat_ulong_t cdata)
{
#ifdef CONFIG_X86_X32_ABI
- if (!is_ia32_task())
+ if (!in_ia32_syscall())
return x32_arch_ptrace(child, request, caddr, cdata);
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 3e84ef1..22cc2f9 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -390,7 +390,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
put_user_ex(&frame->uc, &frame->puc);
/* Create the ucontext. */
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
else
put_user_ex(0, &frame->uc.uc_flags);
@@ -441,7 +441,7 @@ static unsigned long frame_uc_flags(struct pt_regs *regs)
{
unsigned long flags;
- if (cpu_has_xsave)
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
else
flags = UC_SIGCONTEXT_SS;
@@ -761,7 +761,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
#ifdef CONFIG_X86_64
- if (is_ia32_task())
+ if (in_ia32_syscall())
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 0e4329e..fafe8b9 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1236,7 +1236,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
* If we couldn't find a local APIC, then get out of here now!
*/
if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) &&
- !cpu_has_apic) {
+ !boot_cpu_has(X86_FEATURE_APIC)) {
if (!disable_apic) {
pr_err("BIOS bug, local APIC #%d not detected!...\n",
boot_cpu_physical_apicid);
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
index ab40954..f386bad 100644
--- a/arch/x86/kernel/tce_64.c
+++ b/arch/x86/kernel/tce_64.c
@@ -40,7 +40,7 @@
static inline void flush_tce(void* tceaddr)
{
/* a single tce can't cross a cache line */
- if (cpu_has_clflush)
+ if (boot_cpu_has(X86_FEATURE_CLFLUSH))
clflush(tceaddr);
else
wbinvd();
diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 7fc5e84..9692a5e 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c
@@ -114,6 +114,7 @@ int do_set_thread_area(struct task_struct *p, int idx,
int can_allocate)
{
struct user_desc info;
+ unsigned short __maybe_unused sel, modified_sel;
if (copy_from_user(&info, u_info, sizeof(info)))
return -EFAULT;
@@ -141,6 +142,47 @@ int do_set_thread_area(struct task_struct *p, int idx,
set_tls_desc(p, idx, &info, 1);
+ /*
+ * If DS, ES, FS, or GS points to the modified segment, forcibly
+ * refresh it. Only needed on x86_64 because x86_32 reloads them
+ * on return to user mode.
+ */
+ modified_sel = (idx << 3) | 3;
+
+ if (p == current) {
+#ifdef CONFIG_X86_64
+ savesegment(ds, sel);
+ if (sel == modified_sel)
+ loadsegment(ds, sel);
+
+ savesegment(es, sel);
+ if (sel == modified_sel)
+ loadsegment(es, sel);
+
+ savesegment(fs, sel);
+ if (sel == modified_sel)
+ loadsegment(fs, sel);
+
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ load_gs_index(sel);
+#endif
+
+#ifdef CONFIG_X86_32_LAZY_GS
+ savesegment(gs, sel);
+ if (sel == modified_sel)
+ loadsegment(gs, sel);
+#endif
+ } else {
+#ifdef CONFIG_X86_64
+ if (p->thread.fsindex == modified_sel)
+ p->thread.fsbase = info.base_addr;
+
+ if (p->thread.gsindex == modified_sel)
+ p->thread.gsbase = info.base_addr;
+#endif
+ }
+
return 0;
}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 06cbe25..d159048 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -51,6 +51,7 @@
#include <asm/processor.h>
#include <asm/debugreg.h>
#include <linux/atomic.h>
+#include <asm/text-patching.h>
#include <asm/ftrace.h>
#include <asm/traps.h>
#include <asm/desc.h>
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index c9c4c7c..38ba6de 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -36,7 +36,7 @@ static int __read_mostly tsc_unstable;
/* native_sched_clock() is called before tsc_init(), so
we must start with the TSC soft disabled to prevent
- erroneous rdtsc usage on !cpu_has_tsc processors */
+ erroneous rdtsc usage on !boot_cpu_has(X86_FEATURE_TSC) processors */
static int __read_mostly tsc_disabled = -1;
static DEFINE_STATIC_KEY_FALSE(__use_tsc);
@@ -834,15 +834,15 @@ int recalibrate_cpu_khz(void)
#ifndef CONFIG_SMP
unsigned long cpu_khz_old = cpu_khz;
- if (cpu_has_tsc) {
- tsc_khz = x86_platform.calibrate_tsc();
- cpu_khz = tsc_khz;
- cpu_data(0).loops_per_jiffy =
- cpufreq_scale(cpu_data(0).loops_per_jiffy,
- cpu_khz_old, cpu_khz);
- return 0;
- } else
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return -ENODEV;
+
+ tsc_khz = x86_platform.calibrate_tsc();
+ cpu_khz = tsc_khz;
+ cpu_data(0).loops_per_jiffy = cpufreq_scale(cpu_data(0).loops_per_jiffy,
+ cpu_khz_old, cpu_khz);
+
+ return 0;
#else
return -ENODEV;
#endif
@@ -922,9 +922,6 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
struct cpufreq_freqs *freq = data;
unsigned long *lpj;
- if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
lpj = &boot_cpu_data.loops_per_jiffy;
#ifdef CONFIG_SMP
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -954,9 +951,9 @@ static struct notifier_block time_cpufreq_notifier_block = {
.notifier_call = time_cpufreq_notifier
};
-static int __init cpufreq_tsc(void)
+static int __init cpufreq_register_tsc_scaling(void)
{
- if (!cpu_has_tsc)
+ if (!boot_cpu_has(X86_FEATURE_TSC))
return 0;
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
return 0;
@@ -965,7 +962,7 @@ static int __init cpufreq_tsc(void)
return 0;
}
-core_initcall(cpufreq_tsc);
+core_initcall(cpufreq_register_tsc_scaling);
#endif /* CONFIG_CPU_FREQ */
@@ -1081,7 +1078,7 @@ static void __init check_system_tsc_reliable(void)
*/
int unsynchronized_tsc(void)
{
- if (!cpu_has_tsc || tsc_unstable)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_unstable)
return 1;
#ifdef CONFIG_SMP
@@ -1205,7 +1202,7 @@ out:
static int __init init_tsc_clocksource(void)
{
- if (!cpu_has_tsc || tsc_disabled > 0 || !tsc_khz)
+ if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0;
if (tsc_clocksource_reliable)
@@ -1242,7 +1239,7 @@ void __init tsc_init(void)
u64 lpj;
int cpu;
- if (!cpu_has_tsc) {
+ if (!boot_cpu_has(X86_FEATURE_TSC)) {
setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
return;
}
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index bd07415..6c1ff31 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -516,7 +516,7 @@ struct uprobe_xol_ops {
static inline int sizeof_long(void)
{
- return is_ia32_task() ? 4 : 8;
+ return in_ia32_syscall() ? 4 : 8;
}
static int default_pre_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)