summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-06-23 00:16:11 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-06-23 00:16:11 (GMT)
commite75c73ad64478c12b3a44b86a3e7f62a4f65b93e (patch)
tree9dbb1a2a4e53b480df86c49d478751b203cdccd4 /arch/x86/include
parentcfe3eceb7a2eb91284d5605c5315249bb165e9d3 (diff)
parenta8424003679e90b9952e20adcd1ff1560d9dd3e9 (diff)
downloadlinux-e75c73ad64478c12b3a44b86a3e7f62a4f65b93e.tar.xz
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FPU updates from Ingo Molnar: "This tree contains two main changes: - The big FPU code rewrite: wide reaching cleanups and reorganization that pulls all the FPU code together into a clean base in arch/x86/fpu/. The resulting code is leaner and faster, and much easier to understand. This enables future work to further simplify the FPU code (such as removing lazy FPU restores). By its nature these changes have a substantial regression risk: FPU code related bugs are long lived, because races are often subtle and bugs mask as user-space failures that are difficult to track back to kernel side backs. I'm aware of no unfixed (or even suspected) FPU related regression so far. - MPX support rework/fixes. As this is still not a released CPU feature, there were some buglets in the code - should be much more robust now (Dave Hansen)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (250 commits) x86/fpu: Fix double-increment in setup_xstate_features() x86/mpx: Allow 32-bit binaries on 64-bit kernels again x86/mpx: Do not count MPX VMAs as neighbors when unmapping x86/mpx: Rewrite the unmap code x86/mpx: Support 32-bit binaries on 64-bit kernels x86/mpx: Use 32-bit-only cmpxchg() for 32-bit apps x86/mpx: Introduce new 'directory entry' to 'addr' helper function x86/mpx: Add temporary variable to reduce masking x86: Make is_64bit_mm() widely available x86/mpx: Trace allocation of new bounds tables x86/mpx: Trace the attempts to find bounds tables x86/mpx: Trace entry to bounds exception paths x86/mpx: Trace #BR exceptions x86/mpx: Introduce a boot-time disable flag x86/mpx: Restrict the mmap() size check to bounds tables x86/mpx: Remove redundant MPX_BNDCFG_ADDR_MASK x86/mpx: Clean up the code by not passing a task pointer around when unnecessary x86/mpx: Use the new get_xsave_field_ptr()API x86/fpu/xstate: Wrap get_xsave_addr() to make it safer x86/fpu/xstate: Fix up bad get_xsave_addr() assumptions ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/crypto/glue_helper.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/fpu-internal.h626
-rw-r--r--arch/x86/include/asm/fpu/api.h48
-rw-r--r--arch/x86/include/asm/fpu/internal.h694
-rw-r--r--arch/x86/include/asm/fpu/regset.h21
-rw-r--r--arch/x86/include/asm/fpu/signal.h33
-rw-r--r--arch/x86/include/asm/fpu/types.h293
-rw-r--r--arch/x86/include/asm/fpu/xstate.h46
-rw-r--r--arch/x86/include/asm/i387.h108
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h13
-rw-r--r--arch/x86/include/asm/mpx.h74
-rw-r--r--arch/x86/include/asm/processor.h160
-rw-r--r--arch/x86/include/asm/simd.h2
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/include/asm/suspend_32.h2
-rw-r--r--arch/x86/include/asm/suspend_64.h2
-rw-r--r--arch/x86/include/asm/trace/mpx.h132
-rw-r--r--arch/x86/include/asm/user.h12
-rw-r--r--arch/x86/include/asm/xcr.h49
-rw-r--r--arch/x86/include/asm/xor.h2
-rw-r--r--arch/x86/include/asm/xor_32.h2
-rw-r--r--arch/x86/include/asm/xor_avx.h2
-rw-r--r--arch/x86/include/asm/xsave.h257
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h8
27 files changed, 1357 insertions, 1243 deletions
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ba32af0..7bfc85b 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -52,6 +52,12 @@ struct alt_instr {
u8 padlen; /* length of build-time padding */
} __packed;
+/*
+ * Debug flag that can be tested to see whether alternative
+ * instructions were patched in already:
+ */
+extern int alternatives_patched;
+
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index 1eef555..03bb106 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -7,7 +7,7 @@
#include <linux/kernel.h>
#include <linux/crypto.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <crypto/b128ops.h>
typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src);
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 3738b13..155162e 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_EFI_H
#define _ASM_X86_EFI_H
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#include <asm/pgtable.h>
/*
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
deleted file mode 100644
index da5e967..0000000
--- a/arch/x86/include/asm/fpu-internal.h
+++ /dev/null
@@ -1,626 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _FPU_INTERNAL_H
-#define _FPU_INTERNAL_H
-
-#include <linux/kernel_stat.h>
-#include <linux/regset.h>
-#include <linux/compat.h>
-#include <linux/slab.h>
-#include <asm/asm.h>
-#include <asm/cpufeature.h>
-#include <asm/processor.h>
-#include <asm/sigcontext.h>
-#include <asm/user.h>
-#include <asm/uaccess.h>
-#include <asm/xsave.h>
-#include <asm/smap.h>
-
-#ifdef CONFIG_X86_64
-# include <asm/sigcontext32.h>
-# include <asm/user32.h>
-struct ksignal;
-int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-int ia32_setup_frame(int sig, struct ksignal *ksig,
- compat_sigset_t *set, struct pt_regs *regs);
-#else
-# define user_i387_ia32_struct user_i387_struct
-# define user32_fxsr_struct user_fxsr_struct
-# define ia32_setup_frame __setup_frame
-# define ia32_setup_rt_frame __setup_rt_frame
-#endif
-
-extern unsigned int mxcsr_feature_mask;
-extern void fpu_init(void);
-extern void eager_fpu_init(void);
-
-DECLARE_PER_CPU(struct task_struct *, fpu_owner_task);
-
-extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
- struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
- const struct user_i387_ia32_struct *env);
-
-extern user_regset_active_fn fpregs_active, xfpregs_active;
-extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
- xstateregs_get;
-extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
- xstateregs_set;
-
-/*
- * xstateregs_active == fpregs_active. Please refer to the comment
- * at the definition of fpregs_active.
- */
-#define xstateregs_active fpregs_active
-
-#ifdef CONFIG_MATH_EMULATION
-extern void finit_soft_fpu(struct i387_soft_struct *soft);
-#else
-static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
-#endif
-
-/*
- * Must be run with preemption disabled: this clears the fpu_owner_task,
- * on this CPU.
- *
- * This will disable any lazy FPU state restore of the current FPU state,
- * but if the current thread owns the FPU, it will still be saved by.
- */
-static inline void __cpu_disable_lazy_restore(unsigned int cpu)
-{
- per_cpu(fpu_owner_task, cpu) = NULL;
-}
-
-/*
- * Used to indicate that the FPU state in memory is newer than the FPU
- * state in registers, and the FPU state should be reloaded next time the
- * task is run. Only safe on the current task, or non-running tasks.
- */
-static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
-{
- tsk->thread.fpu.last_cpu = ~0;
-}
-
-static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
-{
- return new == this_cpu_read_stable(fpu_owner_task) &&
- cpu == new->thread.fpu.last_cpu;
-}
-
-static inline int is_ia32_compat_frame(void)
-{
- return config_enabled(CONFIG_IA32_EMULATION) &&
- test_thread_flag(TIF_IA32);
-}
-
-static inline int is_ia32_frame(void)
-{
- return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame();
-}
-
-static inline int is_x32_frame(void)
-{
- return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32);
-}
-
-#define X87_FSW_ES (1 << 7) /* Exception Summary */
-
-static __always_inline __pure bool use_eager_fpu(void)
-{
- return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
-}
-
-static __always_inline __pure bool use_xsaveopt(void)
-{
- return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
-}
-
-static __always_inline __pure bool use_xsave(void)
-{
- return static_cpu_has_safe(X86_FEATURE_XSAVE);
-}
-
-static __always_inline __pure bool use_fxsr(void)
-{
- return static_cpu_has_safe(X86_FEATURE_FXSR);
-}
-
-static inline void fx_finit(struct i387_fxsave_struct *fx)
-{
- fx->cwd = 0x37f;
- fx->mxcsr = MXCSR_DEFAULT;
-}
-
-extern void __sanitize_i387_state(struct task_struct *);
-
-static inline void sanitize_i387_state(struct task_struct *tsk)
-{
- if (!use_xsaveopt())
- return;
- __sanitize_i387_state(tsk);
-}
-
-#define user_insn(insn, output, input...) \
-({ \
- int err; \
- asm volatile(ASM_STAC "\n" \
- "1:" #insn "\n\t" \
- "2: " ASM_CLAC "\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-#define check_insn(insn, output, input...) \
-({ \
- int err; \
- asm volatile("1:" #insn "\n\t" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err), output \
- : "0"(0), input); \
- err; \
-})
-
-static inline int fsave_user(struct i387_fsave_struct __user *fx)
-{
- return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
-}
-
-static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
-}
-
-static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
-}
-
-static inline int fxrstor_user(struct i387_fxsave_struct __user *fx)
-{
- if (config_enabled(CONFIG_X86_32))
- return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
-
- /* See comment in fpu_fxsave() below. */
- return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
- "m" (*fx));
-}
-
-static inline int frstor_checking(struct i387_fsave_struct *fx)
-{
- return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline int frstor_user(struct i387_fsave_struct __user *fx)
-{
- return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
-}
-
-static inline void fpu_fxsave(struct fpu *fpu)
-{
- if (config_enabled(CONFIG_X86_32))
- asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave));
- else if (config_enabled(CONFIG_AS_FXSAVEQ))
- asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave));
- else {
- /* Using "rex64; fxsave %0" is broken because, if the memory
- * operand uses any extended registers for addressing, a second
- * REX prefix will be generated (to the assembler, rex64
- * followed by semicolon is a separate instruction), and hence
- * the 64-bitness is lost.
- *
- * Using "fxsaveq %0" would be the ideal choice, but is only
- * supported starting with gas 2.16.
- *
- * Using, as a workaround, the properly prefixed form below
- * isn't accepted by any binutils version so far released,
- * complaining that the same type of prefix is used twice if
- * an extended register is needed for addressing (fix submitted
- * to mainline 2005-11-21).
- *
- * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave));
- *
- * This, however, we can work around by forcing the compiler to
- * select an addressing mode that doesn't require extended
- * registers.
- */
- asm volatile( "rex64/fxsave (%[fx])"
- : "=m" (fpu->state->fxsave)
- : [fx] "R" (&fpu->state->fxsave));
- }
-}
-
-/*
- * These must be called with preempt disabled. Returns
- * 'true' if the FPU state is still intact.
- */
-static inline int fpu_save_init(struct fpu *fpu)
-{
- if (use_xsave()) {
- fpu_xsave(fpu);
-
- /*
- * xsave header may indicate the init state of the FP.
- */
- if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP))
- return 1;
- } else if (use_fxsr()) {
- fpu_fxsave(fpu);
- } else {
- asm volatile("fnsave %[fx]; fwait"
- : [fx] "=m" (fpu->state->fsave));
- return 0;
- }
-
- /*
- * If exceptions are pending, we need to clear them so
- * that we don't randomly get exceptions later.
- *
- * FIXME! Is this perhaps only true for the old-style
- * irq13 case? Maybe we could leave the x87 state
- * intact otherwise?
- */
- if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) {
- asm volatile("fnclex");
- return 0;
- }
- return 1;
-}
-
-static inline int __save_init_fpu(struct task_struct *tsk)
-{
- return fpu_save_init(&tsk->thread.fpu);
-}
-
-static inline int fpu_restore_checking(struct fpu *fpu)
-{
- if (use_xsave())
- return fpu_xrstor_checking(&fpu->state->xsave);
- else if (use_fxsr())
- return fxrstor_checking(&fpu->state->fxsave);
- else
- return frstor_checking(&fpu->state->fsave);
-}
-
-static inline int restore_fpu_checking(struct task_struct *tsk)
-{
- /*
- * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
- * pending. Clear the x87 state here by setting it to fixed values.
- * "m" is a random variable that should be in L1.
- */
- if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
- asm volatile(
- "fnclex\n\t"
- "emms\n\t"
- "fildl %P[addr]" /* set F?P to defined value */
- : : [addr] "m" (tsk->thread.fpu.has_fpu));
- }
-
- return fpu_restore_checking(&tsk->thread.fpu);
-}
-
-/*
- * Software FPU state helpers. Careful: these need to
- * be preemption protection *and* they need to be
- * properly paired with the CR0.TS changes!
- */
-static inline int __thread_has_fpu(struct task_struct *tsk)
-{
- return tsk->thread.fpu.has_fpu;
-}
-
-/* Must be paired with an 'stts' after! */
-static inline void __thread_clear_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 0;
- this_cpu_write(fpu_owner_task, NULL);
-}
-
-/* Must be paired with a 'clts' before! */
-static inline void __thread_set_has_fpu(struct task_struct *tsk)
-{
- tsk->thread.fpu.has_fpu = 1;
- this_cpu_write(fpu_owner_task, tsk);
-}
-
-/*
- * Encapsulate the CR0.TS handling together with the
- * software flag.
- *
- * These generally need preemption protection to work,
- * do try to avoid using these on their own.
- */
-static inline void __thread_fpu_end(struct task_struct *tsk)
-{
- __thread_clear_has_fpu(tsk);
- if (!use_eager_fpu())
- stts();
-}
-
-static inline void __thread_fpu_begin(struct task_struct *tsk)
-{
- if (!use_eager_fpu())
- clts();
- __thread_set_has_fpu(tsk);
-}
-
-static inline void drop_fpu(struct task_struct *tsk)
-{
- /*
- * Forget coprocessor state..
- */
- preempt_disable();
- tsk->thread.fpu_counter = 0;
-
- if (__thread_has_fpu(tsk)) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- __thread_fpu_end(tsk);
- }
-
- clear_stopped_child_used_math(tsk);
- preempt_enable();
-}
-
-static inline void restore_init_xstate(void)
-{
- if (use_xsave())
- xrstor_state(init_xstate_buf, -1);
- else
- fxrstor_checking(&init_xstate_buf->i387);
-}
-
-/*
- * Reset the FPU state in the eager case and drop it in the lazy case (later use
- * will reinit it).
- */
-static inline void fpu_reset_state(struct task_struct *tsk)
-{
- if (!use_eager_fpu())
- drop_fpu(tsk);
- else
- restore_init_xstate();
-}
-
-/*
- * FPU state switching for scheduling.
- *
- * This is a two-stage process:
- *
- * - switch_fpu_prepare() saves the old state and
- * sets the new state of the CR0.TS bit. This is
- * done within the context of the old process.
- *
- * - switch_fpu_finish() restores the new state as
- * necessary.
- */
-typedef struct { int preload; } fpu_switch_t;
-
-static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
-{
- fpu_switch_t fpu;
-
- /*
- * If the task has used the math, pre-load the FPU on xsave processors
- * or if the past 5 consecutive context-switches used math.
- */
- fpu.preload = tsk_used_math(new) &&
- (use_eager_fpu() || new->thread.fpu_counter > 5);
-
- if (__thread_has_fpu(old)) {
- if (!__save_init_fpu(old))
- task_disable_lazy_fpu_restore(old);
- else
- old->thread.fpu.last_cpu = cpu;
-
- /* But leave fpu_owner_task! */
- old->thread.fpu.has_fpu = 0;
-
- /* Don't change CR0.TS if we just switch! */
- if (fpu.preload) {
- new->thread.fpu_counter++;
- __thread_set_has_fpu(new);
- prefetch(new->thread.fpu.state);
- } else if (!use_eager_fpu())
- stts();
- } else {
- old->thread.fpu_counter = 0;
- task_disable_lazy_fpu_restore(old);
- if (fpu.preload) {
- new->thread.fpu_counter++;
- if (fpu_lazy_restore(new, cpu))
- fpu.preload = 0;
- else
- prefetch(new->thread.fpu.state);
- __thread_fpu_begin(new);
- }
- }
- return fpu;
-}
-
-/*
- * By the time this gets called, we've already cleared CR0.TS and
- * given the process the FPU if we are going to preload the FPU
- * state - all we need to do is to conditionally restore the register
- * state itself.
- */
-static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
-{
- if (fpu.preload) {
- if (unlikely(restore_fpu_checking(new)))
- fpu_reset_state(new);
- }
-}
-
-/*
- * Signal frame handlers...
- */
-extern int save_xstate_sig(void __user *buf, void __user *fx, int size);
-extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size);
-
-static inline int xstate_sigframe_size(void)
-{
- return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size;
-}
-
-static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
-{
- void __user *buf_fx = buf;
- int size = xstate_sigframe_size();
-
- if (ia32_frame && use_fxsr()) {
- buf_fx = buf + sizeof(struct i387_fsave_struct);
- size += sizeof(struct i387_fsave_struct);
- }
-
- return __restore_xstate_sig(buf, buf_fx, size);
-}
-
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
- preempt_disable();
- if (!user_has_fpu())
- __thread_fpu_begin(current);
- preempt_enable();
-}
-
-static inline void __save_fpu(struct task_struct *tsk)
-{
- if (use_xsave()) {
- if (unlikely(system_state == SYSTEM_BOOTING))
- xsave_state_booting(&tsk->thread.fpu.state->xsave, -1);
- else
- xsave_state(&tsk->thread.fpu.state->xsave, -1);
- } else
- fpu_fxsave(&tsk->thread.fpu);
-}
-
-/*
- * i387 state interaction
- */
-static inline unsigned short get_fpu_cwd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.cwd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.cwd;
- }
-}
-
-static inline unsigned short get_fpu_swd(struct task_struct *tsk)
-{
- if (cpu_has_fxsr) {
- return tsk->thread.fpu.state->fxsave.swd;
- } else {
- return (unsigned short)tsk->thread.fpu.state->fsave.swd;
- }
-}
-
-static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk)
-{
- if (cpu_has_xmm) {
- return tsk->thread.fpu.state->fxsave.mxcsr;
- } else {
- return MXCSR_DEFAULT;
- }
-}
-
-static bool fpu_allocated(struct fpu *fpu)
-{
- return fpu->state != NULL;
-}
-
-static inline int fpu_alloc(struct fpu *fpu)
-{
- if (fpu_allocated(fpu))
- return 0;
- fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL);
- if (!fpu->state)
- return -ENOMEM;
- WARN_ON((unsigned long)fpu->state & 15);
- return 0;
-}
-
-static inline void fpu_free(struct fpu *fpu)
-{
- if (fpu->state) {
- kmem_cache_free(task_xstate_cachep, fpu->state);
- fpu->state = NULL;
- }
-}
-
-static inline void fpu_copy(struct task_struct *dst, struct task_struct *src)
-{
- if (use_eager_fpu()) {
- memset(&dst->thread.fpu.state->xsave, 0, xstate_size);
- __save_fpu(dst);
- } else {
- struct fpu *dfpu = &dst->thread.fpu;
- struct fpu *sfpu = &src->thread.fpu;
-
- unlazy_fpu(src);
- memcpy(dfpu->state, sfpu->state, xstate_size);
- }
-}
-
-static inline unsigned long
-alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx,
- unsigned long *size)
-{
- unsigned long frame_size = xstate_sigframe_size();
-
- *buf_fx = sp = round_down(sp - frame_size, 64);
- if (ia32_frame && use_fxsr()) {
- frame_size += sizeof(struct i387_fsave_struct);
- sp -= sizeof(struct i387_fsave_struct);
- }
-
- *size = frame_size;
- return sp;
-}
-
-#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
new file mode 100644
index 0000000..1429a7c
--- /dev/null
+++ b/arch/x86/include/asm/fpu/api.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_API_H
+#define _ASM_X86_FPU_API_H
+
+/*
+ * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
+ * and they don't touch the preempt state on their own.
+ * If you enable preemption after __kernel_fpu_begin(), preempt notifier
+ * should call the __kernel_fpu_end() to prevent the kernel/user FPU
+ * state from getting corrupted. KVM for example uses this model.
+ *
+ * All other cases use kernel_fpu_begin/end() which disable preemption
+ * during kernel FPU usage.
+ */
+extern void __kernel_fpu_begin(void);
+extern void __kernel_fpu_end(void);
+extern void kernel_fpu_begin(void);
+extern void kernel_fpu_end(void);
+extern bool irq_fpu_usable(void);
+
+/*
+ * Some instructions like VIA's padlock instructions generate a spurious
+ * DNA fault but don't modify SSE registers. And these instructions
+ * get used from interrupt context as well. To prevent these kernel instructions
+ * in interrupt context interacting wrongly with other user/kernel fpu usage, we
+ * should use them only in the context of irq_ts_save/restore()
+ */
+extern int irq_ts_save(void);
+extern void irq_ts_restore(int TS_state);
+
+/*
+ * Query the presence of one or more xfeatures. Works on any legacy CPU as well.
+ *
+ * If 'feature_name' is set then put a human-readable description of
+ * the feature there as well - this can be used to print error (or success)
+ * messages.
+ */
+extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name);
+
+#endif /* _ASM_X86_FPU_API_H */
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
new file mode 100644
index 0000000..3c3550c
--- /dev/null
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -0,0 +1,694 @@
+/*
+ * Copyright (C) 1994 Linus Torvalds
+ *
+ * Pentium III FXSR, SSE support
+ * General FPU state handling cleanups
+ * Gareth Hughes <gareth@valinux.com>, May 2000
+ * x86-64 work by Andi Kleen 2002
+ */
+
+#ifndef _ASM_X86_FPU_INTERNAL_H
+#define _ASM_X86_FPU_INTERNAL_H
+
+#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/user.h>
+#include <asm/fpu/api.h>
+#include <asm/fpu/xstate.h>
+
+/*
+ * High level FPU state handling functions:
+ */
+extern void fpu__activate_curr(struct fpu *fpu);
+extern void fpu__activate_fpstate_read(struct fpu *fpu);
+extern void fpu__activate_fpstate_write(struct fpu *fpu);
+extern void fpu__save(struct fpu *fpu);
+extern void fpu__restore(struct fpu *fpu);
+extern int fpu__restore_sig(void __user *buf, int ia32_frame);
+extern void fpu__drop(struct fpu *fpu);
+extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
+extern void fpu__clear(struct fpu *fpu);
+extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
+extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
+
+/*
+ * Boot time FPU initialization functions:
+ */
+extern void fpu__init_cpu(void);
+extern void fpu__init_system_xstate(void);
+extern void fpu__init_cpu_xstate(void);
+extern void fpu__init_system(struct cpuinfo_x86 *c);
+extern void fpu__init_check_bugs(void);
+extern void fpu__resume_cpu(void);
+
+/*
+ * Debugging facility:
+ */
+#ifdef CONFIG_X86_DEBUG_FPU
+# define WARN_ON_FPU(x) WARN_ON_ONCE(x)
+#else
+# define WARN_ON_FPU(x) ({ (void)(x); 0; })
+#endif
+
+/*
+ * FPU related CPU feature flag helper routines:
+ */
+static __always_inline __pure bool use_eager_fpu(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_EAGER_FPU);
+}
+
+static __always_inline __pure bool use_xsaveopt(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_XSAVEOPT);
+}
+
+static __always_inline __pure bool use_xsave(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_XSAVE);
+}
+
+static __always_inline __pure bool use_fxsr(void)
+{
+ return static_cpu_has_safe(X86_FEATURE_FXSR);
+}
+
+/*
+ * fpstate handling functions:
+ */
+
+extern union fpregs_state init_fpstate;
+
+extern void fpstate_init(union fpregs_state *state);
+#ifdef CONFIG_MATH_EMULATION
+extern void fpstate_init_soft(struct swregs_state *soft);
+#else
+static inline void fpstate_init_soft(struct swregs_state *soft) {}
+#endif
+static inline void fpstate_init_fxstate(struct fxregs_state *fx)
+{
+ fx->cwd = 0x37f;
+ fx->mxcsr = MXCSR_DEFAULT;
+}
+extern void fpstate_sanitize_xstate(struct fpu *fpu);
+
+#define user_insn(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile(ASM_STAC "\n" \
+ "1:" #insn "\n\t" \
+ "2: " ASM_CLAC "\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+#define check_insn(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
+static inline int copy_fregs_to_user(struct fregs_state __user *fx)
+{
+ return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx));
+}
+
+static inline int copy_fxregs_to_user(struct fxregs_state __user *fx)
+{
+ if (config_enabled(CONFIG_X86_32))
+ return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx));
+
+ /* See comment in copy_fxregs_to_kernel() below. */
+ return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx));
+}
+
+static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
+{
+ int err;
+
+ if (config_enabled(CONFIG_X86_32)) {
+ err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ } else {
+ if (config_enabled(CONFIG_AS_FXSAVEQ)) {
+ err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+ } else {
+ /* See comment in copy_fxregs_to_kernel() below. */
+ err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx));
+ }
+ }
+ /* Copying from a kernel buffer to FPU registers should never fail: */
+ WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
+{
+ if (config_enabled(CONFIG_X86_32))
+ return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+ /* See comment in copy_fxregs_to_kernel() below. */
+ return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx),
+ "m" (*fx));
+}
+
+static inline void copy_kernel_to_fregs(struct fregs_state *fx)
+{
+ int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+
+ WARN_ON_FPU(err);
+}
+
+static inline int copy_user_to_fregs(struct fregs_state __user *fx)
+{
+ return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
+static inline void copy_fxregs_to_kernel(struct fpu *fpu)
+{
+ if (config_enabled(CONFIG_X86_32))
+ asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ else if (config_enabled(CONFIG_AS_FXSAVEQ))
+ asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave));
+ else {
+ /* Using "rex64; fxsave %0" is broken because, if the memory
+ * operand uses any extended registers for addressing, a second
+ * REX prefix will be generated (to the assembler, rex64
+ * followed by semicolon is a separate instruction), and hence
+ * the 64-bitness is lost.
+ *
+ * Using "fxsaveq %0" would be the ideal choice, but is only
+ * supported starting with gas 2.16.
+ *
+ * Using, as a workaround, the properly prefixed form below
+ * isn't accepted by any binutils version so far released,
+ * complaining that the same type of prefix is used twice if
+ * an extended register is needed for addressing (fix submitted
+ * to mainline 2005-11-21).
+ *
+ * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave));
+ *
+ * This, however, we can work around by forcing the compiler to
+ * select an addressing mode that doesn't require extended
+ * registers.
+ */
+ asm volatile( "rex64/fxsave (%[fx])"
+ : "=m" (fpu->state.fxsave)
+ : [fx] "R" (&fpu->state.fxsave));
+ }
+}
+
+/* These macros all use (%edi)/(%rdi) as the single memory argument. */
+#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
+#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
+#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
+#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
+#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
+
+/* xstate instruction fault handler: */
+#define xstate_fault(__err) \
+ \
+ ".section .fixup,\"ax\"\n" \
+ \
+ "3: movl $-2,%[_err]\n" \
+ " jmp 2b\n" \
+ \
+ ".previous\n" \
+ \
+ _ASM_EXTABLE(1b, 3b) \
+ : [_err] "=r" (__err)
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XSAVES"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+ else
+ asm volatile("1:"XSAVE"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+
+ /* We should never fault when copying to a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * This function is called only during boot time when x86 caps are not set
+ * up and alternative can not be used yet.
+ */
+static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(system_state != SYSTEM_BOOTING);
+
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
+ asm volatile("1:"XRSTORS"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+ else
+ asm volatile("1:"XRSTOR"\n\t"
+ "2:\n\t"
+ xstate_fault(err)
+ : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory");
+
+ /* We should never fault when copying from a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Save processor xstate to xsave area.
+ */
+static inline void copy_xregs_to_kernel(struct xregs_state *xstate)
+{
+ u64 mask = -1;
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ WARN_ON(!alternatives_patched);
+
+ /*
+ * If xsaves is enabled, xsaves replaces xsaveopt because
+ * it supports compact format and supervisor states in addition to
+ * modified optimization in xsaveopt.
+ *
+ * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
+ * because xsaveopt supports modified optimization which is not
+ * supported by xsave.
+ *
+ * If none of xsaves and xsaveopt is enabled, use xsave.
+ */
+ alternative_input_2(
+ "1:"XSAVE,
+ XSAVEOPT,
+ X86_FEATURE_XSAVEOPT,
+ XSAVES,
+ X86_FEATURE_XSAVES,
+ [xstate] "D" (xstate), "a" (lmask), "d" (hmask) :
+ "memory");
+ asm volatile("2:\n\t"
+ xstate_fault(err)
+ : "0" (err)
+ : "memory");
+
+ /* We should never fault when copying to a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Restore processor xstate from xsave area.
+ */
+static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ /*
+ * Use xrstors to restore context if it is enabled. xrstors supports
+ * compacted format of xsave area which is not supported by xrstor.
+ */
+ alternative_input(
+ "1: " XRSTOR,
+ XRSTORS,
+ X86_FEATURE_XSAVES,
+ "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask)
+ : "memory");
+
+ asm volatile("2:\n"
+ xstate_fault(err)
+ : "0" (err)
+ : "memory");
+
+ /* We should never fault when copying from a kernel buffer: */
+ WARN_ON_FPU(err);
+}
+
+/*
+ * Save xstate to user space xsave area.
+ *
+ * We don't use modified optimization because xrstor/xrstors might track
+ * a different application.
+ *
+ * We don't use compacted format xsave area for
+ * backward compatibility for old applications which don't understand
+ * compacted format of xsave area.
+ */
+static inline int copy_xregs_to_user(struct xregs_state __user *buf)
+{
+ int err;
+
+ /*
+ * Clear the xsave header first, so that reserved fields are
+ * initialized to zero.
+ */
+ err = __clear_user(&buf->header, sizeof(buf->header));
+ if (unlikely(err))
+ return -EFAULT;
+
+ __asm__ __volatile__(ASM_STAC "\n"
+ "1:"XSAVE"\n"
+ "2: " ASM_CLAC "\n"
+ xstate_fault(err)
+ : "D" (buf), "a" (-1), "d" (-1), "0" (err)
+ : "memory");
+ return err;
+}
+
+/*
+ * Restore xstate from user space xsave area.
+ */
+static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
+{
+ struct xregs_state *xstate = ((__force struct xregs_state *)buf);
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err = 0;
+
+ __asm__ __volatile__(ASM_STAC "\n"
+ "1:"XRSTOR"\n"
+ "2: " ASM_CLAC "\n"
+ xstate_fault(err)
+ : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err)
+ : "memory"); /* memory required? */
+ return err;
+}
+
+/*
+ * These must be called with preempt disabled. Returns
+ * 'true' if the FPU state is still intact and we can
+ * keep registers active.
+ *
+ * The legacy FNSAVE instruction cleared all FPU state
+ * unconditionally, so registers are essentially destroyed.
+ * Modern FPU state can be kept in registers, if there are
+ * no pending FP exceptions.
+ */
+static inline int copy_fpregs_to_fpstate(struct fpu *fpu)
+{
+ if (likely(use_xsave())) {
+ copy_xregs_to_kernel(&fpu->state.xsave);
+ return 1;
+ }
+
+ if (likely(use_fxsr())) {
+ copy_fxregs_to_kernel(fpu);
+ return 1;
+ }
+
+ /*
+ * Legacy FPU register saving, FNSAVE always clears FPU registers,
+ * so we have to mark them inactive:
+ */
+ asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave));
+
+ return 0;
+}
+
+static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+ if (use_xsave()) {
+ copy_kernel_to_xregs(&fpstate->xsave, -1);
+ } else {
+ if (use_fxsr())
+ copy_kernel_to_fxregs(&fpstate->fxsave);
+ else
+ copy_kernel_to_fregs(&fpstate->fsave);
+ }
+}
+
+static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate)
+{
+ /*
+ * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is
+ * pending. Clear the x87 state here by setting it to fixed values.
+ * "m" is a random variable that should be in L1.
+ */
+ if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) {
+ asm volatile(
+ "fnclex\n\t"
+ "emms\n\t"
+ "fildl %P[addr]" /* set F?P to defined value */
+ : : [addr] "m" (fpstate));
+ }
+
+ __copy_kernel_to_fpregs(fpstate);
+}
+
+extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size);
+
+/*
+ * FPU context switch related helper methods:
+ */
+
+DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
+
+/*
+ * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx,
+ * on this CPU.
+ *
+ * This will disable any lazy FPU state restore of the current FPU state,
+ * but if the current thread owns the FPU, it will still be saved by.
+ */
+static inline void __cpu_disable_lazy_restore(unsigned int cpu)
+{
+ per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
+}
+
+static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
+{
+ return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu;
+}
+
+
+/*
+ * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
+ * idiom, which is then paired with the sw-flag (fpregs_active) later on:
+ */
+
+static inline void __fpregs_activate_hw(void)
+{
+ if (!use_eager_fpu())
+ clts();
+}
+
+static inline void __fpregs_deactivate_hw(void)
+{
+ if (!use_eager_fpu())
+ stts();
+}
+
+/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
+static inline void __fpregs_deactivate(struct fpu *fpu)
+{
+ WARN_ON_FPU(!fpu->fpregs_active);
+
+ fpu->fpregs_active = 0;
+ this_cpu_write(fpu_fpregs_owner_ctx, NULL);
+}
+
+/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
+static inline void __fpregs_activate(struct fpu *fpu)
+{
+ WARN_ON_FPU(fpu->fpregs_active);
+
+ fpu->fpregs_active = 1;
+ this_cpu_write(fpu_fpregs_owner_ctx, fpu);
+}
+
+/*
+ * The question "does this thread have fpu access?"
+ * is slightly racy, since preemption could come in
+ * and revoke it immediately after the test.
+ *
+ * However, even in that very unlikely scenario,
+ * we can just assume we have FPU access - typically
+ * to save the FP state - we'll just take a #NM
+ * fault and get the FPU access back.
+ */
+static inline int fpregs_active(void)
+{
+ return current->thread.fpu.fpregs_active;
+}
+
+/*
+ * Encapsulate the CR0.TS handling together with the
+ * software flag.
+ *
+ * These generally need preemption protection to work,
+ * do try to avoid using these on their own.
+ */
+static inline void fpregs_activate(struct fpu *fpu)
+{
+ __fpregs_activate_hw();
+ __fpregs_activate(fpu);
+}
+
+static inline void fpregs_deactivate(struct fpu *fpu)
+{
+ __fpregs_deactivate(fpu);
+ __fpregs_deactivate_hw();
+}
+
+/*
+ * FPU state switching for scheduling.
+ *
+ * This is a two-stage process:
+ *
+ * - switch_fpu_prepare() saves the old state and
+ * sets the new state of the CR0.TS bit. This is
+ * done within the context of the old process.
+ *
+ * - switch_fpu_finish() restores the new state as
+ * necessary.
+ */
+typedef struct { int preload; } fpu_switch_t;
+
+static inline fpu_switch_t
+switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
+{
+ fpu_switch_t fpu;
+
+ /*
+ * If the task has used the math, pre-load the FPU on xsave processors
+ * or if the past 5 consecutive context-switches used math.
+ */
+ fpu.preload = new_fpu->fpstate_active &&
+ (use_eager_fpu() || new_fpu->counter > 5);
+
+ if (old_fpu->fpregs_active) {
+ if (!copy_fpregs_to_fpstate(old_fpu))
+ old_fpu->last_cpu = -1;
+ else
+ old_fpu->last_cpu = cpu;
+
+ /* But leave fpu_fpregs_owner_ctx! */
+ old_fpu->fpregs_active = 0;
+
+ /* Don't change CR0.TS if we just switch! */
+ if (fpu.preload) {
+ new_fpu->counter++;
+ __fpregs_activate(new_fpu);
+ prefetch(&new_fpu->state);
+ } else {
+ __fpregs_deactivate_hw();
+ }
+ } else {
+ old_fpu->counter = 0;
+ old_fpu->last_cpu = -1;
+ if (fpu.preload) {
+ new_fpu->counter++;
+ if (fpu_want_lazy_restore(new_fpu, cpu))
+ fpu.preload = 0;
+ else
+ prefetch(&new_fpu->state);
+ fpregs_activate(new_fpu);
+ }
+ }
+ return fpu;
+}
+
+/*
+ * Misc helper functions:
+ */
+
+/*
+ * By the time this gets called, we've already cleared CR0.TS and
+ * given the process the FPU if we are going to preload the FPU
+ * state - all we need to do is to conditionally restore the register
+ * state itself.
+ */
+static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch)
+{
+ if (fpu_switch.preload)
+ copy_kernel_to_fpregs(&new_fpu->state);
+}
+
+/*
+ * Needs to be preemption-safe.
+ *
+ * NOTE! user_fpu_begin() must be used only immediately before restoring
+ * the save state. It does not do any saving/restoring on its own. In
+ * lazy FPU mode, it is just an optimization to avoid a #NM exception,
+ * the task can lose the FPU right after preempt_enable().
+ */
+static inline void user_fpu_begin(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ preempt_disable();
+ if (!fpregs_active())
+ fpregs_activate(fpu);
+ preempt_enable();
+}
+
+/*
+ * MXCSR and XCR definitions:
+ */
+
+extern unsigned int mxcsr_feature_mask;
+
+#define XCR_XFEATURE_ENABLED_MASK 0x00000000
+
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
+ : : "a" (eax), "d" (edx), "c" (index));
+}
+
+#endif /* _ASM_X86_FPU_INTERNAL_H */
diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h
new file mode 100644
index 0000000..39d3107
--- /dev/null
+++ b/arch/x86/include/asm/fpu/regset.h
@@ -0,0 +1,21 @@
+/*
+ * FPU regset handling methods:
+ */
+#ifndef _ASM_X86_FPU_REGSET_H
+#define _ASM_X86_FPU_REGSET_H
+
+#include <linux/regset.h>
+
+extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active;
+extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get,
+ xstateregs_get;
+extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set,
+ xstateregs_set;
+
+/*
+ * xstateregs_active == regset_fpregs_active. Please refer to the comment
+ * at the definition of regset_fpregs_active.
+ */
+#define xstateregs_active regset_fpregs_active
+
+#endif /* _ASM_X86_FPU_REGSET_H */
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
new file mode 100644
index 0000000..7358e9d
--- /dev/null
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -0,0 +1,33 @@
+/*
+ * x86 FPU signal frame handling methods:
+ */
+#ifndef _ASM_X86_FPU_SIGNAL_H
+#define _ASM_X86_FPU_SIGNAL_H
+
+#ifdef CONFIG_X86_64
+# include <asm/sigcontext32.h>
+# include <asm/user32.h>
+struct ksignal;
+int ia32_setup_rt_frame(int sig, struct ksignal *ksig,
+ compat_sigset_t *set, struct pt_regs *regs);
+int ia32_setup_frame(int sig, struct ksignal *ksig,
+ compat_sigset_t *set, struct pt_regs *regs);
+#else
+# define user_i387_ia32_struct user_i387_struct
+# define user32_fxsr_struct user_fxsr_struct
+# define ia32_setup_frame __setup_frame
+# define ia32_setup_rt_frame __setup_rt_frame
+#endif
+
+extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
+ struct task_struct *tsk);
+extern void convert_to_fxsr(struct task_struct *tsk,
+ const struct user_i387_ia32_struct *env);
+
+unsigned long
+fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
+ unsigned long *buf_fx, unsigned long *size);
+
+extern void fpu__init_prepare_fx_sw_frame(void);
+
+#endif /* _ASM_X86_FPU_SIGNAL_H */
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
new file mode 100644
index 0000000..0637826
--- /dev/null
+++ b/arch/x86/include/asm/fpu/types.h
@@ -0,0 +1,293 @@
+/*
+ * FPU data structures:
+ */
+#ifndef _ASM_X86_FPU_H
+#define _ASM_X86_FPU_H
+
+/*
+ * The legacy x87 FPU state format, as saved by FSAVE and
+ * restored by the FRSTOR instructions:
+ */
+struct fregs_state {
+ u32 cwd; /* FPU Control Word */
+ u32 swd; /* FPU Status Word */
+ u32 twd; /* FPU Tag Word */
+ u32 fip; /* FPU IP Offset */
+ u32 fcs; /* FPU IP Selector */
+ u32 foo; /* FPU Operand Pointer Offset */
+ u32 fos; /* FPU Operand Pointer Selector */
+
+ /* 8*10 bytes for each FP-reg = 80 bytes: */
+ u32 st_space[20];
+
+ /* Software status information [not touched by FSAVE]: */
+ u32 status;
+};
+
+/*
+ * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and
+ * restored by the FXRSTOR instructions. It's similar to the FSAVE
+ * format, but differs in some areas, plus has extensions at
+ * the end for the XMM registers.
+ */
+struct fxregs_state {
+ u16 cwd; /* Control Word */
+ u16 swd; /* Status Word */
+ u16 twd; /* Tag Word */
+ u16 fop; /* Last Instruction Opcode */
+ union {
+ struct {
+ u64 rip; /* Instruction Pointer */
+ u64 rdp; /* Data Pointer */
+ };
+ struct {
+ u32 fip; /* FPU IP Offset */
+ u32 fcs; /* FPU IP Selector */
+ u32 foo; /* FPU Operand Offset */
+ u32 fos; /* FPU Operand Selector */
+ };
+ };
+ u32 mxcsr; /* MXCSR Register State */
+ u32 mxcsr_mask; /* MXCSR Mask */
+
+ /* 8*16 bytes for each FP-reg = 128 bytes: */
+ u32 st_space[32];
+
+ /* 16*16 bytes for each XMM-reg = 256 bytes: */
+ u32 xmm_space[64];
+
+ u32 padding[12];
+
+ union {
+ u32 padding1[12];
+ u32 sw_reserved[12];
+ };
+
+} __attribute__((aligned(16)));
+
+/* Default value for fxregs_state.mxcsr: */
+#define MXCSR_DEFAULT 0x1f80
+
+/*
+ * Software based FPU emulation state. This is arbitrary really,
+ * it matches the x87 format to make it easier to understand:
+ */
+struct swregs_state {
+ u32 cwd;
+ u32 swd;
+ u32 twd;
+ u32 fip;
+ u32 fcs;
+ u32 foo;
+ u32 fos;
+ /* 8*10 bytes for each FP-reg = 80 bytes: */
+ u32 st_space[20];
+ u8 ftop;
+ u8 changed;
+ u8 lookahead;
+ u8 no_update;
+ u8 rm;
+ u8 alimit;
+ struct math_emu_info *info;
+ u32 entry_eip;
+};
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+ XSTATE_BIT_FP,
+ XSTATE_BIT_SSE,
+ XSTATE_BIT_YMM,
+ XSTATE_BIT_BNDREGS,
+ XSTATE_BIT_BNDCSR,
+ XSTATE_BIT_OPMASK,
+ XSTATE_BIT_ZMM_Hi256,
+ XSTATE_BIT_Hi16_ZMM,
+
+ XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/*
+ * There are 16x 256-bit AVX registers named YMM0-YMM15.
+ * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
+ * and are stored in 'struct fxregs_state::xmm_space[]'.
+ *
+ * The high 128 bits are stored here:
+ * 16x 128 bits == 256 bytes.
+ */
+struct ymmh_struct {
+ u8 ymmh_space[256];
+};
+
+/* We don't support LWP yet: */
+struct lwp_struct {
+ u8 reserved[128];
+};
+
+/* Intel MPX support: */
+struct bndreg {
+ u64 lower_bound;
+ u64 upper_bound;
+} __packed;
+
+struct bndcsr {
+ u64 bndcfgu;
+ u64 bndstatus;
+} __packed;
+
+struct mpx_struct {
+ struct bndreg bndreg[4];
+ struct bndcsr bndcsr;
+};
+
+struct xstate_header {
+ u64 xfeatures;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+/* New processor state extensions should be added here: */
+#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
+ sizeof(struct lwp_struct) + \
+ sizeof(struct mpx_struct) )
+/*
+ * This is our most modern FPU state format, as saved by the XSAVE
+ * and restored by the XRSTOR instructions.
+ *
+ * It consists of a legacy fxregs portion, an xstate header and
+ * subsequent fixed size areas as defined by the xstate header.
+ * Not all CPUs support all the extensions.
+ */
+struct xregs_state {
+ struct fxregs_state i387;
+ struct xstate_header header;
+ u8 __reserved[XSTATE_RESERVE];
+} __attribute__ ((packed, aligned (64)));
+
+/*
+ * This is a union of all the possible FPU state formats
+ * put together, so that we can pick the right one runtime.
+ *
+ * The size of the structure is determined by the largest
+ * member - which is the xsave area:
+ */
+union fpregs_state {
+ struct fregs_state fsave;
+ struct fxregs_state fxsave;
+ struct swregs_state soft;
+ struct xregs_state xsave;
+};
+
+/*
+ * Highest level per task FPU state data structure that
+ * contains the FPU register state plus various FPU
+ * state fields:
+ */
+struct fpu {
+ /*
+ * @state:
+ *
+ * In-memory copy of all FPU registers that we save/restore
+ * over context switches. If the task is using the FPU then
+ * the registers in the FPU are more recent than this state
+ * copy. If the task context-switches away then they get
+ * saved here and represent the FPU state.
+ *
+ * After context switches there may be a (short) time period
+ * during which the in-FPU hardware registers are unchanged
+ * and still perfectly match this state, if the tasks
+ * scheduled afterwards are not using the FPU.
+ *
+ * This is the 'lazy restore' window of optimization, which
+ * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'.
+ *
+ * We detect whether a subsequent task uses the FPU via setting
+ * CR0::TS to 1, which causes any FPU use to raise a #NM fault.
+ *
+ * During this window, if the task gets scheduled again, we
+ * might be able to skip having to do a restore from this
+ * memory buffer to the hardware registers - at the cost of
+ * incurring the overhead of #NM fault traps.
+ *
+ * Note that on modern CPUs that support the XSAVEOPT (or other
+ * optimized XSAVE instructions), we don't use #NM traps anymore,
+ * as the hardware can track whether FPU registers need saving
+ * or not. On such CPUs we activate the non-lazy ('eagerfpu')
+ * logic, which unconditionally saves/restores all FPU state
+ * across context switches. (if FPU state exists.)
+ */
+ union fpregs_state state;
+
+ /*
+ * @last_cpu:
+ *
+ * Records the last CPU on which this context was loaded into
+ * FPU registers. (In the lazy-restore case we might be
+ * able to reuse FPU registers across multiple context switches
+ * this way, if no intermediate task used the FPU.)
+ *
+ * A value of -1 is used to indicate that the FPU state in context
+ * memory is newer than the FPU state in registers, and that the
+ * FPU state should be reloaded next time the task is run.
+ */
+ unsigned int last_cpu;
+
+ /*
+ * @fpstate_active:
+ *
+ * This flag indicates whether this context is active: if the task
+ * is not running then we can restore from this context, if the task
+ * is running then we should save into this context.
+ */
+ unsigned char fpstate_active;
+
+ /*
+ * @fpregs_active:
+ *
+ * This flag determines whether a given context is actively
+ * loaded into the FPU's registers and that those registers
+ * represent the task's current FPU state.
+ *
+ * Note the interaction with fpstate_active:
+ *
+ * # task does not use the FPU:
+ * fpstate_active == 0
+ *
+ * # task uses the FPU and regs are active:
+ * fpstate_active == 1 && fpregs_active == 1
+ *
+ * # the regs are inactive but still match fpstate:
+ * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu
+ *
+ * The third state is what we use for the lazy restore optimization
+ * on lazy-switching CPUs.
+ */
+ unsigned char fpregs_active;
+
+ /*
+ * @counter:
+ *
+ * This counter contains the number of consecutive context switches
+ * during which the FPU stays used. If this is over a threshold, the
+ * lazy FPU restore logic becomes eager, to save the trap overhead.
+ * This is an unsigned char so that after 256 iterations the counter
+ * wraps and the context switch behavior turns lazy again; this is to
+ * deal with bursty apps that only use the FPU for a short time:
+ */
+ unsigned char counter;
+};
+
+#endif /* _ASM_X86_FPU_H */
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
new file mode 100644
index 0000000..4656b25
--- /dev/null
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -0,0 +1,46 @@
+#ifndef __ASM_X86_XSAVE_H
+#define __ASM_X86_XSAVE_H
+
+#include <linux/types.h>
+#include <asm/processor.h>
+#include <linux/uaccess.h>
+
+/* Bit 63 of XCR0 is reserved for future expansion */
+#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
+
+#define XSTATE_CPUID 0x0000000d
+
+#define FXSAVE_SIZE 512
+
+#define XSAVE_HDR_SIZE 64
+#define XSAVE_HDR_OFFSET FXSAVE_SIZE
+
+#define XSAVE_YMM_SIZE 256
+#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
+
+/* Supported features which support lazy state saving */
+#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
+ | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+
+/* Supported features which require eager state saving */
+#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
+
+/* All currently supported features */
+#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
+
+#ifdef CONFIG_X86_64
+#define REX_PREFIX "0x48, "
+#else
+#define REX_PREFIX
+#endif
+
+extern unsigned int xstate_size;
+extern u64 xfeatures_mask;
+extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
+
+extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+
+void *get_xsave_addr(struct xregs_state *xsave, int xstate);
+const void *get_xsave_field_ptr(int xstate_field);
+
+#endif
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h
deleted file mode 100644
index 6eb6fcb..0000000
--- a/arch/x86/include/asm/i387.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (C) 1994 Linus Torvalds
- *
- * Pentium III FXSR, SSE support
- * General FPU state handling cleanups
- * Gareth Hughes <gareth@valinux.com>, May 2000
- * x86-64 work by Andi Kleen 2002
- */
-
-#ifndef _ASM_X86_I387_H
-#define _ASM_X86_I387_H
-
-#ifndef __ASSEMBLY__
-
-#include <linux/sched.h>
-#include <linux/hardirq.h>
-
-struct pt_regs;
-struct user_i387_struct;
-
-extern int init_fpu(struct task_struct *child);
-extern void fpu_finit(struct fpu *fpu);
-extern int dump_fpu(struct pt_regs *, struct user_i387_struct *);
-extern void math_state_restore(void);
-
-extern bool irq_fpu_usable(void);
-
-/*
- * Careful: __kernel_fpu_begin/end() must be called with preempt disabled
- * and they don't touch the preempt state on their own.
- * If you enable preemption after __kernel_fpu_begin(), preempt notifier
- * should call the __kernel_fpu_end() to prevent the kernel/user FPU
- * state from getting corrupted. KVM for example uses this model.
- *
- * All other cases use kernel_fpu_begin/end() which disable preemption
- * during kernel FPU usage.
- */
-extern void __kernel_fpu_begin(void);
-extern void __kernel_fpu_end(void);
-
-static inline void kernel_fpu_begin(void)
-{
- preempt_disable();
- WARN_ON_ONCE(!irq_fpu_usable());
- __kernel_fpu_begin();
-}
-
-static inline void kernel_fpu_end(void)
-{
- __kernel_fpu_end();
- preempt_enable();
-}
-
-/* Must be called with preempt disabled */
-extern void kernel_fpu_disable(void);
-extern void kernel_fpu_enable(void);
-
-/*
- * Some instructions like VIA's padlock instructions generate a spurious
- * DNA fault but don't modify SSE registers. And these instructions
- * get used from interrupt context as well. To prevent these kernel instructions
- * in interrupt context interacting wrongly with other user/kernel fpu usage, we
- * should use them only in the context of irq_ts_save/restore()
- */
-static inline int irq_ts_save(void)
-{
- /*
- * If in process context and not atomic, we can take a spurious DNA fault.
- * Otherwise, doing clts() in process context requires disabling preemption
- * or some heavy lifting like kernel_fpu_begin()
- */
- if (!in_atomic())
- return 0;
-
- if (read_cr0() & X86_CR0_TS) {
- clts();
- return 1;
- }
-
- return 0;
-}
-
-static inline void irq_ts_restore(int TS_state)
-{
- if (TS_state)
- stts();
-}
-
-/*
- * The question "does this thread have fpu access?"
- * is slightly racy, since preemption could come in
- * and revoke it immediately after the test.
- *
- * However, even in that very unlikely scenario,
- * we can just assume we have FPU access - typically
- * to save the FP state - we'll just take a #NM
- * fault and get the FPU access back.
- */
-static inline int user_has_fpu(void)
-{
- return current->thread.fpu.has_fpu;
-}
-
-extern void unlazy_fpu(struct task_struct *tsk);
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_X86_I387_H */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f4a555b..f8c0ec3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1002,8 +1002,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
void kvm_inject_nmi(struct kvm_vcpu *vcpu);
-int fx_init(struct kvm_vcpu *vcpu);
-
void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
const u8 *new, int bytes);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 883f6b9..5e8daee 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
paravirt_arch_exit_mmap(mm);
}
+#ifdef CONFIG_X86_64
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+ return !config_enabled(CONFIG_IA32_EMULATION) ||
+ !(mm->context.ia32_compat == TIF_IA32);
+}
+#else
+static inline bool is_64bit_mm(struct mm_struct *mm)
+{
+ return false;
+}
+#endif
+
static inline void arch_bprm_mm_init(struct mm_struct *mm,
struct vm_area_struct *vma)
{
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index a952a13d..7a35495 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -13,55 +13,50 @@
#define MPX_BNDCFG_ENABLE_FLAG 0x1
#define MPX_BD_ENTRY_VALID_FLAG 0x1
-#ifdef CONFIG_X86_64
-
-/* upper 28 bits [47:20] of the virtual address in 64-bit used to
- * index into bounds directory (BD).
- */
-#define MPX_BD_ENTRY_OFFSET 28
-#define MPX_BD_ENTRY_SHIFT 3
-/* bits [19:3] of the virtual address in 64-bit used to index into
- * bounds table (BT).
+/*
+ * The upper 28 bits [47:20] of the virtual address in 64-bit
+ * are used to index into bounds directory (BD).
+ *
+ * The directory is 2G (2^31) in size, and with 8-byte entries
+ * it has 2^28 entries.
*/
-#define MPX_BT_ENTRY_OFFSET 17
-#define MPX_BT_ENTRY_SHIFT 5
-#define MPX_IGN_BITS 3
-#define MPX_BD_ENTRY_TAIL 3
+#define MPX_BD_SIZE_BYTES_64 (1UL<<31)
+#define MPX_BD_ENTRY_BYTES_64 8
+#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64)
-#else
-
-#define MPX_BD_ENTRY_OFFSET 20
-#define MPX_BD_ENTRY_SHIFT 2
-#define MPX_BT_ENTRY_OFFSET 10
-#define MPX_BT_ENTRY_SHIFT 4
-#define MPX_IGN_BITS 2
-#define MPX_BD_ENTRY_TAIL 2
+/*
+ * The 32-bit directory is 4MB (2^22) in size, and with 4-byte
+ * entries it has 2^20 entries.
+ */
+#define MPX_BD_SIZE_BYTES_32 (1UL<<22)
+#define MPX_BD_ENTRY_BYTES_32 4
+#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32)
-#endif
+/*
+ * A 64-bit table is 4MB total in size, and an entry is
+ * 4 64-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_64 (1UL<<22)
+#define MPX_BT_ENTRY_BYTES_64 32
+#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64)
-#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
-#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
+/*
+ * A 32-bit table is 16kB total in size, and an entry is
+ * 4 32-bit pointers in size.
+ */
+#define MPX_BT_SIZE_BYTES_32 (1UL<<14)
+#define MPX_BT_ENTRY_BYTES_32 16
+#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32)
#define MPX_BNDSTA_TAIL 2
#define MPX_BNDCFG_TAIL 12
#define MPX_BNDSTA_ADDR_MASK (~((1UL<<MPX_BNDSTA_TAIL)-1))
#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
-#define MPX_BT_ADDR_MASK (~((1UL<<MPX_BD_ENTRY_TAIL)-1))
-
-#define MPX_BNDCFG_ADDR_MASK (~((1UL<<MPX_BNDCFG_TAIL)-1))
#define MPX_BNDSTA_ERROR_CODE 0x3
-#define MPX_BD_ENTRY_MASK ((1<<MPX_BD_ENTRY_OFFSET)-1)
-#define MPX_BT_ENTRY_MASK ((1<<MPX_BT_ENTRY_OFFSET)-1)
-#define MPX_GET_BD_ENTRY_OFFSET(addr) ((((addr)>>(MPX_BT_ENTRY_OFFSET+ \
- MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT)
-#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \
- MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT)
-
#ifdef CONFIG_X86_INTEL_MPX
-siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
- struct xsave_struct *xsave_buf);
-int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
+siginfo_t *mpx_generate_siginfo(struct pt_regs *regs);
+int mpx_handle_bd_fault(void);
static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
{
return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
@@ -77,12 +72,11 @@ static inline void mpx_mm_init(struct mm_struct *mm)
void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#else
-static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
- struct xsave_struct *xsave_buf)
+static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
{
return NULL;
}
-static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+static inline int mpx_handle_bd_fault(void)
{
return -EINVAL;
}
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 9aa52fd..43e6519 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,6 +21,7 @@ struct mm_struct;
#include <asm/desc_defs.h>
#include <asm/nops.h>
#include <asm/special_insns.h>
+#include <asm/fpu/types.h>
#include <linux/personality.h>
#include <linux/cpumask.h>
@@ -52,11 +53,16 @@ static inline void *current_text_addr(void)
return pc;
}
+/*
+ * These alignment constraints are for performance in the vSMP case,
+ * but in the task_struct case we must also meet hardware imposed
+ * alignment requirements of the FPU state:
+ */
#ifdef CONFIG_X86_VSMP
# define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT)
# define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT)
#else
-# define ARCH_MIN_TASKALIGN 16
+# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state)
# define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
@@ -166,7 +172,6 @@ extern const struct seq_operations cpuinfo_op;
#define cache_line_size() (boot_cpu_data.x86_cache_alignment)
extern void cpu_detect(struct cpuinfo_x86 *c);
-extern void fpu_detect(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_boot_cpu(void);
@@ -313,128 +318,6 @@ struct orig_ist {
unsigned long ist[7];
};
-#define MXCSR_DEFAULT 0x1f80
-
-struct i387_fsave_struct {
- u32 cwd; /* FPU Control Word */
- u32 swd; /* FPU Status Word */
- u32 twd; /* FPU Tag Word */
- u32 fip; /* FPU IP Offset */
- u32 fcs; /* FPU IP Selector */
- u32 foo; /* FPU Operand Pointer Offset */
- u32 fos; /* FPU Operand Pointer Selector */
-
- /* 8*10 bytes for each FP-reg = 80 bytes: */
- u32 st_space[20];
-
- /* Software status information [not touched by FSAVE ]: */
- u32 status;
-};
-
-struct i387_fxsave_struct {
- u16 cwd; /* Control Word */
- u16 swd; /* Status Word */
- u16 twd; /* Tag Word */
- u16 fop; /* Last Instruction Opcode */
- union {
- struct {
- u64 rip; /* Instruction Pointer */
- u64 rdp; /* Data Pointer */
- };
- struct {
- u32 fip; /* FPU IP Offset */
- u32 fcs; /* FPU IP Selector */
- u32 foo; /* FPU Operand Offset */
- u32 fos; /* FPU Operand Selector */
- };
- };
- u32 mxcsr; /* MXCSR Register State */
- u32 mxcsr_mask; /* MXCSR Mask */
-
- /* 8*16 bytes for each FP-reg = 128 bytes: */
- u32 st_space[32];
-
- /* 16*16 bytes for each XMM-reg = 256 bytes: */
- u32 xmm_space[64];
-
- u32 padding[12];
-
- union {
- u32 padding1[12];
- u32 sw_reserved[12];
- };
-
-} __attribute__((aligned(16)));
-
-struct i387_soft_struct {
- u32 cwd;
- u32 swd;
- u32 twd;
- u32 fip;
- u32 fcs;
- u32 foo;
- u32 fos;
- /* 8*10 bytes for each FP-reg = 80 bytes: */
- u32 st_space[20];
- u8 ftop;
- u8 changed;
- u8 lookahead;
- u8 no_update;
- u8 rm;
- u8 alimit;
- struct math_emu_info *info;
- u32 entry_eip;
-};
-
-struct ymmh_struct {
- /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
- u32 ymmh_space[64];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
- u8 reserved[128];
-};
-
-struct bndreg {
- u64 lower_bound;
- u64 upper_bound;
-} __packed;
-
-struct bndcsr {
- u64 bndcfgu;
- u64 bndstatus;
-} __packed;
-
-struct xsave_hdr_struct {
- u64 xstate_bv;
- u64 xcomp_bv;
- u64 reserved[6];
-} __attribute__((packed));
-
-struct xsave_struct {
- struct i387_fxsave_struct i387;
- struct xsave_hdr_struct xsave_hdr;
- struct ymmh_struct ymmh;
- struct lwp_struct lwp;
- struct bndreg bndreg[4];
- struct bndcsr bndcsr;
- /* new processor state extensions will go here */
-} __attribute__ ((packed, aligned (64)));
-
-union thread_xstate {
- struct i387_fsave_struct fsave;
- struct i387_fxsave_struct fxsave;
- struct i387_soft_struct soft;
- struct xsave_struct xsave;
-};
-
-struct fpu {
- unsigned int last_cpu;
- unsigned int has_fpu;
- union thread_xstate *state;
-};
-
#ifdef CONFIG_X86_64
DECLARE_PER_CPU(struct orig_ist, orig_ist);
@@ -483,8 +366,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
#endif /* X86_64 */
extern unsigned int xstate_size;
-extern void free_thread_xstate(struct task_struct *);
-extern struct kmem_cache *task_xstate_cachep;
struct perf_event;
@@ -508,6 +389,10 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
+
+ /* Floating point and extended processor state */
+ struct fpu fpu;
+
/* Save middle states of ptrace breakpoints */
struct perf_event *ptrace_bps[HBP_NUM];
/* Debug status used for traps, single steps, etc... */
@@ -518,8 +403,6 @@ struct thread_struct {
unsigned long cr2;
unsigned long trap_nr;
unsigned long error_code;
- /* floating point and extended processor state */
- struct fpu fpu;
#ifdef CONFIG_X86_32
/* Virtual 86 mode info */
struct vm86_struct __user *vm86_info;
@@ -535,15 +418,6 @@ struct thread_struct {
unsigned long iopl;
/* Max allowed port in the bitmap, in bytes: */
unsigned io_bitmap_max;
- /*
- * fpu_counter contains the number of consecutive context switches
- * that the FPU is used. If this is over a threshold, the lazy fpu
- * saving becomes unlazy to save the trap. This is an unsigned char
- * so that after 256 times the counter wraps and the behavior turns
- * lazy again; this to deal with bursty apps that only use FPU for
- * a short time
- */
- unsigned char fpu_counter;
};
/*
@@ -928,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
/* Register/unregister a process' MPX related resource */
-#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk))
-#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk))
+#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
+#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
#ifdef CONFIG_X86_INTEL_MPX
-extern int mpx_enable_management(struct task_struct *tsk);
-extern int mpx_disable_management(struct task_struct *tsk);
+extern int mpx_enable_management(void);
+extern int mpx_disable_management(void);
#else
-static inline int mpx_enable_management(struct task_struct *tsk)
+static inline int mpx_enable_management(void)
{
return -EINVAL;
}
-static inline int mpx_disable_management(struct task_struct *tsk)
+static inline int mpx_disable_management(void)
{
return -EINVAL;
}
diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h
index ee80b92..6c8a7ed 100644
--- a/arch/x86/include/asm/simd.h
+++ b/arch/x86/include/asm/simd.h
@@ -1,5 +1,5 @@
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/*
* may_use_simd - whether it is allowable at this time to issue SIMD
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 6a99859..c2e00bb 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -39,7 +39,9 @@
#include <asm/processor.h>
#include <asm/percpu.h>
#include <asm/desc.h>
+
#include <linux/random.h>
+#include <linux/sched.h>
/*
* 24 byte read-only segment initializer for stack canary. Linker
diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 552d6c9..d1793f0 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -7,7 +7,7 @@
#define _ASM_X86_SUSPEND_32_H
#include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/* image of the saved processor state */
struct saved_context {
diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h
index bc62328..7ebf0eb 100644
--- a/arch/x86/include/asm/suspend_64.h
+++ b/arch/x86/include/asm/suspend_64.h
@@ -7,7 +7,7 @@
#define _ASM_X86_SUSPEND_64_H
#include <asm/desc.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
/*
* Image of the saved processor state, used by the low level ACPI suspend to
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
new file mode 100644
index 0000000..173dd3b
--- /dev/null
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -0,0 +1,132 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM mpx
+
+#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MPX_H
+
+#include <linux/tracepoint.h>
+
+#ifdef CONFIG_X86_INTEL_MPX
+
+TRACE_EVENT(mpx_bounds_register_exception,
+
+ TP_PROTO(void *addr_referenced,
+ const struct bndreg *bndreg),
+ TP_ARGS(addr_referenced, bndreg),
+
+ TP_STRUCT__entry(
+ __field(void *, addr_referenced)
+ __field(u64, lower_bound)
+ __field(u64, upper_bound)
+ ),
+
+ TP_fast_assign(
+ __entry->addr_referenced = addr_referenced;
+ __entry->lower_bound = bndreg->lower_bound;
+ __entry->upper_bound = bndreg->upper_bound;
+ ),
+ /*
+ * Note that we are printing out the '~' of the upper
+ * bounds register here. It is actually stored in its
+ * one's complement form so that its 'init' state
+ * corresponds to all 0's. But, that looks like
+ * gibberish when printed out, so print out the 1's
+ * complement instead of the actual value here. Note
+ * though that you still need to specify filters for the
+ * actual value, not the displayed one.
+ */
+ TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx",
+ __entry->addr_referenced,
+ __entry->lower_bound,
+ ~__entry->upper_bound
+ )
+);
+
+TRACE_EVENT(bounds_exception_mpx,
+
+ TP_PROTO(const struct bndcsr *bndcsr),
+ TP_ARGS(bndcsr),
+
+ TP_STRUCT__entry(
+ __field(u64, bndcfgu)
+ __field(u64, bndstatus)
+ ),
+
+ TP_fast_assign(
+ /* need to get rid of the 'const' on bndcsr */
+ __entry->bndcfgu = (u64)bndcsr->bndcfgu;
+ __entry->bndstatus = (u64)bndcsr->bndstatus;
+ ),
+
+ TP_printk("bndcfgu:0x%llx bndstatus:0x%llx",
+ __entry->bndcfgu,
+ __entry->bndstatus)
+);
+
+DECLARE_EVENT_CLASS(mpx_range_trace,
+
+ TP_PROTO(unsigned long start,
+ unsigned long end),
+ TP_ARGS(start, end),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, start)
+ __field(unsigned long, end)
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->end = end;
+ ),
+
+ TP_printk("[0x%p:0x%p]",
+ (void *)__entry->start,
+ (void *)__entry->end
+ )
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end)
+);
+
+DEFINE_EVENT(mpx_range_trace, mpx_unmap_search,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end)
+);
+
+TRACE_EVENT(mpx_new_bounds_table,
+
+ TP_PROTO(unsigned long table_vaddr),
+ TP_ARGS(table_vaddr),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, table_vaddr)
+ ),
+
+ TP_fast_assign(
+ __entry->table_vaddr = table_vaddr;
+ ),
+
+ TP_printk("table vaddr:%p", (void *)__entry->table_vaddr)
+);
+
+#else
+
+/*
+ * This gets used outside of MPX-specific code, so we need a stub.
+ */
+static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+{
+}
+
+#endif /* CONFIG_X86_INTEL_MPX */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH asm/trace/
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE mpx
+#endif /* _TRACE_MPX_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index ccab4af..59a54e8 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -14,8 +14,8 @@ struct user_ymmh_regs {
__u32 ymmh_space[64];
};
-struct user_xsave_hdr {
- __u64 xstate_bv;
+struct user_xstate_header {
+ __u64 xfeatures;
__u64 reserved1[2];
__u64 reserved2[5];
};
@@ -41,11 +41,11 @@ struct user_xsave_hdr {
* particular process/thread.
*
* Also when the user modifies certain state FP/SSE/etc through the
- * ptrace interface, they must ensure that the xsave_hdr.xstate_bv
+ * ptrace interface, they must ensure that the header.xfeatures
* bytes[512..519] of the memory layout are updated correspondingly.
* i.e., for example when FP state is modified to a non-init state,
- * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to
- * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc.
+ * header.xfeatures's bit 0 must be set to '1', when SSE is modified to
+ * non-init state, header.xfeatures's bit 1 must to be set to '1', etc.
*/
#define USER_XSTATE_FX_SW_WORDS 6
#define USER_XSTATE_XCR0_WORD 0
@@ -55,7 +55,7 @@ struct user_xstateregs {
__u64 fpx_space[58];
__u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS];
} i387;
- struct user_xsave_hdr xsave_hdr;
+ struct user_xstate_header header;
struct user_ymmh_regs ymmh;
/* further processor state extensions go here */
};
diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h
deleted file mode 100644
index f2cba4e7..0000000
--- a/arch/x86/include/asm/xcr.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- * Copyright 2008 rPath, Inc. - All Rights Reserved
- *
- * This file is part of the Linux kernel, and is made available under
- * the terms of the GNU General Public License version 2 or (at your
- * option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * asm-x86/xcr.h
- *
- * Definitions for the eXtended Control Register instructions
- */
-
-#ifndef _ASM_X86_XCR_H
-#define _ASM_X86_XCR_H
-
-#define XCR_XFEATURE_ENABLED_MASK 0x00000000
-
-#ifdef __KERNEL__
-# ifndef __ASSEMBLY__
-
-#include <linux/types.h>
-
-static inline u64 xgetbv(u32 index)
-{
- u32 eax, edx;
-
- asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((u64)edx << 32);
-}
-
-static inline void xsetbv(u32 index, u64 value)
-{
- u32 eax = value;
- u32 edx = value >> 32;
-
- asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
- : : "a" (eax), "d" (edx), "c" (index));
-}
-
-# endif /* __ASSEMBLY__ */
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_XCR_H */
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index d882975..1f5c516 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -36,7 +36,7 @@
* no advantages to be gotten from x86-64 here anyways.
*/
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#ifdef CONFIG_X86_32
/* reduce register pressure */
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index ce05722..5a08bc8 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -26,7 +26,7 @@
#define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n"
#define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n"
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
static void
xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h
index 492b298..7c0a517 100644
--- a/arch/x86/include/asm/xor_avx.h
+++ b/arch/x86/include/asm/xor_avx.h
@@ -18,7 +18,7 @@
#ifdef CONFIG_AS_AVX
#include <linux/compiler.h>
-#include <asm/i387.h>
+#include <asm/fpu/api.h>
#define BLOCK4(i) \
BLOCK(32 * i, 0) \
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
deleted file mode 100644
index c9a6d68..0000000
--- a/arch/x86/include/asm/xsave.h
+++ /dev/null
@@ -1,257 +0,0 @@
-#ifndef __ASM_X86_XSAVE_H
-#define __ASM_X86_XSAVE_H
-
-#include <linux/types.h>
-#include <asm/processor.h>
-
-#define XSTATE_CPUID 0x0000000d
-
-#define XSTATE_FP 0x1
-#define XSTATE_SSE 0x2
-#define XSTATE_YMM 0x4
-#define XSTATE_BNDREGS 0x8
-#define XSTATE_BNDCSR 0x10
-#define XSTATE_OPMASK 0x20
-#define XSTATE_ZMM_Hi256 0x40
-#define XSTATE_Hi16_ZMM 0x80
-
-#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-/* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
-
-#define FXSAVE_SIZE 512
-
-#define XSAVE_HDR_SIZE 64
-#define XSAVE_HDR_OFFSET FXSAVE_SIZE
-
-#define XSAVE_YMM_SIZE 256
-#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-
-/* Supported features which support lazy state saving */
-#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
- | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
-
-/* Supported features which require eager state saving */
-#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
-
-/* All currently supported features */
-#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
-
-#ifdef CONFIG_X86_64
-#define REX_PREFIX "0x48, "
-#else
-#define REX_PREFIX
-#endif
-
-extern unsigned int xstate_size;
-extern u64 pcntxt_mask;
-extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
-extern struct xsave_struct *init_xstate_buf;
-
-extern void xsave_init(void);
-extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
-extern int init_fpu(struct task_struct *child);
-
-/* These macros all use (%edi)/(%rdi) as the single memory argument. */
-#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
-#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
-#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
-#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
-#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
-
-#define xstate_fault ".section .fixup,\"ax\"\n" \
- "3: movl $-1,%[err]\n" \
- " jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b, 3b) \
- : [err] "=r" (err)
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XSAVES"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- else
- asm volatile("1:"XSAVE"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- return err;
-}
-
-/*
- * This function is called only during boot time when x86 caps are not set
- * up and alternative can not be used yet.
- */
-static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- WARN_ON(system_state != SYSTEM_BOOTING);
-
- if (boot_cpu_has(X86_FEATURE_XSAVES))
- asm volatile("1:"XRSTORS"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- else
- asm volatile("1:"XRSTOR"\n\t"
- "2:\n\t"
- xstate_fault
- : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
- return err;
-}
-
-/*
- * Save processor xstate to xsave area.
- */
-static inline int xsave_state(struct xsave_struct *fx, u64 mask)
-{
- u32 lmask = mask;
- u32 hmask = mask >> 32;
- int err = 0;
-
- /*
- * If xsaves is enabled, xsaves replaces xsaveopt because
- * it supports compact format and supervisor states in addition to
- * modified optimization in xsaveopt.
- *
- * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave
- * because xsaveopt supports modified optimization which is not
- * supported by xsave.
- *
- * If none of xsaves and xsaveopt is enabled, use xsave.
- */
- alternative_input_2(
- "1:"XSAVE,
- XSAVEOPT,
- X86_FEATURE_XSAVEOPT,
- XSAVES,
- X86_FEATURE_XSAVES,
- [fx] "D" (fx), "a" (lmask), "d" (hmask) :
- "memory");
- asm volatile("2:\n\t"
- xstate_fault
- : "0" (0)
- : "memory");
-
- return err;
-}
-
-/*
- * Restore processor xstate from xsave area.
- */
-static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
-{
- int err = 0;
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- /*
- * Use xrstors to restore context if it is enabled. xrstors supports
- * compacted format of xsave area which is not supported by xrstor.
- */
- alternative_input(
- "1: " XRSTOR,
- XRSTORS,
- X86_FEATURE_XSAVES,
- "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
- : "memory");
-
- asm volatile("2:\n"
- xstate_fault
- : "0" (0)
- : "memory");
-
- return err;
-}
-
-/*
- * Save xstate context for old process during context switch.
- */
-static inline void fpu_xsave(struct fpu *fpu)
-{
- xsave_state(&fpu->state->xsave, -1);
-}
-
-/*
- * Restore xstate context for new process during context switch.
- */
-static inline int fpu_xrstor_checking(struct xsave_struct *fx)
-{
- return xrstor_state(fx, -1);
-}
-
-/*
- * Save xstate to user space xsave area.
- *
- * We don't use modified optimization because xrstor/xrstors might track
- * a different application.
- *
- * We don't use compacted format xsave area for
- * backward compatibility for old applications which don't understand
- * compacted format of xsave area.
- */
-static inline int xsave_user(struct xsave_struct __user *buf)
-{
- int err;
-
- /*
- * Clear the xsave header first, so that reserved fields are
- * initialized to zero.
- */
- err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr));
- if (unlikely(err))
- return -EFAULT;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XSAVE"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault
- : "D" (buf), "a" (-1), "d" (-1), "0" (0)
- : "memory");
- return err;
-}
-
-/*
- * Restore xstate from user space xsave area.
- */
-static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
-{
- int err = 0;
- struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
- u32 lmask = mask;
- u32 hmask = mask >> 32;
-
- __asm__ __volatile__(ASM_STAC "\n"
- "1:"XRSTOR"\n"
- "2: " ASM_CLAC "\n"
- xstate_fault
- : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
- : "memory"); /* memory required? */
- return err;
-}
-
-void *get_xsave_addr(struct xsave_struct *xsave, int xstate);
-void setup_xstate_comp(void);
-
-#endif
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index 16dc4e8..0e8a973 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -25,7 +25,7 @@ struct _fpx_sw_bytes {
__u32 extended_size; /* total size of the layout referred by
* fpstate pointer in the sigcontext.
*/
- __u64 xstate_bv;
+ __u64 xfeatures;
/* feature bit mask (including fp/sse/extended
* state) that is present in the memory
* layout.
@@ -209,8 +209,8 @@ struct sigcontext {
#endif /* !__i386__ */
-struct _xsave_hdr {
- __u64 xstate_bv;
+struct _header {
+ __u64 xfeatures;
__u64 reserved1[2];
__u64 reserved2[5];
};
@@ -228,7 +228,7 @@ struct _ymmh_state {
*/
struct _xstate {
struct _fpstate fpstate;
- struct _xsave_hdr xstate_hdr;
+ struct _header xstate_hdr;
struct _ymmh_state ymmh;
/* new processor state extensions go here */
};