From 1f999ab5a1360afc388868cc0ef9afe8edeef3be Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 21 Jul 2010 19:03:57 +0200 Subject: x86, xsave: Disable xsave in i387 emulation mode xsave is broken for (!HAVE_HWFP). This is the case if config MATH_EMULATION is enabled, 'no387' kernel parameter is set and xsave exists. xsave will not work because x86/math-emu and xsave share the same memory. As this case can be treated as corner case we simply disable xsave then. Signed-off-by: Robert Richter LKML-Reference: <1279731838-1522-7-git-send-email-robert.richter@amd.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 1f11f5c..2605c50 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -67,6 +67,12 @@ static void __cpuinit init_thread_xstate(void) */ if (!HAVE_HWFP) { + /* + * Disable xsave as we do not support it if i387 + * emulation is enabled. + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); xstate_size = sizeof(struct i387_soft_struct); return; } -- cgit v0.10.2 From 2df7a6e9e8e67c19e5fe2eac3f2d2223b7bb4a7b Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:08 -0400 Subject: x86: Use correct type for %cr4 %cr4 is 64-bit in 64-bit mode (although the upper 32-bits are currently reserved). Use unsigned long for the temporary variable to get the right size. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-2-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 325b7bd..396b80f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -602,7 +602,7 @@ extern unsigned long mmu_cr4_features; static inline void set_in_cr4(unsigned long mask) { - unsigned cr4; + unsigned long cr4; mmu_cr4_features |= mask; cr4 = read_cr4(); @@ -612,7 +612,7 @@ static inline void set_in_cr4(unsigned long mask) static inline void clear_in_cr4(unsigned long mask) { - unsigned cr4; + unsigned long cr4; mmu_cr4_features &= ~mask; cr4 = read_cr4(); -- cgit v0.10.2 From 6ac8bac2684235f4caf22a410549c582aa7327d6 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:09 -0400 Subject: x86, fpu: Merge fpu_init() Make fpu_init() handle 32-bit setup. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-3-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 490dac6..f9e23e8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1264,13 +1264,6 @@ void __cpuinit cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - /* - * Force FPU initialization: - */ - current_thread_info()->status = 0; - clear_used_math(); - mxcsr_feature_mask_init(); - fpu_init(); xsave_init(); } diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 2605c50..8216651 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -85,7 +85,6 @@ static void __cpuinit init_thread_xstate(void) #endif } -#ifdef CONFIG_X86_64 /* * Called at bootup to set up the initial FPU state that is later cloned * into all processes. @@ -93,12 +92,21 @@ static void __cpuinit init_thread_xstate(void) void __cpuinit fpu_init(void) { - unsigned long oldcr0 = read_cr0(); - - set_in_cr4(X86_CR4_OSFXSR); - set_in_cr4(X86_CR4_OSXMMEXCPT); + unsigned long cr0; + unsigned long cr4_mask = 0; - write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ + if (cpu_has_fxsr) + cr4_mask |= X86_CR4_OSFXSR; + if (cpu_has_xmm) + cr4_mask |= X86_CR4_OSXMMEXCPT; + if (cr4_mask) + set_in_cr4(cr4_mask); + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ + if (!HAVE_HWFP) + cr0 |= X86_CR0_EM; + write_cr0(cr0); if (!smp_processor_id()) init_thread_xstate(); @@ -109,16 +117,6 @@ void __cpuinit fpu_init(void) clear_used_math(); } -#else /* CONFIG_X86_64 */ - -void __cpuinit fpu_init(void) -{ - if (!smp_processor_id()) - init_thread_xstate(); -} - -#endif /* CONFIG_X86_32 */ - void fpu_finit(struct fpu *fpu) { #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 60788de..d0029eb 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -881,18 +881,6 @@ void __init trap_init(void) #endif #ifdef CONFIG_X86_32 - if (cpu_has_fxsr) { - printk(KERN_INFO "Enabling fast FPU save and restore... "); - set_in_cr4(X86_CR4_OSFXSR); - printk("done.\n"); - } - if (cpu_has_xmm) { - printk(KERN_INFO - "Enabling unmasked SIMD FPU exception support... "); - set_in_cr4(X86_CR4_OSXMMEXCPT); - printk("done.\n"); - } - set_system_trap_gate(SYSCALL_VECTOR, &system_call); set_bit(SYSCALL_VECTOR, used_vectors); #endif -- cgit v0.10.2 From 51115d4d45700fc7c08306f7ba6e68551f526ae5 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:10 -0400 Subject: x86, fpu: Merge tolerant_fwait() Commit e2e75c91 merged the math exception handler, allowing both 32-bit and 64-bit to handle math exceptions from kernel mode. Switch to using the 64-bit version of tolerant_fwait() without fnclex, which simply ignores the exception if one is still pending from userspace. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-4-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index a73a8d5..5d8f9a7 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -77,15 +77,6 @@ static inline void sanitize_i387_state(struct task_struct *tsk) } #ifdef CONFIG_X86_64 - -/* Ignore delayed exceptions from user space */ -static inline void tolerant_fwait(void) -{ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); -} - static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; @@ -220,11 +211,6 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft); static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif -static inline void tolerant_fwait(void) -{ - asm volatile("fnclex ; fwait"); -} - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { @@ -344,7 +330,10 @@ static inline void __unlazy_fpu(struct task_struct *tsk) static inline void __clear_fpu(struct task_struct *tsk) { if (task_thread_info(tsk)->status & TS_USEDFPU) { - tolerant_fwait(); + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } -- cgit v0.10.2 From bfd946cb891800d408decaae268a3480775178a3 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:11 -0400 Subject: x86, fpu: Merge __save_init_fpu() __save_init_fpu() is identical for 32-bit and 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-5-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 5d8f9a7..88065e3 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -197,12 +197,6 @@ static inline void fpu_save_init(struct fpu *fpu) fpu_clear(fpu); } -static inline void __save_init_fpu(struct task_struct *tsk) -{ - fpu_save_init(&tsk->thread.fpu); - task_thread_info(tsk)->status &= ~TS_USEDFPU; -} - #else /* CONFIG_X86_32 */ #ifdef CONFIG_MATH_EMULATION @@ -285,15 +279,14 @@ end: ; } +#endif /* CONFIG_X86_64 */ + static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); task_thread_info(tsk)->status &= ~TS_USEDFPU; } - -#endif /* CONFIG_X86_64 */ - static inline int fpu_fxrstor_checking(struct fpu *fpu) { return fxrstor_checking(&fpu->state->fxsave); -- cgit v0.10.2 From a4d4fbc7735bba6654b20f859135f9d3f8fe7f76 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:12 -0400 Subject: x86-64, fpu: Disable preemption when using TS_USEDFPU Consolidates code and fixes the below race for 64-bit. commit 9fa2f37bfeb798728241cc4a19578ce6e4258f25 Author: torvalds Date: Tue Sep 2 07:37:25 2003 +0000 Be a lot more careful about TS_USEDFPU and preemption We had some races where we testecd (or set) TS_USEDFPU together with sequences that depended on the setting (like clearing or setting the TS flag in %cr0) and we could be preempted in between, which screws up the FPU state, since preemption will itself change USEDFPU and the TS flag. This makes it a lot more explicit: the "internal" low-level FPU functions ("__xxxx_fpu()") all require preemption to be disabled, and the exported "real" functions will make sure that is the case. One case - in __switch_to() - was switched to the non-preempt-safe internal version, since the scheduler itself has already disabled preemption. BKrev: 3f5448b5WRiQuyzAlbajs3qoQjSobw Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-6-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 88065e3..8b40a83 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -387,19 +387,6 @@ static inline void irq_ts_restore(int TS_state) stts(); } -#ifdef CONFIG_X86_64 - -static inline void save_init_fpu(struct task_struct *tsk) -{ - __save_init_fpu(tsk); - stts(); -} - -#define unlazy_fpu __unlazy_fpu -#define clear_fpu __clear_fpu - -#else /* CONFIG_X86_32 */ - /* * These disable preemption on their own and are safe */ @@ -425,8 +412,6 @@ static inline void clear_fpu(struct task_struct *tsk) preempt_enable(); } -#endif /* CONFIG_X86_64 */ - /* * i387 state interaction */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 3d9ea53..b3d7a3a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -424,7 +424,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) load_TLS(next, cpu); /* Must be after DS reload */ - unlazy_fpu(prev_p); + __unlazy_fpu(prev_p); /* Make sure cpu is ready for new context */ if (preload_fpu) -- cgit v0.10.2 From 10c11f304986a1f84201c2261a428701f9d2dffc Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:13 -0400 Subject: x86-64, fpu: Fix %cs value in convert_from_fxsr() While %ds still contains the userspace selector, %cs is KERNEL_CS at this point. Always get %cs from pt_regs even for the current task. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-7-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8216651..f3775f5 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -389,19 +389,17 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) #ifdef CONFIG_X86_64 env->fip = fxsave->rip; env->foo = fxsave->rdp; + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + env->fcs = task_pt_regs(tsk)->cs; if (tsk == current) { - /* - * should be actually ds/cs at fpu exception time, but - * that information is not available in 64bit mode. - */ - asm("mov %%ds, %[fos]" : [fos] "=r" (env->fos)); - asm("mov %%cs, %[fcs]" : [fcs] "=r" (env->fcs)); + savesegment(ds, env->fos); } else { - struct pt_regs *regs = task_pt_regs(tsk); - - env->fos = 0xffff0000 | tsk->thread.ds; - env->fcs = regs->cs; + env->fos = tsk->thread.ds; } + env->fos |= 0xffff0000; #else env->fip = fxsave->fip; env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); -- cgit v0.10.2 From 820241356d6aa9a895fc10def15794a5a5bfcd98 Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:14 -0400 Subject: x86-64, fpu: Simplify constraints for fxsave/fxtstor Use the "R" constraint (legacy register) instead of listing all the possible registers. Clean up the comments as well. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-8-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 8b40a83..768fcb2 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -81,6 +81,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { int err; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxrstor (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -89,11 +90,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "m" (*fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); -#endif + : [fx] "R" (fx), "m" (*fx), "0" (0)); return err; } @@ -140,6 +137,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) if (unlikely(err)) return -EFAULT; + /* See comment in fxsave() below. */ asm volatile("1: rex64/fxsave (%[fx])\n\t" "2:\n" ".section .fixup,\"ax\"\n" @@ -148,11 +146,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) ".previous\n" _ASM_EXTABLE(1b, 3b) : [err] "=r" (err), "=m" (*fx) -#if 0 /* See comment in fxsave() below. */ - : [fx] "r" (fx), "0" (0)); -#else - : [fx] "cdaSDb" (fx), "0" (0)); -#endif + : [fx] "R" (fx), "0" (0)); if (unlikely(err) && __clear_user(fx, sizeof(struct i387_fxsave_struct))) err = -EFAULT; @@ -165,26 +159,22 @@ static inline void fpu_fxsave(struct fpu *fpu) /* Using "rex64; fxsave %0" is broken because, if the memory operand uses any extended registers for addressing, a second REX prefix will be generated (to the assembler, rex64 followed by semicolon - is a separate instruction), and hence the 64-bitness is lost. */ -#if 0 - /* Using "fxsaveq %0" would be the ideal choice, but is only supported - starting with gas 2.16. */ - __asm__ __volatile__("fxsaveq %0" - : "=m" (fpu->state->fxsave)); -#elif 0 - /* Using, as a workaround, the properly prefixed form below isn't + is a separate instruction), and hence the 64-bitness is lost. + Using "fxsaveq %0" would be the ideal choice, but is only supported + starting with gas 2.16. + asm volatile("fxsaveq %0" + : "=m" (fpu->state->fxsave)); + Using, as a workaround, the properly prefixed form below isn't accepted by any binutils version so far released, complaining that the same type of prefix is used twice if an extended register is - needed for addressing (fix submitted to mainline 2005-11-21). */ - __asm__ __volatile__("rex64/fxsave %0" - : "=m" (fpu->state->fxsave)); -#else - /* This, however, we can work around by forcing the compiler to select + needed for addressing (fix submitted to mainline 2005-11-21). + asm volatile("rex64/fxsave %0" + : "=m" (fpu->state->fxsave)); + This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__("rex64/fxsave (%1)" - : "=m" (fpu->state->fxsave) - : "cdaSDb" (&fpu->state->fxsave)); -#endif + asm volatile("rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); } static inline void fpu_save_init(struct fpu *fpu) -- cgit v0.10.2 From a334fe43d85f570ae907acf988a053c5eff78d6e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:15 -0400 Subject: x86-32, fpu: Remove math_emulate stub check_fpu() in bugs.c halts boot if no FPU is found and math emulation isn't enabled. Therefore this stub will never be used. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-9-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d0029eb..d439685 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -776,21 +776,10 @@ asmlinkage void math_state_restore(void) } EXPORT_SYMBOL_GPL(math_state_restore); -#ifndef CONFIG_MATH_EMULATION -void math_emulate(struct math_emu_info *info) -{ - printk(KERN_EMERG - "math-emulation not enabled and no coprocessor found.\n"); - printk(KERN_EMERG "killing %s.\n", current->comm); - force_sig(SIGFPE, current); - schedule(); -} -#endif /* CONFIG_MATH_EMULATION */ - dotraplinkage void __kprobes do_device_not_available(struct pt_regs *regs, long error_code) { -#ifdef CONFIG_X86_32 +#ifdef CONFIG_MATH_EMULATION if (read_cr0() & X86_CR0_EM) { struct math_emu_info info = { }; @@ -798,12 +787,12 @@ do_device_not_available(struct pt_regs *regs, long error_code) info.regs = regs; math_emulate(&info); - } else { - math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + return; } -#else - math_state_restore(); +#endif + math_state_restore(); /* interrupts still off */ +#ifdef CONFIG_X86_32 + conditional_sti(regs); #endif } -- cgit v0.10.2 From 8eb91a577d7763d21628f6761045328784b1911c Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:16 -0400 Subject: x86, fpu: Remove unnecessary ifdefs from i387 code. Remove ifdefs for code that the compiler can optimize away on 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-10-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 768fcb2..42b507e 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -55,6 +55,12 @@ extern int save_i387_xstate_ia32(void __user *buf); extern int restore_i387_xstate_ia32(void __user *buf); #endif +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + #define X87_FSW_ES (1 << 7) /* Exception Summary */ static __always_inline __pure bool use_xsaveopt(void) @@ -189,12 +195,6 @@ static inline void fpu_save_init(struct fpu *fpu) #else /* CONFIG_X86_32 */ -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - /* perform fxrstor iff the processor has extended states, otherwise frstor */ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index f3775f5..e795e36 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -79,10 +79,8 @@ static void __cpuinit init_thread_xstate(void) if (cpu_has_fxsr) xstate_size = sizeof(struct i387_fxsave_struct); -#ifdef CONFIG_X86_32 else xstate_size = sizeof(struct i387_fsave_struct); -#endif } /* @@ -119,12 +117,10 @@ void __cpuinit fpu_init(void) void fpu_finit(struct fpu *fpu) { -#ifdef CONFIG_X86_32 if (!HAVE_HWFP) { finit_soft_fpu(&fpu->state->soft); return; } -#endif if (cpu_has_fxsr) { struct i387_fxsave_struct *fx = &fpu->state->fxsave; -- cgit v0.10.2 From eec73f813ab0954253e5e2168119c4555f83f07d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:17 -0400 Subject: x86, fpu: Remove PSHUFB_XMM5_* macros The PSHUFB_XMM5_* macros are no longer used. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-11-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 42b507e..907967e 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -465,7 +465,4 @@ extern void fpu_finit(struct fpu *fpu); #endif /* __ASSEMBLY__ */ -#define PSHUFB_XMM5_XMM0 .byte 0x66, 0x0f, 0x38, 0x00, 0xc5 -#define PSHUFB_XMM5_XMM6 .byte 0x66, 0x0f, 0x38, 0x00, 0xf5 - #endif /* _ASM_X86_I387_H */ -- cgit v0.10.2 From 58a992b9cbaf449aeebd3575c3695a9eb5d95b5e Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:18 -0400 Subject: x86-32, fpu: Rewrite fpu_save_init() Rewrite fpu_save_init() to prepare for merging with 64-bit. Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-12-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 907967e..b45abef 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -73,6 +73,11 @@ static __always_inline __pure bool use_xsave(void) return static_cpu_has(X86_FEATURE_XSAVE); } +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has(X86_FEATURE_FXSR); +} + extern void __sanitize_i387_state(struct task_struct *); static inline void sanitize_i387_state(struct task_struct *tsk) @@ -211,6 +216,12 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return 0; } +static inline void fpu_fxsave(struct fpu *fpu) +{ + asm volatile("fxsave %[fx]" + : [fx] "=m" (fpu->state->fxsave)); +} + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -226,36 +237,24 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) static inline void fpu_save_init(struct fpu *fpu) { if (use_xsave()) { - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - fpu_xsave(fpu); /* * xsave header may indicate the init state of the FP. */ - if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - goto end; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - - /* - * we can do a simple return here or be paranoid :) - */ - goto clear_state; + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return; } - /* Use more nops than strictly needed in case the compiler - varies code */ - alternative_input( - "fnsave %[fx] ;fwait;" GENERIC_NOP8 GENERIC_NOP4, - "fxsave %[fx]\n" - "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", - X86_FEATURE_FXSR, - [fx] "m" (fpu->state->fxsave), - [fsw] "m" (fpu->state->fxsave.swd) : "memory"); -clear_state: + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) + asm volatile("fnclex"); + /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -265,8 +264,6 @@ clear_state: "fildl %[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); -end: - ; } #endif /* CONFIG_X86_64 */ -- cgit v0.10.2 From b2b57fe053c9cf8b8af5a0e826a465996afed0ff Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Fri, 3 Sep 2010 21:17:19 -0400 Subject: x86, fpu: Merge fpu_save_init() Make 64-bit use the 32-bit version of fpu_save_init(). Remove unused clear_fpu_state(). Signed-off-by: Brian Gerst Acked-by: Pekka Enberg Cc: Suresh Siddha LKML-Reference: <1283563039-3466-13-git-send-email-brgerst@gmail.com> Signed-off-by: H. Peter Anvin diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index b45abef..70626ed 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -105,36 +105,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) return err; } -/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception - is pending. Clear the x87 state here by setting it to fixed - values. The kernel data segment can be sometimes 0 and sometimes - new user value. Both should be ok. - Use the PDA as safe address because it should be already in L1. */ -static inline void fpu_clear(struct fpu *fpu) -{ - struct xsave_struct *xstate = &fpu->state->xsave; - struct i387_fxsave_struct *fx = &fpu->state->fxsave; - - /* - * xsave header may indicate the init state of the FP. - */ - if (use_xsave() && - !(xstate->xsave_hdr.xstate_bv & XSTATE_FP)) - return; - - if (unlikely(fx->swd & X87_FSW_ES)) - asm volatile("fnclex"); - alternative_input(ASM_NOP8 ASM_NOP2, - " emms\n" /* clear stack tags */ - " fildl %%gs:0", /* load to clear state */ - X86_FEATURE_FXSAVE_LEAK); -} - -static inline void clear_fpu_state(struct task_struct *tsk) -{ - fpu_clear(&tsk->thread.fpu); -} - static inline int fxsave_user(struct i387_fxsave_struct __user *fx) { int err; @@ -188,16 +158,6 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "R" (&fpu->state->fxsave)); } -static inline void fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) - fpu_xsave(fpu); - else - fpu_fxsave(fpu); - - fpu_clear(fpu); -} - #else /* CONFIG_X86_32 */ /* perform fxrstor iff the processor has extended states, otherwise frstor */ @@ -222,6 +182,8 @@ static inline void fpu_fxsave(struct fpu *fpu) : [fx] "=m" (fpu->state->fxsave)); } +#endif /* CONFIG_X86_64 */ + /* We need a safe address that is cheap to find and that is already in L1 during context switch. The best choices are unfortunately different for UP and SMP */ @@ -259,15 +221,13 @@ static inline void fpu_save_init(struct fpu *fpu) is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ alternative_input( - GENERIC_NOP8 GENERIC_NOP2, + ASM_NOP8 ASM_NOP2, "emms\n\t" /* clear stack tags */ - "fildl %[addr]", /* set F?P to defined value */ + "fildl %P[addr]", /* set F?P to defined value */ X86_FEATURE_FXSAVE_LEAK, [addr] "m" (safe_address)); } -#endif /* CONFIG_X86_64 */ - static inline void __save_init_fpu(struct task_struct *tsk) { fpu_save_init(&tsk->thread.fpu); -- cgit v0.10.2