From 0a7815515471a80379bfefc9f1913e0d8c87fbfb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 10:58:52 +0200 Subject: x86/fpu: Rename unlazy_fpu() to fpu__save() This function is a misnomer on two levels: 1) it doesn't really manipulate TS on modern CPUs anymore, its primary purpose is to save FPU state, used: - when executing fork()/clone(): to copy current FPU state to the child's FPU state. - when handling math exceptions: to generate the math error si_code in the signal frame. 2) even on legacy CPUs it doesn't actually 'unlazy', if then it lazies the FPU state: as a side effect of the old FNSAVE instruction which clears (destroys) FPU state it's necessary to set CR0::TS. So rename it to fpu__save() to better reflect its purpose. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index da5e967..49db044 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -602,7 +602,7 @@ static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) struct fpu *dfpu = &dst->thread.fpu; struct fpu *sfpu = &src->thread.fpu; - unlazy_fpu(src); + fpu__save(src); memcpy(dfpu->state, sfpu->state, xstate_size); } } diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6eb6fcb..d4419da 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -101,7 +101,7 @@ static inline int user_has_fpu(void) return current->thread.fpu.has_fpu; } -extern void unlazy_fpu(struct task_struct *tsk); +extern void fpu__save(struct task_struct *tsk); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 00918327..ec1a744 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -117,7 +117,7 @@ void __kernel_fpu_end(void) } EXPORT_SYMBOL(__kernel_fpu_end); -void unlazy_fpu(struct task_struct *tsk) +void fpu__save(struct task_struct *tsk) { preempt_disable(); if (__thread_has_fpu(tsk)) { @@ -130,7 +130,7 @@ void unlazy_fpu(struct task_struct *tsk) } preempt_enable(); } -EXPORT_SYMBOL(unlazy_fpu); +EXPORT_SYMBOL(fpu__save); unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; @@ -251,7 +251,7 @@ int init_fpu(struct task_struct *tsk) if (tsk_used_math(tsk)) { if (cpu_has_fpu && tsk == current) - unlazy_fpu(tsk); + fpu__save(tsk); task_disable_lazy_fpu_restore(tsk); return 0; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 324ab52..12f29f9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -731,7 +731,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - unlazy_fpu(task); + fpu__save(task); task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; -- cgit v0.10.2 From 4af08f2f47f0ed3c48e557ac47ed2ae616d6866b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 11:01:36 +0200 Subject: x86/fpu: Add comments to fpu__save() and restrict its export Add an explanation to fpu__save() and also don't export it to random modules - we don't want them to futz around with deep kernel internals. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index ec1a744..ac47278c 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -117,6 +117,9 @@ void __kernel_fpu_end(void) } EXPORT_SYMBOL(__kernel_fpu_end); +/* + * Save the FPU state (initialize it if necessary): + */ void fpu__save(struct task_struct *tsk) { preempt_disable(); @@ -130,7 +133,7 @@ void fpu__save(struct task_struct *tsk) } preempt_enable(); } -EXPORT_SYMBOL(fpu__save); +EXPORT_SYMBOL_GPL(fpu__save); unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; -- cgit v0.10.2 From 87cdb98affe54b6f390f4622b254569b90820817 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 11:06:43 +0200 Subject: x86/fpu: Add debugging check to fpu__save() Document the function a bit more and add debugging check that we are only running this with the current task. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index ac47278c..66f1053 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -119,9 +119,13 @@ EXPORT_SYMBOL(__kernel_fpu_end); /* * Save the FPU state (initialize it if necessary): + * + * This only ever gets called for the current task. */ void fpu__save(struct task_struct *tsk) { + WARN_ON(tsk != current); + preempt_disable(); if (__thread_has_fpu(tsk)) { if (use_eager_fpu()) { -- cgit v0.10.2 From 1a7dc0db7181ebc8b9ec17e5a15ad4c766c7d3d4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 11:33:52 +0200 Subject: x86/fpu: Rename fpu_detect() to fpu__detect() Use the fpu__*() namespace to organize FPU ops better. Also document fpu__detect() a bit. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 23ba676..2dc08c2 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -166,7 +166,7 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void fpu_detect(struct cpuinfo_x86 *c); +extern void fpu__detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index a62cf04..60a29d2 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -759,7 +759,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); - fpu_detect(c); + fpu__detect(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 66f1053..29251f5 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -640,7 +640,11 @@ static int __init no_387(char *s) __setup("no387", no_387); -void fpu_detect(struct cpuinfo_x86 *c) +/* + * Set the X86_FEATURE_FPU CPU-capability bit based on + * trying to execute an actual sequence of FPU instructions: + */ +void fpu__detect(struct cpuinfo_x86 *c) { unsigned long cr0; u16 fsw, fcw; -- cgit v0.10.2 From 68ad8b9feae583cc5102ec86ad410fce542a5311 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 11:50:20 +0200 Subject: x86/fpu: Remove stale init_fpu() prototype We are going to split init_fpu() so keep only a single prototype, in i387.h. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index c9a6d68..58ed0ca 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -51,7 +51,6 @@ extern struct xsave_struct *init_xstate_buf; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); -extern int init_fpu(struct task_struct *child); /* These macros all use (%edi)/(%rdi) as the single memory argument. */ #define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -- cgit v0.10.2 From 97185c95f7ab7f752473c34672dab0925758094b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:02:02 +0200 Subject: x86/fpu: Split an fpstate_alloc_init() function out of init_fpu() Most init_fpu() users don't want the register-saving aspect of the function, they are calling it for 'current' and when FPU registers are not allocated and initialized yet. Split out a simplified API that does just that (and add debug-checks for these conditions): fpstate_alloc_init(). Use it where appropriate. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index d4419da..1a896b4 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -18,7 +18,10 @@ struct pt_regs; struct user_i387_struct; +extern int fpstate_alloc_init(struct task_struct *curr); + extern int init_fpu(struct task_struct *child); + extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 29251f5..56b6e72 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -247,6 +247,37 @@ void fpu_finit(struct fpu *fpu) EXPORT_SYMBOL_GPL(fpu_finit); /* + * Allocate the backing store for the current task's FPU registers + * and initialize the registers themselves as well. + * + * Can fail. + */ +int fpstate_alloc_init(struct task_struct *curr) +{ + int ret; + + if (WARN_ON_ONCE(curr != current)) + return -EINVAL; + if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) + return -EINVAL; + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + ret = fpu_alloc(&curr->thread.fpu); + if (ret) + return ret; + + fpu_finit(&curr->thread.fpu); + + /* Safe to do for the current task: */ + curr->flags |= PF_USED_MATH; + + return 0; +} +EXPORT_SYMBOL_GPL(fpstate_alloc_init); + +/* * The _current_ task is using the FPU for the first time * so initialize it and set the mxcsr to its default * value at reset if we support XMM instructions and then diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6e338e3..abdb81d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -159,7 +159,7 @@ void flush_thread(void) } else { if (!tsk_used_math(tsk)) { /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(init_fpu(tsk))) + if (WARN_ON(fpstate_alloc_init(tsk))) force_sig(SIGKILL, tsk); user_fpu_begin(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 12f29f9..cf9c962 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -846,7 +846,7 @@ void math_state_restore(void) /* * does a slab alloc which can sleep */ - if (init_fpu(tsk)) { + if (fpstate_alloc_init(tsk)) { /* * ran out of memory! */ diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 87a815b..a977cdd 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -349,7 +349,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!used_math() && init_fpu(tsk)) + if (!used_math() && fpstate_alloc_init(tsk)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c73efcd..bfc3966 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6600,7 +6600,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!tsk_used_math(current) && init_fpu(current)) + if (!tsk_used_math(current) && fpstate_alloc_init(current)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 9b86812..c9ff09a 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -149,7 +149,7 @@ void math_emulate(struct math_emu_info *info) struct desc_struct code_descriptor; if (!used_math()) { - if (init_fpu(current)) { + if (fpstate_alloc_init(current)) { do_group_exit(SIGKILL); return; } -- cgit v0.10.2 From bda283796b38baae3ec5c8c788b143b1fb9dcc77 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:17:28 +0200 Subject: x86/fpu: Make init_fpu() static Now that the allocation users have been split off into a separate function, init_fpu() has become local to i387.c: make it static. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 1a896b4..0367d17 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,8 +20,6 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); -extern int init_fpu(struct task_struct *child); - extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 56b6e72..9507902 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -283,7 +283,7 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); * value at reset if we support XMM instructions and then * remember the current task has used the FPU. */ -int init_fpu(struct task_struct *tsk) +static int init_fpu(struct task_struct *tsk) { int ret; @@ -306,7 +306,6 @@ int init_fpu(struct task_struct *tsk) set_stopped_child_used_math(tsk); return 0; } -EXPORT_SYMBOL_GPL(init_fpu); /* * The xstateregs_active() routine is the same as the fpregs_active() routine, -- cgit v0.10.2 From 67e97fc2ec575adaacca296834f9ea0e1e34b563 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:21:03 +0200 Subject: x86/fpu: Rename init_fpu() to fpu__unlazy_stopped() and add debugging check This function name is a misnomer now that we've split out all the other users from it. Rename it accordingly: it's used to save the FPU state of (ptrace-)stopped child tasks. Add debugging check to double check this intended usage: that this function is only called for non-current, stopped child tasks. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 9507902..909d10d 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -283,27 +283,30 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); * value at reset if we support XMM instructions and then * remember the current task has used the FPU. */ -static int init_fpu(struct task_struct *tsk) +static int fpu__unlazy_stopped(struct task_struct *child) { int ret; - if (tsk_used_math(tsk)) { - if (cpu_has_fpu && tsk == current) - fpu__save(tsk); - task_disable_lazy_fpu_restore(tsk); + if (WARN_ON_ONCE(child == current)) + return -EINVAL; + + if (tsk_used_math(child)) { + if (cpu_has_fpu && child == current) + fpu__save(child); + task_disable_lazy_fpu_restore(child); return 0; } /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpu_alloc(&tsk->thread.fpu); + ret = fpu_alloc(&child->thread.fpu); if (ret) return ret; - fpu_finit(&tsk->thread.fpu); + fpu_finit(&child->thread.fpu); - set_stopped_child_used_math(tsk); + set_stopped_child_used_math(child); return 0; } @@ -331,7 +334,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; @@ -350,7 +353,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; @@ -384,7 +387,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; @@ -414,7 +417,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; @@ -577,7 +580,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; @@ -608,7 +611,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - ret = init_fpu(target); + ret = fpu__unlazy_stopped(target); if (ret) return ret; -- cgit v0.10.2 From 8694c3e793810de3f7ff612d0fc23b70dc5a7e4d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:28:15 +0200 Subject: x86/fpu: Optimize fpu__unlazy_stopped() This function is only called for stopped child tasks, so the fpu__save() branch will never get called - remove it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 909d10d..76006a7 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -291,8 +291,6 @@ static int fpu__unlazy_stopped(struct task_struct *child) return -EINVAL; if (tsk_used_math(child)) { - if (cpu_has_fpu && child == current) - fpu__save(child); task_disable_lazy_fpu_restore(child); return 0; } -- cgit v0.10.2 From 071ae621ec51153d740c9a2252d2d46e03e80d58 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:29:47 +0200 Subject: x86/fpu: Simplify fpu__unlazy_stopped() Open code the PF_USED_MATH logic, to make the logic more obvious. (We'll slowly convert the other users of *_used_math() methods as well.) Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 76006a7..5e4dae7 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -290,7 +290,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) if (WARN_ON_ONCE(child == current)) return -EINVAL; - if (tsk_used_math(child)) { + if (child->flags & PF_USED_MATH) { task_disable_lazy_fpu_restore(child); return 0; } @@ -304,7 +304,9 @@ static int fpu__unlazy_stopped(struct task_struct *child) fpu_finit(&child->thread.fpu); - set_stopped_child_used_math(child); + /* Safe to do for stopped child tasks: */ + child->flags |= PF_USED_MATH; + return 0; } -- cgit v0.10.2 From 373244221ade7c4d8d4fa970355ca3803711fbff Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:34:26 +0200 Subject: x86/fpu: Remove fpu_allocated() It's an unnecessary obfuscation of a very simple allocation pattern. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 49db044..21000f0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -569,14 +569,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -static bool fpu_allocated(struct fpu *fpu) -{ - return fpu->state != NULL; -} - static inline int fpu_alloc(struct fpu *fpu) { - if (fpu_allocated(fpu)) + if (fpu->state) return 0; fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); if (!fpu->state) -- cgit v0.10.2 From 6fbe67124869be10e8b0ceedc4ee7c5691d78c40 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:37:30 +0200 Subject: x86/fpu: Move fpu_alloc() out of line This is not a small function, and it's used in several places, one of them a popular module (KVM). Move the function out of line. This saves a bit of text, even with the symbol export overhead: text data bss dec hex filename 12566052 1619504 1089536 15275092 e91454 vmlinux.before 12566046 1619504 1089536 15275086 e9144e vmlinux.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 21000f0..bdbba1a 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -569,16 +569,7 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -static inline int fpu_alloc(struct fpu *fpu) -{ - if (fpu->state) - return 0; - fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!fpu->state) - return -ENOMEM; - WARN_ON((unsigned long)fpu->state & 15); - return 0; -} +extern int fpu_alloc(struct fpu *fpu); static inline void fpu_free(struct fpu *fpu) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5e4dae7..05fcc90 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -246,6 +246,18 @@ void fpu_finit(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpu_finit); +int fpu_alloc(struct fpu *fpu) +{ + if (fpu->state) + return 0; + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!fpu->state) + return -ENOMEM; + WARN_ON((unsigned long)fpu->state & 15); + return 0; +} +EXPORT_SYMBOL_GPL(fpu_alloc); + /* * Allocate the backing store for the current task's FPU registers * and initialize the registers themselves as well. -- cgit v0.10.2 From ed97b085461ca6bdc22162b68b4cba69459ce1c8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:41:14 +0200 Subject: x86/fpu: Rename fpu_alloc() to fpstate_alloc() Use the fpu__*() namespace for fpstate_alloc() as well. Also add a comment about FPU state alignment. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index bdbba1a..88f584a 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -569,7 +569,7 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -extern int fpu_alloc(struct fpu *fpu); +extern int fpstate_alloc(struct fpu *fpu); static inline void fpu_free(struct fpu *fpu) { diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 05fcc90..5f2feb6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -246,17 +246,21 @@ void fpu_finit(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpu_finit); -int fpu_alloc(struct fpu *fpu) +int fpstate_alloc(struct fpu *fpu) { if (fpu->state) return 0; + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); if (!fpu->state) return -ENOMEM; + + /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */ WARN_ON((unsigned long)fpu->state & 15); + return 0; } -EXPORT_SYMBOL_GPL(fpu_alloc); +EXPORT_SYMBOL_GPL(fpstate_alloc); /* * Allocate the backing store for the current task's FPU registers @@ -276,7 +280,7 @@ int fpstate_alloc_init(struct task_struct *curr) /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpu_alloc(&curr->thread.fpu); + ret = fpstate_alloc(&curr->thread.fpu); if (ret) return ret; @@ -310,7 +314,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpu_alloc(&child->thread.fpu); + ret = fpstate_alloc(&child->thread.fpu); if (ret) return ret; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index abdb81d..a0ed7c0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -92,7 +92,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) dst->thread.fpu.state = NULL; task_disable_lazy_fpu_restore(dst); if (tsk_used_math(src)) { - int err = fpu_alloc(&dst->thread.fpu); + int err = fpstate_alloc(&dst->thread.fpu); if (err) return err; fpu_copy(dst, src); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bfc3966..28fa733 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7007,7 +7007,7 @@ int fx_init(struct kvm_vcpu *vcpu) { int err; - err = fpu_alloc(&vcpu->arch.guest_fpu); + err = fpstate_alloc(&vcpu->arch.guest_fpu); if (err) return err; -- cgit v0.10.2 From a7c2a83364c0d882a2d850c4c183c6de42d5c02b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 12:55:22 +0200 Subject: x86/fpu: Rename fpu_free() to fpstate_free() Use the fpu__*() namespace. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 88f584a..b68e8f0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -571,7 +571,7 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) extern int fpstate_alloc(struct fpu *fpu); -static inline void fpu_free(struct fpu *fpu) +static inline void fpstate_free(struct fpu *fpu) { if (fpu->state) { kmem_cache_free(task_xstate_cachep, fpu->state); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index a0ed7c0..fd4aa56 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -102,7 +102,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void free_thread_xstate(struct task_struct *tsk) { - fpu_free(&tsk->thread.fpu); + fpstate_free(&tsk->thread.fpu); } void arch_release_task_struct(struct task_struct *tsk) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 28fa733..80a411c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7029,7 +7029,7 @@ EXPORT_SYMBOL_GPL(fx_init); static void fx_free(struct kvm_vcpu *vcpu) { - fpu_free(&vcpu->arch.guest_fpu); + fpstate_free(&vcpu->arch.guest_fpu); } void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From c0ee2cf61be0bd3db2a30d76056a2e09fa48272e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 13:01:52 +0200 Subject: x86/fpu: Rename fpu_finit() to fpstate_init() Make it clear that we are initializing the in-memory FPU context area, no the FPU registers. Also move it to the fpu__*() namespace. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 0367d17..6552a16 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -19,8 +19,8 @@ struct pt_regs; struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); +extern void fpstate_init(struct fpu *fpu); -extern void fpu_finit(struct fpu *fpu); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5f2feb6..e0c16e8 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -225,7 +225,7 @@ void fpu_init(void) eager_fpu_init(); } -void fpu_finit(struct fpu *fpu) +void fpstate_init(struct fpu *fpu) { if (!cpu_has_fpu) { finit_soft_fpu(&fpu->state->soft); @@ -244,7 +244,7 @@ void fpu_finit(struct fpu *fpu) fp->fos = 0xffff0000u; } } -EXPORT_SYMBOL_GPL(fpu_finit); +EXPORT_SYMBOL_GPL(fpstate_init); int fpstate_alloc(struct fpu *fpu) { @@ -284,7 +284,7 @@ int fpstate_alloc_init(struct task_struct *curr) if (ret) return ret; - fpu_finit(&curr->thread.fpu); + fpstate_init(&curr->thread.fpu); /* Safe to do for the current task: */ curr->flags |= PF_USED_MATH; @@ -318,7 +318,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) if (ret) return ret; - fpu_finit(&child->thread.fpu); + fpstate_init(&child->thread.fpu); /* Safe to do for stopped child tasks: */ child->flags |= PF_USED_MATH; diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index a977cdd..163b5cc 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -395,7 +395,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { - fpu_finit(fpu); + fpstate_init(fpu); err = -1; } else { sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 80a411c..26b1f89 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7011,7 +7011,7 @@ int fx_init(struct kvm_vcpu *vcpu) if (err) return err; - fpu_finit(&vcpu->arch.guest_fpu); + fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; -- cgit v0.10.2 From 3a9c4b0d7e1a693beeac24d749f6938444148e86 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 13:16:51 +0200 Subject: x86/fpu: Rename fpu_init() to fpu__cpu_init() fpu_init() is a bit of a misnomer in that it (falsely) creates the impression that it's related to the (old) fpu_finit() function, which initializes FPU ctx state. Rename it to fpu__cpu_init() to make its boot time initialization clear, and to move it to the fpu__*() namespace. Also fix and extend its comment block to point out that it's called not only on the boot CPU, but on secondary CPUs as well. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index b68e8f0..02e0e97 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -39,7 +39,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, #endif extern unsigned int mxcsr_feature_mask; -extern void fpu_init(void); +extern void fpu__cpu_init(void); extern void eager_fpu_init(void); DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 60a29d2..b8035b8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1439,7 +1439,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu_init(); + fpu__cpu_init(); if (is_uv_system()) uv_cpu_init(); @@ -1495,7 +1495,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu_init(); + fpu__cpu_init(); } #endif diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e0c16e8..5b46725 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -183,11 +183,13 @@ static void init_thread_xstate(void) } /* - * Called at bootup to set up the initial FPU state that is later cloned - * into all processes. + * Called on the boot CPU at bootup to set up the initial FPU state that + * is later cloned into all processes. + * + * Also called on secondary CPUs to set up the FPU state of their + * idle threads. */ - -void fpu_init(void) +void fpu__cpu_init(void) { unsigned long cr0; unsigned long cr4_mask = 0; diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 46957ea..43f8704 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu) return; /* * For BSP, PSE PGE are set in probe_page_size_mask(), for APs - * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. + * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__cpu_init(). */ if (cpu_has_pse) cr4_set_bits_and_update_boot(X86_CR4_PSE); -- cgit v0.10.2 From 3f6a0bce90289e0980b4250ccb03b765860247ee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 3 Apr 2015 13:22:42 +0200 Subject: x86/fpu: Rename init_thread_xstate() to fpstate_xstate_init_size() So init_thread_xstate() is a misnomer in that it's not really related to a specific thread - it determines, once during initial bootup, the size of the xstate context. Also improve the comments. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 5b46725..0110155 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -158,7 +158,7 @@ static void mxcsr_feature_mask_init(void) mxcsr_feature_mask &= mask; } -static void init_thread_xstate(void) +static void fpstate_xstate_init_size(void) { /* * Note that xstate_size might be overwriten later during @@ -216,11 +216,11 @@ void fpu__cpu_init(void) write_cr0(cr0); /* - * init_thread_xstate is only called once to avoid overriding - * xstate_size during boot time or during CPU hotplug. + * fpstate_xstate_init_size() is only called once, to avoid overriding + * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. */ if (xstate_size == 0) - init_thread_xstate(); + fpstate_xstate_init_size(); mxcsr_feature_mask_init(); xsave_init(); -- cgit v0.10.2 From c0c2803dee21bef08ef5aacdf96fe2f1759ccc62 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:52:56 +0200 Subject: x86/fpu: Move thread_info::fpu_counter into thread_info::fpu.counter This field is kept separate from the main FPU state structure for no good reason. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 02e0e97..f85d21b 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -384,7 +384,7 @@ static inline void drop_fpu(struct task_struct *tsk) * Forget coprocessor state.. */ preempt_disable(); - tsk->thread.fpu_counter = 0; + tsk->thread.fpu.counter = 0; if (__thread_has_fpu(tsk)) { /* Ignore delayed exceptions from user space */ @@ -441,7 +441,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * or if the past 5 consecutive context-switches used math. */ fpu.preload = tsk_used_math(new) && - (use_eager_fpu() || new->thread.fpu_counter > 5); + (use_eager_fpu() || new->thread.fpu.counter > 5); if (__thread_has_fpu(old)) { if (!__save_init_fpu(old)) @@ -454,16 +454,16 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new->thread.fpu_counter++; + new->thread.fpu.counter++; __thread_set_has_fpu(new); prefetch(new->thread.fpu.state); } else if (!use_eager_fpu()) stts(); } else { - old->thread.fpu_counter = 0; + old->thread.fpu.counter = 0; task_disable_lazy_fpu_restore(old); if (fpu.preload) { - new->thread.fpu_counter++; + new->thread.fpu.counter++; if (fpu_lazy_restore(new, cpu)) fpu.preload = 0; else diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2dc08c2..64d6b5d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -433,6 +433,15 @@ struct fpu { unsigned int last_cpu; unsigned int has_fpu; union thread_xstate *state; + /* + * This counter contains the number of consecutive context switches + * that the FPU is used. If this is over a threshold, the lazy fpu + * saving becomes unlazy to save the trap. This is an unsigned char + * so that after 256 times the counter wraps and the behavior turns + * lazy again; this to deal with bursty apps that only use FPU for + * a short time + */ + unsigned char counter; }; #ifdef CONFIG_X86_64 @@ -535,15 +544,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; - /* - * fpu_counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time - */ - unsigned char fpu_counter; }; /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fd4aa56..c7793ad 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -87,7 +87,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - dst->thread.fpu_counter = 0; + dst->thread.fpu.counter = 0; dst->thread.fpu.has_fpu = 0; dst->thread.fpu.state = NULL; task_disable_lazy_fpu_restore(dst); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cf9c962..231aa57 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -863,7 +863,7 @@ void math_state_restore(void) fpu_reset_state(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { - tsk->thread.fpu_counter++; + tsk->thread.fpu.counter++; } kernel_fpu_enable(); } -- cgit v0.10.2 From 126009993faa7a750835e67f3ccb90cee124ffa7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:57:24 +0200 Subject: x86/fpu: Improve the comment for the fpu::counter field This was pretty hard to read, improve it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 64d6b5d..28df855 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -435,11 +435,11 @@ struct fpu { union thread_xstate *state; /* * This counter contains the number of consecutive context switches - * that the FPU is used. If this is over a threshold, the lazy fpu - * saving becomes unlazy to save the trap. This is an unsigned char - * so that after 256 times the counter wraps and the behavior turns - * lazy again; this to deal with bursty apps that only use FPU for - * a short time + * during which the FPU stays used. If this is over a threshold, the + * lazy fpu saving logic becomes unlazy, to save the trap overhead. + * This is an unsigned char so that after 256 iterations the counter + * wraps and the context switch behavior turns lazy again; this is to + * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; }; -- cgit v0.10.2 From 14b9675ae9c83c764c0c1fdf4b33f0e9156a4e4f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 09:57:24 +0200 Subject: x86/fpu: Move FPU data structures to asm/fpu_types.h Move the FPU details to asm/fpu_types.h, to further factor out the FPU code. ( As an added bonus, the 'struct orig_ist' definition now moves next to its other data types - the FPU definitions were slapped in the middle of them for some mysterious reason. ) No code changed. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h new file mode 100644 index 0000000..e996023 --- /dev/null +++ b/arch/x86/include/asm/fpu/types.h @@ -0,0 +1,132 @@ + +#define MXCSR_DEFAULT 0x1f80 + +struct i387_fsave_struct { + u32 cwd; /* FPU Control Word */ + u32 swd; /* FPU Status Word */ + u32 twd; /* FPU Tag Word */ + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Pointer Offset */ + u32 fos; /* FPU Operand Pointer Selector */ + + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + + /* Software status information [not touched by FSAVE ]: */ + u32 status; +}; + +struct i387_fxsave_struct { + u16 cwd; /* Control Word */ + u16 swd; /* Status Word */ + u16 twd; /* Tag Word */ + u16 fop; /* Last Instruction Opcode */ + union { + struct { + u64 rip; /* Instruction Pointer */ + u64 rdp; /* Data Pointer */ + }; + struct { + u32 fip; /* FPU IP Offset */ + u32 fcs; /* FPU IP Selector */ + u32 foo; /* FPU Operand Offset */ + u32 fos; /* FPU Operand Selector */ + }; + }; + u32 mxcsr; /* MXCSR Register State */ + u32 mxcsr_mask; /* MXCSR Mask */ + + /* 8*16 bytes for each FP-reg = 128 bytes: */ + u32 st_space[32]; + + /* 16*16 bytes for each XMM-reg = 256 bytes: */ + u32 xmm_space[64]; + + u32 padding[12]; + + union { + u32 padding1[12]; + u32 sw_reserved[12]; + }; + +} __attribute__((aligned(16))); + +struct i387_soft_struct { + u32 cwd; + u32 swd; + u32 twd; + u32 fip; + u32 fcs; + u32 foo; + u32 fos; + /* 8*10 bytes for each FP-reg = 80 bytes: */ + u32 st_space[20]; + u8 ftop; + u8 changed; + u8 lookahead; + u8 no_update; + u8 rm; + u8 alimit; + struct math_emu_info *info; + u32 entry_eip; +}; + +struct ymmh_struct { + /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ + u32 ymmh_space[64]; +}; + +/* We don't support LWP yet: */ +struct lwp_struct { + u8 reserved[128]; +}; + +struct bndreg { + u64 lower_bound; + u64 upper_bound; +} __packed; + +struct bndcsr { + u64 bndcfgu; + u64 bndstatus; +} __packed; + +struct xsave_hdr_struct { + u64 xstate_bv; + u64 xcomp_bv; + u64 reserved[6]; +} __attribute__((packed)); + +struct xsave_struct { + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; + struct ymmh_struct ymmh; + struct lwp_struct lwp; + struct bndreg bndreg[4]; + struct bndcsr bndcsr; + /* new processor state extensions will go here */ +} __attribute__ ((packed, aligned (64))); + +union thread_xstate { + struct i387_fsave_struct fsave; + struct i387_fxsave_struct fxsave; + struct i387_soft_struct soft; + struct xsave_struct xsave; +}; + +struct fpu { + unsigned int last_cpu; + unsigned int has_fpu; + union thread_xstate *state; + /* + * This counter contains the number of consecutive context switches + * during which the FPU stays used. If this is over a threshold, the + * lazy fpu saving logic becomes unlazy, to save the trap overhead. + * This is an unsigned char so that after 256 iterations the counter + * wraps and the context switch behavior turns lazy again; this is to + * deal with bursty apps that only use the FPU for a short time: + */ + unsigned char counter; +}; + diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 28df855..6b75c4b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -21,6 +21,7 @@ struct mm_struct; #include #include #include +#include #include #include @@ -313,137 +314,6 @@ struct orig_ist { unsigned long ist[7]; }; -#define MXCSR_DEFAULT 0x1f80 - -struct i387_fsave_struct { - u32 cwd; /* FPU Control Word */ - u32 swd; /* FPU Status Word */ - u32 twd; /* FPU Tag Word */ - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Pointer Offset */ - u32 fos; /* FPU Operand Pointer Selector */ - - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - - /* Software status information [not touched by FSAVE ]: */ - u32 status; -}; - -struct i387_fxsave_struct { - u16 cwd; /* Control Word */ - u16 swd; /* Status Word */ - u16 twd; /* Tag Word */ - u16 fop; /* Last Instruction Opcode */ - union { - struct { - u64 rip; /* Instruction Pointer */ - u64 rdp; /* Data Pointer */ - }; - struct { - u32 fip; /* FPU IP Offset */ - u32 fcs; /* FPU IP Selector */ - u32 foo; /* FPU Operand Offset */ - u32 fos; /* FPU Operand Selector */ - }; - }; - u32 mxcsr; /* MXCSR Register State */ - u32 mxcsr_mask; /* MXCSR Mask */ - - /* 8*16 bytes for each FP-reg = 128 bytes: */ - u32 st_space[32]; - - /* 16*16 bytes for each XMM-reg = 256 bytes: */ - u32 xmm_space[64]; - - u32 padding[12]; - - union { - u32 padding1[12]; - u32 sw_reserved[12]; - }; - -} __attribute__((aligned(16))); - -struct i387_soft_struct { - u32 cwd; - u32 swd; - u32 twd; - u32 fip; - u32 fcs; - u32 foo; - u32 fos; - /* 8*10 bytes for each FP-reg = 80 bytes: */ - u32 st_space[20]; - u8 ftop; - u8 changed; - u8 lookahead; - u8 no_update; - u8 rm; - u8 alimit; - struct math_emu_info *info; - u32 entry_eip; -}; - -struct ymmh_struct { - /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ - u32 ymmh_space[64]; -}; - -/* We don't support LWP yet: */ -struct lwp_struct { - u8 reserved[128]; -}; - -struct bndreg { - u64 lower_bound; - u64 upper_bound; -} __packed; - -struct bndcsr { - u64 bndcfgu; - u64 bndstatus; -} __packed; - -struct xsave_hdr_struct { - u64 xstate_bv; - u64 xcomp_bv; - u64 reserved[6]; -} __attribute__((packed)); - -struct xsave_struct { - struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; - struct ymmh_struct ymmh; - struct lwp_struct lwp; - struct bndreg bndreg[4]; - struct bndcsr bndcsr; - /* new processor state extensions will go here */ -} __attribute__ ((packed, aligned (64))); - -union thread_xstate { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; - struct xsave_struct xsave; -}; - -struct fpu { - unsigned int last_cpu; - unsigned int has_fpu; - union thread_xstate *state; - /* - * This counter contains the number of consecutive context switches - * during which the FPU stays used. If this is over a threshold, the - * lazy fpu saving logic becomes unlazy, to save the trap overhead. - * This is an unsigned char so that after 256 iterations the counter - * wraps and the context switch behavior turns lazy again; this is to - * deal with bursty apps that only use the FPU for a short time: - */ - unsigned char counter; -}; - #ifdef CONFIG_X86_64 DECLARE_PER_CPU(struct orig_ist, orig_ist); -- cgit v0.10.2 From 47bc5106342b0c98ce5cc851173c201554d256d3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 10:17:06 +0200 Subject: x86/fpu: Clean up asm/fpu/types.h - add header guards - standardize vertical alignment - add comments about MPX No code changed. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index e996023..efb520d 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -1,3 +1,8 @@ +/* + * FPU data structures: + */ +#ifndef _ASM_X86_FPU_H +#define _ASM_X86_FPU_H #define MXCSR_DEFAULT 0x1f80 @@ -52,6 +57,9 @@ struct i387_fxsave_struct { } __attribute__((aligned(16))); +/* + * Software based FPU emulation state: + */ struct i387_soft_struct { u32 cwd; u32 swd; @@ -74,38 +82,39 @@ struct i387_soft_struct { struct ymmh_struct { /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ - u32 ymmh_space[64]; + u32 ymmh_space[64]; }; /* We don't support LWP yet: */ struct lwp_struct { - u8 reserved[128]; + u8 reserved[128]; }; +/* Intel MPX support: */ struct bndreg { - u64 lower_bound; - u64 upper_bound; + u64 lower_bound; + u64 upper_bound; } __packed; struct bndcsr { - u64 bndcfgu; - u64 bndstatus; + u64 bndcfgu; + u64 bndstatus; } __packed; struct xsave_hdr_struct { - u64 xstate_bv; - u64 xcomp_bv; - u64 reserved[6]; + u64 xstate_bv; + u64 xcomp_bv; + u64 reserved[6]; } __attribute__((packed)); struct xsave_struct { - struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; - struct ymmh_struct ymmh; - struct lwp_struct lwp; - struct bndreg bndreg[4]; - struct bndcsr bndcsr; - /* new processor state extensions will go here */ + struct i387_fxsave_struct i387; + struct xsave_hdr_struct xsave_hdr; + struct ymmh_struct ymmh; + struct lwp_struct lwp; + struct bndreg bndreg[4]; + struct bndcsr bndcsr; + /* New processor state extensions will go here. */ } __attribute__ ((packed, aligned (64))); union thread_xstate { @@ -116,9 +125,9 @@ union thread_xstate { }; struct fpu { - unsigned int last_cpu; - unsigned int has_fpu; - union thread_xstate *state; + unsigned int last_cpu; + unsigned int has_fpu; + union thread_xstate *state; /* * This counter contains the number of consecutive context switches * during which the FPU stays used. If this is over a threshold, the @@ -127,6 +136,7 @@ struct fpu { * wraps and the context switch behavior turns lazy again; this is to * deal with bursty apps that only use the FPU for a short time: */ - unsigned char counter; + unsigned char counter; }; +#endif /* _ASM_X86_FPU_H */ -- cgit v0.10.2 From ce4c4c26241f9ab08f14b028d40736f319ed2445 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 10:39:11 +0200 Subject: x86/fpu: Move i387.c and xsave.c to arch/x86/kernel/fpu/ Create a new subdirectory for the FPU support code in arch/x86/kernel/fpu/. Rename 'i387.c' to 'core.c' - as this really collects the core FPU support code, nothing i387 specific. We'll better organize this directory in later patches. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9bcd0b5..febaf18 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -44,7 +44,7 @@ obj-y += pci-iommu_table.o obj-y += resource.o obj-y += process.o -obj-y += i387.o xsave.o +obj-y += fpu/ obj-y += ptrace.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile new file mode 100644 index 0000000..89fd66a --- /dev/null +++ b/arch/x86/kernel/fpu/Makefile @@ -0,0 +1,5 @@ +# +# Build rules for the FPU support code: +# + +obj-y += core.o xsave.o diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c new file mode 100644 index 0000000..0110155 --- /dev/null +++ b/arch/x86/kernel/fpu/core.c @@ -0,0 +1,718 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(bool, in_kernel_fpu); + +void kernel_fpu_disable(void) +{ + WARN_ON(this_cpu_read(in_kernel_fpu)); + this_cpu_write(in_kernel_fpu, true); +} + +void kernel_fpu_enable(void) +{ + this_cpu_write(in_kernel_fpu, false); +} + +/* + * Were we in an interrupt that interrupted kernel mode? + * + * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that + * pair does nothing at all: the thread must not have fpu (so + * that we don't try to save the FPU state), and TS must + * be set (so that the clts/stts pair does nothing that is + * visible in the interrupted kernel thread). + * + * Except for the eagerfpu case when we return true; in the likely case + * the thread has FPU but we are not going to set/clear TS. + */ +static inline bool interrupted_kernel_fpu_idle(void) +{ + if (this_cpu_read(in_kernel_fpu)) + return false; + + if (use_eager_fpu()) + return true; + + return !__thread_has_fpu(current) && + (read_cr0() & X86_CR0_TS); +} + +/* + * Were we in user mode (or vm86 mode) when we were + * interrupted? + * + * Doing kernel_fpu_begin/end() is ok if we are running + * in an interrupt context from user mode - we'll just + * save the FPU state as required. + */ +static inline bool interrupted_user_mode(void) +{ + struct pt_regs *regs = get_irq_regs(); + return regs && user_mode(regs); +} + +/* + * Can we use the FPU in kernel mode with the + * whole "kernel_fpu_begin/end()" sequence? + * + * It's always ok in process context (ie "not interrupt") + * but it is sometimes ok even from an irq. + */ +bool irq_fpu_usable(void) +{ + return !in_interrupt() || + interrupted_user_mode() || + interrupted_kernel_fpu_idle(); +} +EXPORT_SYMBOL(irq_fpu_usable); + +void __kernel_fpu_begin(void) +{ + struct task_struct *me = current; + + this_cpu_write(in_kernel_fpu, true); + + if (__thread_has_fpu(me)) { + __save_init_fpu(me); + } else { + this_cpu_write(fpu_owner_task, NULL); + if (!use_eager_fpu()) + clts(); + } +} +EXPORT_SYMBOL(__kernel_fpu_begin); + +void __kernel_fpu_end(void) +{ + struct task_struct *me = current; + + if (__thread_has_fpu(me)) { + if (WARN_ON(restore_fpu_checking(me))) + fpu_reset_state(me); + } else if (!use_eager_fpu()) { + stts(); + } + + this_cpu_write(in_kernel_fpu, false); +} +EXPORT_SYMBOL(__kernel_fpu_end); + +/* + * Save the FPU state (initialize it if necessary): + * + * This only ever gets called for the current task. + */ +void fpu__save(struct task_struct *tsk) +{ + WARN_ON(tsk != current); + + preempt_disable(); + if (__thread_has_fpu(tsk)) { + if (use_eager_fpu()) { + __save_fpu(tsk); + } else { + __save_init_fpu(tsk); + __thread_fpu_end(tsk); + } + } + preempt_enable(); +} +EXPORT_SYMBOL_GPL(fpu__save); + +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +EXPORT_SYMBOL_GPL(xstate_size); +static struct i387_fxsave_struct fx_scratch; + +static void mxcsr_feature_mask_init(void) +{ + unsigned long mask = 0; + + if (cpu_has_fxsr) { + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : "+m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; + if (mask == 0) + mask = 0x0000ffbf; + } + mxcsr_feature_mask &= mask; +} + +static void fpstate_xstate_init_size(void) +{ + /* + * Note that xstate_size might be overwriten later during + * xsave_init(). + */ + + if (!cpu_has_fpu) { + /* + * Disable xsave as we do not support it if i387 + * emulation is enabled. + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + xstate_size = sizeof(struct i387_soft_struct); + return; + } + + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); + else + xstate_size = sizeof(struct i387_fsave_struct); +} + +/* + * Called on the boot CPU at bootup to set up the initial FPU state that + * is later cloned into all processes. + * + * Also called on secondary CPUs to set up the FPU state of their + * idle threads. + */ +void fpu__cpu_init(void) +{ + unsigned long cr0; + unsigned long cr4_mask = 0; + +#ifndef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) { + pr_emerg("No FPU found and no math emulation present\n"); + pr_emerg("Giving up\n"); + for (;;) + asm volatile("hlt"); + } +#endif + if (cpu_has_fxsr) + cr4_mask |= X86_CR4_OSFXSR; + if (cpu_has_xmm) + cr4_mask |= X86_CR4_OSXMMEXCPT; + if (cr4_mask) + cr4_set_bits(cr4_mask); + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ + if (!cpu_has_fpu) + cr0 |= X86_CR0_EM; + write_cr0(cr0); + + /* + * fpstate_xstate_init_size() is only called once, to avoid overriding + * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. + */ + if (xstate_size == 0) + fpstate_xstate_init_size(); + + mxcsr_feature_mask_init(); + xsave_init(); + eager_fpu_init(); +} + +void fpstate_init(struct fpu *fpu) +{ + if (!cpu_has_fpu) { + finit_soft_fpu(&fpu->state->soft); + return; + } + + memset(fpu->state, 0, xstate_size); + + if (cpu_has_fxsr) { + fx_finit(&fpu->state->fxsave); + } else { + struct i387_fsave_struct *fp = &fpu->state->fsave; + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; + } +} +EXPORT_SYMBOL_GPL(fpstate_init); + +int fpstate_alloc(struct fpu *fpu) +{ + if (fpu->state) + return 0; + + fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); + if (!fpu->state) + return -ENOMEM; + + /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */ + WARN_ON((unsigned long)fpu->state & 15); + + return 0; +} +EXPORT_SYMBOL_GPL(fpstate_alloc); + +/* + * Allocate the backing store for the current task's FPU registers + * and initialize the registers themselves as well. + * + * Can fail. + */ +int fpstate_alloc_init(struct task_struct *curr) +{ + int ret; + + if (WARN_ON_ONCE(curr != current)) + return -EINVAL; + if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) + return -EINVAL; + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + ret = fpstate_alloc(&curr->thread.fpu); + if (ret) + return ret; + + fpstate_init(&curr->thread.fpu); + + /* Safe to do for the current task: */ + curr->flags |= PF_USED_MATH; + + return 0; +} +EXPORT_SYMBOL_GPL(fpstate_alloc_init); + +/* + * The _current_ task is using the FPU for the first time + * so initialize it and set the mxcsr to its default + * value at reset if we support XMM instructions and then + * remember the current task has used the FPU. + */ +static int fpu__unlazy_stopped(struct task_struct *child) +{ + int ret; + + if (WARN_ON_ONCE(child == current)) + return -EINVAL; + + if (child->flags & PF_USED_MATH) { + task_disable_lazy_fpu_restore(child); + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + ret = fpstate_alloc(&child->thread.fpu); + if (ret) + return ret; + + fpstate_init(&child->thread.fpu); + + /* Safe to do for stopped child tasks: */ + child->flags |= PF_USED_MATH; + + return 0; +} + +/* + * The xstateregs_active() routine is the same as the fpregs_active() routine, + * as the "regset->n" for the xstate regset will be updated based on the feature + * capabilites supported by the xsave. + */ +int fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return tsk_used_math(target) ? regset->n : 0; +} + +int xfpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; +} + +int xfpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + + if (!cpu_has_fxsr) + return -ENODEV; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + sanitize_i387_state(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.state->fxsave, 0, -1); +} + +int xfpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + + if (!cpu_has_fxsr) + return -ENODEV; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + sanitize_i387_state(target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.state->fxsave, 0, -1); + + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; + + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + + return ret; +} + +int xstateregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct xsave_struct *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + xsave = &target->thread.fpu.state->xsave; + + /* + * Copy the 48bytes defined by the software first into the xstate + * memory layout in the thread struct, so that we can copy the entire + * xstateregs to the user using one user_regset_copyout(). + */ + memcpy(&xsave->i387.sw_reserved, + xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); + /* + * Copy the xstate memory layout. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + return ret; +} + +int xstateregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct xsave_struct *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + xsave = &target->thread.fpu.state->xsave; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + xsave->xsave_hdr.xstate_bv &= pcntxt_mask; + /* + * These bits must be zero. + */ + memset(&xsave->xsave_hdr.reserved, 0, 48); + return ret; +} + +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + + return tmp; +} + +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) +#define FP_EXP_TAG_VALID 0 +#define FP_EXP_TAG_ZERO 1 +#define FP_EXP_TAG_SPECIAL 2 +#define FP_EXP_TAG_EMPTY 3 + +static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) +{ + struct _fpxreg *st; + u32 tos = (fxsave->swd >> 11) & 7; + u32 twd = (unsigned long) fxsave->twd; + u32 tag; + u32 ret = 0xffff0000u; + int i; + + for (i = 0; i < 8; i++, twd >>= 1) { + if (twd & 0x1) { + st = FPREG_ADDR(fxsave, (i - tos) & 7); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = FP_EXP_TAG_SPECIAL; + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) + tag = FP_EXP_TAG_ZERO; + else + tag = FP_EXP_TAG_SPECIAL; + break; + default: + if (st->significand[3] & 0x8000) + tag = FP_EXP_TAG_VALID; + else + tag = FP_EXP_TAG_SPECIAL; + break; + } + } else { + tag = FP_EXP_TAG_EMPTY; + } + ret |= tag << (2 * i); + } + return ret; +} + +/* + * FXSR floating point environment conversions. + */ + +void +convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) +{ + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; + struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + env->cwd = fxsave->cwd | 0xffff0000u; + env->swd = fxsave->swd | 0xffff0000u; + env->twd = twd_fxsr_to_i387(fxsave); + +#ifdef CONFIG_X86_64 + env->fip = fxsave->rip; + env->foo = fxsave->rdp; + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + env->fcs = task_pt_regs(tsk)->cs; + if (tsk == current) { + savesegment(ds, env->fos); + } else { + env->fos = tsk->thread.ds; + } + env->fos |= 0xffff0000; +#else + env->fip = fxsave->fip; + env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); + env->foo = fxsave->foo; + env->fos = fxsave->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(to[0])); +} + +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) + +{ + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; + struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + fxsave->cwd = env->cwd; + fxsave->swd = env->swd; + fxsave->twd = twd_i387_to_fxsr(env->twd); + fxsave->fop = (u16) ((u32) env->fcs >> 16); +#ifdef CONFIG_X86_64 + fxsave->rip = env->fip; + fxsave->rdp = env->foo; + /* cs and ds ignored */ +#else + fxsave->fip = env->fip; + fxsave->fcs = (env->fcs & 0xffff); + fxsave->foo = env->foo; + fxsave->fos = env->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(from[0])); +} + +int fpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct user_i387_ia32_struct env; + int ret; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.state->fsave, 0, + -1); + + sanitize_i387_state(target); + + if (kbuf && pos == 0 && count == sizeof(env)) { + convert_from_fxsr(kbuf, target); + return 0; + } + + convert_from_fxsr(&env, target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); +} + +int fpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct user_i387_ia32_struct env; + int ret; + + ret = fpu__unlazy_stopped(target); + if (ret) + return ret; + + sanitize_i387_state(target); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &target->thread.fpu.state->fsave, 0, + -1); + + if (pos > 0 || count < sizeof(env)) + convert_from_fxsr(&env, target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); + if (!ret) + convert_to_fxsr(target, &env); + + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; + return ret; +} + +/* + * FPU state for core dumps. + * This is only used for a.out dumps now. + * It is declared generically using elf_fpregset_t (which is + * struct user_i387_struct) but is in fact only used for 32-bit + * dumps, so on 64-bit it is really struct user_i387_ia32_struct. + */ +int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) +{ + struct task_struct *tsk = current; + int fpvalid; + + fpvalid = !!used_math(); + if (fpvalid) + fpvalid = !fpregs_get(tsk, NULL, + 0, sizeof(struct user_i387_ia32_struct), + fpu, NULL); + + return fpvalid; +} +EXPORT_SYMBOL(dump_fpu); + +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +static int __init no_387(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FPU); + return 1; +} + +__setup("no387", no_387); + +/* + * Set the X86_FEATURE_FPU CPU-capability bit based on + * trying to execute an actual sequence of FPU instructions: + */ +void fpu__detect(struct cpuinfo_x86 *c) +{ + unsigned long cr0; + u16 fsw, fcw; + + fsw = fcw = 0xffff; + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS | X86_CR0_EM); + write_cr0(cr0); + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + + /* The final cr0 value is set in fpu_init() */ +} diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c new file mode 100644 index 0000000..163b5cc --- /dev/null +++ b/arch/x86/kernel/fpu/xsave.c @@ -0,0 +1,724 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Supported feature mask by the CPU and the kernel. + */ +u64 pcntxt_mask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct *init_xstate_buf; + +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; +static unsigned int *xstate_offsets, *xstate_sizes; +static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; +static unsigned int xstate_features; + +/* + * If a processor implementation discern that a processor state component is + * in its initialized state it may modify the corresponding bit in the + * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory + * layout in the case of xsaveopt. While presenting the xstate information to + * the user, we always ensure that the memory layout of a feature will be in + * the init state if the corresponding header bit is zero. This is to ensure + * that the user doesn't see some stale state in the memory layout during + * signal handling, debugging etc. + */ +void __sanitize_i387_state(struct task_struct *tsk) +{ + struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + int feature_bit = 0x2; + u64 xstate_bv; + + if (!fx) + return; + + xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; + + /* + * None of the feature bits are in init state. So nothing else + * to do for us, as the memory layout is up to date. + */ + if ((xstate_bv & pcntxt_mask) == pcntxt_mask) + return; + + /* + * FP is in init state + */ + if (!(xstate_bv & XSTATE_FP)) { + fx->cwd = 0x37f; + fx->swd = 0; + fx->twd = 0; + fx->fop = 0; + fx->rip = 0; + fx->rdp = 0; + memset(&fx->st_space[0], 0, 128); + } + + /* + * SSE is in init state + */ + if (!(xstate_bv & XSTATE_SSE)) + memset(&fx->xmm_space[0], 0, 256); + + xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; + + /* + * Update all the other memory layouts for which the corresponding + * header bit is in the init state. + */ + while (xstate_bv) { + if (xstate_bv & 0x1) { + int offset = xstate_offsets[feature_bit]; + int size = xstate_sizes[feature_bit]; + + memcpy(((void *) fx) + offset, + ((void *) init_xstate_buf) + offset, + size); + } + + xstate_bv >>= 1; + feature_bit++; + } +} + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xsave_hdr_struct); + unsigned int magic2; + + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; + + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; + + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + +/* + * Signal frame handlers. + */ +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +{ + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; + + convert_from_fxsr(&env, tsk); + + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; + } else { + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) + return -1; + } + + return 0; +} + +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xstate_bv; + int err; + + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + + if (!use_xsave()) + return err; + + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + + /* + * Read the xstate_bv which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xstate_bv in the xsave header. + * + * xsave aware apps can change the xstate_bv in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xstate_bv |= XSTATE_FPSSE; + + err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); + + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; +} + +/* + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. + */ +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); + } else { + sanitize_i387_state(tsk); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + return 0; +} + +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xstate_bv, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; + + if (use_xsave()) { + /* These bits must be zero. */ + memset(xsave_hdr->reserved, 0, 48); + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + xsave_hdr->xstate_bv = XSTATE_FPSSE; + else + xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } +} + +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE state. + */ +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) +{ + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + xrstor_state(init_xstate_buf, init_bv); + return fxrstor_user(buf); + } else { + u64 init_bv = pcntxt_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(init_xstate_buf, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_user(buf); + } else + return frstor_user(buf); +} + +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); + struct task_struct *tsk = current; + int state_size = xstate_size; + u64 xstate_bv = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!buf) { + fpu_reset_state(tsk); + return 0; + } + + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + if (!used_math() && fpstate_alloc_init(tsk)) + return -1; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; + + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xstate_bv = fx_sw_user.xstate_bv; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct fpu *fpu = &tsk->thread.fpu; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears used_math(). This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * set_used_math() is again set. + */ + drop_fpu(tsk); + + if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + fpstate_init(fpu); + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + } + + set_used_math(); + if (use_eager_fpu()) { + preempt_disable(); + math_state_restore(); + preempt_enable(); + } + + return err; + } else { + /* + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). + */ + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + fpu_reset_state(tsk); + return -1; + } + } + + return 0; +} + +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +static void prepare_fx_sw_frame(void) +{ + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = size; + fx_sw_reserved.xstate_bv = pcntxt_mask; + fx_sw_reserved.xstate_size = xstate_size; + + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} + +/* + * Enable the extended processor state save/restore feature + */ +static inline void xstate_enable(void) +{ + cr4_set_bits(X86_CR4_OSXSAVE); + xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); +} + +/* + * Record the offsets and sizes of different state managed by the xsave + * memory layout. + */ +static void __init setup_xstate_features(void) +{ + int eax, ebx, ecx, edx, leaf = 0x2; + + xstate_features = fls64(pcntxt_mask); + xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); + xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); + + do { + cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); + + if (eax == 0) + break; + + xstate_offsets[leaf] = ebx; + xstate_sizes[leaf] = eax; + + leaf++; + } while (1); +} + +/* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + * + * Input: void + * Output: void + */ +void setup_xstate_comp(void) +{ + unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; + int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); + + if (!cpu_has_xsaves) { + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xstate_features; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + +/* + * setup the xstate image representing the init state + */ +static void __init setup_init_fpu_buf(void) +{ + /* + * Setup init_xstate_buf to represent the init state of + * all the features managed by the xsave + */ + init_xstate_buf = alloc_bootmem_align(xstate_size, + __alignof__(struct xsave_struct)); + fx_finit(&init_xstate_buf->i387); + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); + + if (cpu_has_xsaves) { + init_xstate_buf->xsave_hdr.xcomp_bv = + (u64)1 << 63 | pcntxt_mask; + init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; + } + + /* + * Init all the features state with header_bv being 0x0 + */ + xrstor_state_booting(init_xstate_buf, -1); + /* + * Dump the init state again. This is to identify the init state + * of any feature which is not represented by all zero's. + */ + xsave_state_booting(init_xstate_buf, -1); +} + +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + + +/* + * Calculate total size of enabled xstates in XCR0/pcntxt_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + +/* + * Enable and initialize the xsave feature. + */ +static void __init xstate_enable_boot_cpu(void) +{ + unsigned int eax, ebx, ecx, edx; + + if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { + WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); + return; + } + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + pcntxt_mask = eax + ((u64)edx << 32); + + if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + pr_err("FP/SSE not shown under xsave features 0x%llx\n", + pcntxt_mask); + BUG(); + } + + /* + * Support only the state known to OS. + */ + pcntxt_mask = pcntxt_mask & XCNTXT_MASK; + + xstate_enable(); + + /* + * Recompute the context size for enabled features + */ + init_xstate_size(); + + update_regset_xstate_info(xstate_size, pcntxt_mask); + prepare_fx_sw_frame(); + setup_init_fpu_buf(); + + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; + + if (pcntxt_mask & XSTATE_EAGER) { + if (eagerfpu == DISABLE) { + pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", + pcntxt_mask & XSTATE_EAGER); + pcntxt_mask &= ~XSTATE_EAGER; + } else { + eagerfpu = ENABLE; + } + } + + pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", + pcntxt_mask, xstate_size, + cpu_has_xsaves ? "compacted form" : "standard form"); +} + +/* + * For the very first instance, this calls xstate_enable_boot_cpu(); + * for all subsequent instances, this calls xstate_enable(). + * + * This is somewhat obfuscated due to the lack of powerful enough + * overrides for the section checks. + */ +void xsave_init(void) +{ + static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; + void (*this_func)(void); + + if (!cpu_has_xsave) + return; + + this_func = next_func; + next_func = xstate_enable; + this_func(); +} + +/* + * setup_init_fpu_buf() is __init and it is OK to call it here because + * init_xstate_buf will be unset only once during boot. + */ +void __init_refok eager_fpu_init(void) +{ + WARN_ON(used_math()); + current_thread_info()->status = 0; + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + if (!cpu_has_eager_fpu) { + stts(); + return; + } + + if (!init_xstate_buf) + setup_init_fpu_buf(); +} + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Inputs: + * xsave: base address of the xsave area; + * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, + * etc.) + * Output: + * address of the state in the xsave area. + */ +void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +{ + int feature = fls64(xstate) - 1; + if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature]; +} +EXPORT_SYMBOL_GPL(get_xsave_addr); diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c deleted file mode 100644 index 0110155..0000000 --- a/arch/x86/kernel/i387.c +++ /dev/null @@ -1,718 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static DEFINE_PER_CPU(bool, in_kernel_fpu); - -void kernel_fpu_disable(void) -{ - WARN_ON(this_cpu_read(in_kernel_fpu)); - this_cpu_write(in_kernel_fpu, true); -} - -void kernel_fpu_enable(void) -{ - this_cpu_write(in_kernel_fpu, false); -} - -/* - * Were we in an interrupt that interrupted kernel mode? - * - * On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that - * pair does nothing at all: the thread must not have fpu (so - * that we don't try to save the FPU state), and TS must - * be set (so that the clts/stts pair does nothing that is - * visible in the interrupted kernel thread). - * - * Except for the eagerfpu case when we return true; in the likely case - * the thread has FPU but we are not going to set/clear TS. - */ -static inline bool interrupted_kernel_fpu_idle(void) -{ - if (this_cpu_read(in_kernel_fpu)) - return false; - - if (use_eager_fpu()) - return true; - - return !__thread_has_fpu(current) && - (read_cr0() & X86_CR0_TS); -} - -/* - * Were we in user mode (or vm86 mode) when we were - * interrupted? - * - * Doing kernel_fpu_begin/end() is ok if we are running - * in an interrupt context from user mode - we'll just - * save the FPU state as required. - */ -static inline bool interrupted_user_mode(void) -{ - struct pt_regs *regs = get_irq_regs(); - return regs && user_mode(regs); -} - -/* - * Can we use the FPU in kernel mode with the - * whole "kernel_fpu_begin/end()" sequence? - * - * It's always ok in process context (ie "not interrupt") - * but it is sometimes ok even from an irq. - */ -bool irq_fpu_usable(void) -{ - return !in_interrupt() || - interrupted_user_mode() || - interrupted_kernel_fpu_idle(); -} -EXPORT_SYMBOL(irq_fpu_usable); - -void __kernel_fpu_begin(void) -{ - struct task_struct *me = current; - - this_cpu_write(in_kernel_fpu, true); - - if (__thread_has_fpu(me)) { - __save_init_fpu(me); - } else { - this_cpu_write(fpu_owner_task, NULL); - if (!use_eager_fpu()) - clts(); - } -} -EXPORT_SYMBOL(__kernel_fpu_begin); - -void __kernel_fpu_end(void) -{ - struct task_struct *me = current; - - if (__thread_has_fpu(me)) { - if (WARN_ON(restore_fpu_checking(me))) - fpu_reset_state(me); - } else if (!use_eager_fpu()) { - stts(); - } - - this_cpu_write(in_kernel_fpu, false); -} -EXPORT_SYMBOL(__kernel_fpu_end); - -/* - * Save the FPU state (initialize it if necessary): - * - * This only ever gets called for the current task. - */ -void fpu__save(struct task_struct *tsk) -{ - WARN_ON(tsk != current); - - preempt_disable(); - if (__thread_has_fpu(tsk)) { - if (use_eager_fpu()) { - __save_fpu(tsk); - } else { - __save_init_fpu(tsk); - __thread_fpu_end(tsk); - } - } - preempt_enable(); -} -EXPORT_SYMBOL_GPL(fpu__save); - -unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); -static struct i387_fxsave_struct fx_scratch; - -static void mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - - if (cpu_has_fxsr) { - memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : "+m" (fx_scratch)); - mask = fx_scratch.mxcsr_mask; - if (mask == 0) - mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; -} - -static void fpstate_xstate_init_size(void) -{ - /* - * Note that xstate_size might be overwriten later during - * xsave_init(). - */ - - if (!cpu_has_fpu) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct i387_soft_struct); - return; - } - - if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); - else - xstate_size = sizeof(struct i387_fsave_struct); -} - -/* - * Called on the boot CPU at bootup to set up the initial FPU state that - * is later cloned into all processes. - * - * Also called on secondary CPUs to set up the FPU state of their - * idle threads. - */ -void fpu__cpu_init(void) -{ - unsigned long cr0; - unsigned long cr4_mask = 0; - -#ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { - pr_emerg("No FPU found and no math emulation present\n"); - pr_emerg("Giving up\n"); - for (;;) - asm volatile("hlt"); - } -#endif - if (cpu_has_fxsr) - cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) - cr4_mask |= X86_CR4_OSXMMEXCPT; - if (cr4_mask) - cr4_set_bits(cr4_mask); - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) - cr0 |= X86_CR0_EM; - write_cr0(cr0); - - /* - * fpstate_xstate_init_size() is only called once, to avoid overriding - * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. - */ - if (xstate_size == 0) - fpstate_xstate_init_size(); - - mxcsr_feature_mask_init(); - xsave_init(); - eager_fpu_init(); -} - -void fpstate_init(struct fpu *fpu) -{ - if (!cpu_has_fpu) { - finit_soft_fpu(&fpu->state->soft); - return; - } - - memset(fpu->state, 0, xstate_size); - - if (cpu_has_fxsr) { - fx_finit(&fpu->state->fxsave); - } else { - struct i387_fsave_struct *fp = &fpu->state->fsave; - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; - } -} -EXPORT_SYMBOL_GPL(fpstate_init); - -int fpstate_alloc(struct fpu *fpu) -{ - if (fpu->state) - return 0; - - fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!fpu->state) - return -ENOMEM; - - /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */ - WARN_ON((unsigned long)fpu->state & 15); - - return 0; -} -EXPORT_SYMBOL_GPL(fpstate_alloc); - -/* - * Allocate the backing store for the current task's FPU registers - * and initialize the registers themselves as well. - * - * Can fail. - */ -int fpstate_alloc_init(struct task_struct *curr) -{ - int ret; - - if (WARN_ON_ONCE(curr != current)) - return -EINVAL; - if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) - return -EINVAL; - - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpstate_alloc(&curr->thread.fpu); - if (ret) - return ret; - - fpstate_init(&curr->thread.fpu); - - /* Safe to do for the current task: */ - curr->flags |= PF_USED_MATH; - - return 0; -} -EXPORT_SYMBOL_GPL(fpstate_alloc_init); - -/* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remember the current task has used the FPU. - */ -static int fpu__unlazy_stopped(struct task_struct *child) -{ - int ret; - - if (WARN_ON_ONCE(child == current)) - return -EINVAL; - - if (child->flags & PF_USED_MATH) { - task_disable_lazy_fpu_restore(child); - return 0; - } - - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpstate_alloc(&child->thread.fpu); - if (ret) - return ret; - - fpstate_init(&child->thread.fpu); - - /* Safe to do for stopped child tasks: */ - child->flags |= PF_USED_MATH; - - return 0; -} - -/* - * The xstateregs_active() routine is the same as the fpregs_active() routine, - * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. - */ -int fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return tsk_used_math(target) ? regset->n : 0; -} - -int xfpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; -} - -int xfpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); -} - -int xfpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); - - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; - - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; - - return ret; -} - -int xstateregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - xsave = &target->thread.fpu.state->xsave; - - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - return ret; -} - -int xstateregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - xsave = &target->thread.fpu.state->xsave; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->xsave_hdr.xstate_bv &= pcntxt_mask; - /* - * These bits must be zero. - */ - memset(&xsave->xsave_hdr.reserved, 0, 48); - return ret; -} - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - - return tmp; -} - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) -#define FP_EXP_TAG_VALID 0 -#define FP_EXP_TAG_ZERO 1 -#define FP_EXP_TAG_SPECIAL 2 -#define FP_EXP_TAG_EMPTY 3 - -static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) -{ - struct _fpxreg *st; - u32 tos = (fxsave->swd >> 11) & 7; - u32 twd = (unsigned long) fxsave->twd; - u32 tag; - u32 ret = 0xffff0000u; - int i; - - for (i = 0; i < 8; i++, twd >>= 1) { - if (twd & 0x1) { - st = FPREG_ADDR(fxsave, (i - tos) & 7); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = FP_EXP_TAG_SPECIAL; - break; - case 0x0000: - if (!st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3]) - tag = FP_EXP_TAG_ZERO; - else - tag = FP_EXP_TAG_SPECIAL; - break; - default: - if (st->significand[3] & 0x8000) - tag = FP_EXP_TAG_VALID; - else - tag = FP_EXP_TAG_SPECIAL; - break; - } - } else { - tag = FP_EXP_TAG_EMPTY; - } - ret |= tag << (2 * i); - } - return ret; -} - -/* - * FXSR floating point environment conversions. - */ - -void -convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - env->cwd = fxsave->cwd | 0xffff0000u; - env->swd = fxsave->swd | 0xffff0000u; - env->twd = twd_fxsr_to_i387(fxsave); - -#ifdef CONFIG_X86_64 - env->fip = fxsave->rip; - env->foo = fxsave->rdp; - /* - * should be actually ds/cs at fpu exception time, but - * that information is not available in 64bit mode. - */ - env->fcs = task_pt_regs(tsk)->cs; - if (tsk == current) { - savesegment(ds, env->fos); - } else { - env->fos = tsk->thread.ds; - } - env->fos |= 0xffff0000; -#else - env->fip = fxsave->fip; - env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); - env->foo = fxsave->foo; - env->fos = fxsave->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(to[0])); -} - -void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) - -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; - struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - fxsave->cwd = env->cwd; - fxsave->swd = env->swd; - fxsave->twd = twd_i387_to_fxsr(env->twd); - fxsave->fop = (u16) ((u32) env->fcs >> 16); -#ifdef CONFIG_X86_64 - fxsave->rip = env->fip; - fxsave->rdp = env->foo; - /* cs and ds ignored */ -#else - fxsave->fip = env->fip; - fxsave->fcs = (env->fcs & 0xffff); - fxsave->foo = env->foo; - fxsave->fos = env->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(from[0])); -} - -int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, - -1); - - sanitize_i387_state(target); - - if (kbuf && pos == 0 && count == sizeof(env)) { - convert_from_fxsr(kbuf, target); - return 0; - } - - convert_from_fxsr(&env, target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); -} - -int fpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct user_i387_ia32_struct env; - int ret; - - ret = fpu__unlazy_stopped(target); - if (ret) - return ret; - - sanitize_i387_state(target); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, - -1); - - if (pos > 0 || count < sizeof(env)) - convert_from_fxsr(&env, target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); - if (!ret) - convert_to_fxsr(target, &env); - - /* - * update the header bit in the xsave header, indicating the - * presence of FP. - */ - if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; - return ret; -} - -/* - * FPU state for core dumps. - * This is only used for a.out dumps now. - * It is declared generically using elf_fpregset_t (which is - * struct user_i387_struct) but is in fact only used for 32-bit - * dumps, so on 64-bit it is really struct user_i387_ia32_struct. - */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) -{ - struct task_struct *tsk = current; - int fpvalid; - - fpvalid = !!used_math(); - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - fpu, NULL); - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ - -static int __init no_387(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FPU); - return 1; -} - -__setup("no387", no_387); - -/* - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ -void fpu__detect(struct cpuinfo_x86 *c) -{ - unsigned long cr0; - u16 fsw, fcw; - - fsw = fcw = 0xffff; - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS | X86_CR0_EM); - write_cr0(cr0); - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); - - /* The final cr0 value is set in fpu_init() */ -} diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c deleted file mode 100644 index 163b5cc..0000000 --- a/arch/x86/kernel/xsave.c +++ /dev/null @@ -1,724 +0,0 @@ -/* - * xsave/xrstor support. - * - * Author: Suresh Siddha - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * Supported feature mask by the CPU and the kernel. - */ -u64 pcntxt_mask; - -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct *init_xstate_buf; - -static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes; -static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; -static unsigned int xstate_features; - -/* - * If a processor implementation discern that a processor state component is - * in its initialized state it may modify the corresponding bit in the - * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory - * layout in the case of xsaveopt. While presenting the xstate information to - * the user, we always ensure that the memory layout of a feature will be in - * the init state if the corresponding header bit is zero. This is to ensure - * that the user doesn't see some stale state in the memory layout during - * signal handling, debugging etc. - */ -void __sanitize_i387_state(struct task_struct *tsk) -{ - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - int feature_bit = 0x2; - u64 xstate_bv; - - if (!fx) - return; - - xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; - - /* - * None of the feature bits are in init state. So nothing else - * to do for us, as the memory layout is up to date. - */ - if ((xstate_bv & pcntxt_mask) == pcntxt_mask) - return; - - /* - * FP is in init state - */ - if (!(xstate_bv & XSTATE_FP)) { - fx->cwd = 0x37f; - fx->swd = 0; - fx->twd = 0; - fx->fop = 0; - fx->rip = 0; - fx->rdp = 0; - memset(&fx->st_space[0], 0, 128); - } - - /* - * SSE is in init state - */ - if (!(xstate_bv & XSTATE_SSE)) - memset(&fx->xmm_space[0], 0, 256); - - xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; - - /* - * Update all the other memory layouts for which the corresponding - * header bit is in the init state. - */ - while (xstate_bv) { - if (xstate_bv & 0x1) { - int offset = xstate_offsets[feature_bit]; - int size = xstate_sizes[feature_bit]; - - memcpy(((void *) fx) + offset, - ((void *) init_xstate_buf) + offset, - size); - } - - xstate_bv >>= 1; - feature_bit++; - } -} - -/* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xsave_hdr_struct); - unsigned int magic2; - - if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) - return -1; - - /* Check for the first magic field and other error scenarios. */ - if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || - fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || - fx_sw->xstate_size > fx_sw->extended_size) - return -1; - - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) - || magic2 != FP_XSTATE_MAGIC2) - return -1; - - return 0; -} - -/* - * Signal frame handlers. - */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) -{ - if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; - struct user_i387_ia32_struct env; - struct _fpstate_ia32 __user *fp = buf; - - convert_from_fxsr(&env, tsk); - - if (__copy_to_user(buf, &env, sizeof(env)) || - __put_user(xsave->i387.swd, &fp->status) || - __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; - } else { - struct i387_fsave_struct __user *fp = buf; - u32 swd; - if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; - } - - return 0; -} - -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) -{ - struct xsave_struct __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; - u32 xstate_bv; - int err; - - /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - - if (!use_xsave()) - return err; - - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - - /* - * Read the xstate_bv which we copied (directly from the cpu or - * from the state in task struct) to the user buffers. - */ - err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. - * - * xsave aware apps can change the xstate_bv in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xstate_bv |= XSTATE_FPSSE; - - err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); - - return err; -} - -static inline int save_user_xstate(struct xsave_struct __user *buf) -{ - int err; - - if (use_xsave()) - err = xsave_user(buf); - else if (use_fxsr()) - err = fxsave_user((struct i387_fxsave_struct __user *) buf); - else - err = fsave_user((struct i387_fsave_struct __user *) buf); - - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - return err; -} - -/* - * Save the fpu, extended register state to the user signal frame. - * - * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save - * state is copied. - * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. - * - * buf == buf_fx for 64-bit frames and 32-bit fsave frame. - * buf != buf_fx for 32-bit frames with fxstate. - * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. - * - * If this is a 32-bit frame with fxstate, put a fsave header before - * the aligned state at 'buf_fx'. - * - * For [f]xsave state, update the SW reserved fields in the [f]xsave frame - * indicating the absence/presence of the extended state to the user. - */ -int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; - struct task_struct *tsk = current; - int ia32_fxstate = (buf != buf_fx); - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!access_ok(VERIFY_WRITE, buf, size)) - return -EACCES; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - - if (user_has_fpu()) { - /* Save the live register state to the user directly. */ - if (save_user_xstate(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - fpu_fxsave(&tsk->thread.fpu); - } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf_fx, xsave, xstate_size)) - return -1; - } - - /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; - - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; - - return 0; -} - -static inline void -sanitize_restored_xstate(struct task_struct *tsk, - struct user_i387_ia32_struct *ia32_env, - u64 xstate_bv, int fx_only) -{ - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; - struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; - - if (use_xsave()) { - /* These bits must be zero. */ - memset(xsave_hdr->reserved, 0, 48); - - /* - * Init the state that is not present in the memory - * layout and not enabled by the OS. - */ - if (fx_only) - xsave_hdr->xstate_bv = XSTATE_FPSSE; - else - xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); - } - - if (use_fxsr()) { - /* - * mscsr reserved bits must be masked to zero for security - * reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - convert_to_fxsr(tsk, ia32_env); - } -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE state. - */ -static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) -{ - if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; - xrstor_state(init_xstate_buf, init_bv); - return fxrstor_user(buf); - } else { - u64 init_bv = pcntxt_mask & ~xbv; - if (unlikely(init_bv)) - xrstor_state(init_xstate_buf, init_bv); - return xrestore_user(buf, xbv); - } - } else if (use_fxsr()) { - return fxrstor_user(buf); - } else - return frstor_user(buf); -} - -int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - int ia32_fxstate = (buf != buf_fx); - struct task_struct *tsk = current; - int state_size = xstate_size; - u64 xstate_bv = 0; - int fx_only = 0; - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!buf) { - fpu_reset_state(tsk); - return 0; - } - - if (!access_ok(VERIFY_READ, buf, size)) - return -EACCES; - - if (!used_math() && fpstate_alloc_init(tsk)) - return -1; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; - - if (use_xsave()) { - struct _fpx_sw_bytes fx_sw_user; - if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { - /* - * Couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - state_size = sizeof(struct i387_fxsave_struct); - fx_only = 1; - } else { - state_size = fx_sw_user.xstate_size; - xstate_bv = fx_sw_user.xstate_bv; - } - } - - if (ia32_fxstate) { - /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Sanitize the copied state etc. - */ - struct fpu *fpu = &tsk->thread.fpu; - struct user_i387_ia32_struct env; - int err = 0; - - /* - * Drop the current fpu which clears used_math(). This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * set_used_math() is again set. - */ - drop_fpu(tsk); - - if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(fpu); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); - } - - set_used_math(); - if (use_eager_fpu()) { - preempt_disable(); - math_state_restore(); - preempt_enable(); - } - - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { - fpu_reset_state(tsk); - return -1; - } - } - - return 0; -} - -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -static void prepare_fx_sw_frame(void) -{ - int fsave_header_size = sizeof(struct i387_fsave_struct); - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xstate_bv = pcntxt_mask; - fx_sw_reserved.xstate_size = xstate_size; - - if (config_enabled(CONFIG_IA32_EMULATION)) { - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; - } -} - -/* - * Enable the extended processor state save/restore feature - */ -static inline void xstate_enable(void) -{ - cr4_set_bits(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); -} - -/* - * Record the offsets and sizes of different state managed by the xsave - * memory layout. - */ -static void __init setup_xstate_features(void) -{ - int eax, ebx, ecx, edx, leaf = 0x2; - - xstate_features = fls64(pcntxt_mask); - xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); - xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); - - do { - cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - - if (eax == 0) - break; - - xstate_offsets[leaf] = ebx; - xstate_sizes[leaf] = eax; - - leaf++; - } while (1); -} - -/* - * This function sets up offsets and sizes of all extended states in - * xsave area. This supports both standard format and compacted format - * of the xsave aread. - * - * Input: void - * Output: void - */ -void setup_xstate_comp(void) -{ - unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; - int i; - - /* - * The FP xstates and SSE xstates are legacy states. They are always - * in the fixed offsets in the xsave area in either compacted form - * or standard form. - */ - xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); - - if (!cpu_has_xsaves) { - for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { - xstate_comp_offsets[i] = xstate_offsets[i]; - xstate_comp_sizes[i] = xstate_sizes[i]; - } - } - return; - } - - xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; - - for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) - xstate_comp_sizes[i] = xstate_sizes[i]; - else - xstate_comp_sizes[i] = 0; - - if (i > 2) - xstate_comp_offsets[i] = xstate_comp_offsets[i-1] - + xstate_comp_sizes[i-1]; - - } -} - -/* - * setup the xstate image representing the init state - */ -static void __init setup_init_fpu_buf(void) -{ - /* - * Setup init_xstate_buf to represent the init state of - * all the features managed by the xsave - */ - init_xstate_buf = alloc_bootmem_align(xstate_size, - __alignof__(struct xsave_struct)); - fx_finit(&init_xstate_buf->i387); - - if (!cpu_has_xsave) - return; - - setup_xstate_features(); - - if (cpu_has_xsaves) { - init_xstate_buf->xsave_hdr.xcomp_bv = - (u64)1 << 63 | pcntxt_mask; - init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; - } - - /* - * Init all the features state with header_bv being 0x0 - */ - xrstor_state_booting(init_xstate_buf, -1); - /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. - */ - xsave_state_booting(init_xstate_buf, -1); -} - -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -static int __init eager_fpu_setup(char *s) -{ - if (!strcmp(s, "on")) - eagerfpu = ENABLE; - else if (!strcmp(s, "off")) - eagerfpu = DISABLE; - else if (!strcmp(s, "auto")) - eagerfpu = AUTO; - return 1; -} -__setup("eagerfpu=", eager_fpu_setup); - - -/* - * Calculate total size of enabled xstates in XCR0/pcntxt_mask. - */ -static void __init init_xstate_size(void) -{ - unsigned int eax, ebx, ecx, edx; - int i; - - if (!cpu_has_xsaves) { - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - return; - } - - xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < 64; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { - cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_size += eax; - } - } -} - -/* - * Enable and initialize the xsave feature. - */ -static void __init xstate_enable_boot_cpu(void) -{ - unsigned int eax, ebx, ecx, edx; - - if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); - return; - } - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - pcntxt_mask = eax + ((u64)edx << 32); - - if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - pr_err("FP/SSE not shown under xsave features 0x%llx\n", - pcntxt_mask); - BUG(); - } - - /* - * Support only the state known to OS. - */ - pcntxt_mask = pcntxt_mask & XCNTXT_MASK; - - xstate_enable(); - - /* - * Recompute the context size for enabled features - */ - init_xstate_size(); - - update_regset_xstate_info(xstate_size, pcntxt_mask); - prepare_fx_sw_frame(); - setup_init_fpu_buf(); - - /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) - eagerfpu = ENABLE; - - if (pcntxt_mask & XSTATE_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", - pcntxt_mask & XSTATE_EAGER); - pcntxt_mask &= ~XSTATE_EAGER; - } else { - eagerfpu = ENABLE; - } - } - - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", - pcntxt_mask, xstate_size, - cpu_has_xsaves ? "compacted form" : "standard form"); -} - -/* - * For the very first instance, this calls xstate_enable_boot_cpu(); - * for all subsequent instances, this calls xstate_enable(). - * - * This is somewhat obfuscated due to the lack of powerful enough - * overrides for the section checks. - */ -void xsave_init(void) -{ - static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; - void (*this_func)(void); - - if (!cpu_has_xsave) - return; - - this_func = next_func; - next_func = xstate_enable; - this_func(); -} - -/* - * setup_init_fpu_buf() is __init and it is OK to call it here because - * init_xstate_buf will be unset only once during boot. - */ -void __init_refok eager_fpu_init(void) -{ - WARN_ON(used_math()); - current_thread_info()->status = 0; - - if (eagerfpu == ENABLE) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); - - if (!cpu_has_eager_fpu) { - stts(); - return; - } - - if (!init_xstate_buf) - setup_init_fpu_buf(); -} - -/* - * Given the xsave area and a state inside, this function returns the - * address of the state. - * - * This is the API that is called to get xstate address in either - * standard format or compacted format of xsave area. - * - * Inputs: - * xsave: base address of the xsave area; - * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, - * etc.) - * Output: - * address of the state in the xsave area. - */ -void *get_xsave_addr(struct xsave_struct *xsave, int xstate) -{ - int feature = fls64(xstate) - 1; - if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) - return NULL; - - return (void *)xsave + xstate_comp_offsets[feature]; -} -EXPORT_SYMBOL_GPL(get_xsave_addr); -- cgit v0.10.2 From f89e32e0a3df2f29d61fdc120ac62654ef267111 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 10:58:10 +0200 Subject: x86/fpu: Fix header file dependencies of fpu-internal.h Fix a minor header file dependency bug in asm/fpu-internal.h: it relies on i387.h but does not include it. All users of fpu-internal.h included it explicitly. Also remove unnecessary includes, to reduce compilation time. This also makes it easier to use it as a standalone header file for FPU internals, such as an upcoming C module in arch/x86/kernel/fpu/. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 28640c3..470522c 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -32,7 +32,6 @@ #include #include -#include #include #define CHKSUM_BLOCK_SIZE 1 diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index e510b1c..1537378 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index c81d35e6..4bafd5b 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index f85d21b..c3b7bd1 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -10,18 +10,13 @@ #ifndef _FPU_INTERNAL_H #define _FPU_INTERNAL_H -#include #include #include #include -#include -#include -#include -#include + #include -#include +#include #include -#include #ifdef CONFIG_X86_64 # include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b8035b8..220ad95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c7793ad..35d0f19 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 8ed2106..84d647d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #ifdef CONFIG_MATH_EMULATION diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ddfdbf7..ae6efec 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -38,7 +38,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a7bc794..69451b8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1ea14fd..35f867a 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -26,7 +26,6 @@ #include #include -#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 50e547e..60e331c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,7 +68,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 231aa57..465b335 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,7 +54,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 26b1f89..be276e0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -59,7 +59,7 @@ #include #include #include -#include +#include #include /* Ugh! */ #include #include diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c439ec4..37ad432 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -10,7 +10,6 @@ #include #include -#include #include #include #include -- cgit v0.10.2 From 0c8675379048f36c76ad3a46519310ee2d626b2f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 10:53:34 +0200 Subject: x86/fpu: Split out the boot time FPU init code into fpu/init.c Move boot time FPU initialization code into init.c, to better isolate it into its own domain. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile index 89fd66a..50464a7 100644 --- a/arch/x86/kernel/fpu/Makefile +++ b/arch/x86/kernel/fpu/Makefile @@ -2,4 +2,4 @@ # Build rules for the FPU support code: # -obj-y += core.o xsave.o +obj-y += init.o core.o xsave.o diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0110155..9866a58 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -139,94 +139,6 @@ void fpu__save(struct task_struct *tsk) } EXPORT_SYMBOL_GPL(fpu__save); -unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); -static struct i387_fxsave_struct fx_scratch; - -static void mxcsr_feature_mask_init(void) -{ - unsigned long mask = 0; - - if (cpu_has_fxsr) { - memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : "+m" (fx_scratch)); - mask = fx_scratch.mxcsr_mask; - if (mask == 0) - mask = 0x0000ffbf; - } - mxcsr_feature_mask &= mask; -} - -static void fpstate_xstate_init_size(void) -{ - /* - * Note that xstate_size might be overwriten later during - * xsave_init(). - */ - - if (!cpu_has_fpu) { - /* - * Disable xsave as we do not support it if i387 - * emulation is enabled. - */ - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct i387_soft_struct); - return; - } - - if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); - else - xstate_size = sizeof(struct i387_fsave_struct); -} - -/* - * Called on the boot CPU at bootup to set up the initial FPU state that - * is later cloned into all processes. - * - * Also called on secondary CPUs to set up the FPU state of their - * idle threads. - */ -void fpu__cpu_init(void) -{ - unsigned long cr0; - unsigned long cr4_mask = 0; - -#ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { - pr_emerg("No FPU found and no math emulation present\n"); - pr_emerg("Giving up\n"); - for (;;) - asm volatile("hlt"); - } -#endif - if (cpu_has_fxsr) - cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) - cr4_mask |= X86_CR4_OSXMMEXCPT; - if (cr4_mask) - cr4_set_bits(cr4_mask); - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) - cr0 |= X86_CR0_EM; - write_cr0(cr0); - - /* - * fpstate_xstate_init_size() is only called once, to avoid overriding - * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. - */ - if (xstate_size == 0) - fpstate_xstate_init_size(); - - mxcsr_feature_mask_init(); - xsave_init(); - eager_fpu_init(); -} - void fpstate_init(struct fpu *fpu) { if (!cpu_has_fpu) { diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c new file mode 100644 index 0000000..0a66629 --- /dev/null +++ b/arch/x86/kernel/fpu/init.c @@ -0,0 +1,93 @@ +/* + * x86 FPU boot time init code + */ +#include +#include + +unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +EXPORT_SYMBOL_GPL(xstate_size); +static struct i387_fxsave_struct fx_scratch; + +static void mxcsr_feature_mask_init(void) +{ + unsigned long mask = 0; + + if (cpu_has_fxsr) { + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : "+m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; + if (mask == 0) + mask = 0x0000ffbf; + } + mxcsr_feature_mask &= mask; +} + +static void fpstate_xstate_init_size(void) +{ + /* + * Note that xstate_size might be overwriten later during + * xsave_init(). + */ + + if (!cpu_has_fpu) { + /* + * Disable xsave as we do not support it if i387 + * emulation is enabled. + */ + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + xstate_size = sizeof(struct i387_soft_struct); + return; + } + + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); + else + xstate_size = sizeof(struct i387_fsave_struct); +} + +/* + * Called on the boot CPU at bootup to set up the initial FPU state that + * is later cloned into all processes. + * + * Also called on secondary CPUs to set up the FPU state of their + * idle threads. + */ +void fpu__cpu_init(void) +{ + unsigned long cr0; + unsigned long cr4_mask = 0; + +#ifndef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) { + pr_emerg("No FPU found and no math emulation present\n"); + pr_emerg("Giving up\n"); + for (;;) + asm volatile("hlt"); + } +#endif + if (cpu_has_fxsr) + cr4_mask |= X86_CR4_OSFXSR; + if (cpu_has_xmm) + cr4_mask |= X86_CR4_OSXMMEXCPT; + if (cr4_mask) + cr4_set_bits(cr4_mask); + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ + if (!cpu_has_fpu) + cr0 |= X86_CR0_EM; + write_cr0(cr0); + + /* + * fpstate_xstate_init_size() is only called once, to avoid overriding + * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. + */ + if (xstate_size == 0) + fpstate_xstate_init_size(); + + mxcsr_feature_mask_init(); + xsave_init(); + eager_fpu_init(); +} -- cgit v0.10.2 From 4445e6e9a549823a2c2a188e500389532e1ed501 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:33:25 +0200 Subject: x86/fpu: Remove unnecessary includes from core.c fpu/core.c includes a lot of files for mostly historic reasons. It only needs fpu-internal.h, which already includes all the required headers. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9866a58..b05199f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -5,20 +5,7 @@ * General FPU state handling cleanups * Gareth Hughes , May 2000 */ -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include #include -#include static DEFINE_PER_CPU(bool, in_kernel_fpu); -- cgit v0.10.2 From 146ed598d12ac173bf5fed05ba7046812b8a8978 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:36:14 +0200 Subject: x86/fpu: Move the no_387 handling and FPU detection code into init.c Both no_387() and fpu__detect() run at boot time, so they belong into init.c. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b05199f..9211582 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -581,37 +581,3 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) EXPORT_SYMBOL(dump_fpu); #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ - -static int __init no_387(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FPU); - return 1; -} - -__setup("no387", no_387); - -/* - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ -void fpu__detect(struct cpuinfo_x86 *c) -{ - unsigned long cr0; - u16 fsw, fcw; - - fsw = fcw = 0xffff; - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS | X86_CR0_EM); - write_cr0(cr0); - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); - - /* The final cr0 value is set in fpu_init() */ -} diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 0a66629..5e06aa6 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -91,3 +91,37 @@ void fpu__cpu_init(void) xsave_init(); eager_fpu_init(); } + +static int __init no_387(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FPU); + return 1; +} + +__setup("no387", no_387); + +/* + * Set the X86_FEATURE_FPU CPU-capability bit based on + * trying to execute an actual sequence of FPU instructions: + */ +void fpu__detect(struct cpuinfo_x86 *c) +{ + unsigned long cr0; + u16 fsw, fcw; + + fsw = fcw = 0xffff; + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS | X86_CR0_EM); + write_cr0(cr0); + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); + + /* The final cr0 value is set in fpu_init() */ +} -- cgit v0.10.2 From 11ad19277e025f914518bc2943a240cdd37cf844 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:44:46 +0200 Subject: x86/fpu: Remove the free_thread_xstate() complication Use fpstate_free() directly to manage FPU state. Only process.c was using this method, so this is a speedup as well, as it removes the extra function call and related clobbers. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6b75c4b..fef8db0 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -362,7 +362,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; -extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; struct perf_event; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 35d0f19..6ab180f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -99,14 +99,9 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return 0; } -void free_thread_xstate(struct task_struct *tsk) -{ - fpstate_free(&tsk->thread.fpu); -} - void arch_release_task_struct(struct task_struct *tsk) { - free_thread_xstate(tsk); + fpstate_free(&tsk->thread.fpu); } void arch_task_cache_init(void) @@ -154,7 +149,7 @@ void flush_thread(void) if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ drop_fpu(tsk); - free_thread_xstate(tsk); + fpstate_free(&tsk->thread.fpu); } else { if (!tsk_used_math(tsk)) { /* kthread execs. TODO: cleanup this horror. */ -- cgit v0.10.2 From 81683cc8277e79decff4d0cf82ae0e17d2fe465f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 11:52:13 +0200 Subject: x86/fpu: Factor out fpu__flush_thread() from flush_thread() flush_thread() open codes a lot of FPU internals - create a separate function for it in fpu/core.c. Turns out that this does not hurt performance: text data bss dec hex filename 11843039 1884440 1130496 14857975 e2b6f7 vmlinux.before 11843039 1884440 1130496 14857975 e2b6f7 vmlinux.after and since this is a slowpath clarity comes first anyway. We can reconsider inlining decisions after the FPU code has been cleaned up. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 6552a16..d6fc844 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,6 +20,7 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct task_struct *curr); extern void fpstate_init(struct fpu *fpu); +extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void math_state_restore(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9211582..787bf57 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,6 +227,23 @@ static int fpu__unlazy_stopped(struct task_struct *child) return 0; } +void fpu__flush_thread(struct task_struct *tsk) +{ + if (!use_eager_fpu()) { + /* FPU state will be reallocated lazily at the first use. */ + drop_fpu(tsk); + fpstate_free(&tsk->thread.fpu); + } else { + if (!tsk_used_math(tsk)) { + /* kthread execs. TODO: cleanup this horror. */ + if (WARN_ON(fpstate_alloc_init(tsk))) + force_sig(SIGKILL, tsk); + user_fpu_begin(); + } + restore_init_xstate(); + } +} + /* * The xstateregs_active() routine is the same as the fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6ab180f..52fd8f6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -146,19 +146,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - if (!use_eager_fpu()) { - /* FPU state will be reallocated lazily at the first use. */ - drop_fpu(tsk); - fpstate_free(&tsk->thread.fpu); - } else { - if (!tsk_used_math(tsk)) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(tsk))) - force_sig(SIGKILL, tsk); - user_fpu_begin(); - } - restore_init_xstate(); - } + fpu__flush_thread(tsk); } static void hard_disable_TSC(void) -- cgit v0.10.2 From 93b90712c64ca2db4b39fcb2e7dffcf0d478468d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 12:50:13 +0200 Subject: x86/fpu: Move math_state_restore() to fpu/core.c It's another piece of FPU internals that is better off close to the other FPU internals. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 787bf57..7add2fb 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,6 +227,48 @@ static int fpu__unlazy_stopped(struct task_struct *child) return 0; } +/* + * 'math_state_restore()' saves the current math information in the + * old math state array, and gets the new ones from the current task + * + * Careful.. There are problems with IBM-designed IRQ13 behaviour. + * Don't touch unless you *really* know how it works. + * + * Must be called with kernel preemption disabled (eg with local + * local interrupts as in the case of do_device_not_available). + */ +void math_state_restore(void) +{ + struct task_struct *tsk = current; + + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (fpstate_alloc_init(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ + kernel_fpu_disable(); + __thread_fpu_begin(tsk); + if (unlikely(restore_fpu_checking(tsk))) { + fpu_reset_state(tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); + } else { + tsk->thread.fpu.counter++; + } + kernel_fpu_enable(); +} +EXPORT_SYMBOL_GPL(math_state_restore); + void fpu__flush_thread(struct task_struct *tsk) { if (!use_eager_fpu()) { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 465b335..63c7fc3 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -826,48 +826,6 @@ asmlinkage __visible void __attribute__((weak)) smp_threshold_interrupt(void) { } -/* - * 'math_state_restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task - * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (eg with local - * local interrupts as in the case of do_device_not_available). - */ -void math_state_restore(void) -{ - struct task_struct *tsk = current; - - if (!tsk_used_math(tsk)) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (fpstate_alloc_init(tsk)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } - - /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ - kernel_fpu_disable(); - __thread_fpu_begin(tsk); - if (unlikely(restore_fpu_checking(tsk))) { - fpu_reset_state(tsk); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); - } else { - tsk->thread.fpu.counter++; - } - kernel_fpu_enable(); -} -EXPORT_SYMBOL_GPL(math_state_restore); - dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code) { -- cgit v0.10.2 From 3a0aee4801d475b64a408539c01ec0d17d52192b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 13:16:47 +0200 Subject: x86/fpu: Rename math_state_restore() to fpu__restore() Move to the new fpu__*() namespace. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/Documentation/preempt-locking.txt b/Documentation/preempt-locking.txt index 57883ca..e89ce66 100644 --- a/Documentation/preempt-locking.txt +++ b/Documentation/preempt-locking.txt @@ -48,7 +48,7 @@ preemption must be disabled around such regions. Note, some FPU functions are already explicitly preempt safe. For example, kernel_fpu_begin and kernel_fpu_end will disable and enable preemption. -However, math_state_restore must be called with preemption disabled. +However, fpu__restore() must be called with preemption disabled. RULE #3: Lock acquire and release must be performed by same task diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index d6fc844..c8ee395 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -23,7 +23,7 @@ extern void fpstate_init(struct fpu *fpu); extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void math_state_restore(void); +extern void fpu__restore(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7add2fb..15c3cf7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -228,7 +228,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) } /* - * 'math_state_restore()' saves the current math information in the + * 'fpu__restore()' saves the current math information in the * old math state array, and gets the new ones from the current task * * Careful.. There are problems with IBM-designed IRQ13 behaviour. @@ -237,7 +237,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) * Must be called with kernel preemption disabled (eg with local * local interrupts as in the case of do_device_not_available). */ -void math_state_restore(void) +void fpu__restore(void) { struct task_struct *tsk = current; @@ -267,7 +267,7 @@ void math_state_restore(void) } kernel_fpu_enable(); } -EXPORT_SYMBOL_GPL(math_state_restore); +EXPORT_SYMBOL_GPL(fpu__restore); void fpu__flush_thread(struct task_struct *tsk) { diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 163b5cc..d913d50 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -404,7 +404,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) set_used_math(); if (use_eager_fpu()) { preempt_disable(); - math_state_restore(); + fpu__restore(); preempt_enable(); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 84d647d..1a0edce 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -295,7 +295,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. * This must be done before restoring TLS segments so * the GDT and LDT are properly updated, and must be - * done before math_state_restore, so the TS bit is up + * done before fpu__restore(), so the TS bit is up * to date. */ arch_end_context_switch(next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ae6efec..99cc4b8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -298,7 +298,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) * Leave lazy mode, flushing any hypercalls made here. This * must be done after loading TLS entries in the GDT but before * loading segments that might reference them, and and it must - * be done before math_state_restore, so the TS bit is up to + * be done before fpu__restore(), so the TS bit is up to * date. */ arch_end_context_switch(next_p); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 63c7fc3..22ad90a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -846,7 +846,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) return; } #endif - math_state_restore(); /* interrupts still off */ + fpu__restore(); /* interrupts still off */ #ifdef CONFIG_X86_32 conditional_sti(regs); #endif diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 30f2aef..bcb534a 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -297,12 +297,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) /* * Similarly, if we took a trap because the Guest used the FPU, * we have to restore the FPU it expects to see. - * math_state_restore() may sleep and we may even move off to + * fpu__restore() may sleep and we may even move off to * a different CPU. So all the critical stuff should be done * before this. */ else if (cpu->regs->trapnum == 7 && !user_has_fpu()) - math_state_restore(); + fpu__restore(); } /*H:130 -- cgit v0.10.2 From 4d1640927bd54aa118f91c2bcfe6c2de0e2ba2a3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 13:44:25 +0200 Subject: x86/fpu: Factor out the FPU bug detection code into fpu__init_check_bugs() Move the boot-time FPU bug detection code to the other FPU boot time init code in fpu/init.c. No change in code size: text data bss dec hex filename 13044568 1884440 1130496 16059504 f50c70 vmlinux.before 13044568 1884440 1130496 16059504 f50c70 vmlinux.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index c8ee395..89ae3e0 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -24,6 +24,7 @@ extern void fpu__flush_thread(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void fpu__restore(void); +extern void fpu__init_check_bugs(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 0344534..eb8be0c 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -17,52 +17,6 @@ #include #include -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - s32 fdiv_bug; - - kernel_fpu_begin(); - - /* - * trap_init() enabled FXSR and company _before_ testing for FP - * problems here. - * - * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug - */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&fdiv_bug) - : "m" (*&x), "m" (*&y)); - - kernel_fpu_end(); - - if (fdiv_bug) { - set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); - pr_warn("Hmm, FPU with FDIV bug\n"); - } -} - void __init check_bugs(void) { identify_boot_cpu(); @@ -85,10 +39,5 @@ void __init check_bugs(void) '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); alternative_instructions(); - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (cpu_has_fpu) - check_fpu(); + fpu__init_check_bugs(); } diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5e06aa6..4eabb42 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -4,6 +4,69 @@ #include #include +/* + * Boot time CPU/FPU FDIV bug detection code: + */ + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + s32 fdiv_bug; + + kernel_fpu_begin(); + + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug + */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&fdiv_bug) + : "m" (*&x), "m" (*&y)); + + kernel_fpu_end(); + + if (fdiv_bug) { + set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); + pr_warn("Hmm, FPU with FDIV bug\n"); + } +} + +void fpu__init_check_bugs(void) +{ + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + if (cpu_has_fpu) + check_fpu(); +} + +/* + * Boot time FPU feature detection code: + */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -- cgit v0.10.2 From 3e261c14e41b3a3cc1da54190d56b8609bedd873 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 15:08:34 +0200 Subject: x86/fpu: Simplify the xsave_state*() methods These functions (xsave_state() and xsave_state_booting()) have a 'mask' argument that is always -1. Propagate this into the functions instead and eliminate the extra argument. Does not change the generated code, because these were inlined functions. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index c3b7bd1..b1f5dd6 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -527,9 +527,9 @@ static inline void __save_fpu(struct task_struct *tsk) { if (use_xsave()) { if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&tsk->thread.fpu.state->xsave, -1); + xsave_state_booting(&tsk->thread.fpu.state->xsave); else - xsave_state(&tsk->thread.fpu.state->xsave, -1); + xsave_state(&tsk->thread.fpu.state->xsave); } else fpu_fxsave(&tsk->thread.fpu); } diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 58ed0ca..61c951c 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -70,8 +70,9 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask) +static inline int xsave_state_booting(struct xsave_struct *fx) { + u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; int err = 0; @@ -123,8 +124,9 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) /* * Save processor xstate to xsave area. */ -static inline int xsave_state(struct xsave_struct *fx, u64 mask) +static inline int xsave_state(struct xsave_struct *fx) { + u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; int err = 0; @@ -189,7 +191,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) */ static inline void fpu_xsave(struct fpu *fpu) { - xsave_state(&fpu->state->xsave, -1); + xsave_state(&fpu->state->xsave); } /* diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index d913d50..a52205b 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -558,11 +558,12 @@ static void __init setup_init_fpu_buf(void) * Init all the features state with header_bv being 0x0 */ xrstor_state_booting(init_xstate_buf, -1); + /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state_booting(init_xstate_buf, -1); + xsave_state_booting(init_xstate_buf); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -- cgit v0.10.2 From 0afc4a941c0e7b8f6f619fe576f7c5ddbe78d304 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 15:14:44 +0200 Subject: x86/fpu: Remove fpu_xsave() It's a pointless wrapper now - use xsave_state(). Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index b1f5dd6..95e04cb 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -265,7 +265,7 @@ static inline void fpu_fxsave(struct fpu *fpu) static inline int fpu_save_init(struct fpu *fpu) { if (use_xsave()) { - fpu_xsave(fpu); + xsave_state(&fpu->state->xsave); /* * xsave header may indicate the init state of the FP. diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 61c951c..7c90ea9 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h @@ -187,14 +187,6 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) } /* - * Save xstate context for old process during context switch. - */ -static inline void fpu_xsave(struct fpu *fpu) -{ - xsave_state(&fpu->state->xsave); -} - -/* * Restore xstate context for new process during context switch. */ static inline int fpu_xrstor_checking(struct xsave_struct *fx) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 37ad432..412b5f8 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -389,7 +389,7 @@ int mpx_enable_management(struct task_struct *tsk) * directory into XSAVE/XRSTOR Save Area and enable MPX through * XRSTOR instruction. * - * fpu_xsave() is expected to be very expensive. Storing the bounds + * xsave_state() is expected to be very expensive. Storing the bounds * directory here means that we do not have to do xsave in the unmap * path; we can just use mm->bd_addr instead. */ -- cgit v0.10.2 From 8ffb53ab986ccb4421b1060182c6e084edd7b9d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 15:41:56 +0200 Subject: x86/fpu: Move task_xstate_cachep handling to core.c This code was historically in process.c, now we have FPU core internals in fpu/core.c instead - move it there. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 95e04cb..f41170c 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -564,6 +564,8 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } +extern void fpstate_cache_init(void); + extern int fpstate_alloc(struct fpu *fpu); static inline void fpstate_free(struct fpu *fpu) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 15c3cf7..b32a6eb 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -147,6 +147,21 @@ void fpstate_init(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpstate_init); +/* + * FPU state allocation: + */ +struct kmem_cache *task_xstate_cachep; +EXPORT_SYMBOL_GPL(task_xstate_cachep); + +void fpstate_cache_init(void) +{ + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC | SLAB_NOTRACK, NULL); + setup_xstate_comp(); +} + int fpstate_alloc(struct fpu *fpu) { if (fpu->state) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 52fd8f6..36d9f73 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -75,9 +75,6 @@ void idle_notifier_unregister(struct notifier_block *n) EXPORT_SYMBOL_GPL(idle_notifier_unregister); #endif -struct kmem_cache *task_xstate_cachep; -EXPORT_SYMBOL_GPL(task_xstate_cachep); - /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. @@ -106,11 +103,7 @@ void arch_release_task_struct(struct task_struct *tsk) void arch_task_cache_init(void) { - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, - __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); - setup_xstate_comp(); + fpstate_cache_init(); } /* -- cgit v0.10.2 From a752b53d9dcbae28a3a22b5577f0571acf53d5aa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 15:47:05 +0200 Subject: x86/fpu: Factor out fpu__copy() Introduce fpu__copy() and use it in arch_dup_task_struct(), thus moving another chunk of FPU logic to fpu/core.c. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index f41170c..6c1ceb7 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -567,6 +567,7 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) extern void fpstate_cache_init(void); extern int fpstate_alloc(struct fpu *fpu); +extern int fpu__copy(struct task_struct *dst, struct task_struct *src); static inline void fpstate_free(struct fpu *fpu) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b32a6eb..05df212 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -178,6 +178,24 @@ int fpstate_alloc(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpstate_alloc); +int fpu__copy(struct task_struct *dst, struct task_struct *src) +{ + dst->thread.fpu.counter = 0; + dst->thread.fpu.has_fpu = 0; + dst->thread.fpu.state = NULL; + + task_disable_lazy_fpu_restore(dst); + + if (tsk_used_math(src)) { + int err = fpstate_alloc(&dst->thread.fpu); + + if (err) + return err; + fpu_copy(dst, src); + } + return 0; +} + /* * Allocate the backing store for the current task's FPU registers * and initialize the registers themselves as well. diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 36d9f73..bb7d4ab 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -83,17 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - dst->thread.fpu.counter = 0; - dst->thread.fpu.has_fpu = 0; - dst->thread.fpu.state = NULL; - task_disable_lazy_fpu_restore(dst); - if (tsk_used_math(src)) { - int err = fpstate_alloc(&dst->thread.fpu); - if (err) - return err; - fpu_copy(dst, src); - } - return 0; + return fpu__copy(dst, src); } void arch_release_task_struct(struct task_struct *tsk) -- cgit v0.10.2 From 5a12bf6332da40310eff5575ca1ba20339d74e48 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 15:58:37 +0200 Subject: x86/fpu: Uninline fpstate_free() and move it next to the allocation function Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 6c1ceb7..16a1c66 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -567,16 +567,9 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) extern void fpstate_cache_init(void); extern int fpstate_alloc(struct fpu *fpu); +extern void fpstate_free(struct fpu *fpu); extern int fpu__copy(struct task_struct *dst, struct task_struct *src); -static inline void fpstate_free(struct fpu *fpu) -{ - if (fpu->state) { - kmem_cache_free(task_xstate_cachep, fpu->state); - fpu->state = NULL; - } -} - static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) { if (use_eager_fpu()) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 05df212..b00d1b3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -178,6 +178,15 @@ int fpstate_alloc(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpstate_alloc); +void fpstate_free(struct fpu *fpu) +{ + if (fpu->state) { + kmem_cache_free(task_xstate_cachep, fpu->state); + fpu->state = NULL; + } +} +EXPORT_SYMBOL_GPL(fpstate_free); + int fpu__copy(struct task_struct *dst, struct task_struct *src) { dst->thread.fpu.counter = 0; -- cgit v0.10.2 From f55f88e25e9b5232054a82d47de7aaf67179b78b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:02:33 +0200 Subject: x86/fpu: Make task_xstate_cachep static It's now local to fpu/core.c, make it static. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index fef8db0..d50cc7f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -362,7 +362,6 @@ DECLARE_PER_CPU(struct irq_stack *, softirq_stack); #endif /* X86_64 */ extern unsigned int xstate_size; -extern struct kmem_cache *task_xstate_cachep; struct perf_event; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b00d1b3..d0fcf74 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -150,8 +150,7 @@ EXPORT_SYMBOL_GPL(fpstate_init); /* * FPU state allocation: */ -struct kmem_cache *task_xstate_cachep; -EXPORT_SYMBOL_GPL(task_xstate_cachep); +static struct kmem_cache *task_xstate_cachep; void fpstate_cache_init(void) { -- cgit v0.10.2 From 416d49ac67ae3af8c98ecee2ebe0a883b95e213a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:33:08 +0200 Subject: x86/fpu: Make kernel_fpu_disable/enable() static This allows the compiler to inline them and to eliminate them: arch/x86/kernel/fpu/core.o: text data bss dec hex filename 6741 4 8 6753 1a61 core.o.before 6716 4 8 6728 1a48 core.o.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 89ae3e0..e69989f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -54,10 +54,6 @@ static inline void kernel_fpu_end(void) preempt_enable(); } -/* Must be called with preempt disabled */ -extern void kernel_fpu_disable(void); -extern void kernel_fpu_enable(void); - /* * Some instructions like VIA's padlock instructions generate a spurious * DNA fault but don't modify SSE registers. And these instructions diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d0fcf74..1618205 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,13 +9,13 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); -void kernel_fpu_disable(void) +static void kernel_fpu_disable(void) { WARN_ON(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); } -void kernel_fpu_enable(void) +static void kernel_fpu_enable(void) { this_cpu_write(in_kernel_fpu, false); } @@ -32,7 +32,7 @@ void kernel_fpu_enable(void) * Except for the eagerfpu case when we return true; in the likely case * the thread has FPU but we are not going to set/clear TS. */ -static inline bool interrupted_kernel_fpu_idle(void) +static bool interrupted_kernel_fpu_idle(void) { if (this_cpu_read(in_kernel_fpu)) return false; @@ -52,7 +52,7 @@ static inline bool interrupted_kernel_fpu_idle(void) * in an interrupt context from user mode - we'll just * save the FPU state as required. */ -static inline bool interrupted_user_mode(void) +static bool interrupted_user_mode(void) { struct pt_regs *regs = get_irq_regs(); return regs && user_mode(regs); -- cgit v0.10.2 From 3103ae3a6d3e66d51bb883bb17b55574e163b77d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:40:56 +0200 Subject: x86/fpu: Add debug check to kernel_fpu_disable() We are not supposed to call kernel_fpu_disable() if we have not previously enabled it. Also use kernel_fpu_disable()/enable() in the __kernel_fpu_begin/end() primitives, instead of writing to in_kernel_fpu directly, so that we get the debugging checks. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1618205..9bc573a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -17,6 +17,7 @@ static void kernel_fpu_disable(void) static void kernel_fpu_enable(void) { + WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, false); } @@ -77,7 +78,7 @@ void __kernel_fpu_begin(void) { struct task_struct *me = current; - this_cpu_write(in_kernel_fpu, true); + kernel_fpu_disable(); if (__thread_has_fpu(me)) { __save_init_fpu(me); @@ -100,7 +101,7 @@ void __kernel_fpu_end(void) stts(); } - this_cpu_write(in_kernel_fpu, false); + kernel_fpu_enable(); } EXPORT_SYMBOL(__kernel_fpu_end); -- cgit v0.10.2 From 085cc281a04633761bac361f26dcee2800d58077 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 16:52:03 +0200 Subject: x86/fpu: Add kernel_fpu_disabled() Instead of open-coded in_kernel_fpu access, Use kernel_fpu_disabled() in interrupted_kernel_fpu_idle(), matching the other kernel_fpu_*() methods. Also add some documentation for in_kernel_fpu. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9bc573a..e898e83 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -7,6 +7,17 @@ */ #include +/* + * Track whether the kernel is using the FPU state + * currently. + * + * This flag is used: + * + * - by IRQ context code to potentially use the FPU + * if it's unused. + * + * - to debug kernel_fpu_begin()/end() correctness + */ static DEFINE_PER_CPU(bool, in_kernel_fpu); static void kernel_fpu_disable(void) @@ -21,6 +32,11 @@ static void kernel_fpu_enable(void) this_cpu_write(in_kernel_fpu, false); } +static bool kernel_fpu_disabled(void) +{ + return this_cpu_read(in_kernel_fpu); +} + /* * Were we in an interrupt that interrupted kernel mode? * @@ -35,7 +51,7 @@ static void kernel_fpu_enable(void) */ static bool interrupted_kernel_fpu_idle(void) { - if (this_cpu_read(in_kernel_fpu)) + if (kernel_fpu_disabled()) return false; if (use_eager_fpu()) -- cgit v0.10.2 From 6522d783773d0d61f9f35cae890f8c11c4510d9a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 19:54:09 +0200 Subject: x86/fpu: Remove __save_init_fpu() __save_init_fpu() is just a trivial wrapper around fpu_save_init(). Remove the extra layer of obfuscation. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 16a1c66..1e2b6c6 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -295,11 +295,6 @@ static inline int fpu_save_init(struct fpu *fpu) return 1; } -static inline int __save_init_fpu(struct task_struct *tsk) -{ - return fpu_save_init(&tsk->thread.fpu); -} - static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) @@ -439,7 +434,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta (use_eager_fpu() || new->thread.fpu.counter > 5); if (__thread_has_fpu(old)) { - if (!__save_init_fpu(old)) + if (!fpu_save_init(&old->thread.fpu)) task_disable_lazy_fpu_restore(old); else old->thread.fpu.last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e898e83..6ce971c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -97,7 +97,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (__thread_has_fpu(me)) { - __save_init_fpu(me); + fpu_save_init(&me->thread.fpu); } else { this_cpu_write(fpu_owner_task, NULL); if (!use_eager_fpu()) @@ -135,7 +135,7 @@ void fpu__save(struct task_struct *tsk) if (use_eager_fpu()) { __save_fpu(tsk); } else { - __save_init_fpu(tsk); + fpu_save_init(&tsk->thread.fpu); __thread_fpu_end(tsk); } } -- cgit v0.10.2 From e102f30f4e22b7eb8f3dfbe7fec334cffb350fd8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 22 Apr 2015 20:09:29 +0200 Subject: x86/fpu: Move fpu_copy() to fpu/core.c Move fpu_copy() where its only user is. Beyond readability this also speeds up compilation, as fpu-internal.h is included in over a dozen .c files. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 1e2b6c6..e180fb9 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -565,20 +565,6 @@ extern int fpstate_alloc(struct fpu *fpu); extern void fpstate_free(struct fpu *fpu); extern int fpu__copy(struct task_struct *dst, struct task_struct *src); -static inline void fpu_copy(struct task_struct *dst, struct task_struct *src) -{ - if (use_eager_fpu()) { - memset(&dst->thread.fpu.state->xsave, 0, xstate_size); - __save_fpu(dst); - } else { - struct fpu *dfpu = &dst->thread.fpu; - struct fpu *sfpu = &src->thread.fpu; - - fpu__save(src); - memcpy(dfpu->state, sfpu->state, xstate_size); - } -} - static inline unsigned long alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6ce971c..2cc2380 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -203,6 +203,20 @@ void fpstate_free(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpstate_free); +static void fpu_copy(struct task_struct *dst, struct task_struct *src) +{ + if (use_eager_fpu()) { + memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + __save_fpu(dst); + } else { + struct fpu *dfpu = &dst->thread.fpu; + struct fpu *sfpu = &src->thread.fpu; + + fpu__save(src); + memcpy(dfpu->state, sfpu->state, xstate_size); + } +} + int fpu__copy(struct task_struct *dst, struct task_struct *src) { dst->thread.fpu.counter = 0; -- cgit v0.10.2 From bfd6fc0581e7e2f3fa0b3e5e21cd6e54c3fbd16f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 08:55:34 +0200 Subject: x86/fpu: Add debugging check to fpu_copy() Also add a bit of documentation. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2cc2380..3aeab3f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -203,8 +203,18 @@ void fpstate_free(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpstate_free); +/* + * Copy the current task's FPU state to a new task's FPU context. + * + * In the 'eager' case we just save to the destination context. + * + * In the 'lazy' case we save to the source context, mark the FPU lazy + * via stts() and copy the source context into the destination context. + */ static void fpu_copy(struct task_struct *dst, struct task_struct *src) { + WARN_ON(src != current); + if (use_eager_fpu()) { memset(&dst->thread.fpu.state->xsave, 0, xstate_size); __save_fpu(dst); -- cgit v0.10.2 From 9a89b02918c03d91e685d06e49a652da5f35befc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 11:26:08 +0200 Subject: x86/fpu: Print out whether we are doing lazy/eager FPU context switches Ever since the kernel started defaulting to eager FPU switches on modern Intel CPUs it's not been obvious whether a given system is using the lazy or the eager FPU context switching logic. So generate a boot message about which mode the FPU code is in: x86/fpu: Using 'lazy' FPU context switches. or: x86/fpu: Using 'eager' FPU context switches. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index a52205b..61696c5 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -691,6 +691,8 @@ void __init_refok eager_fpu_init(void) if (eagerfpu == ENABLE) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); + if (!cpu_has_eager_fpu) { stts(); return; -- cgit v0.10.2 From 276983f8085db4a5f4e2cdcda6bce29a1da97eb0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 11:55:18 +0200 Subject: x86/fpu: Eliminate the __thread_has_fpu() wrapper Start migrating FPU methods towards using 'struct fpu *fpu' directly. __thread_has_fpu() is just a trivial wrapper around fpu->has_fpu, eliminate it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e180fb9..c005d1fc 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -323,16 +323,6 @@ static inline int restore_fpu_checking(struct task_struct *tsk) return fpu_restore_checking(&tsk->thread.fpu); } -/* - * Software FPU state helpers. Careful: these need to - * be preemption protection *and* they need to be - * properly paired with the CR0.TS changes! - */ -static inline int __thread_has_fpu(struct task_struct *tsk) -{ - return tsk->thread.fpu.has_fpu; -} - /* Must be paired with an 'stts' after! */ static inline void __thread_clear_has_fpu(struct task_struct *tsk) { @@ -370,13 +360,14 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) static inline void drop_fpu(struct task_struct *tsk) { + struct fpu *fpu = &tsk->thread.fpu; /* * Forget coprocessor state.. */ preempt_disable(); tsk->thread.fpu.counter = 0; - if (__thread_has_fpu(tsk)) { + if (fpu->has_fpu) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" @@ -424,6 +415,7 @@ typedef struct { int preload; } fpu_switch_t; static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) { + struct fpu *old_fpu = &old->thread.fpu; fpu_switch_t fpu; /* @@ -433,7 +425,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta fpu.preload = tsk_used_math(new) && (use_eager_fpu() || new->thread.fpu.counter > 5); - if (__thread_has_fpu(old)) { + if (old_fpu->has_fpu) { if (!fpu_save_init(&old->thread.fpu)) task_disable_lazy_fpu_restore(old); else diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3aeab3f..29b8377 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -57,8 +57,7 @@ static bool interrupted_kernel_fpu_idle(void) if (use_eager_fpu()) return true; - return !__thread_has_fpu(current) && - (read_cr0() & X86_CR0_TS); + return !current->thread.fpu.has_fpu && (read_cr0() & X86_CR0_TS); } /* @@ -93,11 +92,12 @@ EXPORT_SYMBOL(irq_fpu_usable); void __kernel_fpu_begin(void) { struct task_struct *me = current; + struct fpu *fpu = &me->thread.fpu; kernel_fpu_disable(); - if (__thread_has_fpu(me)) { - fpu_save_init(&me->thread.fpu); + if (fpu->has_fpu) { + fpu_save_init(fpu); } else { this_cpu_write(fpu_owner_task, NULL); if (!use_eager_fpu()) @@ -109,8 +109,9 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { struct task_struct *me = current; + struct fpu *fpu = &me->thread.fpu; - if (__thread_has_fpu(me)) { + if (fpu->has_fpu) { if (WARN_ON(restore_fpu_checking(me))) fpu_reset_state(me); } else if (!use_eager_fpu()) { @@ -128,14 +129,16 @@ EXPORT_SYMBOL(__kernel_fpu_end); */ void fpu__save(struct task_struct *tsk) { + struct fpu *fpu = &tsk->thread.fpu; + WARN_ON(tsk != current); preempt_disable(); - if (__thread_has_fpu(tsk)) { + if (fpu->has_fpu) { if (use_eager_fpu()) { __save_fpu(tsk); } else { - fpu_save_init(&tsk->thread.fpu); + fpu_save_init(fpu); __thread_fpu_end(tsk); } } -- cgit v0.10.2 From 36fe6175be15d33fec7c6aa53e6e202ad44f0b25 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:08:58 +0200 Subject: x86/fpu: Change __thread_clear_has_fpu() to 'struct fpu' parameter We do this to make the code more readable, and also to be able to eliminate task_struct usage from most of the FPU code. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index c005d1fc..94c068b 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -324,9 +324,9 @@ static inline int restore_fpu_checking(struct task_struct *tsk) } /* Must be paired with an 'stts' after! */ -static inline void __thread_clear_has_fpu(struct task_struct *tsk) +static inline void __thread_clear_has_fpu(struct fpu *fpu) { - tsk->thread.fpu.has_fpu = 0; + fpu->has_fpu = 0; this_cpu_write(fpu_owner_task, NULL); } @@ -346,7 +346,7 @@ static inline void __thread_set_has_fpu(struct task_struct *tsk) */ static inline void __thread_fpu_end(struct task_struct *tsk) { - __thread_clear_has_fpu(tsk); + __thread_clear_has_fpu(&tsk->thread.fpu); if (!use_eager_fpu()) stts(); } -- cgit v0.10.2 From b0c050c5ba130c0ccb1b86b64f162a4601d160c7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:13:04 +0200 Subject: x86/fpu: Move 'PER_CPU(fpu_owner_task)' to fpu/core.c Move it closer to other per-cpu FPU data structures. This also unifies the 32-bit and 64-bit code. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 220ad95..88bb7a7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1182,8 +1182,6 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1; DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); - /* * Special IST stacks which the CPU switches to when it calls * an IST-marked descriptor entry. Up to 7 stacks (hardware @@ -1274,7 +1272,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; EXPORT_PER_CPU_SYMBOL(__preempt_count); -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); /* * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 29b8377..d29fec7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -20,6 +20,11 @@ */ static DEFINE_PER_CPU(bool, in_kernel_fpu); +/* + * Track which task is using the FPU on the CPU: + */ +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); + static void kernel_fpu_disable(void) { WARN_ON(this_cpu_read(in_kernel_fpu)); -- cgit v0.10.2 From 36b544dcd3f935bd33ada700d070433a57982771 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:18:28 +0200 Subject: x86/fpu: Change fpu_owner_task to fpu_fpregs_owner_ctx Track the FPU owner context instead of the owner task: this change, together with other changes, will allow in subsequent patches the elimination of 'struct task_struct' usage in various FPU code: we'll be able to use 'struct fpu' only. There's no change in code size: text data bss dec hex filename 13066467 2545248 1626112 17237827 1070743 vmlinux.before 13066467 2545248 1626112 17237827 1070743 vmlinux.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 94c068b..d0fe7bb 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -37,7 +37,7 @@ extern unsigned int mxcsr_feature_mask; extern void fpu__cpu_init(void); extern void eager_fpu_init(void); -DECLARE_PER_CPU(struct task_struct *, fpu_owner_task); +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); extern void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk); @@ -63,7 +63,7 @@ static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} #endif /* - * Must be run with preemption disabled: this clears the fpu_owner_task, + * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, * on this CPU. * * This will disable any lazy FPU state restore of the current FPU state, @@ -71,7 +71,7 @@ static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} */ static inline void __cpu_disable_lazy_restore(unsigned int cpu) { - per_cpu(fpu_owner_task, cpu) = NULL; + per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; } /* @@ -86,7 +86,7 @@ static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) { - return new == this_cpu_read_stable(fpu_owner_task) && + return &new->thread.fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == new->thread.fpu.last_cpu; } @@ -327,14 +327,14 @@ static inline int restore_fpu_checking(struct task_struct *tsk) static inline void __thread_clear_has_fpu(struct fpu *fpu) { fpu->has_fpu = 0; - this_cpu_write(fpu_owner_task, NULL); + this_cpu_write(fpu_fpregs_owner_ctx, NULL); } /* Must be paired with a 'clts' before! */ static inline void __thread_set_has_fpu(struct task_struct *tsk) { tsk->thread.fpu.has_fpu = 1; - this_cpu_write(fpu_owner_task, tsk); + this_cpu_write(fpu_fpregs_owner_ctx, &tsk->thread.fpu); } /* @@ -431,7 +431,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta else old->thread.fpu.last_cpu = cpu; - /* But leave fpu_owner_task! */ + /* But leave fpu_fpregs_owner_ctx! */ old->thread.fpu.has_fpu = 0; /* Don't change CR0.TS if we just switch! */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d29fec7..ac390c69 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -21,9 +21,9 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu); /* - * Track which task is using the FPU on the CPU: + * Track which context is using the FPU on the CPU: */ -DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); static void kernel_fpu_disable(void) { @@ -96,15 +96,14 @@ EXPORT_SYMBOL(irq_fpu_usable); void __kernel_fpu_begin(void) { - struct task_struct *me = current; - struct fpu *fpu = &me->thread.fpu; + struct fpu *fpu = ¤t->thread.fpu; kernel_fpu_disable(); if (fpu->has_fpu) { fpu_save_init(fpu); } else { - this_cpu_write(fpu_owner_task, NULL); + this_cpu_write(fpu_fpregs_owner_ctx, NULL); if (!use_eager_fpu()) clts(); } -- cgit v0.10.2 From c0311f63e3cc8b29b6006ac8c1d4e3f6fbb2e357 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:24:59 +0200 Subject: x86/fpu: Remove 'struct task_struct' usage from __thread_set_has_fpu() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index d0fe7bb..cf0d412 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -331,10 +331,10 @@ static inline void __thread_clear_has_fpu(struct fpu *fpu) } /* Must be paired with a 'clts' before! */ -static inline void __thread_set_has_fpu(struct task_struct *tsk) +static inline void __thread_set_has_fpu(struct fpu *fpu) { - tsk->thread.fpu.has_fpu = 1; - this_cpu_write(fpu_fpregs_owner_ctx, &tsk->thread.fpu); + fpu->has_fpu = 1; + this_cpu_write(fpu_fpregs_owner_ctx, fpu); } /* @@ -355,7 +355,7 @@ static inline void __thread_fpu_begin(struct task_struct *tsk) { if (!use_eager_fpu()) clts(); - __thread_set_has_fpu(tsk); + __thread_set_has_fpu(&tsk->thread.fpu); } static inline void drop_fpu(struct task_struct *tsk) @@ -416,6 +416,7 @@ typedef struct { int preload; } fpu_switch_t; static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) { struct fpu *old_fpu = &old->thread.fpu; + struct fpu *new_fpu = &new->thread.fpu; fpu_switch_t fpu; /* @@ -437,7 +438,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { new->thread.fpu.counter++; - __thread_set_has_fpu(new); + __thread_set_has_fpu(new_fpu); prefetch(new->thread.fpu.state); } else if (!use_eager_fpu()) stts(); -- cgit v0.10.2 From 35191e3f073c442b201f8beb5315561271d2327a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:26:55 +0200 Subject: x86/fpu: Remove 'struct task_struct' usage from __thread_fpu_end() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index cf0d412..b1803a6 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -344,9 +344,9 @@ static inline void __thread_set_has_fpu(struct fpu *fpu) * These generally need preemption protection to work, * do try to avoid using these on their own. */ -static inline void __thread_fpu_end(struct task_struct *tsk) +static inline void __thread_fpu_end(struct fpu *fpu) { - __thread_clear_has_fpu(&tsk->thread.fpu); + __thread_clear_has_fpu(fpu); if (!use_eager_fpu()) stts(); } @@ -372,7 +372,7 @@ static inline void drop_fpu(struct task_struct *tsk) asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(tsk); + __thread_fpu_end(fpu); } clear_stopped_child_used_math(tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ac390c69..4e1f8f1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -143,7 +143,7 @@ void fpu__save(struct task_struct *tsk) __save_fpu(tsk); } else { fpu_save_init(fpu); - __thread_fpu_end(tsk); + __thread_fpu_end(fpu); } } preempt_enable(); -- cgit v0.10.2 From 4540d3faa7c3fca6a6125448861de0e2e485658b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:31:17 +0200 Subject: x86/fpu: Remove 'struct task_struct' usage from __thread_fpu_begin() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index b1803a6..44516ad 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -351,11 +351,11 @@ static inline void __thread_fpu_end(struct fpu *fpu) stts(); } -static inline void __thread_fpu_begin(struct task_struct *tsk) +static inline void __thread_fpu_begin(struct fpu *fpu) { if (!use_eager_fpu()) clts(); - __thread_set_has_fpu(&tsk->thread.fpu); + __thread_set_has_fpu(fpu); } static inline void drop_fpu(struct task_struct *tsk) @@ -451,7 +451,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta fpu.preload = 0; else prefetch(new->thread.fpu.state); - __thread_fpu_begin(new); + __thread_fpu_begin(new_fpu); } } return fpu; @@ -505,9 +505,11 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) */ static inline void user_fpu_begin(void) { + struct fpu *fpu = ¤t->thread.fpu; + preempt_disable(); if (!user_has_fpu()) - __thread_fpu_begin(current); + __thread_fpu_begin(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4e1f8f1..cf49cd5 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -329,6 +329,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) void fpu__restore(void) { struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; if (!tsk_used_math(tsk)) { local_irq_enable(); @@ -347,7 +348,7 @@ void fpu__restore(void) /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ kernel_fpu_disable(); - __thread_fpu_begin(tsk); + __thread_fpu_begin(fpu); if (unlikely(restore_fpu_checking(tsk))) { fpu_reset_state(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); -- cgit v0.10.2 From 4c1384100ebf51651d02430a7f70661ef1ef06ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:46:20 +0200 Subject: x86/fpu: Open code PF_USED_MATH usages PF_USED_MATH is used directly, but also in a handful of helper inlines. To ease the elimination of PF_USED_MATH, convert all inline helpers to open-coded PF_USED_MATH usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 4bafd5b..bffb2c4 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -321,7 +321,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (used_math()) { + if (current->flags & PF_USED_MATH) { unsigned long fx_aligned, math_size; sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 44516ad..2cac49e 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -375,7 +375,8 @@ static inline void drop_fpu(struct task_struct *tsk) __thread_fpu_end(fpu); } - clear_stopped_child_used_math(tsk); + tsk->flags &= ~PF_USED_MATH; + preempt_enable(); } @@ -423,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = tsk_used_math(new) && + fpu.preload = (new->flags & PF_USED_MATH) && (use_eager_fpu() || new->thread.fpu.counter > 5); if (old_fpu->has_fpu) { diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index cf49cd5..90f624d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -242,7 +242,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) task_disable_lazy_fpu_restore(dst); - if (tsk_used_math(src)) { + if (src->flags & PF_USED_MATH) { int err = fpstate_alloc(&dst->thread.fpu); if (err) @@ -331,7 +331,7 @@ void fpu__restore(void) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - if (!tsk_used_math(tsk)) { + if (!(tsk->flags & PF_USED_MATH)) { local_irq_enable(); /* * does a slab alloc which can sleep @@ -361,12 +361,14 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__flush_thread(struct task_struct *tsk) { + WARN_ON(tsk != current); + if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ drop_fpu(tsk); fpstate_free(&tsk->thread.fpu); } else { - if (!tsk_used_math(tsk)) { + if (!(tsk->flags & PF_USED_MATH)) { /* kthread execs. TODO: cleanup this horror. */ if (WARN_ON(fpstate_alloc_init(tsk))) force_sig(SIGKILL, tsk); @@ -383,12 +385,12 @@ void fpu__flush_thread(struct task_struct *tsk) */ int fpregs_active(struct task_struct *target, const struct user_regset *regset) { - return tsk_used_math(target) ? regset->n : 0; + return (target->flags & PF_USED_MATH) ? regset->n : 0; } int xfpregs_active(struct task_struct *target, const struct user_regset *regset) { - return (cpu_has_fxsr && tsk_used_math(target)) ? regset->n : 0; + return (cpu_has_fxsr && (target->flags & PF_USED_MATH)) ? regset->n : 0; } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, @@ -719,7 +721,7 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) struct task_struct *tsk = current; int fpvalid; - fpvalid = !!used_math(); + fpvalid = !!(tsk->flags & PF_USED_MATH); if (fpvalid) fpvalid = !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 61696c5..8cd1270 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -349,7 +349,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!used_math() && fpstate_alloc_init(tsk)) + if (!(tsk->flags & PF_USED_MATH) && fpstate_alloc_init(tsk)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) @@ -384,12 +384,12 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) int err = 0; /* - * Drop the current fpu which clears used_math(). This ensures + * Drop the current fpu which clears PF_USED_MATH. This ensures * that any context-switch during the copy of the new state, * avoids the intermediate state from getting restored/saved. * Thus avoiding the new restored state from getting corrupted. * We will be ready to restore/save the state only after - * set_used_math() is again set. + * PF_USED_MATH is again set. */ drop_fpu(tsk); @@ -401,7 +401,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); } - set_used_math(); + tsk->flags |= PF_USED_MATH; if (use_eager_fpu()) { preempt_disable(); fpu__restore(); @@ -685,7 +685,7 @@ void xsave_init(void) */ void __init_refok eager_fpu_init(void) { - WARN_ON(used_math()); + WARN_ON(current->flags & PF_USED_MATH); current_thread_info()->status = 0; if (eagerfpu == ENABLE) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 35f867a..8e2529e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -217,7 +217,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } } - if (used_math()) { + if (current->flags & PF_USED_MATH) { sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), &buf_fx, &math_size); *fpstate = (void __user *)sp; @@ -233,7 +233,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if (used_math() && + if ((current->flags & PF_USED_MATH) && save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; @@ -664,7 +664,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (used_math()) + if (current->flags & PF_USED_MATH) fpu_reset_state(current); } signal_setup_done(failed, ksig, stepping); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index be276e0..0635a1f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6600,7 +6600,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!tsk_used_math(current) && fpstate_alloc_init(current)) + if (!(current->flags & PF_USED_MATH) && fpstate_alloc_init(current)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index c9ff09a..bf62880 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -148,7 +148,7 @@ void math_emulate(struct math_emu_info *info) unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; - if (!used_math()) { + if (!(current->flags & PF_USED_MATH)) { if (fpstate_alloc_init(current)) { do_group_exit(SIGKILL); return; -- cgit v0.10.2 From af7f8721f1f1252473b154c38dd7583abfe3206b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 14:06:05 +0200 Subject: x86/fpu: Document fpu__unlazy_stopped() Explain its usage and also document a TODO item. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 90f624d..7798131 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -284,10 +284,27 @@ int fpstate_alloc_init(struct task_struct *curr) EXPORT_SYMBOL_GPL(fpstate_alloc_init); /* - * The _current_ task is using the FPU for the first time - * so initialize it and set the mxcsr to its default - * value at reset if we support XMM instructions and then - * remember the current task has used the FPU. + * This function is called before we modify a stopped child's + * FPU state context. + * + * If the child has not used the FPU before then initialize its + * FPU context. + * + * If the child has used the FPU before then unlazy it. + * + * [ After this function call, after the context is modified and + * the child task is woken up, the child task will restore + * the modified FPU state from the modified context. If we + * didn't clear its lazy status here then the lazy in-registers + * state pending on its former CPU could be restored, losing + * the modifications. ] + * + * This function is also called before we read a stopped child's + * FPU state - to make sure it's modified. + * + * TODO: A future optimization would be to skip the unlazying in + * the read-only case, it's not strictly necessary for + * read-only access to the context. */ static int fpu__unlazy_stopped(struct task_struct *child) { -- cgit v0.10.2 From c5bedc6847c3be6efe0e671a6155c9a25fd468bf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:49:20 +0200 Subject: x86/fpu: Get rid of PF_USED_MATH usage, convert it to fpu->fpstate_active Introduce a simple fpu->fpstate_active flag in the fpu context data structure and use that instead of PF_USED_MATH in task->flags. Testing for this flag byte should be slightly more efficient than testing a bit in a bitmask, but the main advantage is that most FPU functions can now be performed on a 'struct fpu' alone, they don't need access to 'struct task_struct' anymore. There's a slight linecount increase, mostly due to the 'fpu' local variables and due to extra comments. The local variables will go away once we move most of the FPU methods to pure 'struct fpu' parameters. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index bffb2c4..e1ec6f9 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -307,6 +307,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, size_t frame_size, void __user **fpstate) { + struct fpu *fpu = ¤t->thread.fpu; unsigned long sp; /* Default to using normal stack */ @@ -321,7 +322,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, ksig->ka.sa.sa_restorer) sp = (unsigned long) ksig->ka.sa.sa_restorer; - if (current->flags & PF_USED_MATH) { + if (fpu->fpstate_active) { unsigned long fx_aligned, math_size; sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 2cac49e..9311126 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -375,7 +375,7 @@ static inline void drop_fpu(struct task_struct *tsk) __thread_fpu_end(fpu); } - tsk->flags &= ~PF_USED_MATH; + fpu->fpstate_active = 0; preempt_enable(); } @@ -424,7 +424,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * If the task has used the math, pre-load the FPU on xsave processors * or if the past 5 consecutive context-switches used math. */ - fpu.preload = (new->flags & PF_USED_MATH) && + fpu.preload = new_fpu->fpstate_active && (use_eager_fpu() || new->thread.fpu.counter > 5); if (old_fpu->has_fpu) { diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index efb520d..f6317d9a 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -137,6 +137,12 @@ struct fpu { * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; + /* + * This flag indicates whether this context is fpstate_active: if the task is + * not running then we can restore from this context, if the task + * is running then we should save into this context. + */ + unsigned char fpstate_active; }; #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d50cc7f..0f4add4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -385,6 +385,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; + + /* Floating point and extended processor state */ + struct fpu fpu; + /* Save middle states of ptrace breakpoints */ struct perf_event *ptrace_bps[HBP_NUM]; /* Debug status used for traps, single steps, etc... */ @@ -395,8 +399,6 @@ struct thread_struct { unsigned long cr2; unsigned long trap_nr; unsigned long error_code; - /* floating point and extended processor state */ - struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7798131..9e7f9e7 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -236,14 +236,17 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src) int fpu__copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + dst->thread.fpu.counter = 0; dst->thread.fpu.has_fpu = 0; dst->thread.fpu.state = NULL; task_disable_lazy_fpu_restore(dst); - if (src->flags & PF_USED_MATH) { - int err = fpstate_alloc(&dst->thread.fpu); + if (src_fpu->fpstate_active) { + int err = fpstate_alloc(dst_fpu); if (err) return err; @@ -260,11 +263,12 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) */ int fpstate_alloc_init(struct task_struct *curr) { + struct fpu *fpu = &curr->thread.fpu; int ret; if (WARN_ON_ONCE(curr != current)) return -EINVAL; - if (WARN_ON_ONCE(curr->flags & PF_USED_MATH)) + if (WARN_ON_ONCE(fpu->fpstate_active)) return -EINVAL; /* @@ -277,7 +281,7 @@ int fpstate_alloc_init(struct task_struct *curr) fpstate_init(&curr->thread.fpu); /* Safe to do for the current task: */ - curr->flags |= PF_USED_MATH; + fpu->fpstate_active = 1; return 0; } @@ -308,12 +312,13 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); */ static int fpu__unlazy_stopped(struct task_struct *child) { + struct fpu *child_fpu = &child->thread.fpu; int ret; if (WARN_ON_ONCE(child == current)) return -EINVAL; - if (child->flags & PF_USED_MATH) { + if (child_fpu->fpstate_active) { task_disable_lazy_fpu_restore(child); return 0; } @@ -328,7 +333,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) fpstate_init(&child->thread.fpu); /* Safe to do for stopped child tasks: */ - child->flags |= PF_USED_MATH; + child_fpu->fpstate_active = 1; return 0; } @@ -348,7 +353,7 @@ void fpu__restore(void) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - if (!(tsk->flags & PF_USED_MATH)) { + if (!fpu->fpstate_active) { local_irq_enable(); /* * does a slab alloc which can sleep @@ -378,6 +383,8 @@ EXPORT_SYMBOL_GPL(fpu__restore); void fpu__flush_thread(struct task_struct *tsk) { + struct fpu *fpu = &tsk->thread.fpu; + WARN_ON(tsk != current); if (!use_eager_fpu()) { @@ -385,7 +392,7 @@ void fpu__flush_thread(struct task_struct *tsk) drop_fpu(tsk); fpstate_free(&tsk->thread.fpu); } else { - if (!(tsk->flags & PF_USED_MATH)) { + if (!fpu->fpstate_active) { /* kthread execs. TODO: cleanup this horror. */ if (WARN_ON(fpstate_alloc_init(tsk))) force_sig(SIGKILL, tsk); @@ -402,12 +409,16 @@ void fpu__flush_thread(struct task_struct *tsk) */ int fpregs_active(struct task_struct *target, const struct user_regset *regset) { - return (target->flags & PF_USED_MATH) ? regset->n : 0; + struct fpu *target_fpu = &target->thread.fpu; + + return target_fpu->fpstate_active ? regset->n : 0; } int xfpregs_active(struct task_struct *target, const struct user_regset *regset) { - return (cpu_has_fxsr && (target->flags & PF_USED_MATH)) ? regset->n : 0; + struct fpu *target_fpu = &target->thread.fpu; + + return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; } int xfpregs_get(struct task_struct *target, const struct user_regset *regset, @@ -733,16 +744,17 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * struct user_i387_struct) but is in fact only used for 32-bit * dumps, so on 64-bit it is really struct user_i387_ia32_struct. */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *fpu) +int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) { struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; int fpvalid; - fpvalid = !!(tsk->flags & PF_USED_MATH); + fpvalid = fpu->fpstate_active; if (fpvalid) fpvalid = !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct), - fpu, NULL); + ufpu, NULL); return fpvalid; } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 8cd1270..dc346e1 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -334,6 +334,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) { int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; int state_size = xstate_size; u64 xstate_bv = 0; int fx_only = 0; @@ -349,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!(tsk->flags & PF_USED_MATH) && fpstate_alloc_init(tsk)) + if (!fpu->fpstate_active && fpstate_alloc_init(tsk)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) @@ -384,12 +385,12 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) int err = 0; /* - * Drop the current fpu which clears PF_USED_MATH. This ensures + * Drop the current fpu which clears fpu->fpstate_active. This ensures * that any context-switch during the copy of the new state, * avoids the intermediate state from getting restored/saved. * Thus avoiding the new restored state from getting corrupted. * We will be ready to restore/save the state only after - * PF_USED_MATH is again set. + * fpu->fpstate_active is again set. */ drop_fpu(tsk); @@ -401,7 +402,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); } - tsk->flags |= PF_USED_MATH; + fpu->fpstate_active = 1; if (use_eager_fpu()) { preempt_disable(); fpu__restore(); @@ -685,7 +686,7 @@ void xsave_init(void) */ void __init_refok eager_fpu_init(void) { - WARN_ON(current->flags & PF_USED_MATH); + WARN_ON(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; if (eagerfpu == ENABLE) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 8e2529e..20a9d35 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -198,6 +198,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, unsigned long sp = regs->sp; unsigned long buf_fx = 0; int onsigstack = on_sig_stack(sp); + struct fpu *fpu = ¤t->thread.fpu; /* redzone */ if (config_enabled(CONFIG_X86_64)) @@ -217,7 +218,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } } - if (current->flags & PF_USED_MATH) { + if (fpu->fpstate_active) { sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), &buf_fx, &math_size); *fpstate = (void __user *)sp; @@ -233,7 +234,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, return (void __user *)-1L; /* save i387 and extended state */ - if ((current->flags & PF_USED_MATH) && + if (fpu->fpstate_active && save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; @@ -616,6 +617,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { bool stepping, failed; + struct fpu *fpu = ¤t->thread.fpu; /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { @@ -664,7 +666,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - if (current->flags & PF_USED_MATH) + if (fpu->fpstate_active) fpu_reset_state(current); } signal_setup_done(failed, ksig, stepping); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0635a1f..bab8afb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6597,10 +6597,11 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) { + struct fpu *fpu = ¤t->thread.fpu; int r; sigset_t sigsaved; - if (!(current->flags & PF_USED_MATH) && fpstate_alloc_init(current)) + if (!fpu->fpstate_active && fpstate_alloc_init(current)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index bf62880..f1aac55 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -147,8 +147,9 @@ void math_emulate(struct math_emu_info *info) unsigned long code_base = 0; unsigned long code_limit = 0; /* Initialized to stop compiler warnings */ struct desc_struct code_descriptor; + struct fpu *fpu = ¤t->thread.fpu; - if (!(current->flags & PF_USED_MATH)) { + if (!fpu->fpstate_active) { if (fpstate_alloc_init(current)) { do_group_exit(SIGKILL); return; -- cgit v0.10.2 From ca6787ba0fcc875cfb06dc2a538ac23210b7d251 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 12:33:50 +0200 Subject: x86/fpu: Remove 'struct task_struct' usage from drop_fpu() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 9311126..e8f7134 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -358,14 +358,13 @@ static inline void __thread_fpu_begin(struct fpu *fpu) __thread_set_has_fpu(fpu); } -static inline void drop_fpu(struct task_struct *tsk) +static inline void drop_fpu(struct fpu *fpu) { - struct fpu *fpu = &tsk->thread.fpu; /* * Forget coprocessor state.. */ preempt_disable(); - tsk->thread.fpu.counter = 0; + fpu->counter = 0; if (fpu->has_fpu) { /* Ignore delayed exceptions from user space */ @@ -394,8 +393,10 @@ static inline void restore_init_xstate(void) */ static inline void fpu_reset_state(struct task_struct *tsk) { + struct fpu *fpu = &tsk->thread.fpu; + if (!use_eager_fpu()) - drop_fpu(tsk); + drop_fpu(fpu); else restore_init_xstate(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9e7f9e7..ba539fc 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -389,7 +389,7 @@ void fpu__flush_thread(struct task_struct *tsk) if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ - drop_fpu(tsk); + drop_fpu(fpu); fpstate_free(&tsk->thread.fpu); } else { if (!fpu->fpstate_active) { diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index dc346e1..049dc61 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -392,7 +392,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * We will be ready to restore/save the state only after * fpu->fpstate_active is again set. */ - drop_fpu(tsk); + drop_fpu(fpu); if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index bb7d4ab..50d503a 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -104,6 +104,7 @@ void exit_thread(void) struct task_struct *me = current; struct thread_struct *t = &me->thread; unsigned long *bp = t->io_bitmap_ptr; + struct fpu *fpu = &t->fpu; if (bp) { struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu()); @@ -119,7 +120,7 @@ void exit_thread(void) kfree(bp); } - drop_fpu(me); + drop_fpu(fpu); } void flush_thread(void) -- cgit v0.10.2 From eb6a3251bfe34f327570993e9a95dbf3a592b912 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:08:41 +0200 Subject: x86/fpu: Remove task_disable_lazy_fpu_restore() Replace task_disable_lazy_fpu_restore() with easier to read open-coded uses: we already update the fpu->last_cpu field explicitly in other cases. (This also removes yet another task_struct using FPU method.) Better explain the fpu::last_cpu field in the structure definition. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index e8f7134..76a1f35 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -74,16 +74,6 @@ static inline void __cpu_disable_lazy_restore(unsigned int cpu) per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; } -/* - * Used to indicate that the FPU state in memory is newer than the FPU - * state in registers, and the FPU state should be reloaded next time the - * task is run. Only safe on the current task, or non-running tasks. - */ -static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk) -{ - tsk->thread.fpu.last_cpu = ~0; -} - static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) { return &new->thread.fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && @@ -430,7 +420,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta if (old_fpu->has_fpu) { if (!fpu_save_init(&old->thread.fpu)) - task_disable_lazy_fpu_restore(old); + old->thread.fpu.last_cpu = -1; else old->thread.fpu.last_cpu = cpu; @@ -446,7 +436,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta stts(); } else { old->thread.fpu.counter = 0; - task_disable_lazy_fpu_restore(old); + old->thread.fpu.last_cpu = -1; if (fpu.preload) { new->thread.fpu.counter++; if (fpu_lazy_restore(new, cpu)) diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index f6317d9a..cad1c37 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -125,7 +125,18 @@ union thread_xstate { }; struct fpu { + /* + * Records the last CPU on which this context was loaded into + * FPU registers. (In the lazy-switching case we might be + * able to reuse FPU registers across multiple context switches + * this way, if no intermediate task used the FPU.) + * + * A value of -1 is used to indicate that the FPU state in context + * memory is newer than the FPU state in registers, and that the + * FPU state should be reloaded next time the task is run. + */ unsigned int last_cpu; + unsigned int has_fpu; union thread_xstate *state; /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ba539fc..230e937 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -242,8 +242,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) dst->thread.fpu.counter = 0; dst->thread.fpu.has_fpu = 0; dst->thread.fpu.state = NULL; - - task_disable_lazy_fpu_restore(dst); + dst->thread.fpu.last_cpu = -1; if (src_fpu->fpstate_active) { int err = fpstate_alloc(dst_fpu); @@ -319,7 +318,7 @@ static int fpu__unlazy_stopped(struct task_struct *child) return -EINVAL; if (child_fpu->fpstate_active) { - task_disable_lazy_fpu_restore(child); + child->thread.fpu.last_cpu = -1; return 0; } -- cgit v0.10.2 From 66ddc2cb0f598818b39b66867007634244322843 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:25:44 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu_lazy_restore() Also rename it to fpu_want_lazy_restore(), to better indicate that this function just tests whether we can do a lazy restore. (The old name suggested that it was doing the lazy restore, which is not the case.) Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 76a1f35..3f6d36c 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -74,10 +74,9 @@ static inline void __cpu_disable_lazy_restore(unsigned int cpu) per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; } -static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu) +static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) { - return &new->thread.fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && - cpu == new->thread.fpu.last_cpu; + return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } static inline int is_ia32_compat_frame(void) @@ -439,7 +438,7 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta old->thread.fpu.last_cpu = -1; if (fpu.preload) { new->thread.fpu.counter++; - if (fpu_lazy_restore(new, cpu)) + if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else prefetch(new->thread.fpu.state); -- cgit v0.10.2 From 11f2d50b10289f49676ec07bf3fef932473ef6d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:30:59 +0200 Subject: x86/fpu: Use 'struct fpu' in restore_fpu_checking() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 3f6d36c..2d7934e4 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -294,7 +294,7 @@ static inline int fpu_restore_checking(struct fpu *fpu) return frstor_checking(&fpu->state->fsave); } -static inline int restore_fpu_checking(struct task_struct *tsk) +static inline int restore_fpu_checking(struct fpu *fpu) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -306,10 +306,10 @@ static inline int restore_fpu_checking(struct task_struct *tsk) "fnclex\n\t" "emms\n\t" "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (tsk->thread.fpu.has_fpu)); + : : [addr] "m" (fpu->has_fpu)); } - return fpu_restore_checking(&tsk->thread.fpu); + return fpu_restore_checking(fpu); } /* Must be paired with an 'stts' after! */ @@ -456,8 +456,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta */ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) { + struct fpu *new_fpu = &new->thread.fpu; + if (fpu.preload) { - if (unlikely(restore_fpu_checking(new))) + if (unlikely(restore_fpu_checking(new_fpu))) fpu_reset_state(new); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 230e937..1ecd250 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) struct fpu *fpu = &me->thread.fpu; if (fpu->has_fpu) { - if (WARN_ON(restore_fpu_checking(me))) + if (WARN_ON(restore_fpu_checking(fpu))) fpu_reset_state(me); } else if (!use_eager_fpu()) { stts(); @@ -370,7 +370,7 @@ void fpu__restore(void) /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ kernel_fpu_disable(); __thread_fpu_begin(fpu); - if (unlikely(restore_fpu_checking(tsk))) { + if (unlikely(restore_fpu_checking(fpu))) { fpu_reset_state(tsk); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { -- cgit v0.10.2 From af2d94fddcf41e879908b35a8a5308fb94e989c5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:34:20 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu_reset_state() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 2d7934e4..579f7d0 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -380,10 +380,8 @@ static inline void restore_init_xstate(void) * Reset the FPU state in the eager case and drop it in the lazy case (later use * will reinit it). */ -static inline void fpu_reset_state(struct task_struct *tsk) +static inline void fpu_reset_state(struct fpu *fpu) { - struct fpu *fpu = &tsk->thread.fpu; - if (!use_eager_fpu()) drop_fpu(fpu); else @@ -460,7 +458,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) if (fpu.preload) { if (unlikely(restore_fpu_checking(new_fpu))) - fpu_reset_state(new); + fpu_reset_state(new_fpu); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1ecd250..41c9289 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -112,12 +112,11 @@ EXPORT_SYMBOL(__kernel_fpu_begin); void __kernel_fpu_end(void) { - struct task_struct *me = current; - struct fpu *fpu = &me->thread.fpu; + struct fpu *fpu = ¤t->thread.fpu; if (fpu->has_fpu) { if (WARN_ON(restore_fpu_checking(fpu))) - fpu_reset_state(me); + fpu_reset_state(fpu); } else if (!use_eager_fpu()) { stts(); } @@ -371,7 +370,7 @@ void fpu__restore(void) kernel_fpu_disable(); __thread_fpu_begin(fpu); if (unlikely(restore_fpu_checking(fpu))) { - fpu_reset_state(tsk); + fpu_reset_state(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { tsk->thread.fpu.counter++; diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 049dc61..3953cbf 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -343,7 +343,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - fpu_reset_state(tsk); + fpu_reset_state(fpu); return 0; } @@ -417,7 +417,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ user_fpu_begin(); if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { - fpu_reset_state(tsk); + fpu_reset_state(fpu); return -1; } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 20a9d35..bcb853e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -667,7 +667,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Ensure the signal handler starts with the new fpu state. */ if (fpu->fpstate_active) - fpu_reset_state(current); + fpu_reset_state(fpu); } signal_setup_done(failed, ksig, stepping); } -- cgit v0.10.2 From cb8818b6acb45a4e0acc2308df216f36cc5b950c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:39:04 +0200 Subject: x86/fpu: Use 'struct fpu' in switch_fpu_prepare() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 579f7d0..60d2c6f 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -402,10 +402,9 @@ static inline void fpu_reset_state(struct fpu *fpu) */ typedef struct { int preload; } fpu_switch_t; -static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu) +static inline fpu_switch_t +switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) { - struct fpu *old_fpu = &old->thread.fpu; - struct fpu *new_fpu = &new->thread.fpu; fpu_switch_t fpu; /* @@ -413,33 +412,33 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta * or if the past 5 consecutive context-switches used math. */ fpu.preload = new_fpu->fpstate_active && - (use_eager_fpu() || new->thread.fpu.counter > 5); + (use_eager_fpu() || new_fpu->counter > 5); if (old_fpu->has_fpu) { - if (!fpu_save_init(&old->thread.fpu)) - old->thread.fpu.last_cpu = -1; + if (!fpu_save_init(old_fpu)) + old_fpu->last_cpu = -1; else - old->thread.fpu.last_cpu = cpu; + old_fpu->last_cpu = cpu; /* But leave fpu_fpregs_owner_ctx! */ - old->thread.fpu.has_fpu = 0; + old_fpu->has_fpu = 0; /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { - new->thread.fpu.counter++; + new_fpu->counter++; __thread_set_has_fpu(new_fpu); - prefetch(new->thread.fpu.state); + prefetch(new_fpu->state); } else if (!use_eager_fpu()) stts(); } else { - old->thread.fpu.counter = 0; - old->thread.fpu.last_cpu = -1; + old_fpu->counter = 0; + old_fpu->last_cpu = -1; if (fpu.preload) { - new->thread.fpu.counter++; + new_fpu->counter++; if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else - prefetch(new->thread.fpu.state); + prefetch(new_fpu->state); __thread_fpu_begin(new_fpu); } } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 1a0edce..5b0ed71 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -248,7 +248,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - fpu = switch_fpu_prepare(prev_p, next_p, cpu); + fpu = switch_fpu_prepare(&prev_p->thread.fpu, &next_p->thread.fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 99cc4b8..fefe65e 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -278,7 +278,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) unsigned fsindex, gsindex; fpu_switch_t fpu; - fpu = switch_fpu_prepare(prev_p, next_p, cpu); + fpu = switch_fpu_prepare(&prev_p->thread.fpu, &next_p->thread.fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). -- cgit v0.10.2 From 384a23f939912d368d2b42e1b41992be09aaf266 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:43:27 +0200 Subject: x86/fpu: Use 'struct fpu' in switch_fpu_finish() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 60d2c6f..5fd9b3f 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -451,11 +451,9 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) * state - all we need to do is to conditionally restore the register * state itself. */ -static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu) +static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { - struct fpu *new_fpu = &new->thread.fpu; - - if (fpu.preload) { + if (fpu_switch.preload) { if (unlikely(restore_fpu_checking(new_fpu))) fpu_reset_state(new_fpu); } diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5b0ed71..7adc314 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -241,14 +241,16 @@ __visible __notrace_funcgraph struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, - *next = &next_p->thread; + *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); - fpu_switch_t fpu; + fpu_switch_t fpu_switch; /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ - fpu = switch_fpu_prepare(&prev_p->thread.fpu, &next_p->thread.fpu, cpu); + fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); /* * Save away %gs. No need to save %fs, as it was saved on the @@ -318,7 +320,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) lazy_load_gs(next->gs); - switch_fpu_finish(next_p, fpu); + switch_fpu_finish(next_fpu, fpu_switch); this_cpu_write(current_task, next_p); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index fefe65e..4504569 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -273,12 +273,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread; struct thread_struct *next = &next_p->thread; + struct fpu *prev_fpu = &prev->fpu; + struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); struct tss_struct *tss = &per_cpu(cpu_tss, cpu); unsigned fsindex, gsindex; - fpu_switch_t fpu; + fpu_switch_t fpu_switch; - fpu = switch_fpu_prepare(&prev_p->thread.fpu, &next_p->thread.fpu, cpu); + fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu); /* We must save %fs and %gs before load_TLS() because * %fs and %gs may be cleared by load_TLS(). @@ -390,7 +392,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) wrmsrl(MSR_KERNEL_GS_BASE, next->gs); prev->gsindex = gsindex; - switch_fpu_finish(next_p, fpu); + switch_fpu_finish(next_fpu, fpu_switch); /* * Switch the PDA and FPU contexts. -- cgit v0.10.2 From 2d75bcf31470b15205f915aae725a284bc8f2da8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:49:29 +0200 Subject: x86/fpu: Move __save_fpu() into fpu/core.c This helper function is only used in fpu/core.c, move it there. This slightly speeds up compilation. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 5fd9b3f..6b84399 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -501,17 +501,6 @@ static inline void user_fpu_begin(void) preempt_enable(); } -static inline void __save_fpu(struct task_struct *tsk) -{ - if (use_xsave()) { - if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&tsk->thread.fpu.state->xsave); - else - xsave_state(&tsk->thread.fpu.state->xsave); - } else - fpu_fxsave(&tsk->thread.fpu); -} - /* * i387 state interaction */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 41c9289..9db4ef3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -125,6 +125,18 @@ void __kernel_fpu_end(void) } EXPORT_SYMBOL(__kernel_fpu_end); +static void __save_fpu(struct task_struct *tsk) +{ + if (use_xsave()) { + if (unlikely(system_state == SYSTEM_BOOTING)) + xsave_state_booting(&tsk->thread.fpu.state->xsave); + else + xsave_state(&tsk->thread.fpu.state->xsave); + } else { + fpu_fxsave(&tsk->thread.fpu); + } +} + /* * Save the FPU state (initialize it if necessary): * -- cgit v0.10.2 From a4d8fc2e0652613426920aac429541127f8b26d8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:52:36 +0200 Subject: x86/fpu: Use 'struct fpu' in __fpu_save() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9db4ef3..7c05300 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -125,15 +125,15 @@ void __kernel_fpu_end(void) } EXPORT_SYMBOL(__kernel_fpu_end); -static void __save_fpu(struct task_struct *tsk) +static void __save_fpu(struct fpu *fpu) { if (use_xsave()) { if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&tsk->thread.fpu.state->xsave); + xsave_state_booting(&fpu->state->xsave); else - xsave_state(&tsk->thread.fpu.state->xsave); + xsave_state(&fpu->state->xsave); } else { - fpu_fxsave(&tsk->thread.fpu); + fpu_fxsave(fpu); } } @@ -151,7 +151,7 @@ void fpu__save(struct task_struct *tsk) preempt_disable(); if (fpu->has_fpu) { if (use_eager_fpu()) { - __save_fpu(tsk); + __save_fpu(fpu); } else { fpu_save_init(fpu); __thread_fpu_end(fpu); @@ -231,17 +231,17 @@ EXPORT_SYMBOL_GPL(fpstate_free); */ static void fpu_copy(struct task_struct *dst, struct task_struct *src) { + struct fpu *dst_fpu = &dst->thread.fpu; + struct fpu *src_fpu = &src->thread.fpu; + WARN_ON(src != current); if (use_eager_fpu()) { memset(&dst->thread.fpu.state->xsave, 0, xstate_size); - __save_fpu(dst); + __save_fpu(dst_fpu); } else { - struct fpu *dfpu = &dst->thread.fpu; - struct fpu *sfpu = &src->thread.fpu; - fpu__save(src); - memcpy(dfpu->state, sfpu->state, xstate_size); + memcpy(dst_fpu->state, src_fpu->state, xstate_size); } } -- cgit v0.10.2 From 0c070595ceccb391100127a28ff837c50356ad67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Apr 2015 17:57:24 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu__save() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index e69989f..e3b42c5 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -100,7 +100,7 @@ static inline int user_has_fpu(void) return current->thread.fpu.has_fpu; } -extern void fpu__save(struct task_struct *tsk); +extern void fpu__save(struct fpu *fpu); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 7c05300..b685e9e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -142,11 +142,9 @@ static void __save_fpu(struct fpu *fpu) * * This only ever gets called for the current task. */ -void fpu__save(struct task_struct *tsk) +void fpu__save(struct fpu *fpu) { - struct fpu *fpu = &tsk->thread.fpu; - - WARN_ON(tsk != current); + WARN_ON(fpu != ¤t->thread.fpu); preempt_disable(); if (fpu->has_fpu) { @@ -240,7 +238,7 @@ static void fpu_copy(struct task_struct *dst, struct task_struct *src) memset(&dst->thread.fpu.state->xsave, 0, xstate_size); __save_fpu(dst_fpu); } else { - fpu__save(src); + fpu__save(src_fpu); memcpy(dst_fpu->state, src_fpu->state, xstate_size); } } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 22ad90a..8abcd6a 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -730,7 +730,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - fpu__save(task); + fpu__save(&task->thread.fpu); task->thread.trap_nr = trapnr; task->thread.error_code = error_code; info.si_signo = SIGFPE; -- cgit v0.10.2 From f9bc977fe734772a7ca4a467fe4fd74e1ea3a849 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:07:33 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu_copy() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b685e9e..9aaba6a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,15 +227,12 @@ EXPORT_SYMBOL_GPL(fpstate_free); * In the 'lazy' case we save to the source context, mark the FPU lazy * via stts() and copy the source context into the destination context. */ -static void fpu_copy(struct task_struct *dst, struct task_struct *src) +static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - struct fpu *dst_fpu = &dst->thread.fpu; - struct fpu *src_fpu = &src->thread.fpu; - - WARN_ON(src != current); + WARN_ON(src_fpu != ¤t->thread.fpu); if (use_eager_fpu()) { - memset(&dst->thread.fpu.state->xsave, 0, xstate_size); + memset(&dst_fpu->state->xsave, 0, xstate_size); __save_fpu(dst_fpu); } else { fpu__save(src_fpu); @@ -258,7 +255,7 @@ int fpu__copy(struct task_struct *dst, struct task_struct *src) if (err) return err; - fpu_copy(dst, src); + fpu_copy(dst_fpu, src_fpu); } return 0; } -- cgit v0.10.2 From c69e098b1f90c0d520c4d5b5bff9f2ede95b13a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:07:15 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu__copy() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 6b84399..21ad681 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -535,7 +535,7 @@ extern void fpstate_cache_init(void); extern int fpstate_alloc(struct fpu *fpu); extern void fpstate_free(struct fpu *fpu); -extern int fpu__copy(struct task_struct *dst, struct task_struct *src); +extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); static inline unsigned long alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 9aaba6a..a843585 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -240,15 +240,12 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) } } -int fpu__copy(struct task_struct *dst, struct task_struct *src) +int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - struct fpu *dst_fpu = &dst->thread.fpu; - struct fpu *src_fpu = &src->thread.fpu; - - dst->thread.fpu.counter = 0; - dst->thread.fpu.has_fpu = 0; - dst->thread.fpu.state = NULL; - dst->thread.fpu.last_cpu = -1; + dst_fpu->counter = 0; + dst_fpu->has_fpu = 0; + dst_fpu->state = NULL; + dst_fpu->last_cpu = -1; if (src_fpu->fpstate_active) { int err = fpstate_alloc(dst_fpu); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 50d503a..e97266b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -83,7 +83,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; - return fpu__copy(dst, src); + return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } void arch_release_task_struct(struct task_struct *tsk) -- cgit v0.10.2 From db2b1d3ad1cdae9f268d6db54b6127b09933da3d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:13:09 +0200 Subject: x86/fpu: Use 'struct fpu' in fpstate_alloc_init() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index e3b42c5..38376cd 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -18,7 +18,7 @@ struct pt_regs; struct user_i387_struct; -extern int fpstate_alloc_init(struct task_struct *curr); +extern int fpstate_alloc_init(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); extern void fpu__flush_thread(struct task_struct *tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a843585..183e69d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -263,12 +263,11 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) * * Can fail. */ -int fpstate_alloc_init(struct task_struct *curr) +int fpstate_alloc_init(struct fpu *fpu) { - struct fpu *fpu = &curr->thread.fpu; int ret; - if (WARN_ON_ONCE(curr != current)) + if (WARN_ON_ONCE(fpu != ¤t->thread.fpu)) return -EINVAL; if (WARN_ON_ONCE(fpu->fpstate_active)) return -EINVAL; @@ -276,11 +275,11 @@ int fpstate_alloc_init(struct task_struct *curr) /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpstate_alloc(&curr->thread.fpu); + ret = fpstate_alloc(fpu); if (ret) return ret; - fpstate_init(&curr->thread.fpu); + fpstate_init(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; @@ -360,7 +359,7 @@ void fpu__restore(void) /* * does a slab alloc which can sleep */ - if (fpstate_alloc_init(tsk)) { + if (fpstate_alloc_init(fpu)) { /* * ran out of memory! */ @@ -396,7 +395,7 @@ void fpu__flush_thread(struct task_struct *tsk) } else { if (!fpu->fpstate_active) { /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(tsk))) + if (WARN_ON(fpstate_alloc_init(fpu))) force_sig(SIGKILL, tsk); user_fpu_begin(); } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 3953cbf..80b0c8f 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -350,7 +350,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!fpu->fpstate_active && fpstate_alloc_init(tsk)) + if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) return -1; if (!static_cpu_has(X86_FEATURE_FPU)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bab8afb..479d4ce 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6601,7 +6601,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!fpu->fpstate_active && fpstate_alloc_init(current)) + if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) return -ENOMEM; if (vcpu->sigset_active) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index f1aac55..e394bcb 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -150,7 +150,7 @@ void math_emulate(struct math_emu_info *info) struct fpu *fpu = ¤t->thread.fpu; if (!fpu->fpstate_active) { - if (fpstate_alloc_init(current)) { + if (fpstate_alloc_init(fpu)) { do_group_exit(SIGKILL); return; } -- cgit v0.10.2 From cc08d5459905a4155cb77e5fe25f396b4c622b7d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:18:23 +0200 Subject: x86/fpu: Use 'struct fpu' in fpu__unlazy_stopped() Migrate this function to pure 'struct fpu' usage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 183e69d..e3e8585 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -311,27 +311,26 @@ EXPORT_SYMBOL_GPL(fpstate_alloc_init); * the read-only case, it's not strictly necessary for * read-only access to the context. */ -static int fpu__unlazy_stopped(struct task_struct *child) +static int fpu__unlazy_stopped(struct fpu *child_fpu) { - struct fpu *child_fpu = &child->thread.fpu; int ret; - if (WARN_ON_ONCE(child == current)) + if (WARN_ON_ONCE(child_fpu == ¤t->thread.fpu)) return -EINVAL; if (child_fpu->fpstate_active) { - child->thread.fpu.last_cpu = -1; + child_fpu->last_cpu = -1; return 0; } /* * Memory allocation at the first usage of the FPU and other state. */ - ret = fpstate_alloc(&child->thread.fpu); + ret = fpstate_alloc(child_fpu); if (ret) return ret; - fpstate_init(&child->thread.fpu); + fpstate_init(child_fpu); /* Safe to do for stopped child tasks: */ child_fpu->fpstate_active = 1; @@ -426,12 +425,13 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; int ret; if (!cpu_has_fxsr) return -ENODEV; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; @@ -445,12 +445,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; int ret; if (!cpu_has_fxsr) return -ENODEV; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; @@ -478,13 +479,14 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; struct xsave_struct *xsave; int ret; if (!cpu_has_xsave) return -ENODEV; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; @@ -508,13 +510,14 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; struct xsave_struct *xsave; int ret; if (!cpu_has_xsave) return -ENODEV; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; @@ -674,10 +677,11 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; int ret; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; @@ -705,10 +709,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { + struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; int ret; - ret = fpu__unlazy_stopped(target); + ret = fpu__unlazy_stopped(fpu); if (ret) return ret; -- cgit v0.10.2 From 2e8a3102662233dfac92fe70f56429b4050f674a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:28:23 +0200 Subject: x86/fpu: Rename fpu__flush_thread() to fpu__clear() The primary purpose of this function is to clear the current task's FPU before an exec(), to not leak information from the previous task, and to allow the new task to start with freshly initialized FPU registers. Rename the function to reflect this primary purpose. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 38376cd..b8f7d76ac 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h @@ -20,7 +20,7 @@ struct user_i387_struct; extern int fpstate_alloc_init(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); -extern void fpu__flush_thread(struct task_struct *tsk); +extern void fpu__clear(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void fpu__restore(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e3e8585..c15d064 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -381,11 +381,11 @@ void fpu__restore(void) } EXPORT_SYMBOL_GPL(fpu__restore); -void fpu__flush_thread(struct task_struct *tsk) +void fpu__clear(struct task_struct *tsk) { struct fpu *fpu = &tsk->thread.fpu; - WARN_ON(tsk != current); + WARN_ON_ONCE(tsk != current); /* Almost certainly an anomaly */ if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e97266b..51ad342 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -130,7 +130,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - fpu__flush_thread(tsk); + fpu__clear(tsk); } static void hard_disable_TSC(void) -- cgit v0.10.2 From e11267c13fab2c8b0e5ed3f3ae8f6167f03f8953 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:34:05 +0200 Subject: x86/fpu: Clean up fpu__clear() a bit Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c15d064..176f69b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -390,11 +390,11 @@ void fpu__clear(struct task_struct *tsk) if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ drop_fpu(fpu); - fpstate_free(&tsk->thread.fpu); + fpstate_free(fpu); } else { if (!fpu->fpstate_active) { /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(fpu))) + if (WARN_ON(fpstate_alloc_init(fpu))) force_sig(SIGKILL, tsk); user_fpu_begin(); } -- cgit v0.10.2 From df6b35f409af0a8ff1ef62f552b8402f3fef8665 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:46:00 +0200 Subject: x86/fpu: Rename i387.h to fpu/api.h We already have fpu/types.h, move i387.h to fpu/api.h. The file name has become a misnomer anyway: it offers generic FPU APIs, but is not limited to i387 functionality. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 112cefa..b419f43 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c index 1937fc1..07d2c6c 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -35,7 +35,7 @@ #include #include -#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c index b6c67bf..a3fcfc9 100644 --- a/arch/x86/crypto/crct10dif-pclmul_glue.c +++ b/arch/x86/crypto/crct10dif-pclmul_glue.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c index f368ba2..5a2f30f 100644 --- a/arch/x86/crypto/fpu.c +++ b/arch/x86/crypto/fpu.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include struct crypto_fpu_ctx { struct crypto_blkcipher *child; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 2079baf..64d7cf1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #define GHASH_BLOCK_SIZE 16 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 33d1b9d..cb3bf19 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index ccc3388..9eaf7ab 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index d9fa4c1..e0d6a67 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index b5e2d56..1a66e61 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h index 1eef555..03bb106 100644 --- a/arch/x86/include/asm/crypto/glue_helper.h +++ b/arch/x86/include/asm/crypto/glue_helper.h @@ -7,7 +7,7 @@ #include #include -#include +#include #include typedef void (*common_glue_func_t)(void *ctx, u8 *dst, const u8 *src); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 3738b13..155162e 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_EFI_H #define _ASM_X86_EFI_H -#include +#include #include /* diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index 21ad681..d68b349 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -15,7 +15,7 @@ #include #include -#include +#include #include #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h new file mode 100644 index 0000000..9d3a6f3 --- /dev/null +++ b/arch/x86/include/asm/fpu/api.h @@ -0,0 +1,107 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_API_H +#define _ASM_X86_FPU_API_H + +#ifndef __ASSEMBLY__ + +#include +#include + +struct pt_regs; +struct user_i387_struct; + +extern int fpstate_alloc_init(struct fpu *fpu); +extern void fpstate_init(struct fpu *fpu); +extern void fpu__clear(struct task_struct *tsk); + +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern void fpu__restore(void); +extern void fpu__init_check_bugs(void); + +extern bool irq_fpu_usable(void); + +/* + * Careful: __kernel_fpu_begin/end() must be called with preempt disabled + * and they don't touch the preempt state on their own. + * If you enable preemption after __kernel_fpu_begin(), preempt notifier + * should call the __kernel_fpu_end() to prevent the kernel/user FPU + * state from getting corrupted. KVM for example uses this model. + * + * All other cases use kernel_fpu_begin/end() which disable preemption + * during kernel FPU usage. + */ +extern void __kernel_fpu_begin(void); +extern void __kernel_fpu_end(void); + +static inline void kernel_fpu_begin(void) +{ + preempt_disable(); + WARN_ON_ONCE(!irq_fpu_usable()); + __kernel_fpu_begin(); +} + +static inline void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} + +/* + * Some instructions like VIA's padlock instructions generate a spurious + * DNA fault but don't modify SSE registers. And these instructions + * get used from interrupt context as well. To prevent these kernel instructions + * in interrupt context interacting wrongly with other user/kernel fpu usage, we + * should use them only in the context of irq_ts_save/restore() + */ +static inline int irq_ts_save(void) +{ + /* + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() + */ + if (!in_atomic()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} + +static inline void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} + +/* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + */ +static inline int user_has_fpu(void) +{ + return current->thread.fpu.has_fpu; +} + +extern void fpu__save(struct fpu *fpu); + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h deleted file mode 100644 index b8f7d76ac..0000000 --- a/arch/x86/include/asm/i387.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _ASM_X86_I387_H -#define _ASM_X86_I387_H - -#ifndef __ASSEMBLY__ - -#include -#include - -struct pt_regs; -struct user_i387_struct; - -extern int fpstate_alloc_init(struct fpu *fpu); -extern void fpstate_init(struct fpu *fpu); -extern void fpu__clear(struct task_struct *tsk); - -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void fpu__restore(void); -extern void fpu__init_check_bugs(void); - -extern bool irq_fpu_usable(void); - -/* - * Careful: __kernel_fpu_begin/end() must be called with preempt disabled - * and they don't touch the preempt state on their own. - * If you enable preemption after __kernel_fpu_begin(), preempt notifier - * should call the __kernel_fpu_end() to prevent the kernel/user FPU - * state from getting corrupted. KVM for example uses this model. - * - * All other cases use kernel_fpu_begin/end() which disable preemption - * during kernel FPU usage. - */ -extern void __kernel_fpu_begin(void); -extern void __kernel_fpu_end(void); - -static inline void kernel_fpu_begin(void) -{ - preempt_disable(); - WARN_ON_ONCE(!irq_fpu_usable()); - __kernel_fpu_begin(); -} - -static inline void kernel_fpu_end(void) -{ - __kernel_fpu_end(); - preempt_enable(); -} - -/* - * Some instructions like VIA's padlock instructions generate a spurious - * DNA fault but don't modify SSE registers. And these instructions - * get used from interrupt context as well. To prevent these kernel instructions - * in interrupt context interacting wrongly with other user/kernel fpu usage, we - * should use them only in the context of irq_ts_save/restore() - */ -static inline int irq_ts_save(void) -{ - /* - * If in process context and not atomic, we can take a spurious DNA fault. - * Otherwise, doing clts() in process context requires disabling preemption - * or some heavy lifting like kernel_fpu_begin() - */ - if (!in_atomic()) - return 0; - - if (read_cr0() & X86_CR0_TS) { - clts(); - return 1; - } - - return 0; -} - -static inline void irq_ts_restore(int TS_state) -{ - if (TS_state) - stts(); -} - -/* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int user_has_fpu(void) -{ - return current->thread.fpu.has_fpu; -} - -extern void fpu__save(struct fpu *fpu); - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_X86_I387_H */ diff --git a/arch/x86/include/asm/simd.h b/arch/x86/include/asm/simd.h index ee80b92..6c8a7ed 100644 --- a/arch/x86/include/asm/simd.h +++ b/arch/x86/include/asm/simd.h @@ -1,5 +1,5 @@ -#include +#include /* * may_use_simd - whether it is allowable at this time to issue SIMD diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h index 552d6c9..d1793f0 100644 --- a/arch/x86/include/asm/suspend_32.h +++ b/arch/x86/include/asm/suspend_32.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_32_H #include -#include +#include /* image of the saved processor state */ struct saved_context { diff --git a/arch/x86/include/asm/suspend_64.h b/arch/x86/include/asm/suspend_64.h index bc62328..7ebf0eb 100644 --- a/arch/x86/include/asm/suspend_64.h +++ b/arch/x86/include/asm/suspend_64.h @@ -7,7 +7,7 @@ #define _ASM_X86_SUSPEND_64_H #include -#include +#include /* * Image of the saved processor state, used by the low level ACPI suspend to diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index d882975..1f5c516 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -36,7 +36,7 @@ * no advantages to be gotten from x86-64 here anyways. */ -#include +#include #ifdef CONFIG_X86_32 /* reduce register pressure */ diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index ce05722..5a08bc8 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -26,7 +26,7 @@ #define XO3(x, y) " pxor 8*("#x")(%4), %%mm"#y" ;\n" #define XO4(x, y) " pxor 8*("#x")(%5), %%mm"#y" ;\n" -#include +#include static void xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 492b298..7c0a517 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -18,7 +18,7 @@ #ifdef CONFIG_AS_AVX #include -#include +#include #define BLOCK4(i) \ BLOCK(32 * i, 0) \ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index eb8be0c..29dd743 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 80b0c8f..8aa3b86 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b6168..5cb738a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 8f9a133..27f8eea 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -70,7 +70,7 @@ #include #include #include -#include +#include #include #include /* for struct machine_ops */ #include diff --git a/arch/x86/lib/mmx_32.c b/arch/x86/lib/mmx_32.c index c9f2d9b..e5e3ed8 100644 --- a/arch/x86/lib/mmx_32.c +++ b/arch/x86/lib/mmx_32.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include void *_mmx_memcpy(void *to, const void *from, size_t len) diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index e394bcb..3bb4c6a 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "fpu_system.h" #include "fpu_emu.h" diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index a3bebef..0c98a9d 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c178ed8..da2d677 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include /* * Number of data blocks actually fetched for each xcrypt insn. diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index 95f7d27..4e154c9 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include struct padlock_sha_desc { struct shash_desc fallback; diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index bcb534a..fce5989 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include "../lg.h" diff --git a/lib/raid6/x86.h b/lib/raid6/x86.h index b759548..8fe9d96 100644 --- a/lib/raid6/x86.h +++ b/lib/raid6/x86.h @@ -23,7 +23,7 @@ #ifdef __KERNEL__ /* Real code */ -#include +#include #else /* Dummy code for user space testing */ -- cgit v0.10.2 From a137fb6bbf4f10b8ef1452e9b190d8bc76c04d0f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 03:58:37 +0200 Subject: x86/fpu: Move xsave.h to fpu/xsave.h Move the xsave.h header file to the FPU directory as well. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index baf0ac2..004acd7 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 78818a1..2f7ead8 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 236c809..2c3360b 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #define CAST5_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index f448810..a2ec18a 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #define CAST6_PARALLEL_BLOCKS 8 diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 2f63dc8..206ec57 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index c8d478a..4feb68c 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 1537378..02b64bb 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include #include "sha_mb_ctx.h" diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index cb3bf19..71ab2b3 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 9eaf7ab..dcbd8ea 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index e0d6a67..e8836e0 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 1a66e61..3b6c8ba 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h index d68b349..20690a1 100644 --- a/arch/x86/include/asm/fpu-internal.h +++ b/arch/x86/include/asm/fpu-internal.h @@ -16,7 +16,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_64 # include diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h new file mode 100644 index 0000000..7c90ea9 --- /dev/null +++ b/arch/x86/include/asm/fpu/xsave.h @@ -0,0 +1,250 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include +#include + +#define XSTATE_CPUID 0x0000000d + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_BNDREGS 0x8 +#define XSTATE_BNDCSR 0x10 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM_Hi256 0x40 +#define XSTATE_Hi16_ZMM 0x80 + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +/* Bit 63 of XCR0 is reserved for future expansion */ +#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) + +#define FXSAVE_SIZE 512 + +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSET FXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) + +/* Supported features which support lazy state saving */ +#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ + | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* Supported features which require eager state saving */ +#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) + +/* All currently supported features */ +#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +extern unsigned int xstate_size; +extern u64 pcntxt_mask; +extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct *init_xstate_buf; + +extern void xsave_init(void); +extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); + +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +#define xstate_fault ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xsave_state_booting(struct xsave_struct *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * Save processor xstate to xsave area. + */ +static inline int xsave_state(struct xsave_struct *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + XSAVEOPT, + X86_FEATURE_XSAVEOPT, + XSAVES, + X86_FEATURE_XSAVES, + [fx] "D" (fx), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore processor xstate from xsave area. + */ +static inline int xrstor_state(struct xsave_struct *fx, u64 mask) +{ + int err = 0; + u32 lmask = mask; + u32 hmask = mask >> 32; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + XRSTORS, + X86_FEATURE_XSAVES, + "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore xstate context for new process during context switch. + */ +static inline int fpu_xrstor_checking(struct xsave_struct *fx) +{ + return xrstor_state(fx, -1); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ +static inline int xsave_user(struct xsave_struct __user *buf) +{ + int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); + if (unlikely(err)) + return -EFAULT; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XSAVE"\n" + "2: " ASM_CLAC "\n" + xstate_fault + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) +{ + int err = 0; + struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XRSTOR"\n" + "2: " ASM_CLAC "\n" + xstate_fault + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + +void *get_xsave_addr(struct xsave_struct *xsave, int xstate); +void setup_xstate_comp(void); + +#endif diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h deleted file mode 100644 index 7c90ea9..0000000 --- a/arch/x86/include/asm/xsave.h +++ /dev/null @@ -1,250 +0,0 @@ -#ifndef __ASM_X86_XSAVE_H -#define __ASM_X86_XSAVE_H - -#include -#include - -#define XSTATE_CPUID 0x0000000d - -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 -#define XSTATE_YMM 0x4 -#define XSTATE_BNDREGS 0x8 -#define XSTATE_BNDCSR 0x10 -#define XSTATE_OPMASK 0x20 -#define XSTATE_ZMM_Hi256 0x40 -#define XSTATE_Hi16_ZMM 0x80 - -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) -/* Bit 63 of XCR0 is reserved for future expansion */ -#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) - -#define FXSAVE_SIZE 512 - -#define XSAVE_HDR_SIZE 64 -#define XSAVE_HDR_OFFSET FXSAVE_SIZE - -#define XSAVE_YMM_SIZE 256 -#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) - -/* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) - -/* Supported features which require eager state saving */ -#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) - -/* All currently supported features */ -#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -extern unsigned int xstate_size; -extern u64 pcntxt_mask; -extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern struct xsave_struct *init_xstate_buf; - -extern void xsave_init(void); -extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); - -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -#define xstate_fault ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xsave_state_booting(struct xsave_struct *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * Save processor xstate to xsave area. - */ -static inline int xsave_state(struct xsave_struct *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [fx] "D" (fx), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore processor xstate from xsave area. - */ -static inline int xrstor_state(struct xsave_struct *fx, u64 mask) -{ - int err = 0; - u32 lmask = mask; - u32 hmask = mask >> 32; - - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore xstate context for new process during context switch. - */ -static inline int fpu_xrstor_checking(struct xsave_struct *fx) -{ - return xrstor_state(fx, -1); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int xsave_user(struct xsave_struct __user *buf) -{ - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); - if (unlikely(err)) - return -EFAULT; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (buf), "a" (-1), "d" (-1), "0" (0) - : "memory"); - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) -{ - int err = 0; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) - : "memory"); /* memory required? */ - return err; -} - -void *get_xsave_addr(struct xsave_struct *xsave, int xstate); -void setup_xstate_comp(void); - -#endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 59b69f6..0ce4c4f 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "cpuid.h" #include "lapic.h" #include "mmu.h" -- cgit v0.10.2 From 78f7f1e54bac032b98956862a5bcf8c28ed22d07 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:54:44 +0200 Subject: x86/fpu: Rename fpu-internal.h to fpu/internal.h This unifies all the FPU related header files under a unified, hiearchical naming scheme: - asm/fpu/types.h: FPU related data types, needed for 'struct task_struct', widely included in almost all kernel code, and hence kept as small as possible. - asm/fpu/api.h: FPU related 'public' methods exported to other subsystems. - asm/fpu/internal.h: FPU subsystem internal methods - asm/fpu/xsave.h: XSAVE support internal methods (Also standardize the header guard in asm/fpu/internal.h.) Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c index 470522c..81a595d 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -32,7 +32,7 @@ #include #include -#include +#include #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 02b64bb..03ffaf8c 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include "sha_mb_ctx.h" #define FLUSH_INTERVAL 1000 /* in usec */ diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index e1ec6f9..d6d8f4c 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h deleted file mode 100644 index 20690a1..0000000 --- a/arch/x86/include/asm/fpu-internal.h +++ /dev/null @@ -1,556 +0,0 @@ -/* - * Copyright (C) 1994 Linus Torvalds - * - * Pentium III FXSR, SSE support - * General FPU state handling cleanups - * Gareth Hughes , May 2000 - * x86-64 work by Andi Kleen 2002 - */ - -#ifndef _FPU_INTERNAL_H -#define _FPU_INTERNAL_H - -#include -#include -#include - -#include -#include -#include - -#ifdef CONFIG_X86_64 -# include -# include -struct ksignal; -int ia32_setup_rt_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -#else -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -# define ia32_setup_frame __setup_frame -# define ia32_setup_rt_frame __setup_rt_frame -#endif - -extern unsigned int mxcsr_feature_mask; -extern void fpu__cpu_init(void); -extern void eager_fpu_init(void); - -DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); - -extern void convert_from_fxsr(struct user_i387_ia32_struct *env, - struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env); - -extern user_regset_active_fn fpregs_active, xfpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; -extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; - -/* - * xstateregs_active == fpregs_active. Please refer to the comment - * at the definition of fpregs_active. - */ -#define xstateregs_active fpregs_active - -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - -/* - * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, - * on this CPU. - * - * This will disable any lazy FPU state restore of the current FPU state, - * but if the current thread owns the FPU, it will still be saved by. - */ -static inline void __cpu_disable_lazy_restore(unsigned int cpu) -{ - per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; -} - -static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) -{ - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; -} - -static inline int is_ia32_compat_frame(void) -{ - return config_enabled(CONFIG_IA32_EMULATION) && - test_thread_flag(TIF_IA32); -} - -static inline int is_ia32_frame(void) -{ - return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); -} - -static inline int is_x32_frame(void) -{ - return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); -} - -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - -static __always_inline __pure bool use_eager_fpu(void) -{ - return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); -} - -static __always_inline __pure bool use_xsaveopt(void) -{ - return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); -} - -static __always_inline __pure bool use_xsave(void) -{ - return static_cpu_has_safe(X86_FEATURE_XSAVE); -} - -static __always_inline __pure bool use_fxsr(void) -{ - return static_cpu_has_safe(X86_FEATURE_FXSR); -} - -static inline void fx_finit(struct i387_fxsave_struct *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} - -extern void __sanitize_i387_state(struct task_struct *); - -static inline void sanitize_i387_state(struct task_struct *tsk) -{ - if (!use_xsaveopt()) - return; - __sanitize_i387_state(tsk); -} - -#define user_insn(insn, output, input...) \ -({ \ - int err; \ - asm volatile(ASM_STAC "\n" \ - "1:" #insn "\n\t" \ - "2: " ASM_CLAC "\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -#define check_insn(insn, output, input...) \ -({ \ - int err; \ - asm volatile("1:" #insn "\n\t" \ - "2:\n" \ - ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err), output \ - : "0"(0), input); \ - err; \ -}) - -static inline int fsave_user(struct i387_fsave_struct __user *fx) -{ - return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); -} - -static inline int fxsave_user(struct i387_fxsave_struct __user *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); -} - -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); -} - -static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) -{ - if (config_enabled(CONFIG_X86_32)) - return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - - /* See comment in fpu_fxsave() below. */ - return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); -} - -static inline int frstor_checking(struct i387_fsave_struct *fx) -{ - return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline int frstor_user(struct i387_fsave_struct __user *fx) -{ - return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); -} - -static inline void fpu_fxsave(struct fpu *fpu) -{ - if (config_enabled(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); - else { - /* Using "rex64; fxsave %0" is broken because, if the memory - * operand uses any extended registers for addressing, a second - * REX prefix will be generated (to the assembler, rex64 - * followed by semicolon is a separate instruction), and hence - * the 64-bitness is lost. - * - * Using "fxsaveq %0" would be the ideal choice, but is only - * supported starting with gas 2.16. - * - * Using, as a workaround, the properly prefixed form below - * isn't accepted by any binutils version so far released, - * complaining that the same type of prefix is used twice if - * an extended register is needed for addressing (fix submitted - * to mainline 2005-11-21). - * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); - * - * This, however, we can work around by forcing the compiler to - * select an addressing mode that doesn't require extended - * registers. - */ - asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); - } -} - -/* - * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact. - */ -static inline int fpu_save_init(struct fpu *fpu) -{ - if (use_xsave()) { - xsave_state(&fpu->state->xsave); - - /* - * xsave header may indicate the init state of the FP. - */ - if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) - return 1; - } else if (use_fxsr()) { - fpu_fxsave(fpu); - } else { - asm volatile("fnsave %[fx]; fwait" - : [fx] "=m" (fpu->state->fsave)); - return 0; - } - - /* - * If exceptions are pending, we need to clear them so - * that we don't randomly get exceptions later. - * - * FIXME! Is this perhaps only true for the old-style - * irq13 case? Maybe we could leave the x87 state - * intact otherwise? - */ - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { - asm volatile("fnclex"); - return 0; - } - return 1; -} - -static inline int fpu_restore_checking(struct fpu *fpu) -{ - if (use_xsave()) - return fpu_xrstor_checking(&fpu->state->xsave); - else if (use_fxsr()) - return fxrstor_checking(&fpu->state->fxsave); - else - return frstor_checking(&fpu->state->fsave); -} - -static inline int restore_fpu_checking(struct fpu *fpu) -{ - /* - * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is - * pending. Clear the x87 state here by setting it to fixed values. - * "m" is a random variable that should be in L1. - */ - if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { - asm volatile( - "fnclex\n\t" - "emms\n\t" - "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpu->has_fpu)); - } - - return fpu_restore_checking(fpu); -} - -/* Must be paired with an 'stts' after! */ -static inline void __thread_clear_has_fpu(struct fpu *fpu) -{ - fpu->has_fpu = 0; - this_cpu_write(fpu_fpregs_owner_ctx, NULL); -} - -/* Must be paired with a 'clts' before! */ -static inline void __thread_set_has_fpu(struct fpu *fpu) -{ - fpu->has_fpu = 1; - this_cpu_write(fpu_fpregs_owner_ctx, fpu); -} - -/* - * Encapsulate the CR0.TS handling together with the - * software flag. - * - * These generally need preemption protection to work, - * do try to avoid using these on their own. - */ -static inline void __thread_fpu_end(struct fpu *fpu) -{ - __thread_clear_has_fpu(fpu); - if (!use_eager_fpu()) - stts(); -} - -static inline void __thread_fpu_begin(struct fpu *fpu) -{ - if (!use_eager_fpu()) - clts(); - __thread_set_has_fpu(fpu); -} - -static inline void drop_fpu(struct fpu *fpu) -{ - /* - * Forget coprocessor state.. - */ - preempt_disable(); - fpu->counter = 0; - - if (fpu->has_fpu) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(fpu); - } - - fpu->fpstate_active = 0; - - preempt_enable(); -} - -static inline void restore_init_xstate(void) -{ - if (use_xsave()) - xrstor_state(init_xstate_buf, -1); - else - fxrstor_checking(&init_xstate_buf->i387); -} - -/* - * Reset the FPU state in the eager case and drop it in the lazy case (later use - * will reinit it). - */ -static inline void fpu_reset_state(struct fpu *fpu) -{ - if (!use_eager_fpu()) - drop_fpu(fpu); - else - restore_init_xstate(); -} - -/* - * FPU state switching for scheduling. - * - * This is a two-stage process: - * - * - switch_fpu_prepare() saves the old state and - * sets the new state of the CR0.TS bit. This is - * done within the context of the old process. - * - * - switch_fpu_finish() restores the new state as - * necessary. - */ -typedef struct { int preload; } fpu_switch_t; - -static inline fpu_switch_t -switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) -{ - fpu_switch_t fpu; - - /* - * If the task has used the math, pre-load the FPU on xsave processors - * or if the past 5 consecutive context-switches used math. - */ - fpu.preload = new_fpu->fpstate_active && - (use_eager_fpu() || new_fpu->counter > 5); - - if (old_fpu->has_fpu) { - if (!fpu_save_init(old_fpu)) - old_fpu->last_cpu = -1; - else - old_fpu->last_cpu = cpu; - - /* But leave fpu_fpregs_owner_ctx! */ - old_fpu->has_fpu = 0; - - /* Don't change CR0.TS if we just switch! */ - if (fpu.preload) { - new_fpu->counter++; - __thread_set_has_fpu(new_fpu); - prefetch(new_fpu->state); - } else if (!use_eager_fpu()) - stts(); - } else { - old_fpu->counter = 0; - old_fpu->last_cpu = -1; - if (fpu.preload) { - new_fpu->counter++; - if (fpu_want_lazy_restore(new_fpu, cpu)) - fpu.preload = 0; - else - prefetch(new_fpu->state); - __thread_fpu_begin(new_fpu); - } - } - return fpu; -} - -/* - * By the time this gets called, we've already cleared CR0.TS and - * given the process the FPU if we are going to preload the FPU - * state - all we need to do is to conditionally restore the register - * state itself. - */ -static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) -{ - if (fpu_switch.preload) { - if (unlikely(restore_fpu_checking(new_fpu))) - fpu_reset_state(new_fpu); - } -} - -/* - * Signal frame handlers... - */ -extern int save_xstate_sig(void __user *buf, void __user *fx, int size); -extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); - -static inline int xstate_sigframe_size(void) -{ - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; -} - -static inline int restore_xstate_sig(void __user *buf, int ia32_frame) -{ - void __user *buf_fx = buf; - int size = xstate_sigframe_size(); - - if (ia32_frame && use_fxsr()) { - buf_fx = buf + sizeof(struct i387_fsave_struct); - size += sizeof(struct i387_fsave_struct); - } - - return __restore_xstate_sig(buf, buf_fx, size); -} - -/* - * Needs to be preemption-safe. - * - * NOTE! user_fpu_begin() must be used only immediately before restoring - * the save state. It does not do any saving/restoring on its own. In - * lazy FPU mode, it is just an optimization to avoid a #NM exception, - * the task can lose the FPU right after preempt_enable(). - */ -static inline void user_fpu_begin(void) -{ - struct fpu *fpu = ¤t->thread.fpu; - - preempt_disable(); - if (!user_has_fpu()) - __thread_fpu_begin(fpu); - preempt_enable(); -} - -/* - * i387 state interaction - */ -static inline unsigned short get_fpu_cwd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.cwd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.swd; - } else { - return (unsigned short)tsk->thread.fpu.state->fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) -{ - if (cpu_has_xmm) { - return tsk->thread.fpu.state->fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - -extern void fpstate_cache_init(void); - -extern int fpstate_alloc(struct fpu *fpu); -extern void fpstate_free(struct fpu *fpu); -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); - -static inline unsigned long -alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, - unsigned long *size) -{ - unsigned long frame_size = xstate_sigframe_size(); - - *buf_fx = sp = round_down(sp - frame_size, 64); - if (ia32_frame && use_fxsr()) { - frame_size += sizeof(struct i387_fsave_struct); - sp -= sizeof(struct i387_fsave_struct); - } - - *size = frame_size; - return sp; -} - -#endif diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h new file mode 100644 index 0000000..386a883 --- /dev/null +++ b/arch/x86/include/asm/fpu/internal.h @@ -0,0 +1,556 @@ +/* + * Copyright (C) 1994 Linus Torvalds + * + * Pentium III FXSR, SSE support + * General FPU state handling cleanups + * Gareth Hughes , May 2000 + * x86-64 work by Andi Kleen 2002 + */ + +#ifndef _ASM_X86_FPU_INTERNAL_H +#define _ASM_X86_FPU_INTERNAL_H + +#include +#include +#include + +#include +#include +#include + +#ifdef CONFIG_X86_64 +# include +# include +struct ksignal; +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern unsigned int mxcsr_feature_mask; +extern void fpu__cpu_init(void); +extern void eager_fpu_init(void); + +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + +extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + +/* + * xstateregs_active == fpregs_active. Please refer to the comment + * at the definition of fpregs_active. + */ +#define xstateregs_active fpregs_active + +#ifdef CONFIG_MATH_EMULATION +extern void finit_soft_fpu(struct i387_soft_struct *soft); +#else +static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} +#endif + +/* + * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, + * on this CPU. + * + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. + */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; +} + +static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) +{ + return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +} + +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + +#define X87_FSW_ES (1 << 7) /* Exception Summary */ + +static __always_inline __pure bool use_eager_fpu(void) +{ + return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); +} + +static __always_inline __pure bool use_xsaveopt(void) +{ + return static_cpu_has_safe(X86_FEATURE_XSAVEOPT); +} + +static __always_inline __pure bool use_xsave(void) +{ + return static_cpu_has_safe(X86_FEATURE_XSAVE); +} + +static __always_inline __pure bool use_fxsr(void) +{ + return static_cpu_has_safe(X86_FEATURE_FXSR); +} + +static inline void fx_finit(struct i387_fxsave_struct *fx) +{ + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} + +extern void __sanitize_i387_state(struct task_struct *); + +static inline void sanitize_i387_state(struct task_struct *tsk) +{ + if (!use_xsaveopt()) + return; + __sanitize_i387_state(tsk); +} + +#define user_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile(ASM_STAC "\n" \ + "1:" #insn "\n\t" \ + "2: " ASM_CLAC "\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +#define check_insn(insn, output, input...) \ +({ \ + int err; \ + asm volatile("1:" #insn "\n\t" \ + "2:\n" \ + ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err), output \ + : "0"(0), input); \ + err; \ +}) + +static inline int fsave_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); +} + +static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); + + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); +} + +static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + + /* See comment in fpu_fxsave() below. */ + return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); +} + +static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) +{ + if (config_enabled(CONFIG_X86_32)) + return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + + /* See comment in fpu_fxsave() below. */ + return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), + "m" (*fx)); +} + +static inline int frstor_checking(struct i387_fsave_struct *fx) +{ + return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline int frstor_user(struct i387_fsave_struct __user *fx) +{ + return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); +} + +static inline void fpu_fxsave(struct fpu *fpu) +{ + if (config_enabled(CONFIG_X86_32)) + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else if (config_enabled(CONFIG_AS_FXSAVEQ)) + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); + else { + /* Using "rex64; fxsave %0" is broken because, if the memory + * operand uses any extended registers for addressing, a second + * REX prefix will be generated (to the assembler, rex64 + * followed by semicolon is a separate instruction), and hence + * the 64-bitness is lost. + * + * Using "fxsaveq %0" would be the ideal choice, but is only + * supported starting with gas 2.16. + * + * Using, as a workaround, the properly prefixed form below + * isn't accepted by any binutils version so far released, + * complaining that the same type of prefix is used twice if + * an extended register is needed for addressing (fix submitted + * to mainline 2005-11-21). + * + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * + * This, however, we can work around by forcing the compiler to + * select an addressing mode that doesn't require extended + * registers. + */ + asm volatile( "rex64/fxsave (%[fx])" + : "=m" (fpu->state->fxsave) + : [fx] "R" (&fpu->state->fxsave)); + } +} + +/* + * These must be called with preempt disabled. Returns + * 'true' if the FPU state is still intact. + */ +static inline int fpu_save_init(struct fpu *fpu) +{ + if (use_xsave()) { + xsave_state(&fpu->state->xsave); + + /* + * xsave header may indicate the init state of the FP. + */ + if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + return 1; + } else if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + asm volatile("fnsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + return 0; + } + + /* + * If exceptions are pending, we need to clear them so + * that we don't randomly get exceptions later. + * + * FIXME! Is this perhaps only true for the old-style + * irq13 case? Maybe we could leave the x87 state + * intact otherwise? + */ + if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { + asm volatile("fnclex"); + return 0; + } + return 1; +} + +static inline int fpu_restore_checking(struct fpu *fpu) +{ + if (use_xsave()) + return fpu_xrstor_checking(&fpu->state->xsave); + else if (use_fxsr()) + return fxrstor_checking(&fpu->state->fxsave); + else + return frstor_checking(&fpu->state->fsave); +} + +static inline int restore_fpu_checking(struct fpu *fpu) +{ + /* + * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is + * pending. Clear the x87 state here by setting it to fixed values. + * "m" is a random variable that should be in L1. + */ + if (unlikely(static_cpu_has_bug_safe(X86_BUG_FXSAVE_LEAK))) { + asm volatile( + "fnclex\n\t" + "emms\n\t" + "fildl %P[addr]" /* set F?P to defined value */ + : : [addr] "m" (fpu->has_fpu)); + } + + return fpu_restore_checking(fpu); +} + +/* Must be paired with an 'stts' after! */ +static inline void __thread_clear_has_fpu(struct fpu *fpu) +{ + fpu->has_fpu = 0; + this_cpu_write(fpu_fpregs_owner_ctx, NULL); +} + +/* Must be paired with a 'clts' before! */ +static inline void __thread_set_has_fpu(struct fpu *fpu) +{ + fpu->has_fpu = 1; + this_cpu_write(fpu_fpregs_owner_ctx, fpu); +} + +/* + * Encapsulate the CR0.TS handling together with the + * software flag. + * + * These generally need preemption protection to work, + * do try to avoid using these on their own. + */ +static inline void __thread_fpu_end(struct fpu *fpu) +{ + __thread_clear_has_fpu(fpu); + if (!use_eager_fpu()) + stts(); +} + +static inline void __thread_fpu_begin(struct fpu *fpu) +{ + if (!use_eager_fpu()) + clts(); + __thread_set_has_fpu(fpu); +} + +static inline void drop_fpu(struct fpu *fpu) +{ + /* + * Forget coprocessor state.. + */ + preempt_disable(); + fpu->counter = 0; + + if (fpu->has_fpu) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + __thread_fpu_end(fpu); + } + + fpu->fpstate_active = 0; + + preempt_enable(); +} + +static inline void restore_init_xstate(void) +{ + if (use_xsave()) + xrstor_state(init_xstate_buf, -1); + else + fxrstor_checking(&init_xstate_buf->i387); +} + +/* + * Reset the FPU state in the eager case and drop it in the lazy case (later use + * will reinit it). + */ +static inline void fpu_reset_state(struct fpu *fpu) +{ + if (!use_eager_fpu()) + drop_fpu(fpu); + else + restore_init_xstate(); +} + +/* + * FPU state switching for scheduling. + * + * This is a two-stage process: + * + * - switch_fpu_prepare() saves the old state and + * sets the new state of the CR0.TS bit. This is + * done within the context of the old process. + * + * - switch_fpu_finish() restores the new state as + * necessary. + */ +typedef struct { int preload; } fpu_switch_t; + +static inline fpu_switch_t +switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) +{ + fpu_switch_t fpu; + + /* + * If the task has used the math, pre-load the FPU on xsave processors + * or if the past 5 consecutive context-switches used math. + */ + fpu.preload = new_fpu->fpstate_active && + (use_eager_fpu() || new_fpu->counter > 5); + + if (old_fpu->has_fpu) { + if (!fpu_save_init(old_fpu)) + old_fpu->last_cpu = -1; + else + old_fpu->last_cpu = cpu; + + /* But leave fpu_fpregs_owner_ctx! */ + old_fpu->has_fpu = 0; + + /* Don't change CR0.TS if we just switch! */ + if (fpu.preload) { + new_fpu->counter++; + __thread_set_has_fpu(new_fpu); + prefetch(new_fpu->state); + } else if (!use_eager_fpu()) + stts(); + } else { + old_fpu->counter = 0; + old_fpu->last_cpu = -1; + if (fpu.preload) { + new_fpu->counter++; + if (fpu_want_lazy_restore(new_fpu, cpu)) + fpu.preload = 0; + else + prefetch(new_fpu->state); + __thread_fpu_begin(new_fpu); + } + } + return fpu; +} + +/* + * By the time this gets called, we've already cleared CR0.TS and + * given the process the FPU if we are going to preload the FPU + * state - all we need to do is to conditionally restore the register + * state itself. + */ +static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) +{ + if (fpu_switch.preload) { + if (unlikely(restore_fpu_checking(new_fpu))) + fpu_reset_state(new_fpu); + } +} + +/* + * Signal frame handlers... + */ +extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); + +static inline int xstate_sigframe_size(void) +{ + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); + } + + return __restore_xstate_sig(buf, buf_fx, size); +} + +/* + * Needs to be preemption-safe. + * + * NOTE! user_fpu_begin() must be used only immediately before restoring + * the save state. It does not do any saving/restoring on its own. In + * lazy FPU mode, it is just an optimization to avoid a #NM exception, + * the task can lose the FPU right after preempt_enable(). + */ +static inline void user_fpu_begin(void) +{ + struct fpu *fpu = ¤t->thread.fpu; + + preempt_disable(); + if (!user_has_fpu()) + __thread_fpu_begin(fpu); + preempt_enable(); +} + +/* + * i387 state interaction + */ +static inline unsigned short get_fpu_cwd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.cwd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct task_struct *tsk) +{ + if (cpu_has_fxsr) { + return tsk->thread.fpu.state->fxsave.swd; + } else { + return (unsigned short)tsk->thread.fpu.state->fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) +{ + if (cpu_has_xmm) { + return tsk->thread.fpu.state->fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +extern void fpstate_cache_init(void); + +extern int fpstate_alloc(struct fpu *fpu); +extern void fpstate_free(struct fpu *fpu); +extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); + +static inline unsigned long +alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, + unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + return sp; +} + +#endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 88bb7a7..8f6a4ea 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 176f69b..30016f0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -5,7 +5,7 @@ * General FPU state handling cleanups * Gareth Hughes , May 2000 */ -#include +#include /* * Track whether the kernel is using the FPU state diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 4eabb42..33df056 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -1,7 +1,7 @@ /* * x86 FPU boot time init code */ -#include +#include #include /* diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 8aa3b86..4ff726e 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 51ad342..71f4b4d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 7adc314..deff651 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_MATH_EMULATION #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4504569..c50e013 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -38,7 +38,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 69451b8..c14a00f 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index bcb853e..c67f96c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 60e331c..29f105f 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -68,7 +68,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8abcd6a..a65586e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -54,7 +54,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 479d4ce..91d7f3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -60,7 +60,7 @@ #include #include #include -#include /* Ugh! */ +#include /* Ugh! */ #include #include #include diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 412b5f8..5563be3 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include static const char *mpx_mapping_name(struct vm_area_struct *vma) { diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 757678f..edaf934 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -21,7 +21,7 @@ #include #include #include -#include /* pcntxt_mask */ +#include /* pcntxt_mask */ #include #ifdef CONFIG_X86_32 -- cgit v0.10.2 From df6397525cbe096d0eab0c1530fd25429d26f11b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 03:06:56 +0200 Subject: x86/fpu: Move MXCSR_DEFAULT to fpu/internal.h fpu/types.h gets included everywhere, move the MXCSR_DEFAULT to fpu/internal.h, the place where it's used. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 386a883..0e9a7a3 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -33,6 +33,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, # define ia32_setup_rt_frame __setup_rt_frame #endif +#define MXCSR_DEFAULT 0x1f80 + extern unsigned int mxcsr_feature_mask; extern void fpu__cpu_init(void); extern void eager_fpu_init(void); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index cad1c37..917d2e5 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -4,8 +4,6 @@ #ifndef _ASM_X86_FPU_H #define _ASM_X86_FPU_H -#define MXCSR_DEFAULT 0x1f80 - struct i387_fsave_struct { u32 cwd; /* FPU Control Word */ u32 swd; /* FPU Status Word */ -- cgit v0.10.2 From c0841e34fd9bbcef58c537151de8c83d2c910099 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 03:18:28 +0200 Subject: x86/fpu: Remove xsave_init() __init obfuscation So this code surprised me - and being surprised when reading FPU code does not help maintainability of an already overly complex subsystem. Remove the obfuscation and just don't use __init annotation for now. Anyone who wants to free these ~600 bytes of xstate_enable_boot_cpu() should implement it cleanly. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 4ff726e..6c1cbb2 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -606,8 +606,11 @@ static void __init init_xstate_size(void) /* * Enable and initialize the xsave feature. + * + * ( Not marked __init because of false positive section warnings + * generated by xsave_init(). ) */ -static void __init xstate_enable_boot_cpu(void) +static void /* __init */ xstate_enable_boot_cpu(void) { unsigned int eax, ebx, ecx, edx; @@ -663,21 +666,20 @@ static void __init xstate_enable_boot_cpu(void) /* * For the very first instance, this calls xstate_enable_boot_cpu(); * for all subsequent instances, this calls xstate_enable(). - * - * This is somewhat obfuscated due to the lack of powerful enough - * overrides for the section checks. */ void xsave_init(void) { - static __refdata void (*next_func)(void) = xstate_enable_boot_cpu; - void (*this_func)(void); + static char on_boot_cpu = 1; if (!cpu_has_xsave) return; - this_func = next_func; - next_func = xstate_enable; - this_func(); + if (on_boot_cpu) { + on_boot_cpu = 0; + xstate_enable_boot_cpu(); + } else { + xstate_enable(); + } } /* -- cgit v0.10.2 From 7b302e6731ac37fc688bc3085c11f5a886c90c08 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 02:39:22 +0200 Subject: x86/fpu: Remove assembly guard from asm/fpu/api.h asm/fpu/api.h does not contain any defines useful to assembly code, and no assembly code includes asm/fpu/api.h. Remove the historic #ifndef __ASSEMBLY__ leftover guard. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 9d3a6f3..f1eddcc 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,8 +10,6 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H -#ifndef __ASSEMBLY__ - #include #include @@ -102,6 +100,4 @@ static inline int user_has_fpu(void) extern void fpu__save(struct fpu *fpu); -#endif /* __ASSEMBLY__ */ - #endif /* _ASM_X86_FPU_API_H */ -- cgit v0.10.2 From 32d4d9ccb0f3209425d6e8aefa484bf00a5dc183 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 03:25:18 +0200 Subject: x86/fpu: Improve FPU detection kernel messages Standardize the various boot time messages printed during FPU detection: - Use a common 'x86/fpu: ' prefix for consistency and to make it easy to grep boot logs for FPU related messages - Correct speling errors - Add printout for the legacy FPU case as well - Clarify messages Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 6c1cbb2..bfe92f73 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -3,9 +3,6 @@ * * Author: Suresh Siddha */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - #include #include #include @@ -615,7 +612,7 @@ static void /* __init */ xstate_enable_boot_cpu(void) unsigned int eax, ebx, ecx, edx; if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, KERN_ERR "XSTATE_CPUID missing\n"); + WARN(1, "x86/fpu: XSTATE_CPUID missing!\n"); return; } @@ -623,8 +620,7 @@ static void /* __init */ xstate_enable_boot_cpu(void) pcntxt_mask = eax + ((u64)edx << 32); if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - pr_err("FP/SSE not shown under xsave features 0x%llx\n", - pcntxt_mask); + pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", pcntxt_mask); BUG(); } @@ -650,17 +646,18 @@ static void /* __init */ xstate_enable_boot_cpu(void) if (pcntxt_mask & XSTATE_EAGER) { if (eagerfpu == DISABLE) { - pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", - pcntxt_mask & XSTATE_EAGER); + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + pcntxt_mask & XSTATE_EAGER); pcntxt_mask &= ~XSTATE_EAGER; } else { eagerfpu = ENABLE; } } - pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x using %s\n", - pcntxt_mask, xstate_size, - cpu_has_xsaves ? "compacted form" : "standard form"); + pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", + pcntxt_mask, + xstate_size, + cpu_has_xsaves ? "compacted" : "standard"); } /* @@ -671,8 +668,13 @@ void xsave_init(void) { static char on_boot_cpu = 1; - if (!cpu_has_xsave) + if (!cpu_has_xsave) { + if (on_boot_cpu) { + on_boot_cpu = 0; + pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + } return; + } if (on_boot_cpu) { on_boot_cpu = 0; -- cgit v0.10.2 From 69496e10f87cec182a9263046c1b251235b8c38e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 08:48:01 +0200 Subject: x86/fpu: Print supported xstate features in human readable way Inform the user/admin about which xstate features the kernel supports. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index bfe92f73..f39882b 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -482,6 +482,30 @@ static void __init setup_xstate_features(void) } while (1); } +static void print_xstate_feature(u64 xstate_mask, const char *desc) +{ + if (pcntxt_mask & xstate_mask) { + int xstate_feature = fls64(xstate_mask)-1; + + pr_info("x86/fpu: Supporting XSAVE feature %2d: '%s'\n", xstate_feature, desc); + } +} + +/* + * Print out all the supported xstate features: + */ +static void print_xstate_features(void) +{ + print_xstate_feature(XSTATE_FP, "x87 floating point registers"); + print_xstate_feature(XSTATE_SSE, "SSE registers"); + print_xstate_feature(XSTATE_YMM, "AVX registers"); + print_xstate_feature(XSTATE_BNDREGS, "MPX bounds registers"); + print_xstate_feature(XSTATE_BNDCSR, "MPX CSR"); + print_xstate_feature(XSTATE_OPMASK, "AVX-512 opmask"); + print_xstate_feature(XSTATE_ZMM_Hi256, "AVX-512 Hi256"); + print_xstate_feature(XSTATE_Hi16_ZMM, "AVX-512 ZMM_Hi256"); +} + /* * This function sets up offsets and sizes of all extended states in * xsave area. This supports both standard format and compacted format @@ -545,6 +569,7 @@ static void __init setup_init_fpu_buf(void) return; setup_xstate_features(); + print_xstate_features(); if (cpu_has_xsaves) { init_xstate_buf->xsave_hdr.xcomp_bv = -- cgit v0.10.2 From 614df7fb8a661b0881f9709206350b59de3f84ab Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 09:20:33 +0200 Subject: x86/fpu: Rename 'pcntxt_mask' to 'xfeatures_mask' So the 'pcntxt_mask' is a misnomer, it's essentially meaningless to anyone who doesn't know what it does exactly. Name it more descriptively as 'xfeatures_mask'. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index 7c90ea9..400d5b2 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -45,7 +45,7 @@ #endif extern unsigned int xstate_size; -extern u64 pcntxt_mask; +extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern struct xsave_struct *init_xstate_buf; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 30016f0..8ea9ce0 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -528,7 +528,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, * mxcsr reserved bits must be masked to zero for security reasons. */ xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->xsave_hdr.xstate_bv &= pcntxt_mask; + xsave->xsave_hdr.xstate_bv &= xfeatures_mask; /* * These bits must be zero. */ diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index f39882b..c0e9553 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -13,9 +13,9 @@ #include /* - * Supported feature mask by the CPU and the kernel. + * Mask of xstate features supported by the CPU and the kernel: */ -u64 pcntxt_mask; +u64 xfeatures_mask; /* * Represents init state for the supported extended state. @@ -24,7 +24,7 @@ struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes; -static unsigned int xstate_comp_offsets[sizeof(pcntxt_mask)*8]; +static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; static unsigned int xstate_features; /* @@ -52,7 +52,7 @@ void __sanitize_i387_state(struct task_struct *tsk) * None of the feature bits are in init state. So nothing else * to do for us, as the memory layout is up to date. */ - if ((xstate_bv & pcntxt_mask) == pcntxt_mask) + if ((xstate_bv & xfeatures_mask) == xfeatures_mask) return; /* @@ -74,7 +74,7 @@ void __sanitize_i387_state(struct task_struct *tsk) if (!(xstate_bv & XSTATE_SSE)) memset(&fx->xmm_space[0], 0, 256); - xstate_bv = (pcntxt_mask & ~xstate_bv) >> 2; + xstate_bv = (xfeatures_mask & ~xstate_bv) >> 2; /* * Update all the other memory layouts for which the corresponding @@ -291,7 +291,7 @@ sanitize_restored_xstate(struct task_struct *tsk, if (fx_only) xsave_hdr->xstate_bv = XSTATE_FPSSE; else - xsave_hdr->xstate_bv &= (pcntxt_mask & xstate_bv); + xsave_hdr->xstate_bv &= (xfeatures_mask & xstate_bv); } if (use_fxsr()) { @@ -312,11 +312,11 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) { if (use_xsave()) { if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = pcntxt_mask & ~XSTATE_FPSSE; + u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; xrstor_state(init_xstate_buf, init_bv); return fxrstor_user(buf); } else { - u64 init_bv = pcntxt_mask & ~xbv; + u64 init_bv = xfeatures_mask & ~xbv; if (unlikely(init_bv)) xrstor_state(init_xstate_buf, init_bv); return xrestore_user(buf, xbv); @@ -439,7 +439,7 @@ static void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; - fx_sw_reserved.xstate_bv = pcntxt_mask; + fx_sw_reserved.xstate_bv = xfeatures_mask; fx_sw_reserved.xstate_size = xstate_size; if (config_enabled(CONFIG_IA32_EMULATION)) { @@ -454,7 +454,7 @@ static void prepare_fx_sw_frame(void) static inline void xstate_enable(void) { cr4_set_bits(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); } /* @@ -465,7 +465,7 @@ static void __init setup_xstate_features(void) { int eax, ebx, ecx, edx, leaf = 0x2; - xstate_features = fls64(pcntxt_mask); + xstate_features = fls64(xfeatures_mask); xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); @@ -484,7 +484,7 @@ static void __init setup_xstate_features(void) static void print_xstate_feature(u64 xstate_mask, const char *desc) { - if (pcntxt_mask & xstate_mask) { + if (xfeatures_mask & xstate_mask) { int xstate_feature = fls64(xstate_mask)-1; pr_info("x86/fpu: Supporting XSAVE feature %2d: '%s'\n", xstate_feature, desc); @@ -516,7 +516,7 @@ static void print_xstate_features(void) */ void setup_xstate_comp(void) { - unsigned int xstate_comp_sizes[sizeof(pcntxt_mask)*8]; + unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; int i; /* @@ -529,7 +529,7 @@ void setup_xstate_comp(void) if (!cpu_has_xsaves) { for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { xstate_comp_offsets[i] = xstate_offsets[i]; xstate_comp_sizes[i] = xstate_sizes[i]; } @@ -540,7 +540,7 @@ void setup_xstate_comp(void) xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; for (i = 2; i < xstate_features; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) + if (test_bit(i, (unsigned long *)&xfeatures_mask)) xstate_comp_sizes[i] = xstate_sizes[i]; else xstate_comp_sizes[i] = 0; @@ -573,8 +573,8 @@ static void __init setup_init_fpu_buf(void) if (cpu_has_xsaves) { init_xstate_buf->xsave_hdr.xcomp_bv = - (u64)1 << 63 | pcntxt_mask; - init_xstate_buf->xsave_hdr.xstate_bv = pcntxt_mask; + (u64)1 << 63 | xfeatures_mask; + init_xstate_buf->xsave_hdr.xstate_bv = xfeatures_mask; } /* @@ -604,7 +604,7 @@ __setup("eagerfpu=", eager_fpu_setup); /* - * Calculate total size of enabled xstates in XCR0/pcntxt_mask. + * Calculate total size of enabled xstates in XCR0/xfeatures_mask. */ static void __init init_xstate_size(void) { @@ -619,7 +619,7 @@ static void __init init_xstate_size(void) xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; for (i = 2; i < 64; i++) { - if (test_bit(i, (unsigned long *)&pcntxt_mask)) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); xstate_size += eax; } @@ -642,17 +642,17 @@ static void /* __init */ xstate_enable_boot_cpu(void) } cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - pcntxt_mask = eax + ((u64)edx << 32); + xfeatures_mask = eax + ((u64)edx << 32); - if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", pcntxt_mask); + if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); BUG(); } /* * Support only the state known to OS. */ - pcntxt_mask = pcntxt_mask & XCNTXT_MASK; + xfeatures_mask = xfeatures_mask & XCNTXT_MASK; xstate_enable(); @@ -661,7 +661,7 @@ static void /* __init */ xstate_enable_boot_cpu(void) */ init_xstate_size(); - update_regset_xstate_info(xstate_size, pcntxt_mask); + update_regset_xstate_info(xstate_size, xfeatures_mask); prepare_fx_sw_frame(); setup_init_fpu_buf(); @@ -669,18 +669,18 @@ static void /* __init */ xstate_enable_boot_cpu(void) if (cpu_has_xsaveopt && eagerfpu != DISABLE) eagerfpu = ENABLE; - if (pcntxt_mask & XSTATE_EAGER) { + if (xfeatures_mask & XSTATE_EAGER) { if (eagerfpu == DISABLE) { pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - pcntxt_mask & XSTATE_EAGER); - pcntxt_mask &= ~XSTATE_EAGER; + xfeatures_mask & XSTATE_EAGER); + xfeatures_mask &= ~XSTATE_EAGER; } else { eagerfpu = ENABLE; } } pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", - pcntxt_mask, + xfeatures_mask, xstate_size, cpu_has_xsaves ? "compacted" : "standard"); } @@ -749,7 +749,7 @@ void __init_refok eager_fpu_init(void) void *get_xsave_addr(struct xsave_struct *xsave, int xstate) { int feature = fls64(xstate) - 1; - if (!test_bit(feature, (unsigned long *)&pcntxt_mask)) + if (!test_bit(feature, (unsigned long *)&xfeatures_mask)) return NULL; return (void *)xsave + xstate_comp_offsets[feature]; diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index edaf934..62054ac 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -21,7 +21,7 @@ #include #include #include -#include /* pcntxt_mask */ +#include /* xfeatures_mask */ #include #ifdef CONFIG_X86_32 @@ -225,7 +225,7 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) * restore XCR0 for xsave capable cpu's. */ if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); fix_processor_context(); -- cgit v0.10.2 From 84246fe4e3a0e412a9602983ba37f4e4dbebb3e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 09:23:59 +0200 Subject: x86/fpu: Rename 'xstate_features' to 'xfeatures_nr' The name 'xstate_features' does not tell us whether it's a bitmap or any other value. That it's a count of features is only obvious if you read the code that calculates it. Rename it to the more descriptive 'xfeatures_nr' name. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index c0e9553..1d0e271 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -25,7 +25,9 @@ struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int *xstate_offsets, *xstate_sizes; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; -static unsigned int xstate_features; + +/* The number of supported xfeatures in xfeatures_mask: */ +static unsigned int xfeatures_nr; /* * If a processor implementation discern that a processor state component is @@ -465,9 +467,9 @@ static void __init setup_xstate_features(void) { int eax, ebx, ecx, edx, leaf = 0x2; - xstate_features = fls64(xfeatures_mask); - xstate_offsets = alloc_bootmem(xstate_features * sizeof(int)); - xstate_sizes = alloc_bootmem(xstate_features * sizeof(int)); + xfeatures_nr = fls64(xfeatures_mask); + xstate_offsets = alloc_bootmem(xfeatures_nr * sizeof(int)); + xstate_sizes = alloc_bootmem(xfeatures_nr * sizeof(int)); do { cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); @@ -528,7 +530,7 @@ void setup_xstate_comp(void) xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); if (!cpu_has_xsaves) { - for (i = 2; i < xstate_features; i++) { + for (i = 2; i < xfeatures_nr; i++) { if (test_bit(i, (unsigned long *)&xfeatures_mask)) { xstate_comp_offsets[i] = xstate_offsets[i]; xstate_comp_sizes[i] = xstate_sizes[i]; @@ -539,7 +541,7 @@ void setup_xstate_comp(void) xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < xstate_features; i++) { + for (i = 2; i < xfeatures_nr; i++) { if (test_bit(i, (unsigned long *)&xfeatures_mask)) xstate_comp_sizes[i] = xstate_sizes[i]; else -- cgit v0.10.2 From 9254aaa0fea4e8aa4e83539c379aecb41a975ca6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 10:02:32 +0200 Subject: x86/fpu: Move XCR0 manipulation to the FPU code proper The suspend code accesses FPU state internals, add a helper for it and isolate it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index f1eddcc..5bdde8c 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -23,6 +23,7 @@ extern void fpu__clear(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern void fpu__restore(void); extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); extern bool irq_fpu_usable(void); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 1d0e271..a485180 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -735,6 +735,18 @@ void __init_refok eager_fpu_init(void) } /* + * Restore minimal FPU state after suspend: + */ +void fpu__resume_cpu(void) +{ + /* + * Restore XCR0 on xsave capable CPUs: + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); +} + +/* * Given the xsave area and a state inside, this function returns the * address of the state. * diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index 62054ac..ad0ce6b 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -18,10 +18,8 @@ #include #include #include -#include #include #include -#include /* xfeatures_mask */ #include #ifdef CONFIG_X86_32 @@ -155,6 +153,8 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ + + fpu__resume_cpu(); } /** @@ -221,12 +221,6 @@ static void notrace __restore_processor_state(struct saved_context *ctxt) wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); #endif - /* - * restore XCR0 for xsave capable cpu's. - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); - fix_processor_context(); do_fpu_end(); -- cgit v0.10.2 From 8dcea8db793150ba7d56d56f0a397260db490abe Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 10:11:24 +0200 Subject: x86/fpu: Clean up regset functions Clean up various regset handlers: use the 'fpu' pointer which is available in most cases. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8ea9ce0..07c8a44 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -438,7 +438,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, sanitize_i387_state(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); + &fpu->state->fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -458,19 +458,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, sanitize_i387_state(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fxsave, 0, -1); + &fpu->state->fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask; + fpu->state->fxsave.mxcsr &= mxcsr_feature_mask; /* * update the header bits in the xsave header, indicating the * presence of FP and SSE state. */ if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; return ret; } @@ -490,7 +490,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - xsave = &target->thread.fpu.state->xsave; + xsave = &fpu->state->xsave; /* * Copy the 48bytes defined by the software first into the xstate @@ -521,7 +521,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - xsave = &target->thread.fpu.state->xsave; + xsave = &fpu->state->xsave; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); /* @@ -533,6 +533,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, * These bits must be zero. */ memset(&xsave->xsave_hdr.reserved, 0, 48); + return ret; } @@ -690,7 +691,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, + &fpu->state->fsave, 0, -1); sanitize_i387_state(target); @@ -724,7 +725,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.fpu.state->fsave, 0, + &fpu->state->fsave, 0, -1); if (pos > 0 || count < sizeof(env)) @@ -739,7 +740,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP. */ if (cpu_has_xsave) - target->thread.fpu.state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; + fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; return ret; } -- cgit v0.10.2 From 3a54450b5ed1671a6adecf501a0b4d4c1d27235d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 10:14:36 +0200 Subject: x86/fpu: Rename 'xsave_hdr' to 'header' Code like: fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; is an eyesore, because not only is the words 'xsave' and 'state' are repeated twice times (!), but also because of the 'hdr' and 'bv' abbreviations that are pretty meaningless at a first glance. Start cleaning this up by renaming 'xsave_hdr' to 'header'. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0e9a7a3..3007df9 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -261,7 +261,7 @@ static inline int fpu_save_init(struct fpu *fpu) /* * xsave header may indicate the init state of the FP. */ - if (!(fpu->state->xsave.xsave_hdr.xstate_bv & XSTATE_FP)) + if (!(fpu->state->xsave.header.xstate_bv & XSTATE_FP)) return 1; } else if (use_fxsr()) { fpu_fxsave(fpu); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 917d2e5..33c0c7b 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -99,7 +99,7 @@ struct bndcsr { u64 bndstatus; } __packed; -struct xsave_hdr_struct { +struct xstate_header { u64 xstate_bv; u64 xcomp_bv; u64 reserved[6]; @@ -107,7 +107,7 @@ struct xsave_hdr_struct { struct xsave_struct { struct i387_fxsave_struct i387; - struct xsave_hdr_struct xsave_hdr; + struct xstate_header header; struct ymmh_struct ymmh; struct lwp_struct lwp; struct bndreg bndreg[4]; diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index 400d5b2..b27b446 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -212,7 +212,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) * Clear the xsave header first, so that reserved fields are * initialized to zero. */ - err = __clear_user(&buf->xsave_hdr, sizeof(buf->xsave_hdr)); + err = __clear_user(&buf->header, sizeof(buf->header)); if (unlikely(err)) return -EFAULT; diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index ccab4af..fa04241 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h @@ -14,7 +14,7 @@ struct user_ymmh_regs { __u32 ymmh_space[64]; }; -struct user_xsave_hdr { +struct user_xstate_header { __u64 xstate_bv; __u64 reserved1[2]; __u64 reserved2[5]; @@ -41,11 +41,11 @@ struct user_xsave_hdr { * particular process/thread. * * Also when the user modifies certain state FP/SSE/etc through the - * ptrace interface, they must ensure that the xsave_hdr.xstate_bv + * ptrace interface, they must ensure that the header.xstate_bv * bytes[512..519] of the memory layout are updated correspondingly. * i.e., for example when FP state is modified to a non-init state, - * xsave_hdr.xstate_bv's bit 0 must be set to '1', when SSE is modified to - * non-init state, xsave_hdr.xstate_bv's bit 1 must to be set to '1', etc. + * header.xstate_bv's bit 0 must be set to '1', when SSE is modified to + * non-init state, header.xstate_bv's bit 1 must to be set to '1', etc. */ #define USER_XSTATE_FX_SW_WORDS 6 #define USER_XSTATE_XCR0_WORD 0 @@ -55,7 +55,7 @@ struct user_xstateregs { __u64 fpx_space[58]; __u64 xstate_fx_sw[USER_XSTATE_FX_SW_WORDS]; } i387; - struct user_xsave_hdr xsave_hdr; + struct user_xstate_header header; struct user_ymmh_regs ymmh; /* further processor state extensions go here */ }; diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 16dc4e8..7f850f7 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -209,7 +209,7 @@ struct sigcontext { #endif /* !__i386__ */ -struct _xsave_hdr { +struct _header { __u64 xstate_bv; __u64 reserved1[2]; __u64 reserved2[5]; @@ -228,7 +228,7 @@ struct _ymmh_state { */ struct _xstate { struct _fpstate fpstate; - struct _xsave_hdr xstate_hdr; + struct _header xstate_hdr; struct _ymmh_state ymmh; /* new processor state extensions go here */ }; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 07c8a44..74189f3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -470,7 +470,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP and SSE state. */ if (cpu_has_xsave) - fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE; + fpu->state->xsave.header.xstate_bv |= XSTATE_FPSSE; return ret; } @@ -528,11 +528,11 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, * mxcsr reserved bits must be masked to zero for security reasons. */ xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->xsave_hdr.xstate_bv &= xfeatures_mask; + xsave->header.xstate_bv &= xfeatures_mask; /* * These bits must be zero. */ - memset(&xsave->xsave_hdr.reserved, 0, 48); + memset(&xsave->header.reserved, 0, 48); return ret; } @@ -740,7 +740,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP. */ if (cpu_has_xsave) - fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; + fpu->state->xsave.header.xstate_bv |= XSTATE_FP; return ret; } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index a485180..03639fa 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -32,7 +32,7 @@ static unsigned int xfeatures_nr; /* * If a processor implementation discern that a processor state component is * in its initialized state it may modify the corresponding bit in the - * xsave_hdr.xstate_bv as '0', with out modifying the corresponding memory + * header.xstate_bv as '0', with out modifying the corresponding memory * layout in the case of xsaveopt. While presenting the xstate information to * the user, we always ensure that the memory layout of a feature will be in * the init state if the corresponding header bit is zero. This is to ensure @@ -48,7 +48,7 @@ void __sanitize_i387_state(struct task_struct *tsk) if (!fx) return; - xstate_bv = tsk->thread.fpu.state->xsave.xsave_hdr.xstate_bv; + xstate_bv = tsk->thread.fpu.state->xsave.header.xstate_bv; /* * None of the feature bits are in init state. So nothing else @@ -106,7 +106,7 @@ static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, struct _fpx_sw_bytes *fx_sw) { int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xsave_hdr_struct); + sizeof(struct xstate_header); unsigned int magic2; if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) @@ -178,7 +178,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) * Read the xstate_bv which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ - err |= __get_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); + err |= __get_user(xstate_bv, (__u32 *)&x->header.xstate_bv); /* * For legacy compatible, we always set FP/SSE bits in the bit @@ -193,7 +193,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) */ xstate_bv |= XSTATE_FPSSE; - err |= __put_user(xstate_bv, (__u32 *)&x->xsave_hdr.xstate_bv); + err |= __put_user(xstate_bv, (__u32 *)&x->header.xstate_bv); return err; } @@ -280,20 +280,20 @@ sanitize_restored_xstate(struct task_struct *tsk, u64 xstate_bv, int fx_only) { struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; - struct xsave_hdr_struct *xsave_hdr = &xsave->xsave_hdr; + struct xstate_header *header = &xsave->header; if (use_xsave()) { /* These bits must be zero. */ - memset(xsave_hdr->reserved, 0, 48); + memset(header->reserved, 0, 48); /* * Init the state that is not present in the memory * layout and not enabled by the OS. */ if (fx_only) - xsave_hdr->xstate_bv = XSTATE_FPSSE; + header->xstate_bv = XSTATE_FPSSE; else - xsave_hdr->xstate_bv &= (xfeatures_mask & xstate_bv); + header->xstate_bv &= (xfeatures_mask & xstate_bv); } if (use_fxsr()) { @@ -574,9 +574,9 @@ static void __init setup_init_fpu_buf(void) print_xstate_features(); if (cpu_has_xsaves) { - init_xstate_buf->xsave_hdr.xcomp_bv = + init_xstate_buf->header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_xstate_buf->xsave_hdr.xstate_bv = xfeatures_mask; + init_xstate_buf->header.xstate_bv = xfeatures_mask; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91d7f3b..ac24889 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3197,7 +3197,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; - u64 xstate_bv = xsave->xsave_hdr.xstate_bv; + u64 xstate_bv = xsave->header.xstate_bv; u64 valid; /* @@ -3243,9 +3243,9 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) memcpy(xsave, src, XSAVE_HDR_OFFSET); /* Set XSTATE_BV and possibly XCOMP_BV. */ - xsave->xsave_hdr.xstate_bv = xstate_bv; + xsave->header.xstate_bv = xstate_bv; if (cpu_has_xsaves) - xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; + xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* * Copy each region from the non-compacted offset to the @@ -7014,7 +7014,7 @@ int fx_init(struct kvm_vcpu *vcpu) fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) - vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv = + vcpu->arch.guest_fpu.state->xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* -- cgit v0.10.2 From 400e4b209166dcd3e3a155401c57bdc6413bf715 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 10:19:47 +0200 Subject: x86/fpu: Rename xsave.header::xstate_bv to 'xfeatures' 'xsave.header::xstate_bv' is a misnomer - what does 'bv' stand for? It probably comes from the 'XGETBV' instruction name, but I could not find in the Intel documentation where that abbreviation comes from. It could mean 'bit vector' - or something else? But how about - instead of guessing about a weird name - we named the field in an obvious and descriptive way that tells us exactly what it does? So rename it to 'xfeatures', which is a bitmask of the xfeatures that are fpstate_active in that context structure. Eyesore like: fpu->state->xsave.xsave_hdr.xstate_bv |= XSTATE_FP; is now much more readable: fpu->state->xsave.header.xfeatures |= XSTATE_FP; Which form is not just infinitely more readable, but is also shorter as well. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3007df9..07c6adc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -261,7 +261,7 @@ static inline int fpu_save_init(struct fpu *fpu) /* * xsave header may indicate the init state of the FP. */ - if (!(fpu->state->xsave.header.xstate_bv & XSTATE_FP)) + if (!(fpu->state->xsave.header.xfeatures & XSTATE_FP)) return 1; } else if (use_fxsr()) { fpu_fxsave(fpu); diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 33c0c7b..9bd2cd1 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -100,7 +100,7 @@ struct bndcsr { } __packed; struct xstate_header { - u64 xstate_bv; + u64 xfeatures; u64 xcomp_bv; u64 reserved[6]; } __attribute__((packed)); diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h index fa04241..59a54e8 100644 --- a/arch/x86/include/asm/user.h +++ b/arch/x86/include/asm/user.h @@ -15,7 +15,7 @@ struct user_ymmh_regs { }; struct user_xstate_header { - __u64 xstate_bv; + __u64 xfeatures; __u64 reserved1[2]; __u64 reserved2[5]; }; @@ -41,11 +41,11 @@ struct user_xstate_header { * particular process/thread. * * Also when the user modifies certain state FP/SSE/etc through the - * ptrace interface, they must ensure that the header.xstate_bv + * ptrace interface, they must ensure that the header.xfeatures * bytes[512..519] of the memory layout are updated correspondingly. * i.e., for example when FP state is modified to a non-init state, - * header.xstate_bv's bit 0 must be set to '1', when SSE is modified to - * non-init state, header.xstate_bv's bit 1 must to be set to '1', etc. + * header.xfeatures's bit 0 must be set to '1', when SSE is modified to + * non-init state, header.xfeatures's bit 1 must to be set to '1', etc. */ #define USER_XSTATE_FX_SW_WORDS 6 #define USER_XSTATE_XCR0_WORD 0 diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 7f850f7..0e8a973 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -25,7 +25,7 @@ struct _fpx_sw_bytes { __u32 extended_size; /* total size of the layout referred by * fpstate pointer in the sigcontext. */ - __u64 xstate_bv; + __u64 xfeatures; /* feature bit mask (including fp/sse/extended * state) that is present in the memory * layout. @@ -210,7 +210,7 @@ struct sigcontext { #endif /* !__i386__ */ struct _header { - __u64 xstate_bv; + __u64 xfeatures; __u64 reserved1[2]; __u64 reserved2[5]; }; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 74189f3..ae8f26b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -470,7 +470,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP and SSE state. */ if (cpu_has_xsave) - fpu->state->xsave.header.xstate_bv |= XSTATE_FPSSE; + fpu->state->xsave.header.xfeatures |= XSTATE_FPSSE; return ret; } @@ -528,7 +528,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, * mxcsr reserved bits must be masked to zero for security reasons. */ xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->header.xstate_bv &= xfeatures_mask; + xsave->header.xfeatures &= xfeatures_mask; /* * These bits must be zero. */ @@ -740,7 +740,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP. */ if (cpu_has_xsave) - fpu->state->xsave.header.xstate_bv |= XSTATE_FP; + fpu->state->xsave.header.xfeatures |= XSTATE_FP; return ret; } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 03639fa..467e463 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -32,7 +32,7 @@ static unsigned int xfeatures_nr; /* * If a processor implementation discern that a processor state component is * in its initialized state it may modify the corresponding bit in the - * header.xstate_bv as '0', with out modifying the corresponding memory + * header.xfeatures as '0', with out modifying the corresponding memory * layout in the case of xsaveopt. While presenting the xstate information to * the user, we always ensure that the memory layout of a feature will be in * the init state if the corresponding header bit is zero. This is to ensure @@ -43,24 +43,24 @@ void __sanitize_i387_state(struct task_struct *tsk) { struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; int feature_bit = 0x2; - u64 xstate_bv; + u64 xfeatures; if (!fx) return; - xstate_bv = tsk->thread.fpu.state->xsave.header.xstate_bv; + xfeatures = tsk->thread.fpu.state->xsave.header.xfeatures; /* * None of the feature bits are in init state. So nothing else * to do for us, as the memory layout is up to date. */ - if ((xstate_bv & xfeatures_mask) == xfeatures_mask) + if ((xfeatures & xfeatures_mask) == xfeatures_mask) return; /* * FP is in init state */ - if (!(xstate_bv & XSTATE_FP)) { + if (!(xfeatures & XSTATE_FP)) { fx->cwd = 0x37f; fx->swd = 0; fx->twd = 0; @@ -73,17 +73,17 @@ void __sanitize_i387_state(struct task_struct *tsk) /* * SSE is in init state */ - if (!(xstate_bv & XSTATE_SSE)) + if (!(xfeatures & XSTATE_SSE)) memset(&fx->xmm_space[0], 0, 256); - xstate_bv = (xfeatures_mask & ~xstate_bv) >> 2; + xfeatures = (xfeatures_mask & ~xfeatures) >> 2; /* * Update all the other memory layouts for which the corresponding * header bit is in the init state. */ - while (xstate_bv) { - if (xstate_bv & 0x1) { + while (xfeatures) { + if (xfeatures & 0x1) { int offset = xstate_offsets[feature_bit]; int size = xstate_sizes[feature_bit]; @@ -92,7 +92,7 @@ void __sanitize_i387_state(struct task_struct *tsk) size); } - xstate_bv >>= 1; + xfeatures >>= 1; feature_bit++; } } @@ -162,7 +162,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) { struct xsave_struct __user *x = buf; struct _fpx_sw_bytes *sw_bytes; - u32 xstate_bv; + u32 xfeatures; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ @@ -175,25 +175,25 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); /* - * Read the xstate_bv which we copied (directly from the cpu or + * Read the xfeatures which we copied (directly from the cpu or * from the state in task struct) to the user buffers. */ - err |= __get_user(xstate_bv, (__u32 *)&x->header.xstate_bv); + err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to * the FP/SSE bits by the legacy applications which don't touch - * xstate_bv in the xsave header. + * xfeatures in the xsave header. * - * xsave aware apps can change the xstate_bv in the xsave + * xsave aware apps can change the xfeatures in the xsave * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ - xstate_bv |= XSTATE_FPSSE; + xfeatures |= XSTATE_FPSSE; - err |= __put_user(xstate_bv, (__u32 *)&x->header.xstate_bv); + err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); return err; } @@ -277,7 +277,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) static inline void sanitize_restored_xstate(struct task_struct *tsk, struct user_i387_ia32_struct *ia32_env, - u64 xstate_bv, int fx_only) + u64 xfeatures, int fx_only) { struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; struct xstate_header *header = &xsave->header; @@ -291,9 +291,9 @@ sanitize_restored_xstate(struct task_struct *tsk, * layout and not enabled by the OS. */ if (fx_only) - header->xstate_bv = XSTATE_FPSSE; + header->xfeatures = XSTATE_FPSSE; else - header->xstate_bv &= (xfeatures_mask & xstate_bv); + header->xfeatures &= (xfeatures_mask & xfeatures); } if (use_fxsr()) { @@ -335,7 +335,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; int state_size = xstate_size; - u64 xstate_bv = 0; + u64 xfeatures = 0; int fx_only = 0; ia32_fxstate &= (config_enabled(CONFIG_X86_32) || @@ -369,7 +369,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) fx_only = 1; } else { state_size = fx_sw_user.xstate_size; - xstate_bv = fx_sw_user.xstate_bv; + xfeatures = fx_sw_user.xfeatures; } } @@ -398,7 +398,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) fpstate_init(fpu); err = -1; } else { - sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only); + sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } fpu->fpstate_active = 1; @@ -415,7 +415,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * state to the registers directly (with exceptions handled). */ user_fpu_begin(); - if (restore_user_xstate(buf_fx, xstate_bv, fx_only)) { + if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { fpu_reset_state(fpu); return -1; } @@ -441,7 +441,7 @@ static void prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; - fx_sw_reserved.xstate_bv = xfeatures_mask; + fx_sw_reserved.xfeatures = xfeatures_mask; fx_sw_reserved.xstate_size = xstate_size; if (config_enabled(CONFIG_IA32_EMULATION)) { @@ -576,7 +576,7 @@ static void __init setup_init_fpu_buf(void) if (cpu_has_xsaves) { init_xstate_buf->header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_xstate_buf->header.xstate_bv = xfeatures_mask; + init_xstate_buf->header.xfeatures = xfeatures_mask; } /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac24889..0b58b93 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3197,7 +3197,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; - u64 xstate_bv = xsave->header.xstate_bv; + u64 xstate_bv = xsave->header.xfeatures; u64 valid; /* @@ -3243,7 +3243,7 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) memcpy(xsave, src, XSAVE_HDR_OFFSET); /* Set XSTATE_BV and possibly XCOMP_BV. */ - xsave->header.xstate_bv = xstate_bv; + xsave->header.xfeatures = xstate_bv; if (cpu_has_xsaves) xsave->header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; -- cgit v0.10.2 From 91a8c2a5b43fc4be4adb4bda50cd331697e289e0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 10:49:11 +0200 Subject: x86/fpu: Clean up and fix MXCSR handling The code has the following problems: - it uses a single global 'fx_scratch' area that multiple CPUs could write into simultaneously, in theory. - it wastes 512 bytes of .data for something that is only rarely used. Fix this by moving the state buffer to the stack. Note that while this is 512 bytes, we don't ever call this function in very deep callchains, so its stack usage should not be a problem. Also add comments to explain the magic 0x0000ffbf default value. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 33df056..0b16f61 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -68,18 +68,26 @@ void fpu__init_check_bugs(void) * Boot time FPU feature detection code: */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; + unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -static struct i387_fxsave_struct fx_scratch; static void mxcsr_feature_mask_init(void) { - unsigned long mask = 0; + unsigned int mask = 0; if (cpu_has_fxsr) { - memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : "+m" (fx_scratch)); - mask = fx_scratch.mxcsr_mask; + struct i387_fxsave_struct fx_tmp __aligned(32) = { }; + + asm volatile("fxsave %0" : "+m" (fx_tmp)); + + mask = fx_tmp.mxcsr_mask; + + /* + * If zero then use the default features mask, + * which has all features set, except the + * denormals-are-zero feature bit: + */ if (mask == 0) mask = 0x0000ffbf; } -- cgit v0.10.2 From 678eaf603460180260a645de359050fd6568cf74 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:48:24 +0200 Subject: x86/fpu: Rename regset FPU register accessors Rename regset accessors to prefix them with 'regset_', because we want to start using the 'fpregs_active' name elsewhere. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 07c6adc..6eea81c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -46,17 +46,17 @@ extern void convert_from_fxsr(struct user_i387_ia32_struct *env, extern void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env); -extern user_regset_active_fn fpregs_active, xfpregs_active; +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, xstateregs_set; /* - * xstateregs_active == fpregs_active. Please refer to the comment - * at the definition of fpregs_active. + * xstateregs_active == regset_fpregs_active. Please refer to the comment + * at the definition of regset_fpregs_active. */ -#define xstateregs_active fpregs_active +#define xstateregs_active regset_fpregs_active #ifdef CONFIG_MATH_EMULATION extern void finit_soft_fpu(struct i387_soft_struct *soft); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ae8f26b..4978a77 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -403,18 +403,18 @@ void fpu__clear(struct task_struct *tsk) } /* - * The xstateregs_active() routine is the same as the fpregs_active() routine, + * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, * as the "regset->n" for the xstate regset will be updated based on the feature * capabilites supported by the xsave. */ -int fpregs_active(struct task_struct *target, const struct user_regset *regset) +int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { struct fpu *target_fpu = &target->thread.fpu; return target_fpu->fpstate_active ? regset->n : 0; } -int xfpregs_active(struct task_struct *target, const struct user_regset *regset) +int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) { struct fpu *target_fpu = &target->thread.fpu; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index c14a00f..4c61566 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1296,7 +1296,7 @@ static struct user_regset x86_64_regsets[] __read_mostly = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_struct) / sizeof(long), .size = sizeof(long), .align = sizeof(long), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, @@ -1337,13 +1337,13 @@ static struct user_regset x86_32_regsets[] __read_mostly = { .core_note_type = NT_PRFPREG, .n = sizeof(struct user_i387_ia32_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = fpregs_active, .get = fpregs_get, .set = fpregs_set + .active = regset_fpregs_active, .get = fpregs_get, .set = fpregs_set }, [REGSET_XFP] = { .core_note_type = NT_PRXFPREG, .n = sizeof(struct user32_fxsr_struct) / sizeof(u32), .size = sizeof(u32), .align = sizeof(u32), - .active = xfpregs_active, .get = xfpregs_get, .set = xfpregs_set + .active = regset_xregset_fpregs_active, .get = xfpregs_get, .set = xfpregs_set }, [REGSET_XSTATE] = { .core_note_type = NT_X86_XSTATE, -- cgit v0.10.2 From e783e8167ddf275782ef448eb139fafff3ac3af2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 11:43:47 +0200 Subject: x86/fpu: Explain the AVX register layout in the xsave area The previous explanation was rather cryptic. Also transform "u32 [64]" to the more readable "u8[256]" form. No change in implementation. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 9bd2cd1..8a5120a 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -78,9 +78,16 @@ struct i387_soft_struct { u32 entry_eip; }; +/* + * There are 16x 256-bit AVX registers named YMM0-YMM15. + * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) + * and are stored in 'struct i387_fxsave_struct::xmm_space[]'. + * + * The high 128 bits are stored here: + * 16x 128 bits == 256 bytes. + */ struct ymmh_struct { - /* 16 * 16 bytes for each YMMH-reg = 256 bytes */ - u32 ymmh_space[64]; + u8 ymmh_space[256]; }; /* We don't support LWP yet: */ -- cgit v0.10.2 From 73a3aeb3ac5312122d5a261c7acb0ab9be93857a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 11:32:59 +0200 Subject: x86/fpu: Improve the __sanitize_i387_state() documentation Improve the comments and add new ones, as this code isn't very obvious. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 467e463..f3d30f0 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -30,19 +30,23 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; static unsigned int xfeatures_nr; /* - * If a processor implementation discern that a processor state component is - * in its initialized state it may modify the corresponding bit in the - * header.xfeatures as '0', with out modifying the corresponding memory - * layout in the case of xsaveopt. While presenting the xstate information to - * the user, we always ensure that the memory layout of a feature will be in - * the init state if the corresponding header bit is zero. This is to ensure - * that the user doesn't see some stale state in the memory layout during - * signal handling, debugging etc. + * When executing XSAVEOPT (optimized XSAVE), if a processor implementation + * detects that an FPU state component is still (or is again) in its + * initialized state, it may clear the corresponding bit in the header.xfeatures + * field, and can skip the writeout of registers to the corresponding memory layout. + * + * This means that when the bit is zero, the state component might still contain + * some previous - non-initialized register state. + * + * Before writing xstate information to user-space we sanitize those components, + * to always ensure that the memory layout of a feature will be in the init state + * if the corresponding header bit is zero. This is to ensure that user-space doesn't + * see some stale state in the memory layout during signal handling, debugging etc. */ void __sanitize_i387_state(struct task_struct *tsk) { struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; - int feature_bit = 0x2; + int feature_bit; u64 xfeatures; if (!fx) @@ -76,19 +80,25 @@ void __sanitize_i387_state(struct task_struct *tsk) if (!(xfeatures & XSTATE_SSE)) memset(&fx->xmm_space[0], 0, 256); + /* + * First two features are FPU and SSE, which above we handled + * in a special way already: + */ + feature_bit = 0x2; xfeatures = (xfeatures_mask & ~xfeatures) >> 2; /* - * Update all the other memory layouts for which the corresponding - * header bit is in the init state. + * Update all the remaining memory layouts according to their + * standard xstate layout, if their header bit is in the init + * state: */ while (xfeatures) { if (xfeatures & 0x1) { int offset = xstate_offsets[feature_bit]; int size = xstate_sizes[feature_bit]; - memcpy(((void *) fx) + offset, - ((void *) init_xstate_buf) + offset, + memcpy((void *)fx + offset, + (void *)init_xstate_buf + offset, size); } -- cgit v0.10.2 From d5cea9b0af1509f170337ba8f47160d0699ff374 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:19:26 +0200 Subject: x86/fpu: Rename fpu->has_fpu to fpu->fpregs_active So the current code uses fpu->has_cpu to determine whether a given user FPU context is actively loaded into the FPU's registers [*] and that those registers represent the task's current FPU state. But this term is not unambiguous: especially the distinction between fpu->has_fpu, PF_USED_MATH and fpu_fpregs_owner_ctx is not clear. Increase clarity by unambigously signalling that it's about hardware registers being active right now, by renaming it to fpu->fpregs_active. ( In later patches we'll use more of the 'fpregs' naming, which will make it easier to grep for as well. ) [*] There's the kernel_fpu_begin()/end() primitive that also activates FPU hw registers as well and uses them, without touching the fpu->fpregs_active flag. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 5bdde8c..4ca745c 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -96,7 +96,7 @@ static inline void irq_ts_restore(int TS_state) */ static inline int user_has_fpu(void) { - return current->thread.fpu.has_fpu; + return current->thread.fpu.fpregs_active; } extern void fpu__save(struct fpu *fpu); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6eea81c..b546ec8 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -308,7 +308,7 @@ static inline int restore_fpu_checking(struct fpu *fpu) "fnclex\n\t" "emms\n\t" "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpu->has_fpu)); + : : [addr] "m" (fpu->fpregs_active)); } return fpu_restore_checking(fpu); @@ -317,14 +317,14 @@ static inline int restore_fpu_checking(struct fpu *fpu) /* Must be paired with an 'stts' after! */ static inline void __thread_clear_has_fpu(struct fpu *fpu) { - fpu->has_fpu = 0; + fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); } /* Must be paired with a 'clts' before! */ static inline void __thread_set_has_fpu(struct fpu *fpu) { - fpu->has_fpu = 1; + fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); } @@ -357,7 +357,7 @@ static inline void drop_fpu(struct fpu *fpu) preempt_disable(); fpu->counter = 0; - if (fpu->has_fpu) { + if (fpu->fpregs_active) { /* Ignore delayed exceptions from user space */ asm volatile("1: fwait\n" "2:\n" @@ -416,14 +416,14 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) fpu.preload = new_fpu->fpstate_active && (use_eager_fpu() || new_fpu->counter > 5); - if (old_fpu->has_fpu) { + if (old_fpu->fpregs_active) { if (!fpu_save_init(old_fpu)) old_fpu->last_cpu = -1; else old_fpu->last_cpu = cpu; /* But leave fpu_fpregs_owner_ctx! */ - old_fpu->has_fpu = 0; + old_fpu->fpregs_active = 0; /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 8a5120a..231a8f5 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -142,7 +142,7 @@ struct fpu { */ unsigned int last_cpu; - unsigned int has_fpu; + unsigned int fpregs_active; union thread_xstate *state; /* * This counter contains the number of consecutive context switches diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4978a77..c8ae838 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -62,7 +62,7 @@ static bool interrupted_kernel_fpu_idle(void) if (use_eager_fpu()) return true; - return !current->thread.fpu.has_fpu && (read_cr0() & X86_CR0_TS); + return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS); } /* @@ -100,7 +100,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); - if (fpu->has_fpu) { + if (fpu->fpregs_active) { fpu_save_init(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -114,7 +114,7 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->has_fpu) { + if (fpu->fpregs_active) { if (WARN_ON(restore_fpu_checking(fpu))) fpu_reset_state(fpu); } else if (!use_eager_fpu()) { @@ -147,7 +147,7 @@ void fpu__save(struct fpu *fpu) WARN_ON(fpu != ¤t->thread.fpu); preempt_disable(); - if (fpu->has_fpu) { + if (fpu->fpregs_active) { if (use_eager_fpu()) { __save_fpu(fpu); } else { @@ -243,7 +243,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->counter = 0; - dst_fpu->has_fpu = 0; + dst_fpu->fpregs_active = 0; dst_fpu->state = NULL; dst_fpu->last_cpu = -1; -- cgit v0.10.2 From dfaea4e6a27afffb0cc6da7aaaae81abc127fdb3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:26:47 +0200 Subject: x86/fpu: Rename __thread_set_has_fpu() to __fpregs_activate() Propagate the 'fpu->fpregs_active' naming to the functions that sets it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b546ec8..3554a8c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -322,7 +322,7 @@ static inline void __thread_clear_has_fpu(struct fpu *fpu) } /* Must be paired with a 'clts' before! */ -static inline void __thread_set_has_fpu(struct fpu *fpu) +static inline void __fpregs_activate(struct fpu *fpu) { fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); @@ -346,7 +346,7 @@ static inline void __thread_fpu_begin(struct fpu *fpu) { if (!use_eager_fpu()) clts(); - __thread_set_has_fpu(fpu); + __fpregs_activate(fpu); } static inline void drop_fpu(struct fpu *fpu) @@ -428,7 +428,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) /* Don't change CR0.TS if we just switch! */ if (fpu.preload) { new_fpu->counter++; - __thread_set_has_fpu(new_fpu); + __fpregs_activate(new_fpu); prefetch(new_fpu->state); } else if (!use_eager_fpu()) stts(); -- cgit v0.10.2 From 723c58e428fbcbc9f16864edf740ec3bfaf3703c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:28:01 +0200 Subject: x86/fpu: Rename __thread_clear_has_fpu() to __fpregs_deactivate() Propagate the 'fpu->fpregs_active' naming to the functions that clears it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3554a8c..7a23517 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -315,7 +315,7 @@ static inline int restore_fpu_checking(struct fpu *fpu) } /* Must be paired with an 'stts' after! */ -static inline void __thread_clear_has_fpu(struct fpu *fpu) +static inline void __fpregs_deactivate(struct fpu *fpu) { fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); @@ -337,7 +337,7 @@ static inline void __fpregs_activate(struct fpu *fpu) */ static inline void __thread_fpu_end(struct fpu *fpu) { - __thread_clear_has_fpu(fpu); + __fpregs_deactivate(fpu); if (!use_eager_fpu()) stts(); } -- cgit v0.10.2 From 232f62cdd7c7162168a445cbc718a82e7f6e36c1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:30:38 +0200 Subject: x86/fpu: Rename __thread_fpu_begin() to fpregs_activate() Propagate the 'fpu->fpregs_active' naming to the high level function that sets it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7a23517..18a6223 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -342,7 +342,7 @@ static inline void __thread_fpu_end(struct fpu *fpu) stts(); } -static inline void __thread_fpu_begin(struct fpu *fpu) +static inline void fpregs_activate(struct fpu *fpu) { if (!use_eager_fpu()) clts(); @@ -441,7 +441,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) fpu.preload = 0; else prefetch(new_fpu->state); - __thread_fpu_begin(new_fpu); + fpregs_activate(new_fpu); } } return fpu; @@ -499,7 +499,7 @@ static inline void user_fpu_begin(void) preempt_disable(); if (!user_has_fpu()) - __thread_fpu_begin(fpu); + fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c8ae838..a1768ec 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -368,9 +368,9 @@ void fpu__restore(void) local_irq_disable(); } - /* Avoid __kernel_fpu_begin() right after __thread_fpu_begin() */ + /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); - __thread_fpu_begin(fpu); + fpregs_activate(fpu); if (unlikely(restore_fpu_checking(fpu))) { fpu_reset_state(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); -- cgit v0.10.2 From 66af8e276409196abd59e33919f928e4d002d4f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 14:31:27 +0200 Subject: x86/fpu: Rename __thread_fpu_end() to fpregs_deactivate() Propagate the 'fpu->fpregs_active' naming to the high level function that clears it. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 18a6223..0292fcc 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -335,18 +335,18 @@ static inline void __fpregs_activate(struct fpu *fpu) * These generally need preemption protection to work, * do try to avoid using these on their own. */ -static inline void __thread_fpu_end(struct fpu *fpu) +static inline void fpregs_activate(struct fpu *fpu) { - __fpregs_deactivate(fpu); if (!use_eager_fpu()) - stts(); + clts(); + __fpregs_activate(fpu); } -static inline void fpregs_activate(struct fpu *fpu) +static inline void fpregs_deactivate(struct fpu *fpu) { + __fpregs_deactivate(fpu); if (!use_eager_fpu()) - clts(); - __fpregs_activate(fpu); + stts(); } static inline void drop_fpu(struct fpu *fpu) @@ -362,7 +362,7 @@ static inline void drop_fpu(struct fpu *fpu) asm volatile("1: fwait\n" "2:\n" _ASM_EXTABLE(1b, 2b)); - __thread_fpu_end(fpu); + fpregs_deactivate(fpu); } fpu->fpstate_active = 0; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a1768ec..3d2ec4b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -152,7 +152,7 @@ void fpu__save(struct fpu *fpu) __save_fpu(fpu); } else { fpu_save_init(fpu); - __thread_fpu_end(fpu); + fpregs_deactivate(fpu); } } preempt_enable(); -- cgit v0.10.2 From 6a133207587bce64efdd0fda9bea09ed994fa690 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 04:29:26 +0200 Subject: x86/fpu: Remove fpstate_xstate_init_size() boot quirk fpstate_xstate_init_size() is called in fpu__cpu_init(), which is run on every CPU, every time they are brought online. But we want to call fpstate_xstate_init_size() only once. Move it to fpu__detect(), which only runs once, on the boot CPU. Also clean up the flow of fpstate_xstate_init_size() a bit, by removing a 'return' from the middle of the function. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 0b16f61..3e0fee5 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -109,13 +109,12 @@ static void fpstate_xstate_init_size(void) setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); xstate_size = sizeof(struct i387_soft_struct); - return; + } else { + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); + else + xstate_size = sizeof(struct i387_fsave_struct); } - - if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); - else - xstate_size = sizeof(struct i387_fsave_struct); } /* @@ -151,12 +150,6 @@ void fpu__cpu_init(void) cr0 |= X86_CR0_EM; write_cr0(cr0); - /* - * fpstate_xstate_init_size() is only called once, to avoid overriding - * 'xstate_size' during (secondary CPU) bootup or during CPU hotplug. - */ - if (xstate_size == 0) - fpstate_xstate_init_size(); mxcsr_feature_mask_init(); xsave_init(); @@ -194,5 +187,7 @@ void fpu__detect(struct cpuinfo_x86 *c) else clear_cpu_cap(c, X86_FEATURE_FPU); - /* The final cr0 value is set in fpu_init() */ + /* The final cr0 value is set later, in fpu_init() */ + + fpstate_xstate_init_size(); } -- cgit v0.10.2 From 966ece619eaeae9b5cb6cede7fe6303b7031a51f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 05:04:41 +0200 Subject: x86/fpu: Remove xsave_init() bootmem allocations There's only 8 xstate bits at the moment, and it's not like we can support unknown bits - so put xstate_offsets[] and xstate_sizes[] into static allocation. This is in preparation to be able to call the FPU init code earlier, when there's no bootmem available yet. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index b27b446..fd56434 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -15,6 +15,9 @@ #define XSTATE_ZMM_Hi256 0x40 #define XSTATE_Hi16_ZMM 0x80 +/* The highest xstate bit above (of XSTATE_Hi16_ZMM): */ +#define XFEATURES_NR_MAX 8 + #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index f3d30f0..adeab16 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -23,7 +23,7 @@ u64 xfeatures_mask; struct xsave_struct *init_xstate_buf; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int *xstate_offsets, *xstate_sizes; +static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; /* The number of supported xfeatures in xfeatures_mask: */ @@ -478,8 +478,6 @@ static void __init setup_xstate_features(void) int eax, ebx, ecx, edx, leaf = 0x2; xfeatures_nr = fls64(xfeatures_mask); - xstate_offsets = alloc_bootmem(xfeatures_nr * sizeof(int)); - xstate_sizes = alloc_bootmem(xfeatures_nr * sizeof(int)); do { cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); -- cgit v0.10.2 From 26b1f5d05a81a0e60eed718d2d073f050b0afc8f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 05:27:26 +0200 Subject: x86/fpu: Make setup_init_fpu_buf() run-once explicitly Remove the dependency on the init_xstate_buf == NULL check to implement once-per-bootup logic in eager_fpu_init(), by making setup_init_fpu_buf() run once per bootup explicitly. This is in preparation to make init_xstate_buf statically allocated. The various boot-once quirks in the FPU init code will be removed in a later cleanup stage. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index adeab16..d11b335 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -565,8 +565,14 @@ void setup_xstate_comp(void) /* * setup the xstate image representing the init state */ -static void __init setup_init_fpu_buf(void) +static void setup_init_fpu_buf(void) { + static int on_boot_cpu = 1; + + if (!on_boot_cpu) + return; + on_boot_cpu = 0; + /* * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave @@ -738,8 +744,7 @@ void __init_refok eager_fpu_init(void) return; } - if (!init_xstate_buf) - setup_init_fpu_buf(); + setup_init_fpu_buf(); } /* -- cgit v0.10.2 From 3e5e1267740f47b1616aff5187b668cadd950047 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 05:08:17 +0200 Subject: x86/fpu: Remove 'init_xstate_buf' bootmem allocation Make init_xstate_buf allocated statically at build time. This structure's maximum size is around 1KB - and it's allocated even on most modern embedded x86 CPUs which strive for FPU instruction set parity with desktop and server CPUs, so it's not like we can save much on smaller systems. This removes the last bootmem allocation from the FPU init path, allowing it to be called earlier in the boot sequence. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0292fcc..19b7cdf 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -373,9 +373,9 @@ static inline void drop_fpu(struct fpu *fpu) static inline void restore_init_xstate(void) { if (use_xsave()) - xrstor_state(init_xstate_buf, -1); + xrstor_state(&init_xstate_ctx, -1); else - fxrstor_checking(&init_xstate_buf->i387); + fxrstor_checking(&init_xstate_ctx.i387); } /* diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index fd56434..5c3ab4e 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -50,7 +50,7 @@ extern unsigned int xstate_size; extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern struct xsave_struct *init_xstate_buf; +extern struct xsave_struct init_xstate_ctx; extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index d11b335..45130ba 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -3,7 +3,6 @@ * * Author: Suresh Siddha */ -#include #include #include #include @@ -20,7 +19,7 @@ u64 xfeatures_mask; /* * Represents init state for the supported extended state. */ -struct xsave_struct *init_xstate_buf; +struct xsave_struct init_xstate_ctx; static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; @@ -98,7 +97,7 @@ void __sanitize_i387_state(struct task_struct *tsk) int size = xstate_sizes[feature_bit]; memcpy((void *)fx + offset, - (void *)init_xstate_buf + offset, + (void *)&init_xstate_ctx + offset, size); } @@ -325,12 +324,12 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) if (use_xsave()) { if ((unsigned long)buf % 64 || fx_only) { u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; - xrstor_state(init_xstate_buf, init_bv); + xrstor_state(&init_xstate_ctx, init_bv); return fxrstor_user(buf); } else { u64 init_bv = xfeatures_mask & ~xbv; if (unlikely(init_bv)) - xrstor_state(init_xstate_buf, init_bv); + xrstor_state(&init_xstate_ctx, init_bv); return xrestore_user(buf, xbv); } } else if (use_fxsr()) { @@ -574,12 +573,10 @@ static void setup_init_fpu_buf(void) on_boot_cpu = 0; /* - * Setup init_xstate_buf to represent the init state of + * Setup init_xstate_ctx to represent the init state of * all the features managed by the xsave */ - init_xstate_buf = alloc_bootmem_align(xstate_size, - __alignof__(struct xsave_struct)); - fx_finit(&init_xstate_buf->i387); + fx_finit(&init_xstate_ctx.i387); if (!cpu_has_xsave) return; @@ -588,21 +585,20 @@ static void setup_init_fpu_buf(void) print_xstate_features(); if (cpu_has_xsaves) { - init_xstate_buf->header.xcomp_bv = - (u64)1 << 63 | xfeatures_mask; - init_xstate_buf->header.xfeatures = xfeatures_mask; + init_xstate_ctx.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_xstate_ctx.header.xfeatures = xfeatures_mask; } /* * Init all the features state with header_bv being 0x0 */ - xrstor_state_booting(init_xstate_buf, -1); + xrstor_state_booting(&init_xstate_ctx, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state_booting(init_xstate_buf); + xsave_state_booting(&init_xstate_ctx); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; @@ -727,7 +723,7 @@ void xsave_init(void) /* * setup_init_fpu_buf() is __init and it is OK to call it here because - * init_xstate_buf will be unset only once during boot. + * init_xstate_ctx will be unset only once during boot. */ void __init_refok eager_fpu_init(void) { -- cgit v0.10.2 From e35f6f14148c09ad534d122ed32722dd431ac184 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 04:34:48 +0200 Subject: x86/fpu: Split fpu__cpu_init() into early-boot and cpu-boot parts There are two kinds of FPU initialization sequences necessary to bring FPU functionality up: once per system bootup activities, such as detection, feature initialization, etc. of attributes that are shared by all CPUs in the system - and per cpu initialization sequences run when a CPU is brought online (either during bootup or during CPU hotplug onlining), such as CR0/CR4 register setting, etc. The FPU code is mixing these roles together, with no clear distinction. Start sorting this out by splitting the main FPU detection routine (fpu__cpu_init()) into two parts: fpu__init_system() for one per system init activities, and fpu__init_cpu() for the per CPU onlining init activities. Note that xstate_init() is called from both variants for the time being, because it has a dual nature as well. We'll fix that in upcoming patches. Just do the split and call it as we used to before, don't introduce any change in initialization behavior yet, beyond duplicate (and harmless) fpu__init_cpu() and xstate_init() calls - which we'll fix in later patches. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 3e0fee5..d6234ad 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -118,13 +118,9 @@ static void fpstate_xstate_init_size(void) } /* - * Called on the boot CPU at bootup to set up the initial FPU state that - * is later cloned into all processes. - * - * Also called on secondary CPUs to set up the FPU state of their - * idle threads. + * Enable all supported FPU features. Called when a CPU is brought online. */ -void fpu__cpu_init(void) +void fpu__init_cpu(void) { unsigned long cr0; unsigned long cr4_mask = 0; @@ -150,12 +146,29 @@ void fpu__cpu_init(void) cr0 |= X86_CR0_EM; write_cr0(cr0); + xsave_init(); +} + +/* + * Called on the boot CPU once per system bootup, to set up the initial FPU state that + * is later cloned into all processes. + */ +void fpu__init_system(void) +{ + /* The FPU has to be operational for some of the later FPU init activities: */ + fpu__init_cpu(); mxcsr_feature_mask_init(); xsave_init(); eager_fpu_init(); } +void fpu__cpu_init(void) +{ + fpu__init_cpu(); + fpu__init_system(); +} + static int __init no_387(char *s) { setup_clear_cpu_cap(X86_FEATURE_FPU); -- cgit v0.10.2 From 55cc4678b7ee2edd3e6a9411250530eb871bc61d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 06:26:36 +0200 Subject: x86/fpu: Make the system/cpu init distinction clear in the xstate code as well Rename existing xstate init functions along the system/cpu init principles: fpu__init_system_xstate(): called once per system bootup fpu__init_cpu_xstate(): called per CPU onlining Also make the fpu__init_cpu_xstate() early code invariant: if xfeatures_mask is not set yet then don't crash but return. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 19b7cdf..71d44be 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -39,6 +39,9 @@ extern unsigned int mxcsr_feature_mask; extern void fpu__cpu_init(void); extern void eager_fpu_init(void); +extern void fpu__init_system_xstate(void); +extern void fpu__init_cpu_xstate(void); + DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); extern void convert_from_fxsr(struct user_i387_ia32_struct *env, diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 45130ba..961c258 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -460,10 +460,14 @@ static void prepare_fx_sw_frame(void) } /* - * Enable the extended processor state save/restore feature + * Enable the extended processor state save/restore feature. + * Called once per CPU onlining. */ -static inline void xstate_enable(void) +void fpu__init_cpu_xstate(void) { + if (!xfeatures_mask) + return; + cr4_set_bits(X86_CR4_OSXSAVE); xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); } @@ -640,11 +644,12 @@ static void __init init_xstate_size(void) /* * Enable and initialize the xsave feature. + * Called once per system bootup. * * ( Not marked __init because of false positive section warnings * generated by xsave_init(). ) */ -static void /* __init */ xstate_enable_boot_cpu(void) +void fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; @@ -666,7 +671,8 @@ static void /* __init */ xstate_enable_boot_cpu(void) */ xfeatures_mask = xfeatures_mask & XCNTXT_MASK; - xstate_enable(); + /* Enable xstate instructions to be able to continue with initialization: */ + fpu__init_cpu_xstate(); /* * Recompute the context size for enabled features @@ -698,8 +704,8 @@ static void /* __init */ xstate_enable_boot_cpu(void) } /* - * For the very first instance, this calls xstate_enable_boot_cpu(); - * for all subsequent instances, this calls xstate_enable(). + * For the very first instance, this calls fpu__init_system_xstate(); + * for all subsequent instances, this calls fpu__init_cpu_xstate(). */ void xsave_init(void) { @@ -715,9 +721,9 @@ void xsave_init(void) if (on_boot_cpu) { on_boot_cpu = 0; - xstate_enable_boot_cpu(); + fpu__init_system_xstate(); } else { - xstate_enable(); + fpu__init_cpu_xstate(); } } -- cgit v0.10.2 From e84611fc96c67d3a073e327be44d0f9ee3e981ef Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 06:41:07 +0200 Subject: x86/fpu: Move CPU capability check into fpu__init_cpu_xstate() fpu__init_system_xstate() does an FPU capability check that is better done in fpu__init_cpu_xstate(). This will allow us to call fpu__init_cpu_xstate() directly on legacy CPUs as well. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 961c258..0610f43 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -465,7 +465,7 @@ static void prepare_fx_sw_frame(void) */ void fpu__init_cpu_xstate(void) { - if (!xfeatures_mask) + if (!cpu_has_xsave || !xfeatures_mask) return; cr4_set_bits(X86_CR4_OSXSAVE); -- cgit v0.10.2 From e9dbfd673a9cc9d3ef90e3ea4e136a833dab3674 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 06:47:24 +0200 Subject: x86/fpu: Move legacy check to fpu__init_system_xstate() Now that legacy code can execute fpu__init_cpu_xstate() in xsave_init(), we can move the once per boot legacy check into fpu__init_system_xstate(), where it belongs. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 0610f43..fd656cb 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -653,6 +653,11 @@ void fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; + if (!cpu_has_xsave) { + pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + return; + } + if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { WARN(1, "x86/fpu: XSTATE_CPUID missing!\n"); return; @@ -711,14 +716,6 @@ void xsave_init(void) { static char on_boot_cpu = 1; - if (!cpu_has_xsave) { - if (on_boot_cpu) { - on_boot_cpu = 0; - pr_info("x86/fpu: Legacy x87 FPU detected.\n"); - } - return; - } - if (on_boot_cpu) { on_boot_cpu = 0; fpu__init_system_xstate(); -- cgit v0.10.2 From 62db6871ae862fbd7f7abfebffe501b170698a8b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 06:50:09 +0200 Subject: x86/fpu: Propagate once per boot quirk into fpu__init_system_xstate() Linearize the call sequence in xsave_init(): fpu__init_system_xstate(); fpu__init_cpu_xstate(); We do this by propagating the boot-once quirk into fpu__init_system_xstate(). fpu__init_cpu_xstate() is safe to be called multiple time. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index fd656cb..9d5ff90 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -652,6 +652,11 @@ static void __init init_xstate_size(void) void fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; + static bool on_boot_cpu = 1; + + if (!on_boot_cpu) + return; + on_boot_cpu = 0; if (!cpu_has_xsave) { pr_info("x86/fpu: Legacy x87 FPU detected.\n"); @@ -714,14 +719,8 @@ void fpu__init_system_xstate(void) */ void xsave_init(void) { - static char on_boot_cpu = 1; - - if (on_boot_cpu) { - on_boot_cpu = 0; - fpu__init_system_xstate(); - } else { - fpu__init_cpu_xstate(); - } + fpu__init_system_xstate(); + fpu__init_cpu_xstate(); } /* -- cgit v0.10.2 From c42103b22652dae50e1aaacb5c2767145027ab3e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 06:52:53 +0200 Subject: x86/fpu: Remove xsave_init() Expand fpu__init_system_xstate() and fpu__init_cpu_xstate() calls into xsave_init() calls. (This will allow us to call the proper versions in higher level FPU init code later on.) No change in functionality. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index 5c3ab4e..a10e665 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -52,7 +52,6 @@ extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern struct xsave_struct init_xstate_ctx; -extern void xsave_init(void); extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); /* These macros all use (%edi)/(%rdi) as the single memory argument. */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d6234ad..77599fe 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -98,7 +98,7 @@ static void fpstate_xstate_init_size(void) { /* * Note that xstate_size might be overwriten later during - * xsave_init(). + * fpu__init_system_xstate(). */ if (!cpu_has_fpu) { @@ -146,7 +146,8 @@ void fpu__init_cpu(void) cr0 |= X86_CR0_EM; write_cr0(cr0); - xsave_init(); + fpu__init_system_xstate(); + fpu__init_cpu_xstate(); } /* @@ -159,7 +160,8 @@ void fpu__init_system(void) fpu__init_cpu(); mxcsr_feature_mask_init(); - xsave_init(); + fpu__init_system_xstate(); + fpu__init_cpu_xstate(); eager_fpu_init(); } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 9d5ff90..fa9b954 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -646,8 +646,7 @@ static void __init init_xstate_size(void) * Enable and initialize the xsave feature. * Called once per system bootup. * - * ( Not marked __init because of false positive section warnings - * generated by xsave_init(). ) + * ( Not marked __init because of false positive section warnings. ) */ void fpu__init_system_xstate(void) { @@ -714,16 +713,6 @@ void fpu__init_system_xstate(void) } /* - * For the very first instance, this calls fpu__init_system_xstate(); - * for all subsequent instances, this calls fpu__init_cpu_xstate(). - */ -void xsave_init(void) -{ - fpu__init_system_xstate(); - fpu__init_cpu_xstate(); -} - -/* * setup_init_fpu_buf() is __init and it is OK to call it here because * init_xstate_ctx will be unset only once during boot. */ -- cgit v0.10.2 From 429ced50a0e5f863f95b100749043451e1968c4c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:21:08 +0200 Subject: x86/fpu: Do fpu__init_system_xstate only from fpu__init_system() Only call xstate system setup routines from fpu__init_system(). Likewise, don't call fpu__init_cpu_xstate() from fpu__init_system(). Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 77599fe..c1b2d1c 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -146,7 +146,6 @@ void fpu__init_cpu(void) cr0 |= X86_CR0_EM; write_cr0(cr0); - fpu__init_system_xstate(); fpu__init_cpu_xstate(); } @@ -161,7 +160,6 @@ void fpu__init_system(void) mxcsr_feature_mask_init(); fpu__init_system_xstate(); - fpu__init_cpu_xstate(); eager_fpu_init(); } -- cgit v0.10.2 From 2507e1c03f577173d613d6728329eb220724c577 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:35:53 +0200 Subject: x86/fpu: Set up the legacy FPU init image from fpu__init_system() The legacy FPU init image is used on older CPUs who don't run xstate init. But the init code is called within setup_init_fpu_buf(), an xstate method. Move this legacy init out of the xstate code and put it into fpu/init.c. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index c1b2d1c..30d2d5d 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -158,6 +158,12 @@ void fpu__init_system(void) /* The FPU has to be operational for some of the later FPU init activities: */ fpu__init_cpu(); + /* + * Set up the legacy init FPU context. (xstate init might overwrite this + * with a more modern format, if the CPU supports it.) + */ + fx_finit(&init_xstate_ctx.i387); + mxcsr_feature_mask_init(); fpu__init_system_xstate(); eager_fpu_init(); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index fa9b954..6be0a98 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -576,12 +576,6 @@ static void setup_init_fpu_buf(void) return; on_boot_cpu = 0; - /* - * Setup init_xstate_ctx to represent the init state of - * all the features managed by the xsave - */ - fx_finit(&init_xstate_ctx.i387); - if (!cpu_has_xsave) return; -- cgit v0.10.2 From a5cb56e9a68aa0e19281ad93f1fce6e6802e3206 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:42:47 +0200 Subject: x86/fpu: Remove setup_init_fpu_buf() call from eager_fpu_init() It's a pure xstate method now, no need for this duplicate call. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 6be0a98..097f03e 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -724,8 +724,6 @@ void __init_refok eager_fpu_init(void) stts(); return; } - - setup_init_fpu_buf(); } /* -- cgit v0.10.2 From 89abbe01a46a9f47f8d07e2817704bfcbd8f6f9e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 09:02:51 +0200 Subject: x86/fpu: Move all eager-fpu setup code to eager_fpu_init() The FPU context switch type (lazy or eager) setup code is split into two places currently - move it all to eager_fpu_init(). Note that the code we move will now be executed on non-xstate CPUs as well, but this should be safe: both xfeatures_mask and cpu_has_xsaveopt is 0 there. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 097f03e..1b920a1 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -686,20 +686,6 @@ void fpu__init_system_xstate(void) prepare_fx_sw_frame(); setup_init_fpu_buf(); - /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) - eagerfpu = ENABLE; - - if (xfeatures_mask & XSTATE_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XSTATE_EAGER); - xfeatures_mask &= ~XSTATE_EAGER; - } else { - eagerfpu = ENABLE; - } - } - pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", xfeatures_mask, xstate_size, @@ -715,6 +701,20 @@ void __init_refok eager_fpu_init(void) WARN_ON(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; + + if (xfeatures_mask & XSTATE_EAGER) { + if (eagerfpu == DISABLE) { + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + xfeatures_mask & XSTATE_EAGER); + xfeatures_mask &= ~XSTATE_EAGER; + } else { + eagerfpu = ENABLE; + } + } + if (eagerfpu == ENABLE) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); -- cgit v0.10.2 From 6f5d265afffb5968e25a8951a085c0467558c073 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 20:11:05 +0200 Subject: x86/fpu: Move eager_fpu_init() to fpu/init.c Move eager_fpu_init() and the 'eagerfpu' boot parameter handling function to the generic FPU init file: it's generic FPU functionality. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 30d2d5d..fa9678f 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -149,6 +149,54 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } +static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; + +static int __init eager_fpu_setup(char *s) +{ + if (!strcmp(s, "on")) + eagerfpu = ENABLE; + else if (!strcmp(s, "off")) + eagerfpu = DISABLE; + else if (!strcmp(s, "auto")) + eagerfpu = AUTO; + return 1; +} +__setup("eagerfpu=", eager_fpu_setup); + +/* + * setup_init_fpu_buf() is __init and it is OK to call it here because + * init_xstate_ctx will be unset only once during boot. + */ +void __init_refok eager_fpu_init(void) +{ + WARN_ON(current->thread.fpu.fpstate_active); + current_thread_info()->status = 0; + + /* Auto enable eagerfpu for xsaveopt */ + if (cpu_has_xsaveopt && eagerfpu != DISABLE) + eagerfpu = ENABLE; + + if (xfeatures_mask & XSTATE_EAGER) { + if (eagerfpu == DISABLE) { + pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", + xfeatures_mask & XSTATE_EAGER); + xfeatures_mask &= ~XSTATE_EAGER; + } else { + eagerfpu = ENABLE; + } + } + + if (eagerfpu == ENABLE) + setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); + + printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); + + if (!cpu_has_eager_fpu) { + stts(); + return; + } +} + /* * Called on the boot CPU once per system bootup, to set up the initial FPU state that * is later cloned into all processes. diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 1b920a1..a232363 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -599,20 +599,6 @@ static void setup_init_fpu_buf(void) xsave_state_booting(&init_xstate_ctx); } -static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -static int __init eager_fpu_setup(char *s) -{ - if (!strcmp(s, "on")) - eagerfpu = ENABLE; - else if (!strcmp(s, "off")) - eagerfpu = DISABLE; - else if (!strcmp(s, "auto")) - eagerfpu = AUTO; - return 1; -} -__setup("eagerfpu=", eager_fpu_setup); - - /* * Calculate total size of enabled xstates in XCR0/xfeatures_mask. */ @@ -693,40 +679,6 @@ void fpu__init_system_xstate(void) } /* - * setup_init_fpu_buf() is __init and it is OK to call it here because - * init_xstate_ctx will be unset only once during boot. - */ -void __init_refok eager_fpu_init(void) -{ - WARN_ON(current->thread.fpu.fpstate_active); - current_thread_info()->status = 0; - - /* Auto enable eagerfpu for xsaveopt */ - if (cpu_has_xsaveopt && eagerfpu != DISABLE) - eagerfpu = ENABLE; - - if (xfeatures_mask & XSTATE_EAGER) { - if (eagerfpu == DISABLE) { - pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", - xfeatures_mask & XSTATE_EAGER); - xfeatures_mask &= ~XSTATE_EAGER; - } else { - eagerfpu = ENABLE; - } - } - - if (eagerfpu == ENABLE) - setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); - - printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); - - if (!cpu_has_eager_fpu) { - stts(); - return; - } -} - -/* * Restore minimal FPU state after suspend: */ void fpu__resume_cpu(void) -- cgit v0.10.2 From 064e51e3c8aee6cfb03ab75e9f9551db3924eb07 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:48:54 +0200 Subject: x86/fpu: Clean up eager_fpu_init() and rename it to fpu__ctx_switch_init() It's not an xsave specific function anymore, so rename it accordingly and also clean it up a bit: - remove the obsolete __init_refok, as the code paths are not mixed anymore - rename it from eager_fpu_init() to fpu__ctx_switch_init() - remove stray 'return;' - make it static to its only user Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index fa9678f..d6d5820 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -167,7 +167,7 @@ __setup("eagerfpu=", eager_fpu_setup); * setup_init_fpu_buf() is __init and it is OK to call it here because * init_xstate_ctx will be unset only once during boot. */ -void __init_refok eager_fpu_init(void) +static void fpu__ctx_switch_init(void) { WARN_ON(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; @@ -191,10 +191,8 @@ void __init_refok eager_fpu_init(void) printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); - if (!cpu_has_eager_fpu) { + if (!cpu_has_eager_fpu) stts(); - return; - } } /* @@ -214,7 +212,7 @@ void fpu__init_system(void) mxcsr_feature_mask_init(); fpu__init_system_xstate(); - eager_fpu_init(); + fpu__ctx_switch_init(); } void fpu__cpu_init(void) -- cgit v0.10.2 From 011545b570be22191047d07299515c1d711eeb38 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 08:28:31 +0200 Subject: x86/fpu: Split fpu__ctx_switch_init() into _cpu() and _system() portions So fpu__ctx_switch_init() has two aspects: a once per bootup functionality that sets up a capability flag, and a per CPU functionality that sets CR0::TS. Split the function. Note that at this stage we still have duplicate calls into these methods, as both the _system() and the _cpu() methods are run on all CPUs, with lower level on_boot_cpu flags filtering out the duplicates where needed. So add TS flag clearing as well, to handle the aftermath of early CPU init sequences that might call in without having eager-fpu set - don't assume the TS flag is cleared. Calling each from its respective init level will happen later on. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index d6d5820..2752b4b 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -118,6 +118,18 @@ static void fpstate_xstate_init_size(void) } /* + * Initialize the TS bit in CR0 according to the style of context-switches + * we are using: + */ +static void fpu__init_cpu_ctx_switch(void) +{ + if (!cpu_has_eager_fpu) + stts(); + else + clts(); +} + +/* * Enable all supported FPU features. Called when a CPU is brought online. */ void fpu__init_cpu(void) @@ -167,7 +179,7 @@ __setup("eagerfpu=", eager_fpu_setup); * setup_init_fpu_buf() is __init and it is OK to call it here because * init_xstate_ctx will be unset only once during boot. */ -static void fpu__ctx_switch_init(void) +static void fpu__init_system_ctx_switch(void) { WARN_ON(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; @@ -190,9 +202,6 @@ static void fpu__ctx_switch_init(void) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); - - if (!cpu_has_eager_fpu) - stts(); } /* @@ -212,7 +221,8 @@ void fpu__init_system(void) mxcsr_feature_mask_init(); fpu__init_system_xstate(); - fpu__ctx_switch_init(); + fpu__init_system_ctx_switch(); + fpu__init_cpu_ctx_switch(); } void fpu__cpu_init(void) -- cgit v0.10.2 From 530b37e43ce3f0bb9969308c0a64901b442f8e0a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:27:44 +0200 Subject: x86/fpu: Do CLTS fpu__init_system() mxcsr_feature_mask_init() depends on TS being cleared, as it executes an FXSAVE instruction. After later changes we will move the TS setup into fpu__init_cpu(), which will interact with this - so clear the TS flag explicitly. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 2752b4b..567e7e6 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -214,6 +214,13 @@ void fpu__init_system(void) fpu__init_cpu(); /* + * But don't leave CR0::TS set yet, as some of the FPU setup methods depend + * on being able to execute FPU instructions that will fault on a set TS, + * such as the FXSAVE in mxcsr_feature_mask_init(). + */ + clts(); + + /* * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ -- cgit v0.10.2 From 997578b14c2730226a804d53ce681d3506f2f876 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 10:35:57 +0200 Subject: x86/fpu: Move the fpstate_xstate_init_size() call into fpu__init_system() The fpstate_xstate_init_size() function sets up a basic xstate_size, called during fpu__detect() currently. Its real dependency is to be called before fpu__init_system_xstate(). So move the function call site into fpu__init_system(), to right before the fpu__init_system_xstate() call. Also add a once-per-boot flag to fpstate_xstate_init_size(), we'll remove this quirk later once we've cleaned up the init dependencies. This moves the two related functions closer to each other and makes them both part of the _init_system() functionality. Currently we do the fpstate_xstate_init_size() Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 567e7e6..ca3468d 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -96,6 +96,12 @@ static void mxcsr_feature_mask_init(void) static void fpstate_xstate_init_size(void) { + static bool on_boot_cpu = 1; + + if (!on_boot_cpu) + return; + on_boot_cpu = 0; + /* * Note that xstate_size might be overwriten later during * fpu__init_system_xstate(). @@ -227,7 +233,10 @@ void fpu__init_system(void) fx_finit(&init_xstate_ctx.i387); mxcsr_feature_mask_init(); + + fpstate_xstate_init_size(); fpu__init_system_xstate(); + fpu__init_system_ctx_switch(); fpu__init_cpu_ctx_switch(); } @@ -270,6 +279,4 @@ void fpu__detect(struct cpuinfo_x86 *c) clear_cpu_cap(c, X86_FEATURE_FPU); /* The final cr0 value is set later, in fpu_init() */ - - fpstate_xstate_init_size(); } -- cgit v0.10.2 From 3960fccf2e22c3b3d61bd5a45b6172e66e660d8b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:27:44 +0200 Subject: x86/fpu: Call fpu__init_cpu_ctx_switch() from fpu__init_cpu() fpu__init_cpu() is currently called from fpu__init_system(), which is the wrong place for it: call it from the proper high level per CPU init function, fpu__init_cpu(). Note, we still keep the old call site as well, because it depends on having proper CR0::TS setup. We'll fix this in the next patch. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index ca3468d..b3ea4f8 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -165,6 +165,7 @@ void fpu__init_cpu(void) write_cr0(cr0); fpu__init_cpu_xstate(); + fpu__init_cpu_ctx_switch(); } static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; -- cgit v0.10.2 From 067051ccd209623cb56152cf4cb06616ee2bcc5c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 25 Apr 2015 08:27:44 +0200 Subject: x86/fpu: Do system-wide setup from fpu__detect() fpu__cpu_init() is called on every CPU, so it is the wrong place to call fpu__init_system() from. Call it from fpu__detect(): this is early CPU init code, but we already have CPU features detected, so we can call the system-wide FPU init code from here. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index b3ea4f8..6e422cf 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -245,7 +245,6 @@ void fpu__init_system(void) void fpu__cpu_init(void) { fpu__init_cpu(); - fpu__init_system(); } static int __init no_387(char *s) @@ -279,5 +278,6 @@ void fpu__detect(struct cpuinfo_x86 *c) else clear_cpu_cap(c, X86_FEATURE_FPU); + fpu__init_system(); /* The final cr0 value is set later, in fpu_init() */ } -- cgit v0.10.2 From 7202ab46f7392265c1ecbf03f600393bf32a8bdf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Apr 2015 12:17:57 +0200 Subject: x86/fpu: Remove fpu__init_cpu_ctx_switch() call from fpu__init_system() We are now doing the fpu__init_cpu_ctx_switch() call from fpu__init_cpu(), so there's no need to call it from fpu__init_system() anymore. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6e422cf..0c9c106 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -239,7 +239,6 @@ void fpu__init_system(void) fpu__init_system_xstate(); fpu__init_system_ctx_switch(); - fpu__init_cpu_ctx_switch(); } void fpu__cpu_init(void) -- cgit v0.10.2 From 21c4cd108a1b144ad645355bfee1f8be937f03a2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:27:17 +0200 Subject: x86/fpu: Simplify fpu__cpu_init() After the latest round of cleanups, fpu__cpu_init() has become a simple call to fpu__init_cpu(). Rename fpu__init_cpu() to fpu__cpu_init() and remove the extra layer. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 71d44be..4617eeb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -36,7 +36,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, #define MXCSR_DEFAULT 0x1f80 extern unsigned int mxcsr_feature_mask; -extern void fpu__cpu_init(void); +extern void fpu__init_cpu(void); extern void eager_fpu_init(void); extern void fpu__init_system_xstate(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8f6a4ea..d28f8eb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1435,7 +1435,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu__cpu_init(); + fpu__init_cpu(); if (is_uv_system()) uv_cpu_init(); @@ -1491,7 +1491,7 @@ void cpu_init(void) clear_all_debug_regs(); dbg_restore_debug_regs(); - fpu__cpu_init(); + fpu__init_cpu(); } #endif diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 0c9c106..cf27bbe 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -241,11 +241,6 @@ void fpu__init_system(void) fpu__init_system_ctx_switch(); } -void fpu__cpu_init(void) -{ - fpu__init_cpu(); -} - static int __init no_387(char *s) { setup_clear_cpu_cap(X86_FEATURE_FPU); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 43f8704..98088bf 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1423,7 +1423,7 @@ static void xen_pvh_set_cr_flags(int cpu) return; /* * For BSP, PSE PGE are set in probe_page_size_mask(), for APs - * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__cpu_init(). + * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). */ if (cpu_has_pse) cr4_set_bits_and_update_boot(X86_CR4_PSE); -- cgit v0.10.2 From b11316ed9ed9e453562d9f89a39d344331a3ec1d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:32:34 +0200 Subject: x86/fpu: Factor out fpu__init_cpu_generic() Factor out the generic bits from fpu__init_cpu(), to create a flat sequence of per CPU initialization function calls: fpu__init_cpu_generic(); fpu__init_cpu_xstate(); fpu__init_cpu_ctx_switch(); Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index cf27bbe..37e8b13 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -136,9 +136,9 @@ static void fpu__init_cpu_ctx_switch(void) } /* - * Enable all supported FPU features. Called when a CPU is brought online. + * Initialize the registers found in all CPUs, CR0 and CR4: */ -void fpu__init_cpu(void) +static void fpu__init_cpu_generic(void) { unsigned long cr0; unsigned long cr4_mask = 0; @@ -163,7 +163,14 @@ void fpu__init_cpu(void) if (!cpu_has_fpu) cr0 |= X86_CR0_EM; write_cr0(cr0); +} +/* + * Enable all supported FPU features. Called when a CPU is brought online. + */ +void fpu__init_cpu(void) +{ + fpu__init_cpu_generic(); fpu__init_cpu_xstate(); fpu__init_cpu_ctx_switch(); } -- cgit v0.10.2 From 7218e8b723dc9ceb71cf2fbd3d019e9e11b7d3cf Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:35:54 +0200 Subject: x86/fpu: Factor out fpu__init_system_generic() Factor out the generic bits from fpu__init_system(). Rename mxcsr_feature_mask_init() to fpu__init_system_mxcsr() to bring it in line with the rest of the nomenclature. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 37e8b13..c3f3a89c 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -72,7 +72,7 @@ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); -static void mxcsr_feature_mask_init(void) +static void fpu__init_system_mxcsr(void) { unsigned int mask = 0; @@ -94,6 +94,20 @@ static void mxcsr_feature_mask_init(void) mxcsr_feature_mask &= mask; } +/* + * Once per bootup FPU initialization sequences that will run on most x86 CPUs: + */ +static void fpu__init_system_generic(void) +{ + /* + * Set up the legacy init FPU context. (xstate init might overwrite this + * with a more modern format, if the CPU supports it.) + */ + fx_finit(&init_xstate_ctx.i387); + + fpu__init_system_mxcsr(); +} + static void fpstate_xstate_init_size(void) { static bool on_boot_cpu = 1; @@ -230,18 +244,11 @@ void fpu__init_system(void) /* * But don't leave CR0::TS set yet, as some of the FPU setup methods depend * on being able to execute FPU instructions that will fault on a set TS, - * such as the FXSAVE in mxcsr_feature_mask_init(). + * such as the FXSAVE in fpu__init_system_mxcsr(). */ clts(); - /* - * Set up the legacy init FPU context. (xstate init might overwrite this - * with a more modern format, if the CPU supports it.) - */ - fx_finit(&init_xstate_ctx.i387); - - mxcsr_feature_mask_init(); - + fpu__init_system_generic(); fpstate_xstate_init_size(); fpu__init_system_xstate(); -- cgit v0.10.2 From 2e2f3da7714f323a0db65baa19b2b8110cc23f95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:40:54 +0200 Subject: x86/fpu: Factor out fpu__init_system_early_generic() Move the generic bits of fpu__detect() into fpu__init_system_early_generic(). We'll move some other code here too in a followup patch. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index c3f3a89c..3637c50 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -65,6 +65,29 @@ void fpu__init_check_bugs(void) } /* + * The earliest FPU detection code: + */ +static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +{ + unsigned long cr0; + u16 fsw, fcw; + + fsw = fcw = 0xffff; + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS | X86_CR0_EM); + write_cr0(cr0); + + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" + : "+m" (fsw), "+m" (fcw)); + + if (fsw == 0 && (fcw & 0x103f) == 0x003f) + set_cpu_cap(c, X86_FEATURE_FPU); + else + clear_cpu_cap(c, X86_FEATURE_FPU); +} + +/* * Boot time FPU feature detection code: */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; @@ -269,23 +292,7 @@ __setup("no387", no_387); */ void fpu__detect(struct cpuinfo_x86 *c) { - unsigned long cr0; - u16 fsw, fcw; - - fsw = fcw = 0xffff; - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS | X86_CR0_EM); - write_cr0(cr0); - - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); - - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); - else - clear_cpu_cap(c, X86_FEATURE_FPU); - + fpu__init_system_early_generic(c); fpu__init_system(); /* The final cr0 value is set later, in fpu_init() */ } -- cgit v0.10.2 From e83ab9ad97a702917cb018e6c5a7d1176ff95305 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:43:44 +0200 Subject: x86/fpu: Move !FPU check ingo fpu__init_system_early_generic() There's a !FPU related sanity check in fpu__init_cpu_generic(), which is executed on every CPU onlining - even though we should do this only once, and during system init. Move this check to fpu__init_system_early_generic(). Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 3637c50..69cdadd 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -85,6 +85,15 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_FPU); else clear_cpu_cap(c, X86_FEATURE_FPU); + +#ifndef CONFIG_MATH_EMULATION + if (!cpu_has_fpu) { + pr_emerg("No FPU found and no math emulation present\n"); + pr_emerg("Giving up\n"); + for (;;) + asm volatile("hlt"); + } +#endif } /* @@ -180,14 +189,6 @@ static void fpu__init_cpu_generic(void) unsigned long cr0; unsigned long cr4_mask = 0; -#ifndef CONFIG_MATH_EMULATION - if (!cpu_has_fpu) { - pr_emerg("No FPU found and no math emulation present\n"); - pr_emerg("Giving up\n"); - for (;;) - asm volatile("hlt"); - } -#endif if (cpu_has_fxsr) cr4_mask |= X86_CR4_OSFXSR; if (cpu_has_xmm) -- cgit v0.10.2 From 0bf23f3d6cadb2f0961d0ea370371cf35480bcb5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 14:48:08 +0200 Subject: x86/fpu: Factor out FPU bug checks into fpu/bugs.c Create separate fpu/bugs.c code so that if we read generic FPU code we don't have to wade through all the bugcheck related code first. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile index 50464a7..2020a2b 100644 --- a/arch/x86/kernel/fpu/Makefile +++ b/arch/x86/kernel/fpu/Makefile @@ -2,4 +2,4 @@ # Build rules for the FPU support code: # -obj-y += init.o core.o xsave.o +obj-y += init.o bugs.o core.o xsave.o diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c new file mode 100644 index 0000000..400a3d7 --- /dev/null +++ b/arch/x86/kernel/fpu/bugs.c @@ -0,0 +1,64 @@ +/* + * x86 FPU bug checks: + */ +#include + +/* + * Boot time CPU/FPU FDIV bug detection code: + */ + +static double __initdata x = 4195835.0; +static double __initdata y = 3145727.0; + +/* + * This used to check for exceptions.. + * However, it turns out that to support that, + * the XMM trap handlers basically had to + * be buggy. So let's have a correct XMM trap + * handler, and forget about printing out + * some status at boot. + * + * We should really only care about bugs here + * anyway. Not features. + */ +static void __init check_fpu(void) +{ + s32 fdiv_bug; + + kernel_fpu_begin(); + + /* + * trap_init() enabled FXSR and company _before_ testing for FP + * problems here. + * + * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug + */ + __asm__("fninit\n\t" + "fldl %1\n\t" + "fdivl %2\n\t" + "fmull %2\n\t" + "fldl %1\n\t" + "fsubp %%st,%%st(1)\n\t" + "fistpl %0\n\t" + "fwait\n\t" + "fninit" + : "=m" (*&fdiv_bug) + : "m" (*&x), "m" (*&y)); + + kernel_fpu_end(); + + if (fdiv_bug) { + set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); + pr_warn("Hmm, FPU with FDIV bug\n"); + } +} + +void fpu__init_check_bugs(void) +{ + /* + * kernel_fpu_begin/end() in check_fpu() relies on the patched + * alternative instructions. + */ + if (cpu_has_fpu) + check_fpu(); +} diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 69cdadd..63cd170 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -5,66 +5,6 @@ #include /* - * Boot time CPU/FPU FDIV bug detection code: - */ - -static double __initdata x = 4195835.0; -static double __initdata y = 3145727.0; - -/* - * This used to check for exceptions.. - * However, it turns out that to support that, - * the XMM trap handlers basically had to - * be buggy. So let's have a correct XMM trap - * handler, and forget about printing out - * some status at boot. - * - * We should really only care about bugs here - * anyway. Not features. - */ -static void __init check_fpu(void) -{ - s32 fdiv_bug; - - kernel_fpu_begin(); - - /* - * trap_init() enabled FXSR and company _before_ testing for FP - * problems here. - * - * Test for the divl bug: http://en.wikipedia.org/wiki/Fdiv_bug - */ - __asm__("fninit\n\t" - "fldl %1\n\t" - "fdivl %2\n\t" - "fmull %2\n\t" - "fldl %1\n\t" - "fsubp %%st,%%st(1)\n\t" - "fistpl %0\n\t" - "fwait\n\t" - "fninit" - : "=m" (*&fdiv_bug) - : "m" (*&x), "m" (*&y)); - - kernel_fpu_end(); - - if (fdiv_bug) { - set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); - pr_warn("Hmm, FPU with FDIV bug\n"); - } -} - -void fpu__init_check_bugs(void) -{ - /* - * kernel_fpu_begin/end() in check_fpu() relies on the patched - * alternative instructions. - */ - if (cpu_has_fpu) - check_fpu(); -} - -/* * The earliest FPU detection code: */ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) -- cgit v0.10.2 From 71eb3c6d155c54fb74d2f95166162296a8f9af12 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 5 May 2015 10:54:04 +0200 Subject: x86/fpu: Make check_fpu() init ordering independent check_fpu() currently relies on being called early in the init sequence, when CR0::TS has not been set up yet. Save/restore CR0::TS across this function, to make it invariant to init ordering. This way we'll be able to move the generic FPU setup routines earlier in the init sequence. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index 400a3d7..449b5f3 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -23,8 +23,13 @@ static double __initdata y = 3145727.0; */ static void __init check_fpu(void) { + u32 cr0_saved; s32 fdiv_bug; + /* We might have CR0::TS set already, clear it: */ + cr0_saved = read_cr0(); + write_cr0(cr0_saved & ~X86_CR0_TS); + kernel_fpu_begin(); /* @@ -47,6 +52,8 @@ static void __init check_fpu(void) kernel_fpu_end(); + write_cr0(cr0_saved); + if (fdiv_bug) { set_cpu_bug(&boot_cpu_data, X86_BUG_FDIV); pr_warn("Hmm, FPU with FDIV bug\n"); -- cgit v0.10.2 From dd863880acd24a23d71576e402f999375d0b4b80 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:07:18 +0200 Subject: x86/fpu: Move fpu__init_system_early_generic() out of fpu__detect() Move the fpu__init_system_early_generic() call into fpu__init_system(), which hosts all the system init calls. Expose fpu__init_system() to other modules - this will be our main and only system init function. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4617eeb..5a1fa5b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -41,6 +41,7 @@ extern void eager_fpu_init(void); extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); +extern void fpu__init_system(struct cpuinfo_x86 *c); DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 63cd170..1155a98 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -5,7 +5,10 @@ #include /* - * The earliest FPU detection code: + * The earliest FPU detection code. + * + * Set the X86_FEATURE_FPU CPU-capability bit based on + * trying to execute an actual sequence of FPU instructions: */ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) { @@ -200,8 +203,10 @@ static void fpu__init_system_ctx_switch(void) * Called on the boot CPU once per system bootup, to set up the initial FPU state that * is later cloned into all processes. */ -void fpu__init_system(void) +void fpu__init_system(struct cpuinfo_x86 *c) { + fpu__init_system_early_generic(c); + /* The FPU has to be operational for some of the later FPU init activities: */ fpu__init_cpu(); @@ -227,13 +232,8 @@ static int __init no_387(char *s) __setup("no387", no_387); -/* - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ void fpu__detect(struct cpuinfo_x86 *c) { - fpu__init_system_early_generic(c); - fpu__init_system(); + fpu__init_system(c); /* The final cr0 value is set later, in fpu_init() */ } -- cgit v0.10.2 From c66e3f28237199629358e9e5a76973c400a54041 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:12:44 +0200 Subject: x86/fpu: Remove the extra fpu__detect() layer Now that fpu__detect() has become an empty layer around fpu__init_system(), eliminate it and make fpu__init_system() the main system initialization routine. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0f4add4..b9e4874 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -167,7 +167,6 @@ extern const struct seq_operations cpuinfo_op; #define cache_line_size() (boot_cpu_data.x86_cache_alignment) extern void cpu_detect(struct cpuinfo_x86 *c); -extern void fpu__detect(struct cpuinfo_x86 *c); extern void early_cpu_init(void); extern void identify_boot_cpu(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d28f8eb..d15610b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -758,7 +758,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); - fpu__detect(c); + fpu__init_system(c); if (this_cpu->c_early_init) this_cpu->c_early_init(c); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 1155a98..77b5d40 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -231,9 +231,3 @@ static int __init no_387(char *s) } __setup("no387", no_387); - -void fpu__detect(struct cpuinfo_x86 *c) -{ - fpu__init_system(c); - /* The final cr0 value is set later, in fpu_init() */ -} -- cgit v0.10.2 From 7638b74b56640d9c266b5b3622109128e30bdc1a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:23:37 +0200 Subject: x86/fpu: Rename fpstate_xstate_init_size() to fpu__init_system_xstate_size_legacy() To bring it in line with the other init_system*() methods. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 77b5d40..a7ce5bc 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -83,7 +83,7 @@ static void fpu__init_system_generic(void) fpu__init_system_mxcsr(); } -static void fpstate_xstate_init_size(void) +static void fpu__init_system_xstate_size_legacy(void) { static bool on_boot_cpu = 1; @@ -218,7 +218,7 @@ void fpu__init_system(struct cpuinfo_x86 *c) clts(); fpu__init_system_generic(); - fpstate_xstate_init_size(); + fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); fpu__init_system_ctx_switch(); -- cgit v0.10.2 From 41e78410d89cc0e60d90e6a62a060d5d4084accb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:32:40 +0200 Subject: x86/fpu: Reorder init methods Reorder init methods in order of their relationship and usage, to form coherent blocks throughout the whole file. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index a7ce5bc..dbff133 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -4,6 +4,46 @@ #include #include +static void fpu__init_cpu_ctx_switch(void) +{ + if (!cpu_has_eager_fpu) + stts(); + else + clts(); +} + +/* + * Initialize the registers found in all CPUs, CR0 and CR4: + */ +static void fpu__init_cpu_generic(void) +{ + unsigned long cr0; + unsigned long cr4_mask = 0; + + if (cpu_has_fxsr) + cr4_mask |= X86_CR4_OSFXSR; + if (cpu_has_xmm) + cr4_mask |= X86_CR4_OSXMMEXCPT; + if (cr4_mask) + cr4_set_bits(cr4_mask); + + cr0 = read_cr0(); + cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ + if (!cpu_has_fpu) + cr0 |= X86_CR0_EM; + write_cr0(cr0); +} + +/* + * Enable all supported FPU features. Called when a CPU is brought online. + */ +void fpu__init_cpu(void) +{ + fpu__init_cpu_generic(); + fpu__init_cpu_xstate(); + fpu__init_cpu_ctx_switch(); +} + /* * The earliest FPU detection code. * @@ -44,9 +84,6 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -unsigned int xstate_size; -EXPORT_SYMBOL_GPL(xstate_size); - static void fpu__init_system_mxcsr(void) { unsigned int mask = 0; @@ -83,6 +120,15 @@ static void fpu__init_system_generic(void) fpu__init_system_mxcsr(); } +unsigned int xstate_size; +EXPORT_SYMBOL_GPL(xstate_size); + +/* + * Set up the xstate_size based on the legacy FPU context size. + * + * We set this up first, and later it will be overwritten by + * fpu__init_system_xstate() if the CPU knows about xstates. + */ static void fpu__init_system_xstate_size_legacy(void) { static bool on_boot_cpu = 1; @@ -112,50 +158,6 @@ static void fpu__init_system_xstate_size_legacy(void) } } -/* - * Initialize the TS bit in CR0 according to the style of context-switches - * we are using: - */ -static void fpu__init_cpu_ctx_switch(void) -{ - if (!cpu_has_eager_fpu) - stts(); - else - clts(); -} - -/* - * Initialize the registers found in all CPUs, CR0 and CR4: - */ -static void fpu__init_cpu_generic(void) -{ - unsigned long cr0; - unsigned long cr4_mask = 0; - - if (cpu_has_fxsr) - cr4_mask |= X86_CR4_OSFXSR; - if (cpu_has_xmm) - cr4_mask |= X86_CR4_OSXMMEXCPT; - if (cr4_mask) - cr4_set_bits(cr4_mask); - - cr0 = read_cr0(); - cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ - if (!cpu_has_fpu) - cr0 |= X86_CR0_EM; - write_cr0(cr0); -} - -/* - * Enable all supported FPU features. Called when a CPU is brought online. - */ -void fpu__init_cpu(void) -{ - fpu__init_cpu_generic(); - fpu__init_cpu_xstate(); - fpu__init_cpu_ctx_switch(); -} - static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; static int __init eager_fpu_setup(char *s) -- cgit v0.10.2 From ae02679c566fb1c2f76d3c6dffef977a9d69474a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 15:36:46 +0200 Subject: x86/fpu: Add more comments to the FPU init code Extend the comments of the FPU init code, and fix old ones. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index dbff133..7ae5a62 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -1,9 +1,13 @@ /* - * x86 FPU boot time init code + * x86 FPU boot time init code: */ #include #include +/* + * Initialize the TS bit in CR0 according to the style of context-switches + * we are using: + */ static void fpu__init_cpu_ctx_switch(void) { if (!cpu_has_eager_fpu) @@ -35,7 +39,7 @@ static void fpu__init_cpu_generic(void) } /* - * Enable all supported FPU features. Called when a CPU is brought online. + * Enable all supported FPU features. Called when a CPU is brought online: */ void fpu__init_cpu(void) { @@ -71,8 +75,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) #ifndef CONFIG_MATH_EMULATION if (!cpu_has_fpu) { - pr_emerg("No FPU found and no math emulation present\n"); - pr_emerg("Giving up\n"); + pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); } @@ -120,6 +123,12 @@ static void fpu__init_system_generic(void) fpu__init_system_mxcsr(); } +/* + * Size of the FPU context state. All tasks in the system use the + * same context size, regardless of what portion they use. + * This is inherent to the XSAVE architecture which puts all state + * components into a single, continuous memory block: + */ unsigned int xstate_size; EXPORT_SYMBOL_GPL(xstate_size); @@ -158,6 +167,37 @@ static void fpu__init_system_xstate_size_legacy(void) } } +/* + * FPU context switching strategies: + * + * Against popular belief, we don't do lazy FPU saves, due to the + * task migration complications it brings on SMP - we only do + * lazy FPU restores. + * + * 'lazy' is the traditional strategy, which is based on setting + * CR0::TS to 1 during context-switch (instead of doing a full + * restore of the FPU state), which causes the first FPU instruction + * after the context switch (whenever it is executed) to fault - at + * which point we lazily restore the FPU state into FPU registers. + * + * Tasks are of course under no obligation to execute FPU instructions, + * so it can easily happen that another context-switch occurs without + * a single FPU instruction being executed. If we eventually switch + * back to the original task (that still owns the FPU) then we have + * not only saved the restores along the way, but we also have the + * FPU ready to be used for the original task. + * + * 'eager' switching is used on modern CPUs, there we switch the FPU + * state during every context switch, regardless of whether the task + * has used FPU instructions in that time slice or not. This is done + * because modern FPU context saving instructions are able to optimize + * state saving and restoration in hardware: they can detect both + * unused and untouched FPU state and optimize accordingly. + * + * [ Note that even in 'lazy' mode we might optimize context switches + * to use 'eager' restores, if we detect that a task is using the FPU + * frequently. See the fpu->counter logic in fpu/internal.h for that. ] + */ static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; static int __init eager_fpu_setup(char *s) @@ -173,8 +213,7 @@ static int __init eager_fpu_setup(char *s) __setup("eagerfpu=", eager_fpu_setup); /* - * setup_init_fpu_buf() is __init and it is OK to call it here because - * init_xstate_ctx will be unset only once during boot. + * Pick the FPU context switching strategy: */ static void fpu__init_system_ctx_switch(void) { @@ -202,20 +241,24 @@ static void fpu__init_system_ctx_switch(void) } /* - * Called on the boot CPU once per system bootup, to set up the initial FPU state that - * is later cloned into all processes. + * Called on the boot CPU once per system bootup, to set up the initial + * FPU state that is later cloned into all processes: */ void fpu__init_system(struct cpuinfo_x86 *c) { fpu__init_system_early_generic(c); - /* The FPU has to be operational for some of the later FPU init activities: */ + /* + * The FPU has to be operational for some of the + * later FPU init activities: + */ fpu__init_cpu(); /* - * But don't leave CR0::TS set yet, as some of the FPU setup methods depend - * on being able to execute FPU instructions that will fault on a set TS, - * such as the FXSAVE in fpu__init_system_mxcsr(). + * But don't leave CR0::TS set yet, as some of the FPU setup + * methods depend on being able to execute FPU instructions + * that will fault on a set TS, such as the FXSAVE in + * fpu__init_system_mxcsr(). */ clts(); @@ -226,6 +269,9 @@ void fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_ctx_switch(); } +/* + * Boot parameter to turn off FPU support and fall back to math-emu: + */ static int __init no_387(char *s) { setup_clear_cpu_cap(X86_FEATURE_FPU); -- cgit v0.10.2 From e229537543fba3de4026cb6e12b75946d9f3430f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 16:43:43 +0200 Subject: x86/fpu: Move fpu__save() to fpu/internals.h It's an internal method, not a driver API, so move it from fpu/api.h to fpu/internal.h. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 4ca745c..eeac376 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -99,6 +99,4 @@ static inline int user_has_fpu(void) return current->thread.fpu.fpregs_active; } -extern void fpu__save(struct fpu *fpu); - #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5a1fa5b..0c8c812 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -290,6 +290,8 @@ static inline int fpu_save_init(struct fpu *fpu) return 1; } +extern void fpu__save(struct fpu *fpu); + static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) -- cgit v0.10.2 From d63e79b114c0208bc2b7712c879568e180909d60 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 12:07:18 +0200 Subject: x86/fpu: Uninline kernel_fpu_begin()/end() Both inline functions call an inline function unconditionally, so we already pay the function call based clobbering cost. Uninline them. This saves quite a bit of code in various performance sensitive code paths: text data bss dec hex filename 13321334 2569888 1634304 17525526 10b6b16 vmlinux.before 13320246 2569888 1634304 17524438 10b66d6 vmlinux.after Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index eeac376..d4ab9e3 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -39,19 +39,8 @@ extern bool irq_fpu_usable(void); */ extern void __kernel_fpu_begin(void); extern void __kernel_fpu_end(void); - -static inline void kernel_fpu_begin(void) -{ - preempt_disable(); - WARN_ON_ONCE(!irq_fpu_usable()); - __kernel_fpu_begin(); -} - -static inline void kernel_fpu_end(void) -{ - __kernel_fpu_end(); - preempt_enable(); -} +extern void kernel_fpu_begin(void); +extern void kernel_fpu_end(void); /* * Some instructions like VIA's padlock instructions generate a spurious diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 3d2ec4b..8323a2a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -125,6 +125,21 @@ void __kernel_fpu_end(void) } EXPORT_SYMBOL(__kernel_fpu_end); +void kernel_fpu_begin(void) +{ + preempt_disable(); + WARN_ON_ONCE(!irq_fpu_usable()); + __kernel_fpu_begin(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_begin); + +void kernel_fpu_end(void) +{ + __kernel_fpu_end(); + preempt_enable(); +} +EXPORT_SYMBOL_GPL(kernel_fpu_end); + static void __save_fpu(struct fpu *fpu) { if (use_xsave()) { -- cgit v0.10.2 From 952f07ecbd4d9bac77c003ba136f8ee8ce631591 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 16:56:05 +0200 Subject: x86/fpu: Move various internal function prototypes to fpu/internal.h There are a number of FPU internal function prototypes and an inline function in fpu/api.h, mostly placed so historically as the code grew over the years. Move them over into fpu/internal.h where they belong. (Add sched.h include to stackprotector.h which incorrectly relied on getting it from fpu/api.h.) fpu/api.h is now a pure file that only contains FPU APIs intended for driver use. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index d4ab9e3..0c71345 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,23 +10,8 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H -#include #include -struct pt_regs; -struct user_i387_struct; - -extern int fpstate_alloc_init(struct fpu *fpu); -extern void fpstate_init(struct fpu *fpu); -extern void fpu__clear(struct task_struct *tsk); - -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern void fpu__restore(void); -extern void fpu__init_check_bugs(void); -extern void fpu__resume_cpu(void); - -extern bool irq_fpu_usable(void); - /* * Careful: __kernel_fpu_begin/end() must be called with preempt disabled * and they don't touch the preempt state on their own. @@ -41,6 +26,7 @@ extern void __kernel_fpu_begin(void); extern void __kernel_fpu_end(void); extern void kernel_fpu_begin(void); extern void kernel_fpu_end(void); +extern bool irq_fpu_usable(void); /* * Some instructions like VIA's padlock instructions generate a spurious @@ -73,19 +59,4 @@ static inline void irq_ts_restore(int TS_state) stts(); } -/* - * The question "does this thread have fpu access?" - * is slightly racy, since preemption could come in - * and revoke it immediately after the test. - * - * However, even in that very unlikely scenario, - * we can just assume we have FPU access - typically - * to save the FP state - we'll just take a #NM - * fault and get the FPU access back. - */ -static inline int user_has_fpu(void) -{ - return current->thread.fpu.fpregs_active; -} - #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0c8c812..89c6ec8 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -43,6 +44,15 @@ extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); +extern int fpstate_alloc_init(struct fpu *fpu); +extern void fpstate_init(struct fpu *fpu); +extern void fpu__clear(struct task_struct *tsk); + +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern void fpu__restore(void); +extern void fpu__init_check_bugs(void); +extern void fpu__resume_cpu(void); + DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); extern void convert_from_fxsr(struct user_i387_ia32_struct *env, @@ -335,6 +345,21 @@ static inline void __fpregs_activate(struct fpu *fpu) } /* + * The question "does this thread have fpu access?" + * is slightly racy, since preemption could come in + * and revoke it immediately after the test. + * + * However, even in that very unlikely scenario, + * we can just assume we have FPU access - typically + * to save the FP state - we'll just take a #NM + * fault and get the FPU access back. + */ +static inline int user_has_fpu(void) +{ + return current->thread.fpu.fpregs_active; +} + +/* * Encapsulate the CR0.TS handling together with the * software flag. * diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h index 6a99859..c2e00bb 100644 --- a/arch/x86/include/asm/stackprotector.h +++ b/arch/x86/include/asm/stackprotector.h @@ -39,7 +39,9 @@ #include #include #include + #include +#include /* * 24 byte read-only segment initializer for stack canary. Linker diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29dd743..bd17db1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5cb738a..f93ae71 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 3bb4c6a..cf84385 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include "fpu_system.h" #include "fpu_emu.h" diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index ad0ce6b..0d7dd1f 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index fce5989..b80e4b8 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -46,7 +46,7 @@ #include #include #include -#include +#include #include #include "../lg.h" -- cgit v0.10.2 From 910665882fc00dc5bab0f846fe707174ff289615 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Apr 2015 16:57:55 +0200 Subject: x86/fpu: Uninline the irq_ts_save()/restore() functions Especially the irq_ts_save() function is pretty bloaty, generating over a dozen instructions, so uninline them. Even though the API is used rarely, the space savings are measurable: text data bss dec hex filename 13331995 2572920 1634304 17539219 10ba093 vmlinux.before 13331739 2572920 1634304 17538963 10b9f93 vmlinux.after ( This also allows the removal of an include file inclusion from fpu/api.h, speeding up the kernel build slightly. ) Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 0c71345..62035cc 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -10,8 +10,6 @@ #ifndef _ASM_X86_FPU_API_H #define _ASM_X86_FPU_API_H -#include - /* * Careful: __kernel_fpu_begin/end() must be called with preempt disabled * and they don't touch the preempt state on their own. @@ -35,28 +33,7 @@ extern bool irq_fpu_usable(void); * in interrupt context interacting wrongly with other user/kernel fpu usage, we * should use them only in the context of irq_ts_save/restore() */ -static inline int irq_ts_save(void) -{ - /* - * If in process context and not atomic, we can take a spurious DNA fault. - * Otherwise, doing clts() in process context requires disabling preemption - * or some heavy lifting like kernel_fpu_begin() - */ - if (!in_atomic()) - return 0; - - if (read_cr0() & X86_CR0_TS) { - clts(); - return 1; - } - - return 0; -} - -static inline void irq_ts_restore(int TS_state) -{ - if (TS_state) - stts(); -} +extern int irq_ts_save(void); +extern void irq_ts_restore(int TS_state); #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8323a2a..b20d4ea 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -6,6 +6,7 @@ * Gareth Hughes , May 2000 */ #include +#include /* * Track whether the kernel is using the FPU state @@ -140,6 +141,35 @@ void kernel_fpu_end(void) } EXPORT_SYMBOL_GPL(kernel_fpu_end); +/* + * CR0::TS save/restore functions: + */ +int irq_ts_save(void) +{ + /* + * If in process context and not atomic, we can take a spurious DNA fault. + * Otherwise, doing clts() in process context requires disabling preemption + * or some heavy lifting like kernel_fpu_begin() + */ + if (!in_atomic()) + return 0; + + if (read_cr0() & X86_CR0_TS) { + clts(); + return 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(irq_ts_save); + +void irq_ts_restore(int TS_state) +{ + if (TS_state) + stts(); +} +EXPORT_SYMBOL_GPL(irq_ts_restore); + static void __save_fpu(struct fpu *fpu) { if (use_xsave()) { -- cgit v0.10.2 From 4f83634710a1a7024b8acaa3b589dc5d8ca03ab0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 02:53:16 +0200 Subject: x86/fpu: Rename fpu_save_init() to copy_fpregs_to_fpstate() So fpu_save_init() is a historic name that got its name when the only way the FPU state was FNSAVE, which cleared (well, destroyed) the FPU state after saving it. Nowadays the name is misleading, because ever since the introduction of FXSAVE (and more modern FPU saving instructions) the 'we need to reload the FPU state' part is only true if there's a pending FPU exception [*], which is almost never the case. So rename it to copy_fpregs_to_fpstate() to make it clear what's happening. Also add a few comments about why we cannot keep registers in certain cases. Also clean up the control flow a bit, to make it more apparent when we are dropping/keeping FP registers, and to optimize the common case (of keeping fpregs) some more. [*] Probably not true anymore, modern instructions always leave the FPU state intact, even if exceptions are pending: because pending FP exceptions are posted on the next FP instruction, not asynchronously. They were truly asynchronous back in the IRQ13 case, and we had to synchronize with them, but that code is not working anymore: we don't have IRQ13 mapped in the IDT anymore. But a cleanup patch is obviously not the place to change subtle behavior. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 89c6ec8..11055f5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -265,9 +265,15 @@ static inline void fpu_fxsave(struct fpu *fpu) /* * These must be called with preempt disabled. Returns - * 'true' if the FPU state is still intact. + * 'true' if the FPU state is still intact and we can + * keep registers active. + * + * The legacy FNSAVE instruction cleared all FPU state + * unconditionally, so registers are essentially destroyed. + * Modern FPU state can be kept in registers, if there are + * no pending FP exceptions. (Note the FIXME below.) */ -static inline int fpu_save_init(struct fpu *fpu) +static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { if (use_xsave()) { xsave_state(&fpu->state->xsave); @@ -276,13 +282,16 @@ static inline int fpu_save_init(struct fpu *fpu) * xsave header may indicate the init state of the FP. */ if (!(fpu->state->xsave.header.xfeatures & XSTATE_FP)) - return 1; - } else if (use_fxsr()) { - fpu_fxsave(fpu); + goto keep_fpregs; } else { - asm volatile("fnsave %[fx]; fwait" - : [fx] "=m" (fpu->state->fsave)); - return 0; + if (use_fxsr()) { + fpu_fxsave(fpu); + } else { + /* FNSAVE always clears FPU registers: */ + asm volatile("fnsave %[fx]; fwait" + : [fx] "=m" (fpu->state->fsave)); + goto drop_fpregs; + } } /* @@ -295,9 +304,14 @@ static inline int fpu_save_init(struct fpu *fpu) */ if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { asm volatile("fnclex"); - return 0; + goto drop_fpregs; } + +keep_fpregs: return 1; + +drop_fpregs: + return 0; } extern void fpu__save(struct fpu *fpu); @@ -448,7 +462,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) (use_eager_fpu() || new_fpu->counter > 5); if (old_fpu->fpregs_active) { - if (!fpu_save_init(old_fpu)) + if (!copy_fpregs_to_fpstate(old_fpu)) old_fpu->last_cpu = -1; else old_fpu->last_cpu = cpu; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b20d4ea..ca88608 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -102,7 +102,7 @@ void __kernel_fpu_begin(void) kernel_fpu_disable(); if (fpu->fpregs_active) { - fpu_save_init(fpu); + copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); if (!use_eager_fpu()) @@ -196,7 +196,7 @@ void fpu__save(struct fpu *fpu) if (use_eager_fpu()) { __save_fpu(fpu); } else { - fpu_save_init(fpu); + copy_fpregs_to_fpstate(fpu); fpregs_deactivate(fpu); } } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a65586e..f028f1d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -395,7 +395,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * It is not directly accessible, though, so we need to * do an xsave and then pull it out of the xsave buffer. */ - fpu_save_init(&tsk->thread.fpu); + copy_fpregs_to_fpstate(&tsk->thread.fpu); xsave_buf = &(tsk->thread.fpu.state->xsave); bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); if (!bndcsr) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b58b93..d90bf4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7058,7 +7058,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) return; vcpu->guest_fpu_loaded = 0; - fpu_save_init(&vcpu->arch.guest_fpu); + copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu); __kernel_fpu_end(); ++vcpu->stat.fpu_reload; kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5563be3..3287215 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -357,7 +357,7 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - fpu_save_init(&tsk->thread.fpu); + copy_fpregs_to_fpstate(&tsk->thread.fpu); bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; -- cgit v0.10.2 From 1bc6b056d8f90f0a710ff1201964c1fffc9ddd3c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 03:32:18 +0200 Subject: x86/fpu: Optimize copy_fpregs_to_fpstate() by removing the FNCLEX synchronization with FP exceptions So we have the following ancient code in copy_fpregs_to_fpstate(): if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { asm volatile("fnclex"); goto drop_fpregs; } which clears pending FPU exceptions and then drops registers, which causes the next FP instruction of the saved context to re-load the saved FPU state, with all pending exceptions marked properly, and will re-start the exception handling mechanism in the hardware. Since FPU exceptions are always issued on instruction boundaries, in particular on the next FP instruction following the exception generating instruction, there's no fear of getting an FP exception asynchronously. They were truly asynchronous back in the IRQ13 days, when the FPU was a weird and expensive co-processor that did its own processing, and we had to synchronize with them, but that code is not working anymore: we don't have IRQ13 mapped in the IDT anymore. With the introduction of optimized XSAVE support there's a new complication: if the xstate features bit indicates that a particular state component is unused (in 'init state'), then the hardware does not guarantee that the XSAVE (et al) instruction keeps the underlying FPU state image in memory valid and current. In practice this means that the hardware won't write it, and the exceptions flag in the state might be an older version, with it still being set. This meant that we had to check the xfeatures flag as well, adding another memory load and branch to a critical hot path of the scheduler. So optimize all this by removing both the old quirk and the new check, and straight-line optimizing the most common cases with likely() hints. Quite a bit of code gets removed this way: arch/x86/kernel/process_64.o: text data bss dec filename 5484 8 0 5492 process_64.o.before 5416 8 0 5424 process_64.o.after Now there's also a chance that some weird behavior or erratum was masked by our IRQ13 handling quirk (or that I misunderstood the nature of the quirk), and that this change triggers some badness. There's no real good way to protect against that possibility other than keeping this change well isolated, well commented and well bisectable. If you bisect a weird (or not so weird) breakage to this commit then please let us know! Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 11055f5..10663b0 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -271,46 +271,26 @@ static inline void fpu_fxsave(struct fpu *fpu) * The legacy FNSAVE instruction cleared all FPU state * unconditionally, so registers are essentially destroyed. * Modern FPU state can be kept in registers, if there are - * no pending FP exceptions. (Note the FIXME below.) + * no pending FP exceptions. */ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { - if (use_xsave()) { + if (likely(use_xsave())) { xsave_state(&fpu->state->xsave); + return 1; + } - /* - * xsave header may indicate the init state of the FP. - */ - if (!(fpu->state->xsave.header.xfeatures & XSTATE_FP)) - goto keep_fpregs; - } else { - if (use_fxsr()) { - fpu_fxsave(fpu); - } else { - /* FNSAVE always clears FPU registers: */ - asm volatile("fnsave %[fx]; fwait" - : [fx] "=m" (fpu->state->fsave)); - goto drop_fpregs; - } + if (likely(use_fxsr())) { + fpu_fxsave(fpu); + return 1; } /* - * If exceptions are pending, we need to clear them so - * that we don't randomly get exceptions later. - * - * FIXME! Is this perhaps only true for the old-style - * irq13 case? Maybe we could leave the x87 state - * intact otherwise? + * Legacy FPU register saving, FNSAVE always clears FPU registers, + * so we have to mark them inactive: */ - if (unlikely(fpu->state->fxsave.swd & X87_FSW_ES)) { - asm volatile("fnclex"); - goto drop_fpregs; - } - -keep_fpregs: - return 1; + asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state->fsave)); -drop_fpregs: return 0; } -- cgit v0.10.2 From 7366ed771f6ed95e4c4525c335722888a83b4b6c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 04:19:39 +0200 Subject: x86/fpu: Simplify FPU handling by embedding the fpstate in task_struct (again) So 6 years ago we made the FPU fpstate dynamically allocated: aa283f49276e ("x86, fpu: lazy allocation of FPU area - v5") 61c4628b5386 ("x86, fpu: split FPU state from task struct - v5") In hindsight this was a mistake: - it complicated context allocation failure handling, such as: /* kthread execs. TODO: cleanup this horror. */ if (WARN_ON(fpstate_alloc_init(fpu))) force_sig(SIGKILL, tsk); - it caused us to enable irqs in fpu__restore(): local_irq_enable(); /* * does a slab alloc which can sleep */ if (fpstate_alloc_init(fpu)) { /* * ran out of memory! */ do_group_exit(SIGKILL); return; } local_irq_disable(); - it (slightly) slowed down task creation/destruction by adding slab allocation/free pattens. - it made access to context contents (slightly) slower by adding one more pointer dereference. The motivation for the dynamic allocation was two-fold: - reduce memory consumption by non-FPU tasks - allocate and handle only the necessary amount of context for various XSAVE processors that have varying hardware frame sizes. These days, with glibc using SSE memcpy by default and GCC optimizing for SSE/AVX by default, the scope of FPU using apps on an x86 system is much larger than it was 6 years ago. For example on a freshly installed Fedora 21 desktop system, with a recent kernel, all non-kthread tasks have used the FPU shortly after bootup. Also, even modern embedded x86 CPUs try to support the latest vector instruction set - so they'll too often use the larger xstate frame sizes. So remove the dynamic allocation complication by embedding the FPU fpstate in task_struct again. This should make the FPU a lot more accessible to all sorts of atomic contexts. We could still optimize for the xstate frame size in the future, by moving the state structure to the last element of task_struct, and allocating only a part of that. This change is kept minimal by still keeping the ctx_alloc()/free() routines (that now do nothing substantial) - we'll remove them in the following patches. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 10663b0..4ce830f 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -232,9 +232,9 @@ static inline int frstor_user(struct i387_fsave_struct __user *fx) static inline void fpu_fxsave(struct fpu *fpu) { if (config_enabled(CONFIG_X86_32)) - asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state->fxsave)); + asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) - asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state->fxsave)); + asm volatile("fxsaveq %[fx]" : [fx] "=m" (fpu->state.fxsave)); else { /* Using "rex64; fxsave %0" is broken because, if the memory * operand uses any extended registers for addressing, a second @@ -251,15 +251,15 @@ static inline void fpu_fxsave(struct fpu *fpu) * an extended register is needed for addressing (fix submitted * to mainline 2005-11-21). * - * asm volatile("rex64/fxsave %0" : "=m" (fpu->state->fxsave)); + * asm volatile("rex64/fxsave %0" : "=m" (fpu->state.fxsave)); * * This, however, we can work around by forcing the compiler to * select an addressing mode that doesn't require extended * registers. */ asm volatile( "rex64/fxsave (%[fx])" - : "=m" (fpu->state->fxsave) - : [fx] "R" (&fpu->state->fxsave)); + : "=m" (fpu->state.fxsave) + : [fx] "R" (&fpu->state.fxsave)); } } @@ -276,7 +276,7 @@ static inline void fpu_fxsave(struct fpu *fpu) static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - xsave_state(&fpu->state->xsave); + xsave_state(&fpu->state.xsave); return 1; } @@ -289,7 +289,7 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to mark them inactive: */ - asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state->fsave)); + asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state.fsave)); return 0; } @@ -299,11 +299,11 @@ extern void fpu__save(struct fpu *fpu); static inline int fpu_restore_checking(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(&fpu->state->xsave); + return fpu_xrstor_checking(&fpu->state.xsave); else if (use_fxsr()) - return fxrstor_checking(&fpu->state->fxsave); + return fxrstor_checking(&fpu->state.fxsave); else - return frstor_checking(&fpu->state->fsave); + return frstor_checking(&fpu->state.fsave); } static inline int restore_fpu_checking(struct fpu *fpu) @@ -454,7 +454,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) if (fpu.preload) { new_fpu->counter++; __fpregs_activate(new_fpu); - prefetch(new_fpu->state); + prefetch(&new_fpu->state); } else if (!use_eager_fpu()) stts(); } else { @@ -465,7 +465,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) if (fpu_want_lazy_restore(new_fpu, cpu)) fpu.preload = 0; else - prefetch(new_fpu->state); + prefetch(&new_fpu->state); fpregs_activate(new_fpu); } } @@ -534,25 +534,25 @@ static inline void user_fpu_begin(void) static inline unsigned short get_fpu_cwd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.cwd; + return tsk->thread.fpu.state.fxsave.cwd; } else { - return (unsigned short)tsk->thread.fpu.state->fsave.cwd; + return (unsigned short)tsk->thread.fpu.state.fsave.cwd; } } static inline unsigned short get_fpu_swd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.fpu.state->fxsave.swd; + return tsk->thread.fpu.state.fxsave.swd; } else { - return (unsigned short)tsk->thread.fpu.state->fsave.swd; + return (unsigned short)tsk->thread.fpu.state.fsave.swd; } } static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) { if (cpu_has_xmm) { - return tsk->thread.fpu.state->fxsave.mxcsr; + return tsk->thread.fpu.state.fxsave.mxcsr; } else { return MXCSR_DEFAULT; } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 231a8f5..3a15ac6 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -143,7 +143,7 @@ struct fpu { unsigned int last_cpu; unsigned int fpregs_active; - union thread_xstate *state; + union thread_xstate state; /* * This counter contains the number of consecutive context switches * during which the FPU stays used. If this is over a threshold, the diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ca88608..1697a9a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -174,9 +174,9 @@ static void __save_fpu(struct fpu *fpu) { if (use_xsave()) { if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&fpu->state->xsave); + xsave_state_booting(&fpu->state.xsave); else - xsave_state(&fpu->state->xsave); + xsave_state(&fpu->state.xsave); } else { fpu_fxsave(fpu); } @@ -207,16 +207,16 @@ EXPORT_SYMBOL_GPL(fpu__save); void fpstate_init(struct fpu *fpu) { if (!cpu_has_fpu) { - finit_soft_fpu(&fpu->state->soft); + finit_soft_fpu(&fpu->state.soft); return; } - memset(fpu->state, 0, xstate_size); + memset(&fpu->state, 0, xstate_size); if (cpu_has_fxsr) { - fx_finit(&fpu->state->fxsave); + fx_finit(&fpu->state.fxsave); } else { - struct i387_fsave_struct *fp = &fpu->state->fsave; + struct i387_fsave_struct *fp = &fpu->state.fsave; fp->cwd = 0xffff037fu; fp->swd = 0xffff0000u; fp->twd = 0xffffffffu; @@ -241,15 +241,8 @@ void fpstate_cache_init(void) int fpstate_alloc(struct fpu *fpu) { - if (fpu->state) - return 0; - - fpu->state = kmem_cache_alloc(task_xstate_cachep, GFP_KERNEL); - if (!fpu->state) - return -ENOMEM; - /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */ - WARN_ON((unsigned long)fpu->state & 15); + WARN_ON((unsigned long)&fpu->state & 15); return 0; } @@ -257,10 +250,6 @@ EXPORT_SYMBOL_GPL(fpstate_alloc); void fpstate_free(struct fpu *fpu) { - if (fpu->state) { - kmem_cache_free(task_xstate_cachep, fpu->state); - fpu->state = NULL; - } } EXPORT_SYMBOL_GPL(fpstate_free); @@ -277,11 +266,11 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) WARN_ON(src_fpu != ¤t->thread.fpu); if (use_eager_fpu()) { - memset(&dst_fpu->state->xsave, 0, xstate_size); + memset(&dst_fpu->state.xsave, 0, xstate_size); __save_fpu(dst_fpu); } else { fpu__save(src_fpu); - memcpy(dst_fpu->state, src_fpu->state, xstate_size); + memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); } } @@ -289,7 +278,6 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) { dst_fpu->counter = 0; dst_fpu->fpregs_active = 0; - dst_fpu->state = NULL; dst_fpu->last_cpu = -1; if (src_fpu->fpstate_active) { @@ -483,7 +471,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, sanitize_i387_state(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state->fxsave, 0, -1); + &fpu->state.fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -503,19 +491,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, sanitize_i387_state(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state->fxsave, 0, -1); + &fpu->state.fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - fpu->state->fxsave.mxcsr &= mxcsr_feature_mask; + fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; /* * update the header bits in the xsave header, indicating the * presence of FP and SSE state. */ if (cpu_has_xsave) - fpu->state->xsave.header.xfeatures |= XSTATE_FPSSE; + fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; return ret; } @@ -535,7 +523,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - xsave = &fpu->state->xsave; + xsave = &fpu->state.xsave; /* * Copy the 48bytes defined by the software first into the xstate @@ -566,7 +554,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (ret) return ret; - xsave = &fpu->state->xsave; + xsave = &fpu->state.xsave; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); /* @@ -657,7 +645,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -695,7 +683,7 @@ void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state->fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -736,7 +724,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state->fsave, 0, + &fpu->state.fsave, 0, -1); sanitize_i387_state(target); @@ -770,7 +758,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state->fsave, 0, + &fpu->state.fsave, 0, -1); if (pos > 0 || count < sizeof(env)) @@ -785,7 +773,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, * presence of FP. */ if (cpu_has_xsave) - fpu->state->xsave.header.xfeatures |= XSTATE_FP; + fpu->state.xsave.header.xfeatures |= XSTATE_FP; return ret; } diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index a232363..c7d48eb 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -44,14 +44,14 @@ static unsigned int xfeatures_nr; */ void __sanitize_i387_state(struct task_struct *tsk) { - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state->fxsave; + struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; int feature_bit; u64 xfeatures; if (!fx) return; - xfeatures = tsk->thread.fpu.state->xsave.header.xfeatures; + xfeatures = tsk->thread.fpu.state.xsave.header.xfeatures; /* * None of the feature bits are in init state. So nothing else @@ -147,7 +147,7 @@ static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; struct user_i387_ia32_struct env; struct _fpstate_ia32 __user *fp = buf; @@ -245,7 +245,7 @@ static inline int save_user_xstate(struct xsave_struct __user *buf) */ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) { - struct xsave_struct *xsave = ¤t->thread.fpu.state->xsave; + struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -288,7 +288,7 @@ sanitize_restored_xstate(struct task_struct *tsk, struct user_i387_ia32_struct *ia32_env, u64 xfeatures, int fx_only) { - struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave; + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { @@ -402,7 +402,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ drop_fpu(fpu); - if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) || + if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { fpstate_init(fpu); err = -1; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index f028f1d..48dfcd9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -396,7 +396,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) * do an xsave and then pull it out of the xsave buffer. */ copy_fpregs_to_fpstate(&tsk->thread.fpu); - xsave_buf = &(tsk->thread.fpu.state->xsave); + xsave_buf = &(tsk->thread.fpu.state.xsave); bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); if (!bndcsr) goto exit_trap; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d90bf4a..8bb0de5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3196,7 +3196,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state.xsave; u64 xstate_bv = xsave->header.xfeatures; u64 valid; @@ -3232,7 +3232,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave; + struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state.xsave; u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); u64 valid; @@ -3277,7 +3277,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, fill_xsave((u8 *) guest_xsave->region, vcpu); } else { memcpy(guest_xsave->region, - &vcpu->arch.guest_fpu.state->fxsave, + &vcpu->arch.guest_fpu.state.fxsave, sizeof(struct i387_fxsave_struct)); *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = XSTATE_FPSSE; @@ -3302,7 +3302,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, } else { if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; - memcpy(&vcpu->arch.guest_fpu.state->fxsave, + memcpy(&vcpu->arch.guest_fpu.state.fxsave, guest_xsave->region, sizeof(struct i387_fxsave_struct)); } return 0; @@ -6973,7 +6973,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct i387_fxsave_struct *fxsave = - &vcpu->arch.guest_fpu.state->fxsave; + &vcpu->arch.guest_fpu.state.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); fpu->fcw = fxsave->cwd; @@ -6990,7 +6990,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { struct i387_fxsave_struct *fxsave = - &vcpu->arch.guest_fpu.state->fxsave; + &vcpu->arch.guest_fpu.state.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); fxsave->cwd = fpu->fcw; @@ -7014,7 +7014,7 @@ int fx_init(struct kvm_vcpu *vcpu) fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) - vcpu->arch.guest_fpu.state->xsave.header.xcomp_bv = + vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; /* diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index dc8adad..7562341 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft) void finit(void) { - finit_soft_fpu(¤t->thread.fpu.state->soft); + finit_soft_fpu(¤t->thread.fpu.state.soft); } /* diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index cf84385..5e00370 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -683,7 +683,7 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; + struct i387_soft_struct *s387 = &target->thread.fpu.state.soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; @@ -735,7 +735,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state->soft; + struct i387_soft_struct *s387 = &target->thread.fpu.state.soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index 2c61441..9ccecb6 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -31,7 +31,7 @@ #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ == (1 << 10)) -#define I387 (current->thread.fpu.state) +#define I387 (¤t->thread.fpu.state) #define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->regs->cs)) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 3287215..ea5b367 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -358,7 +358,7 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) * only accessible if we first do an xsave. */ copy_fpregs_to_fpstate(&tsk->thread.fpu); - bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR); + bndcsr = get_xsave_addr(&tsk->thread.fpu.state.xsave, XSTATE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; -- cgit v0.10.2 From c4d6ee6e2e52ec604cc1d76877791f8e8f5c79b5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 05:52:40 +0200 Subject: x86/fpu: Remove failure paths from fpstate-alloc low level functions Now that we always allocate the FPU context as part of task_struct there's no need for separate allocations - remove them and their primary failure handling code. ( Note that there's still secondary error codes that have become superfluous, those will be removed in separate patches. ) Move the somewhat misplaced setup_xstate_comp() call to the core. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 4ce830f..9454f21 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -558,10 +558,6 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -extern void fpstate_cache_init(void); - -extern int fpstate_alloc(struct fpu *fpu); -extern void fpstate_free(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); static inline unsigned long diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1697a9a..6b8d3e1 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -226,34 +226,6 @@ void fpstate_init(struct fpu *fpu) EXPORT_SYMBOL_GPL(fpstate_init); /* - * FPU state allocation: - */ -static struct kmem_cache *task_xstate_cachep; - -void fpstate_cache_init(void) -{ - task_xstate_cachep = - kmem_cache_create("task_xstate", xstate_size, - __alignof__(union thread_xstate), - SLAB_PANIC | SLAB_NOTRACK, NULL); - setup_xstate_comp(); -} - -int fpstate_alloc(struct fpu *fpu) -{ - /* The CPU requires the FPU state to be aligned to 16 byte boundaries: */ - WARN_ON((unsigned long)&fpu->state & 15); - - return 0; -} -EXPORT_SYMBOL_GPL(fpstate_alloc); - -void fpstate_free(struct fpu *fpu) -{ -} -EXPORT_SYMBOL_GPL(fpstate_free); - -/* * Copy the current task's FPU state to a new task's FPU context. * * In the 'eager' case we just save to the destination context. @@ -280,13 +252,9 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) dst_fpu->fpregs_active = 0; dst_fpu->last_cpu = -1; - if (src_fpu->fpstate_active) { - int err = fpstate_alloc(dst_fpu); - - if (err) - return err; + if (src_fpu->fpstate_active) fpu_copy(dst_fpu, src_fpu); - } + return 0; } @@ -305,13 +273,6 @@ int fpstate_alloc_init(struct fpu *fpu) if (WARN_ON_ONCE(fpu->fpstate_active)) return -EINVAL; - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpstate_alloc(fpu); - if (ret) - return ret; - fpstate_init(fpu); /* Safe to do for the current task: */ @@ -356,13 +317,6 @@ static int fpu__unlazy_stopped(struct fpu *child_fpu) return 0; } - /* - * Memory allocation at the first usage of the FPU and other state. - */ - ret = fpstate_alloc(child_fpu); - if (ret) - return ret; - fpstate_init(child_fpu); /* Safe to do for stopped child tasks: */ @@ -423,7 +377,6 @@ void fpu__clear(struct task_struct *tsk) if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ drop_fpu(fpu); - fpstate_free(fpu); } else { if (!fpu->fpstate_active) { /* kthread execs. TODO: cleanup this horror. */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7ae5a62..460e7e2 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -265,6 +265,7 @@ void fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); + setup_xstate_comp(); fpu__init_system_ctx_switch(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 71f4b4d..5d37c26 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -86,16 +86,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) return fpu__copy(&dst->thread.fpu, &src->thread.fpu); } -void arch_release_task_struct(struct task_struct *tsk) -{ - fpstate_free(&tsk->thread.fpu); -} - -void arch_task_cache_init(void) -{ - fpstate_cache_init(); -} - /* * Free current thread data structures etc.. */ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8bb0de5..6852925 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7008,10 +7008,6 @@ int fx_init(struct kvm_vcpu *vcpu) { int err; - err = fpstate_alloc(&vcpu->arch.guest_fpu); - if (err) - return err; - fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = @@ -7028,11 +7024,6 @@ int fx_init(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(fx_init); -static void fx_free(struct kvm_vcpu *vcpu) -{ - fpstate_free(&vcpu->arch.guest_fpu); -} - void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { if (vcpu->guest_fpu_loaded) @@ -7070,7 +7061,6 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvmclock_reset(vcpu); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); - fx_free(vcpu); kvm_x86_ops->vcpu_free(vcpu); } @@ -7126,7 +7116,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_mmu_unload(vcpu); vcpu_put(vcpu); - fx_free(vcpu); kvm_x86_ops->vcpu_free(vcpu); } -- cgit v0.10.2 From 91d93d0e206432b9fe4c88e64577b93aef018f98 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 06:46:52 +0200 Subject: x86/fpu: Remove failure return from fpstate_alloc_init() Remove the failure code and propagate this down to callers. Note that this function still has an 'init' aspect, which must be called. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 9454f21..1d0c5ce 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -44,7 +44,7 @@ extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); -extern int fpstate_alloc_init(struct fpu *fpu); +extern void fpstate_alloc_init(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); extern void fpu__clear(struct task_struct *tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6b8d3e1..b44ac50 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -259,26 +259,17 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) } /* - * Allocate the backing store for the current task's FPU registers - * and initialize the registers themselves as well. - * - * Can fail. + * Initialize the current task's in-memory FPU context: */ -int fpstate_alloc_init(struct fpu *fpu) +void fpstate_alloc_init(struct fpu *fpu) { - int ret; - - if (WARN_ON_ONCE(fpu != ¤t->thread.fpu)) - return -EINVAL; - if (WARN_ON_ONCE(fpu->fpstate_active)) - return -EINVAL; + WARN_ON_ONCE(fpu != ¤t->thread.fpu); + WARN_ON_ONCE(fpu->fpstate_active); fpstate_init(fpu); /* Safe to do for the current task: */ fpu->fpstate_active = 1; - - return 0; } EXPORT_SYMBOL_GPL(fpstate_alloc_init); @@ -340,20 +331,8 @@ void fpu__restore(void) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - if (!fpu->fpstate_active) { - local_irq_enable(); - /* - * does a slab alloc which can sleep - */ - if (fpstate_alloc_init(fpu)) { - /* - * ran out of memory! - */ - do_group_exit(SIGKILL); - return; - } - local_irq_disable(); - } + if (!fpu->fpstate_active) + fpstate_alloc_init(fpu); /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); @@ -379,9 +358,7 @@ void fpu__clear(struct task_struct *tsk) drop_fpu(fpu); } else { if (!fpu->fpstate_active) { - /* kthread execs. TODO: cleanup this horror. */ - if (WARN_ON(fpstate_alloc_init(fpu))) - force_sig(SIGKILL, tsk); + fpstate_alloc_init(fpu); user_fpu_begin(); } restore_init_xstate(); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index c7d48eb..dd2cef0 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -358,8 +358,8 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) - return -1; + if (!fpu->fpstate_active) + fpstate_alloc_init(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6852925..707f4e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6601,8 +6601,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!fpu->fpstate_active && fpstate_alloc_init(fpu)) - return -ENOMEM; + if (!fpu->fpstate_active) + fpstate_alloc_init(fpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5e00370..99ddfc2 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -149,12 +149,8 @@ void math_emulate(struct math_emu_info *info) struct desc_struct code_descriptor; struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->fpstate_active) { - if (fpstate_alloc_init(fpu)) { - do_group_exit(SIGKILL); - return; - } - } + if (!fpu->fpstate_active) + fpstate_alloc_init(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { -- cgit v0.10.2 From e62bb3d894d312a37009fb07dc83fd1cc7bed37c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 06:50:29 +0200 Subject: x86/fpu: Rename fpstate_alloc_init() to fpstate_init_curr() Now that there are no FPU context allocations, rename fpstate_alloc_init() to fpstate_init_curr(), to signal that it initializes the fpstate and marks it active, for the current task. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 1d0c5ce..1345ab3 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -44,7 +44,7 @@ extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpstate_alloc_init(struct fpu *fpu); +extern void fpstate_init_curr(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); extern void fpu__clear(struct task_struct *tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b44ac50..45f014e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -261,7 +261,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) /* * Initialize the current task's in-memory FPU context: */ -void fpstate_alloc_init(struct fpu *fpu) +void fpstate_init_curr(struct fpu *fpu) { WARN_ON_ONCE(fpu != ¤t->thread.fpu); WARN_ON_ONCE(fpu->fpstate_active); @@ -271,7 +271,7 @@ void fpstate_alloc_init(struct fpu *fpu) /* Safe to do for the current task: */ fpu->fpstate_active = 1; } -EXPORT_SYMBOL_GPL(fpstate_alloc_init); +EXPORT_SYMBOL_GPL(fpstate_init_curr); /* * This function is called before we modify a stopped child's @@ -332,7 +332,7 @@ void fpu__restore(void) struct fpu *fpu = &tsk->thread.fpu; if (!fpu->fpstate_active) - fpstate_alloc_init(fpu); + fpstate_init_curr(fpu); /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); @@ -358,7 +358,7 @@ void fpu__clear(struct task_struct *tsk) drop_fpu(fpu); } else { if (!fpu->fpstate_active) { - fpstate_alloc_init(fpu); + fpstate_init_curr(fpu); user_fpu_begin(); } restore_init_xstate(); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index dd2cef0..49d9f3d 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -359,7 +359,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) return -EACCES; if (!fpu->fpstate_active) - fpstate_alloc_init(fpu); + fpstate_init_curr(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 707f4e2..74b53c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6602,7 +6602,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) sigset_t sigsaved; if (!fpu->fpstate_active) - fpstate_alloc_init(fpu); + fpstate_init_curr(fpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 99ddfc2..4c6ab79 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -150,7 +150,7 @@ void math_emulate(struct math_emu_info *info) struct fpu *fpu = ¤t->thread.fpu; if (!fpu->fpstate_active) - fpstate_alloc_init(fpu); + fpstate_init_curr(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { -- cgit v0.10.2 From 2fb29fc7c690d3d318471c42a62a05153d433cc1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 06:55:54 +0200 Subject: x86/fpu: Simplify fpu__unlazy_stopped() error handling Now that FPU contexts are always allocated, fpu__unlazy_stopped() cannot fail. Remove its error return and propagate the changes to the callers. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 45f014e..97b4f9e 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -296,24 +296,18 @@ EXPORT_SYMBOL_GPL(fpstate_init_curr); * the read-only case, it's not strictly necessary for * read-only access to the context. */ -static int fpu__unlazy_stopped(struct fpu *child_fpu) +static void fpu__unlazy_stopped(struct fpu *child_fpu) { - int ret; - - if (WARN_ON_ONCE(child_fpu == ¤t->thread.fpu)) - return -EINVAL; + WARN_ON_ONCE(child_fpu == ¤t->thread.fpu); if (child_fpu->fpstate_active) { child_fpu->last_cpu = -1; - return 0; - } - - fpstate_init(child_fpu); - - /* Safe to do for stopped child tasks: */ - child_fpu->fpstate_active = 1; + } else { + fpstate_init(child_fpu); - return 0; + /* Safe to do for stopped child tasks: */ + child_fpu->fpstate_active = 1; + } } /* @@ -389,15 +383,11 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - int ret; if (!cpu_has_fxsr) return -ENODEV; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; - + fpu__unlazy_stopped(fpu); sanitize_i387_state(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -414,10 +404,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; - + fpu__unlazy_stopped(fpu); sanitize_i387_state(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -449,9 +436,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; + fpu__unlazy_stopped(fpu); xsave = &fpu->state.xsave; @@ -480,9 +465,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; + fpu__unlazy_stopped(fpu); xsave = &fpu->state.xsave; @@ -643,11 +626,8 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, { struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; - int ret; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; + fpu__unlazy_stopped(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); @@ -677,9 +657,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - ret = fpu__unlazy_stopped(fpu); - if (ret) - return ret; + fpu__unlazy_stopped(fpu); sanitize_i387_state(target); -- cgit v0.10.2 From 0ee6a5172573aea06ef41f4e48737dcfab0099bb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 06:58:22 +0200 Subject: x86/fpu, kvm: Simplify fx_init() Now that fpstate_init() cannot fail the error return of fx_init() has lost its purpose. Eliminate the error return and propagate this change to all callers. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index dea2e7e..c29e61a 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -999,8 +999,6 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); -int fx_init(struct kvm_vcpu *vcpu); - void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new, int bytes); int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 74b53c3..92a8490 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7004,10 +7004,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) return 0; } -int fx_init(struct kvm_vcpu *vcpu) +static void fx_init(struct kvm_vcpu *vcpu) { - int err; - fpstate_init(&vcpu->arch.guest_fpu); if (cpu_has_xsaves) vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = @@ -7019,10 +7017,7 @@ int fx_init(struct kvm_vcpu *vcpu) vcpu->arch.xcr0 = XSTATE_FP; vcpu->arch.cr0 |= X86_CR0_ET; - - return 0; } -EXPORT_SYMBOL_GPL(fx_init); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) { @@ -7341,9 +7336,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) goto fail_free_mce_banks; } - r = fx_init(vcpu); - if (r) - goto fail_free_wbinvd_dirty_mask; + fx_init(vcpu); vcpu->arch.ia32_tsc_adjust_msr = 0x0; vcpu->arch.pv_time_enabled = false; @@ -7357,8 +7350,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) kvm_pmu_init(vcpu); return 0; -fail_free_wbinvd_dirty_mask: - free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); + fail_free_mce_banks: kfree(vcpu->arch.mce_banks); fail_free_lapic: -- cgit v0.10.2 From c4d72e2db3a36bf560b506df8a3490f140aeae26 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 07:18:17 +0200 Subject: x86/fpu: Simplify fpstate_init_curr() usage Now that fpstate_init_curr() is not doing implicit allocations anymore, almost all uses of it involve a very simple pattern: if (!fpu->fpstate_active) fpstate_init_curr(fpu); which is basically activating the FPU fpstate if it was not active before. So propagate the check into the function itself, and rename the function according to its new purpose: fpu__activate_curr(fpu); Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 1345ab3..de19fc5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -44,7 +44,7 @@ extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpstate_init_curr(struct fpu *fpu); +extern void fpu__activate_curr(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); extern void fpu__clear(struct task_struct *tsk); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97b4f9e..74cc322 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -259,19 +259,21 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) } /* - * Initialize the current task's in-memory FPU context: + * Activate the current task's in-memory FPU context, + * if it has not been used before: */ -void fpstate_init_curr(struct fpu *fpu) +void fpu__activate_curr(struct fpu *fpu) { WARN_ON_ONCE(fpu != ¤t->thread.fpu); - WARN_ON_ONCE(fpu->fpstate_active); - fpstate_init(fpu); + if (!fpu->fpstate_active) { + fpstate_init(fpu); - /* Safe to do for the current task: */ - fpu->fpstate_active = 1; + /* Safe to do for the current task: */ + fpu->fpstate_active = 1; + } } -EXPORT_SYMBOL_GPL(fpstate_init_curr); +EXPORT_SYMBOL_GPL(fpu__activate_curr); /* * This function is called before we modify a stopped child's @@ -325,8 +327,7 @@ void fpu__restore(void) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; - if (!fpu->fpstate_active) - fpstate_init_curr(fpu); + fpu__activate_curr(fpu); /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); @@ -352,7 +353,7 @@ void fpu__clear(struct task_struct *tsk) drop_fpu(fpu); } else { if (!fpu->fpstate_active) { - fpstate_init_curr(fpu); + fpu__activate_curr(fpu); user_fpu_begin(); } restore_init_xstate(); diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c index 49d9f3d..f549e2a 100644 --- a/arch/x86/kernel/fpu/xsave.c +++ b/arch/x86/kernel/fpu/xsave.c @@ -358,8 +358,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (!access_ok(VERIFY_READ, buf, size)) return -EACCES; - if (!fpu->fpstate_active) - fpstate_init_curr(fpu); + fpu__activate_curr(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(current, NULL, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 92a8490..9ff4df7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6601,8 +6601,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int r; sigset_t sigsaved; - if (!fpu->fpstate_active) - fpstate_init_curr(fpu); + fpu__activate_curr(fpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 4c6ab79..5b85051 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -149,8 +149,7 @@ void math_emulate(struct math_emu_info *info) struct desc_struct code_descriptor; struct fpu *fpu = ¤t->thread.fpu; - if (!fpu->fpstate_active) - fpstate_init_curr(fpu); + fpu__activate_curr(fpu); #ifdef RE_ENTRANT_CHECKING if (emulating) { -- cgit v0.10.2 From 67ee658e6fb8611a64dd8406b0081b1fba9dec1b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 07:22:58 +0200 Subject: x86/fpu: Rename fpu__unlazy_stopped() to fpu__activate_stopped() In line with the fpstate_activate() change, name fpu__unlazy_stopped() in a similar fashion as well: its purpose is to make the fpstate of a stopped task the current and active FPU context, which may require unlazying and initialization. The unlazying is just part of the job, the main concept is to make the fpstate active. Also clarify the function's description to clarify its exact usage and the background behind it all. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 74cc322..a407bf5 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -276,29 +276,30 @@ void fpu__activate_curr(struct fpu *fpu) EXPORT_SYMBOL_GPL(fpu__activate_curr); /* - * This function is called before we modify a stopped child's - * FPU state context. + * This function must be called before we modify a stopped child's + * fpstate. * * If the child has not used the FPU before then initialize its - * FPU context. + * fpstate. * * If the child has used the FPU before then unlazy it. * - * [ After this function call, after the context is modified and - * the child task is woken up, the child task will restore - * the modified FPU state from the modified context. If we + * [ After this function call, after registers in the fpstate are + * modified and the child task has woken up, the child task will + * restore the modified FPU state from the modified context. If we * didn't clear its lazy status here then the lazy in-registers - * state pending on its former CPU could be restored, losing + * state pending on its former CPU could be restored, corrupting * the modifications. ] * * This function is also called before we read a stopped child's - * FPU state - to make sure it's modified. + * FPU state - to make sure it's initialized if the child has + * no active FPU state. * * TODO: A future optimization would be to skip the unlazying in * the read-only case, it's not strictly necessary for * read-only access to the context. */ -static void fpu__unlazy_stopped(struct fpu *child_fpu) +static void fpu__activate_stopped(struct fpu *child_fpu) { WARN_ON_ONCE(child_fpu == ¤t->thread.fpu); @@ -388,7 +389,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); sanitize_i387_state(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -405,7 +406,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); sanitize_i387_state(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -437,7 +438,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); xsave = &fpu->state.xsave; @@ -466,7 +467,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); xsave = &fpu->state.xsave; @@ -628,7 +629,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); @@ -658,7 +659,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - fpu__unlazy_stopped(fpu); + fpu__activate_stopped(fpu); sanitize_i387_state(target); -- cgit v0.10.2 From 32b49b3c83cad1ba60494a00dad2f511a647fb5a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 08:58:45 +0200 Subject: x86/fpu: Factor out FPU hw activation/deactivation We have repeat patterns of: if (!use_eager_fpu()) clts(); ... to activate FPU registers, and: if (!use_eager_fpu()) stts(); ... to deactivate them. Encapsulate these in: __fpregs_activate_hw(); __fpregs_activate_hw(); and use them accordingly. Doing this synchronizes the idiom with the fpu->fpregs_active software-flag's handling functions, creating clear patterns of: __fpregs_activate_hw(); __fpregs_activate(fpu); etc., which improves readability. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index de19fc5..28556c6 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -324,14 +324,31 @@ static inline int restore_fpu_checking(struct fpu *fpu) return fpu_restore_checking(fpu); } -/* Must be paired with an 'stts' after! */ +/* + * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' + * idiom, which is then paired with the sw-flag (fpregs_active) later on: + */ + +static inline void __fpregs_activate_hw(void) +{ + if (!use_eager_fpu()) + clts(); +} + +static inline void __fpregs_deactivate_hw(void) +{ + if (!use_eager_fpu()) + stts(); +} + +/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ static inline void __fpregs_deactivate(struct fpu *fpu) { fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); } -/* Must be paired with a 'clts' before! */ +/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ static inline void __fpregs_activate(struct fpu *fpu) { fpu->fpregs_active = 1; @@ -362,16 +379,14 @@ static inline int user_has_fpu(void) */ static inline void fpregs_activate(struct fpu *fpu) { - if (!use_eager_fpu()) - clts(); + __fpregs_activate_hw(); __fpregs_activate(fpu); } static inline void fpregs_deactivate(struct fpu *fpu) { __fpregs_deactivate(fpu); - if (!use_eager_fpu()) - stts(); + __fpregs_deactivate_hw(); } static inline void drop_fpu(struct fpu *fpu) @@ -455,8 +470,9 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) new_fpu->counter++; __fpregs_activate(new_fpu); prefetch(&new_fpu->state); - } else if (!use_eager_fpu()) - stts(); + } else { + __fpregs_deactivate_hw(); + } } else { old_fpu->counter = 0; old_fpu->last_cpu = -1; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a407bf5..a617aac 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -105,8 +105,7 @@ void __kernel_fpu_begin(void) copy_fpregs_to_fpstate(fpu); } else { this_cpu_write(fpu_fpregs_owner_ctx, NULL); - if (!use_eager_fpu()) - clts(); + __fpregs_activate_hw(); } } EXPORT_SYMBOL(__kernel_fpu_begin); @@ -118,8 +117,8 @@ void __kernel_fpu_end(void) if (fpu->fpregs_active) { if (WARN_ON(restore_fpu_checking(fpu))) fpu_reset_state(fpu); - } else if (!use_eager_fpu()) { - stts(); + } else { + __fpregs_deactivate_hw(); } kernel_fpu_enable(); -- cgit v0.10.2 From 72ee6f87adcb7b7bdb71cc81c22858811ee1a069 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 09:23:43 +0200 Subject: x86/fpu: Simplify __save_fpu() __save_fpu() has this pattern: if (unlikely(system_state == SYSTEM_BOOTING)) xsave_state_booting(&fpu->state.xsave); else xsave_state(&fpu->state.xsave); ... but it does not actually get called during system bootup. So remove the complication and always call xsave_state(). To make sure this assumption is correct, add a WARN_ONCE() debug check to xsave_state(). Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h index a10e665..2f2ed32 100644 --- a/arch/x86/include/asm/fpu/xsave.h +++ b/arch/x86/include/asm/fpu/xsave.h @@ -133,6 +133,8 @@ static inline int xsave_state(struct xsave_struct *fx) u32 hmask = mask >> 32; int err = 0; + WARN_ON(system_state == SYSTEM_BOOTING); + /* * If xsaves is enabled, xsaves replaces xsaveopt because * it supports compact format and supervisor states in addition to diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a617aac..79a0b99 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -172,10 +172,7 @@ EXPORT_SYMBOL_GPL(irq_ts_restore); static void __save_fpu(struct fpu *fpu) { if (use_xsave()) { - if (unlikely(system_state == SYSTEM_BOOTING)) - xsave_state_booting(&fpu->state.xsave); - else - xsave_state(&fpu->state.xsave); + xsave_state(&fpu->state.xsave); } else { fpu_fxsave(fpu); } -- cgit v0.10.2 From 9f876d67663c8412a386321d94b17b68105b13ac Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 09:26:41 +0200 Subject: x86/fpu: Eliminate __save_fpu() The current implementation of __save_fpu(): if (use_xsave()) { xsave_state(&fpu->state.xsave); } else { fpu_fxsave(fpu); } Is actually a simplified version of copy_fpregs_to_fpstate(), if use_eager_fpu() is true. But all call sites of __save_fpu() call it only it when use_eager_fpu() is true. So we can eliminate __save_fpu() altogether and use the standard copy_fpregs_to_fpstate() function. This cleans up the code by making it use fewer variants of FPU register saving. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 79a0b99..0040814 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -169,15 +169,6 @@ void irq_ts_restore(int TS_state) } EXPORT_SYMBOL_GPL(irq_ts_restore); -static void __save_fpu(struct fpu *fpu) -{ - if (use_xsave()) { - xsave_state(&fpu->state.xsave); - } else { - fpu_fxsave(fpu); - } -} - /* * Save the FPU state (initialize it if necessary): * @@ -190,7 +181,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { if (use_eager_fpu()) { - __save_fpu(fpu); + copy_fpregs_to_fpstate(fpu); } else { copy_fpregs_to_fpstate(fpu); fpregs_deactivate(fpu); @@ -235,7 +226,7 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) if (use_eager_fpu()) { memset(&dst_fpu->state.xsave, 0, xstate_size); - __save_fpu(dst_fpu); + copy_fpregs_to_fpstate(dst_fpu); } else { fpu__save(src_fpu); memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); -- cgit v0.10.2 From fea435a2027a88407181c387f62daabf30bb5ea7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 09:38:21 +0200 Subject: x86/fpu: Simplify fpu__save() Factor out a common call. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0040814..a0b2221 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -180,12 +180,9 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - if (use_eager_fpu()) { - copy_fpregs_to_fpstate(fpu); - } else { - copy_fpregs_to_fpstate(fpu); + copy_fpregs_to_fpstate(fpu); + if (!use_eager_fpu()) fpregs_deactivate(fpu); - } } preempt_enable(); } -- cgit v0.10.2 From 48c4717f30cc1f83d02f045c3b47a2885863bff2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 09:45:12 +0200 Subject: x86/fpu: Optimize fpu__save() So fpu__save() does this currently: copy_fpregs_to_fpstate(fpu); if (!use_eager_fpu()) fpregs_deactivate(fpu); ... which deactivates the FPU on lazy switching systems unconditionally. Both usecases of fpu__save() use this function to save the FPU state into a fpstate: fork()/clone() and math error signal handling. The unconditional disabling of FPU registers in the lazy switching case is probably a mistaken conversion of old FNSAVE code (that had to disable FPU registers). So speed up this code by only disabling FPU registers when absolutely necessary: when indicated by the copy_fpregs_to_fpstate() return code: if (!copy_fpregs_to_fpstate(fpu)) fpregs_deactivate(fpu); Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a0b2221..b1fbbb8 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -170,7 +170,7 @@ void irq_ts_restore(int TS_state) EXPORT_SYMBOL_GPL(irq_ts_restore); /* - * Save the FPU state (initialize it if necessary): + * Save the FPU state (mark it for reload if necessary): * * This only ever gets called for the current task. */ @@ -180,8 +180,7 @@ void fpu__save(struct fpu *fpu) preempt_disable(); if (fpu->fpregs_active) { - copy_fpregs_to_fpstate(fpu); - if (!use_eager_fpu()) + if (!copy_fpregs_to_fpstate(fpu)) fpregs_deactivate(fpu); } preempt_enable(); -- cgit v0.10.2 From 68271c6ae726d7ab51e39b7342c838761bf0a25c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 09:59:11 +0200 Subject: x86/fpu: Optimize fpu_copy() Optimize fpu_copy() a bit by expanding the ->fpstate_active == 1 portion of fpu__save() into it. ( The main purpose of this change is to enable another, larger optimization that will be done in the next patch. ) Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b1fbbb8..41ea25a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -224,7 +224,10 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) memset(&dst_fpu->state.xsave, 0, xstate_size); copy_fpregs_to_fpstate(dst_fpu); } else { - fpu__save(src_fpu); + preempt_disable(); + if (!copy_fpregs_to_fpstate(src_fpu)) + fpregs_deactivate(src_fpu); + preempt_enable(); memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); } } -- cgit v0.10.2 From b16529004f5cc0debf8073d21b560a4677a03a2a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Apr 2015 10:08:39 +0200 Subject: x86/fpu: Optimize fpu_copy() some more on lazy switching systems The current fpu_copy() code on lazy switching CPUs always saves into the current fpstate and then copies it over into the child context: preempt_disable(); if (!copy_fpregs_to_fpstate(src_fpu)) fpregs_deactivate(src_fpu); preempt_enable(); memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); That memcpy() can be avoided on all lazy switching setups except really old FNSAVE-only systems: change fpu_copy() to directly save into the child context, for both the lazy and the eager context switching case. Note that we still have to do a memcpy() back into the parent context in the FNSAVE case, but this won't be executed on the majority of x86 systems that got built in the last 10 years or so. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 41ea25a..edbb5d04 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -220,16 +220,35 @@ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) { WARN_ON(src_fpu != ¤t->thread.fpu); - if (use_eager_fpu()) { + /* + * Don't let 'init optimized' areas of the XSAVE area + * leak into the child task: + */ + if (use_eager_fpu()) memset(&dst_fpu->state.xsave, 0, xstate_size); - copy_fpregs_to_fpstate(dst_fpu); - } else { - preempt_disable(); - if (!copy_fpregs_to_fpstate(src_fpu)) - fpregs_deactivate(src_fpu); - preempt_enable(); - memcpy(&dst_fpu->state, &src_fpu->state, xstate_size); + + /* + * Save current FPU registers directly into the child + * FPU context, without any memory-to-memory copying. + * + * If the FPU context got destroyed in the process (FNSAVE + * done on old CPUs) then copy it back into the source + * context and mark the current task for lazy restore. + * + * We have to do all this with preemption disabled, + * mostly because of the FNSAVE case, because in that + * case we must not allow preemption in the window + * between the FNSAVE and us marking the context lazy. + * + * It shouldn't be an issue as even FNSAVE is plenty + * fast in terms of critical section length. + */ + preempt_disable(); + if (!copy_fpregs_to_fpstate(dst_fpu)) { + memcpy(&src_fpu->state, &dst_fpu->state, xstate_size); + fpregs_deactivate(src_fpu); } + preempt_enable(); } int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) -- cgit v0.10.2 From 669ebabb79906302ba6e6922a683893788a134e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 08:41:33 +0200 Subject: x86/fpu: Rename fpu/xsave.h to fpu/xstate.h 'xsave' is an x86 instruction name to most people - but xsave.h is about a lot more than just the XSAVE instruction: it includes definitions and support, both internal and external, related to xstate and xfeatures support. As a first step in cleaning up the various xstate uses rename this header to 'fpu/xstate.h' to better reflect what this header file is about. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 004acd7..4648448 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 2f7ead8..0122cd9 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index 2c3360b..ca4f32e 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #define CAST5_PARALLEL_BLOCKS 16 diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index a2ec18a..21d0b84 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #define CAST6_PARALLEL_BLOCKS 8 diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index 206ec57..aa325fa 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index 4feb68c..f66ae85 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 03ffaf8c..6f3f765 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -66,7 +66,7 @@ #include #include #include -#include +#include #include #include #include "sha_mb_ctx.h" diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 71ab2b3..84db12f 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index dcbd8ea..eb65522 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index e8836e0..7891464 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 3b6c8ba..95434f5 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 28556c6..8ec785e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -17,7 +17,7 @@ #include #include -#include +#include #ifdef CONFIG_X86_64 # include diff --git a/arch/x86/include/asm/fpu/xsave.h b/arch/x86/include/asm/fpu/xsave.h deleted file mode 100644 index 2f2ed32..0000000 --- a/arch/x86/include/asm/fpu/xsave.h +++ /dev/null @@ -1,254 +0,0 @@ -#ifndef __ASM_X86_XSAVE_H -#define __ASM_X86_XSAVE_H - -#include -#include - -#define XSTATE_CPUID 0x0000000d - -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 -#define XSTATE_YMM 0x4 -#define XSTATE_BNDREGS 0x8 -#define XSTATE_BNDCSR 0x10 -#define XSTATE_OPMASK 0x20 -#define XSTATE_ZMM_Hi256 0x40 -#define XSTATE_Hi16_ZMM 0x80 - -/* The highest xstate bit above (of XSTATE_Hi16_ZMM): */ -#define XFEATURES_NR_MAX 8 - -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) -/* Bit 63 of XCR0 is reserved for future expansion */ -#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) - -#define FXSAVE_SIZE 512 - -#define XSAVE_HDR_SIZE 64 -#define XSAVE_HDR_OFFSET FXSAVE_SIZE - -#define XSAVE_YMM_SIZE 256 -#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) - -/* Supported features which support lazy state saving */ -#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ - | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) - -/* Supported features which require eager state saving */ -#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) - -/* All currently supported features */ -#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) - -#ifdef CONFIG_X86_64 -#define REX_PREFIX "0x48, " -#else -#define REX_PREFIX -#endif - -extern unsigned int xstate_size; -extern u64 xfeatures_mask; -extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern struct xsave_struct init_xstate_ctx; - -extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); - -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -#define xstate_fault ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xsave_state_booting(struct xsave_struct *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * Save processor xstate to xsave area. - */ -static inline int xsave_state(struct xsave_struct *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state == SYSTEM_BOOTING); - - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [fx] "D" (fx), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore processor xstate from xsave area. - */ -static inline int xrstor_state(struct xsave_struct *fx, u64 mask) -{ - int err = 0; - u32 lmask = mask; - u32 hmask = mask >> 32; - - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore xstate context for new process during context switch. - */ -static inline int fpu_xrstor_checking(struct xsave_struct *fx) -{ - return xrstor_state(fx, -1); -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int xsave_user(struct xsave_struct __user *buf) -{ - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->header, sizeof(buf->header)); - if (unlikely(err)) - return -EFAULT; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (buf), "a" (-1), "d" (-1), "0" (0) - : "memory"); - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) -{ - int err = 0; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) - : "memory"); /* memory required? */ - return err; -} - -void *get_xsave_addr(struct xsave_struct *xsave, int xstate); -void setup_xstate_comp(void); - -#endif diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h new file mode 100644 index 0000000..2f2ed32 --- /dev/null +++ b/arch/x86/include/asm/fpu/xstate.h @@ -0,0 +1,254 @@ +#ifndef __ASM_X86_XSAVE_H +#define __ASM_X86_XSAVE_H + +#include +#include + +#define XSTATE_CPUID 0x0000000d + +#define XSTATE_FP 0x1 +#define XSTATE_SSE 0x2 +#define XSTATE_YMM 0x4 +#define XSTATE_BNDREGS 0x8 +#define XSTATE_BNDCSR 0x10 +#define XSTATE_OPMASK 0x20 +#define XSTATE_ZMM_Hi256 0x40 +#define XSTATE_Hi16_ZMM 0x80 + +/* The highest xstate bit above (of XSTATE_Hi16_ZMM): */ +#define XFEATURES_NR_MAX 8 + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) +/* Bit 63 of XCR0 is reserved for future expansion */ +#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) + +#define FXSAVE_SIZE 512 + +#define XSAVE_HDR_SIZE 64 +#define XSAVE_HDR_OFFSET FXSAVE_SIZE + +#define XSAVE_YMM_SIZE 256 +#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) + +/* Supported features which support lazy state saving */ +#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ + | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* Supported features which require eager state saving */ +#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) + +/* All currently supported features */ +#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) + +#ifdef CONFIG_X86_64 +#define REX_PREFIX "0x48, " +#else +#define REX_PREFIX +#endif + +extern unsigned int xstate_size; +extern u64 xfeatures_mask; +extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; +extern struct xsave_struct init_xstate_ctx; + +extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); + +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +#define xstate_fault ".section .fixup,\"ax\"\n" \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + ".previous\n" \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (err) + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xsave_state_booting(struct xsave_struct *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + xstate_fault + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * Save processor xstate to xsave area. + */ +static inline int xsave_state(struct xsave_struct *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state == SYSTEM_BOOTING); + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + XSAVEOPT, + X86_FEATURE_XSAVEOPT, + XSAVES, + X86_FEATURE_XSAVES, + [fx] "D" (fx), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore processor xstate from xsave area. + */ +static inline int xrstor_state(struct xsave_struct *fx, u64 mask) +{ + int err = 0; + u32 lmask = mask; + u32 hmask = mask >> 32; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + XRSTORS, + X86_FEATURE_XSAVES, + "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore xstate context for new process during context switch. + */ +static inline int fpu_xrstor_checking(struct xsave_struct *fx) +{ + return xrstor_state(fx, -1); +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ +static inline int xsave_user(struct xsave_struct __user *buf) +{ + int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->header, sizeof(buf->header)); + if (unlikely(err)) + return -EFAULT; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XSAVE"\n" + "2: " ASM_CLAC "\n" + xstate_fault + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) +{ + int err = 0; + struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XRSTOR"\n" + "2: " ASM_CLAC "\n" + xstate_fault + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + +void *get_xsave_addr(struct xsave_struct *xsave, int xstate); +void setup_xstate_comp(void); + +#endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 0ce4c4f..2426e65 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "cpuid.h" #include "lapic.h" #include "mmu.h" -- cgit v0.10.2 From 62784854502895321d100b1004b486131b9dd286 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 08:46:23 +0200 Subject: x86/fpu: Rename fpu/xsave.c to fpu/xstate.c Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile index 2020a2b..6ae59bc 100644 --- a/arch/x86/kernel/fpu/Makefile +++ b/arch/x86/kernel/fpu/Makefile @@ -2,4 +2,4 @@ # Build rules for the FPU support code: # -obj-y += init.o bugs.o core.o xsave.o +obj-y += init.o bugs.o core.o xstate.o diff --git a/arch/x86/kernel/fpu/xsave.c b/arch/x86/kernel/fpu/xsave.c deleted file mode 100644 index f549e2a..0000000 --- a/arch/x86/kernel/fpu/xsave.c +++ /dev/null @@ -1,714 +0,0 @@ -/* - * xsave/xrstor support. - * - * Author: Suresh Siddha - */ -#include -#include -#include -#include -#include -#include -#include - -/* - * Mask of xstate features supported by the CPU and the kernel: - */ -u64 xfeatures_mask; - -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct init_xstate_ctx; - -static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; -static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; -static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; - -/* The number of supported xfeatures in xfeatures_mask: */ -static unsigned int xfeatures_nr; - -/* - * When executing XSAVEOPT (optimized XSAVE), if a processor implementation - * detects that an FPU state component is still (or is again) in its - * initialized state, it may clear the corresponding bit in the header.xfeatures - * field, and can skip the writeout of registers to the corresponding memory layout. - * - * This means that when the bit is zero, the state component might still contain - * some previous - non-initialized register state. - * - * Before writing xstate information to user-space we sanitize those components, - * to always ensure that the memory layout of a feature will be in the init state - * if the corresponding header bit is zero. This is to ensure that user-space doesn't - * see some stale state in the memory layout during signal handling, debugging etc. - */ -void __sanitize_i387_state(struct task_struct *tsk) -{ - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; - int feature_bit; - u64 xfeatures; - - if (!fx) - return; - - xfeatures = tsk->thread.fpu.state.xsave.header.xfeatures; - - /* - * None of the feature bits are in init state. So nothing else - * to do for us, as the memory layout is up to date. - */ - if ((xfeatures & xfeatures_mask) == xfeatures_mask) - return; - - /* - * FP is in init state - */ - if (!(xfeatures & XSTATE_FP)) { - fx->cwd = 0x37f; - fx->swd = 0; - fx->twd = 0; - fx->fop = 0; - fx->rip = 0; - fx->rdp = 0; - memset(&fx->st_space[0], 0, 128); - } - - /* - * SSE is in init state - */ - if (!(xfeatures & XSTATE_SSE)) - memset(&fx->xmm_space[0], 0, 256); - - /* - * First two features are FPU and SSE, which above we handled - * in a special way already: - */ - feature_bit = 0x2; - xfeatures = (xfeatures_mask & ~xfeatures) >> 2; - - /* - * Update all the remaining memory layouts according to their - * standard xstate layout, if their header bit is in the init - * state: - */ - while (xfeatures) { - if (xfeatures & 0x1) { - int offset = xstate_offsets[feature_bit]; - int size = xstate_sizes[feature_bit]; - - memcpy((void *)fx + offset, - (void *)&init_xstate_ctx + offset, - size); - } - - xfeatures >>= 1; - feature_bit++; - } -} - -/* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xstate_header); - unsigned int magic2; - - if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) - return -1; - - /* Check for the first magic field and other error scenarios. */ - if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || - fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || - fx_sw->xstate_size > fx_sw->extended_size) - return -1; - - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) - || magic2 != FP_XSTATE_MAGIC2) - return -1; - - return 0; -} - -/* - * Signal frame handlers. - */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) -{ - if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; - struct user_i387_ia32_struct env; - struct _fpstate_ia32 __user *fp = buf; - - convert_from_fxsr(&env, tsk); - - if (__copy_to_user(buf, &env, sizeof(env)) || - __put_user(xsave->i387.swd, &fp->status) || - __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; - } else { - struct i387_fsave_struct __user *fp = buf; - u32 swd; - if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; - } - - return 0; -} - -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) -{ - struct xsave_struct __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; - u32 xfeatures; - int err; - - /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - - if (!use_xsave()) - return err; - - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - - /* - * Read the xfeatures which we copied (directly from the cpu or - * from the state in task struct) to the user buffers. - */ - err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xfeatures in the xsave header. - * - * xsave aware apps can change the xfeatures in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xfeatures |= XSTATE_FPSSE; - - err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); - - return err; -} - -static inline int save_user_xstate(struct xsave_struct __user *buf) -{ - int err; - - if (use_xsave()) - err = xsave_user(buf); - else if (use_fxsr()) - err = fxsave_user((struct i387_fxsave_struct __user *) buf); - else - err = fsave_user((struct i387_fsave_struct __user *) buf); - - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - return err; -} - -/* - * Save the fpu, extended register state to the user signal frame. - * - * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save - * state is copied. - * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. - * - * buf == buf_fx for 64-bit frames and 32-bit fsave frame. - * buf != buf_fx for 32-bit frames with fxstate. - * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. - * - * If this is a 32-bit frame with fxstate, put a fsave header before - * the aligned state at 'buf_fx'. - * - * For [f]xsave state, update the SW reserved fields in the [f]xsave frame - * indicating the absence/presence of the extended state to the user. - */ -int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; - struct task_struct *tsk = current; - int ia32_fxstate = (buf != buf_fx); - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!access_ok(VERIFY_WRITE, buf, size)) - return -EACCES; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - - if (user_has_fpu()) { - /* Save the live register state to the user directly. */ - if (save_user_xstate(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - fpu_fxsave(&tsk->thread.fpu); - } else { - sanitize_i387_state(tsk); - if (__copy_to_user(buf_fx, xsave, xstate_size)) - return -1; - } - - /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; - - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; - - return 0; -} - -static inline void -sanitize_restored_xstate(struct task_struct *tsk, - struct user_i387_ia32_struct *ia32_env, - u64 xfeatures, int fx_only) -{ - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; - struct xstate_header *header = &xsave->header; - - if (use_xsave()) { - /* These bits must be zero. */ - memset(header->reserved, 0, 48); - - /* - * Init the state that is not present in the memory - * layout and not enabled by the OS. - */ - if (fx_only) - header->xfeatures = XSTATE_FPSSE; - else - header->xfeatures &= (xfeatures_mask & xfeatures); - } - - if (use_fxsr()) { - /* - * mscsr reserved bits must be masked to zero for security - * reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - convert_to_fxsr(tsk, ia32_env); - } -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE state. - */ -static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) -{ - if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; - xrstor_state(&init_xstate_ctx, init_bv); - return fxrstor_user(buf); - } else { - u64 init_bv = xfeatures_mask & ~xbv; - if (unlikely(init_bv)) - xrstor_state(&init_xstate_ctx, init_bv); - return xrestore_user(buf, xbv); - } - } else if (use_fxsr()) { - return fxrstor_user(buf); - } else - return frstor_user(buf); -} - -int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) -{ - int ia32_fxstate = (buf != buf_fx); - struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int state_size = xstate_size; - u64 xfeatures = 0; - int fx_only = 0; - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!buf) { - fpu_reset_state(fpu); - return 0; - } - - if (!access_ok(VERIFY_READ, buf, size)) - return -EACCES; - - fpu__activate_curr(fpu); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; - - if (use_xsave()) { - struct _fpx_sw_bytes fx_sw_user; - if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { - /* - * Couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - state_size = sizeof(struct i387_fxsave_struct); - fx_only = 1; - } else { - state_size = fx_sw_user.xstate_size; - xfeatures = fx_sw_user.xfeatures; - } - } - - if (ia32_fxstate) { - /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Sanitize the copied state etc. - */ - struct fpu *fpu = &tsk->thread.fpu; - struct user_i387_ia32_struct env; - int err = 0; - - /* - * Drop the current fpu which clears fpu->fpstate_active. This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * fpu->fpstate_active is again set. - */ - drop_fpu(fpu); - - if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(fpu); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); - } - - fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(); - preempt_enable(); - } - - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { - fpu_reset_state(fpu); - return -1; - } - } - - return 0; -} - -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -static void prepare_fx_sw_frame(void) -{ - int fsave_header_size = sizeof(struct i387_fsave_struct); - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask; - fx_sw_reserved.xstate_size = xstate_size; - - if (config_enabled(CONFIG_IA32_EMULATION)) { - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; - } -} - -/* - * Enable the extended processor state save/restore feature. - * Called once per CPU onlining. - */ -void fpu__init_cpu_xstate(void) -{ - if (!cpu_has_xsave || !xfeatures_mask) - return; - - cr4_set_bits(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); -} - -/* - * Record the offsets and sizes of different state managed by the xsave - * memory layout. - */ -static void __init setup_xstate_features(void) -{ - int eax, ebx, ecx, edx, leaf = 0x2; - - xfeatures_nr = fls64(xfeatures_mask); - - do { - cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - - if (eax == 0) - break; - - xstate_offsets[leaf] = ebx; - xstate_sizes[leaf] = eax; - - leaf++; - } while (1); -} - -static void print_xstate_feature(u64 xstate_mask, const char *desc) -{ - if (xfeatures_mask & xstate_mask) { - int xstate_feature = fls64(xstate_mask)-1; - - pr_info("x86/fpu: Supporting XSAVE feature %2d: '%s'\n", xstate_feature, desc); - } -} - -/* - * Print out all the supported xstate features: - */ -static void print_xstate_features(void) -{ - print_xstate_feature(XSTATE_FP, "x87 floating point registers"); - print_xstate_feature(XSTATE_SSE, "SSE registers"); - print_xstate_feature(XSTATE_YMM, "AVX registers"); - print_xstate_feature(XSTATE_BNDREGS, "MPX bounds registers"); - print_xstate_feature(XSTATE_BNDCSR, "MPX CSR"); - print_xstate_feature(XSTATE_OPMASK, "AVX-512 opmask"); - print_xstate_feature(XSTATE_ZMM_Hi256, "AVX-512 Hi256"); - print_xstate_feature(XSTATE_Hi16_ZMM, "AVX-512 ZMM_Hi256"); -} - -/* - * This function sets up offsets and sizes of all extended states in - * xsave area. This supports both standard format and compacted format - * of the xsave aread. - * - * Input: void - * Output: void - */ -void setup_xstate_comp(void) -{ - unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; - int i; - - /* - * The FP xstates and SSE xstates are legacy states. They are always - * in the fixed offsets in the xsave area in either compacted form - * or standard form. - */ - xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); - - if (!cpu_has_xsaves) { - for (i = 2; i < xfeatures_nr; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) { - xstate_comp_offsets[i] = xstate_offsets[i]; - xstate_comp_sizes[i] = xstate_sizes[i]; - } - } - return; - } - - xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; - - for (i = 2; i < xfeatures_nr; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) - xstate_comp_sizes[i] = xstate_sizes[i]; - else - xstate_comp_sizes[i] = 0; - - if (i > 2) - xstate_comp_offsets[i] = xstate_comp_offsets[i-1] - + xstate_comp_sizes[i-1]; - - } -} - -/* - * setup the xstate image representing the init state - */ -static void setup_init_fpu_buf(void) -{ - static int on_boot_cpu = 1; - - if (!on_boot_cpu) - return; - on_boot_cpu = 0; - - if (!cpu_has_xsave) - return; - - setup_xstate_features(); - print_xstate_features(); - - if (cpu_has_xsaves) { - init_xstate_ctx.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_xstate_ctx.header.xfeatures = xfeatures_mask; - } - - /* - * Init all the features state with header_bv being 0x0 - */ - xrstor_state_booting(&init_xstate_ctx, -1); - - /* - * Dump the init state again. This is to identify the init state - * of any feature which is not represented by all zero's. - */ - xsave_state_booting(&init_xstate_ctx); -} - -/* - * Calculate total size of enabled xstates in XCR0/xfeatures_mask. - */ -static void __init init_xstate_size(void) -{ - unsigned int eax, ebx, ecx, edx; - int i; - - if (!cpu_has_xsaves) { - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xstate_size = ebx; - return; - } - - xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; - for (i = 2; i < 64; i++) { - if (test_bit(i, (unsigned long *)&xfeatures_mask)) { - cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); - xstate_size += eax; - } - } -} - -/* - * Enable and initialize the xsave feature. - * Called once per system bootup. - * - * ( Not marked __init because of false positive section warnings. ) - */ -void fpu__init_system_xstate(void) -{ - unsigned int eax, ebx, ecx, edx; - static bool on_boot_cpu = 1; - - if (!on_boot_cpu) - return; - on_boot_cpu = 0; - - if (!cpu_has_xsave) { - pr_info("x86/fpu: Legacy x87 FPU detected.\n"); - return; - } - - if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, "x86/fpu: XSTATE_CPUID missing!\n"); - return; - } - - cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xfeatures_mask = eax + ((u64)edx << 32); - - if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { - pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); - BUG(); - } - - /* - * Support only the state known to OS. - */ - xfeatures_mask = xfeatures_mask & XCNTXT_MASK; - - /* Enable xstate instructions to be able to continue with initialization: */ - fpu__init_cpu_xstate(); - - /* - * Recompute the context size for enabled features - */ - init_xstate_size(); - - update_regset_xstate_info(xstate_size, xfeatures_mask); - prepare_fx_sw_frame(); - setup_init_fpu_buf(); - - pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", - xfeatures_mask, - xstate_size, - cpu_has_xsaves ? "compacted" : "standard"); -} - -/* - * Restore minimal FPU state after suspend: - */ -void fpu__resume_cpu(void) -{ - /* - * Restore XCR0 on xsave capable CPUs: - */ - if (cpu_has_xsave) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); -} - -/* - * Given the xsave area and a state inside, this function returns the - * address of the state. - * - * This is the API that is called to get xstate address in either - * standard format or compacted format of xsave area. - * - * Inputs: - * xsave: base address of the xsave area; - * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, - * etc.) - * Output: - * address of the state in the xsave area. - */ -void *get_xsave_addr(struct xsave_struct *xsave, int xstate) -{ - int feature = fls64(xstate) - 1; - if (!test_bit(feature, (unsigned long *)&xfeatures_mask)) - return NULL; - - return (void *)xsave + xstate_comp_offsets[feature]; -} -EXPORT_SYMBOL_GPL(get_xsave_addr); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c new file mode 100644 index 0000000..f549e2a --- /dev/null +++ b/arch/x86/kernel/fpu/xstate.c @@ -0,0 +1,714 @@ +/* + * xsave/xrstor support. + * + * Author: Suresh Siddha + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * Mask of xstate features supported by the CPU and the kernel: + */ +u64 xfeatures_mask; + +/* + * Represents init state for the supported extended state. + */ +struct xsave_struct init_xstate_ctx; + +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; +static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; +static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; + +/* The number of supported xfeatures in xfeatures_mask: */ +static unsigned int xfeatures_nr; + +/* + * When executing XSAVEOPT (optimized XSAVE), if a processor implementation + * detects that an FPU state component is still (or is again) in its + * initialized state, it may clear the corresponding bit in the header.xfeatures + * field, and can skip the writeout of registers to the corresponding memory layout. + * + * This means that when the bit is zero, the state component might still contain + * some previous - non-initialized register state. + * + * Before writing xstate information to user-space we sanitize those components, + * to always ensure that the memory layout of a feature will be in the init state + * if the corresponding header bit is zero. This is to ensure that user-space doesn't + * see some stale state in the memory layout during signal handling, debugging etc. + */ +void __sanitize_i387_state(struct task_struct *tsk) +{ + struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; + int feature_bit; + u64 xfeatures; + + if (!fx) + return; + + xfeatures = tsk->thread.fpu.state.xsave.header.xfeatures; + + /* + * None of the feature bits are in init state. So nothing else + * to do for us, as the memory layout is up to date. + */ + if ((xfeatures & xfeatures_mask) == xfeatures_mask) + return; + + /* + * FP is in init state + */ + if (!(xfeatures & XSTATE_FP)) { + fx->cwd = 0x37f; + fx->swd = 0; + fx->twd = 0; + fx->fop = 0; + fx->rip = 0; + fx->rdp = 0; + memset(&fx->st_space[0], 0, 128); + } + + /* + * SSE is in init state + */ + if (!(xfeatures & XSTATE_SSE)) + memset(&fx->xmm_space[0], 0, 256); + + /* + * First two features are FPU and SSE, which above we handled + * in a special way already: + */ + feature_bit = 0x2; + xfeatures = (xfeatures_mask & ~xfeatures) >> 2; + + /* + * Update all the remaining memory layouts according to their + * standard xstate layout, if their header bit is in the init + * state: + */ + while (xfeatures) { + if (xfeatures & 0x1) { + int offset = xstate_offsets[feature_bit]; + int size = xstate_sizes[feature_bit]; + + memcpy((void *)fx + offset, + (void *)&init_xstate_ctx + offset, + size); + } + + xfeatures >>= 1; + feature_bit++; + } +} + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xstate_header); + unsigned int magic2; + + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; + + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; + + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + +/* + * Signal frame handlers. + */ +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +{ + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; + + convert_from_fxsr(&env, tsk); + + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; + } else { + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) + return -1; + } + + return 0; +} + +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xfeatures; + int err; + + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + + if (!use_xsave()) + return err; + + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + + /* + * Read the xfeatures which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xfeatures in the xsave header. + * + * xsave aware apps can change the xfeatures in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xfeatures |= XSTATE_FPSSE; + + err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + + return err; +} + +static inline int save_user_xstate(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = xsave_user(buf); + else if (use_fxsr()) + err = fxsave_user((struct i387_fxsave_struct __user *) buf); + else + err = fsave_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; +} + +/* + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. + */ +int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + + if (user_has_fpu()) { + /* Save the live register state to the user directly. */ + if (save_user_xstate(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + fpu_fxsave(&tsk->thread.fpu); + } else { + sanitize_i387_state(tsk); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + return 0; +} + +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xfeatures, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct xstate_header *header = &xsave->header; + + if (use_xsave()) { + /* These bits must be zero. */ + memset(header->reserved, 0, 48); + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + header->xfeatures = XSTATE_FPSSE; + else + header->xfeatures &= (xfeatures_mask & xfeatures); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } +} + +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE state. + */ +static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) +{ + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; + xrstor_state(&init_xstate_ctx, init_bv); + return fxrstor_user(buf); + } else { + u64 init_bv = xfeatures_mask & ~xbv; + if (unlikely(init_bv)) + xrstor_state(&init_xstate_ctx, init_bv); + return xrestore_user(buf, xbv); + } + } else if (use_fxsr()) { + return fxrstor_user(buf); + } else + return frstor_user(buf); +} + +int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + int state_size = xstate_size; + u64 xfeatures = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!buf) { + fpu_reset_state(fpu); + return 0; + } + + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + fpu__activate_curr(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; + + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xfeatures = fx_sw_user.xfeatures; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct fpu *fpu = &tsk->thread.fpu; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears fpu->fpstate_active. This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * fpu->fpstate_active is again set. + */ + drop_fpu(fpu); + + if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + fpstate_init(fpu); + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + } + + fpu->fpstate_active = 1; + if (use_eager_fpu()) { + preempt_disable(); + fpu__restore(); + preempt_enable(); + } + + return err; + } else { + /* + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). + */ + user_fpu_begin(); + if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { + fpu_reset_state(fpu); + return -1; + } + } + + return 0; +} + +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +static void prepare_fx_sw_frame(void) +{ + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = size; + fx_sw_reserved.xfeatures = xfeatures_mask; + fx_sw_reserved.xstate_size = xstate_size; + + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} + +/* + * Enable the extended processor state save/restore feature. + * Called once per CPU onlining. + */ +void fpu__init_cpu_xstate(void) +{ + if (!cpu_has_xsave || !xfeatures_mask) + return; + + cr4_set_bits(X86_CR4_OSXSAVE); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); +} + +/* + * Record the offsets and sizes of different state managed by the xsave + * memory layout. + */ +static void __init setup_xstate_features(void) +{ + int eax, ebx, ecx, edx, leaf = 0x2; + + xfeatures_nr = fls64(xfeatures_mask); + + do { + cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); + + if (eax == 0) + break; + + xstate_offsets[leaf] = ebx; + xstate_sizes[leaf] = eax; + + leaf++; + } while (1); +} + +static void print_xstate_feature(u64 xstate_mask, const char *desc) +{ + if (xfeatures_mask & xstate_mask) { + int xstate_feature = fls64(xstate_mask)-1; + + pr_info("x86/fpu: Supporting XSAVE feature %2d: '%s'\n", xstate_feature, desc); + } +} + +/* + * Print out all the supported xstate features: + */ +static void print_xstate_features(void) +{ + print_xstate_feature(XSTATE_FP, "x87 floating point registers"); + print_xstate_feature(XSTATE_SSE, "SSE registers"); + print_xstate_feature(XSTATE_YMM, "AVX registers"); + print_xstate_feature(XSTATE_BNDREGS, "MPX bounds registers"); + print_xstate_feature(XSTATE_BNDCSR, "MPX CSR"); + print_xstate_feature(XSTATE_OPMASK, "AVX-512 opmask"); + print_xstate_feature(XSTATE_ZMM_Hi256, "AVX-512 Hi256"); + print_xstate_feature(XSTATE_Hi16_ZMM, "AVX-512 ZMM_Hi256"); +} + +/* + * This function sets up offsets and sizes of all extended states in + * xsave area. This supports both standard format and compacted format + * of the xsave aread. + * + * Input: void + * Output: void + */ +void setup_xstate_comp(void) +{ + unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; + int i; + + /* + * The FP xstates and SSE xstates are legacy states. They are always + * in the fixed offsets in the xsave area in either compacted form + * or standard form. + */ + xstate_comp_offsets[0] = 0; + xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); + + if (!cpu_has_xsaves) { + for (i = 2; i < xfeatures_nr; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { + xstate_comp_offsets[i] = xstate_offsets[i]; + xstate_comp_sizes[i] = xstate_sizes[i]; + } + } + return; + } + + xstate_comp_offsets[2] = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = 2; i < xfeatures_nr; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) + xstate_comp_sizes[i] = xstate_sizes[i]; + else + xstate_comp_sizes[i] = 0; + + if (i > 2) + xstate_comp_offsets[i] = xstate_comp_offsets[i-1] + + xstate_comp_sizes[i-1]; + + } +} + +/* + * setup the xstate image representing the init state + */ +static void setup_init_fpu_buf(void) +{ + static int on_boot_cpu = 1; + + if (!on_boot_cpu) + return; + on_boot_cpu = 0; + + if (!cpu_has_xsave) + return; + + setup_xstate_features(); + print_xstate_features(); + + if (cpu_has_xsaves) { + init_xstate_ctx.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_xstate_ctx.header.xfeatures = xfeatures_mask; + } + + /* + * Init all the features state with header_bv being 0x0 + */ + xrstor_state_booting(&init_xstate_ctx, -1); + + /* + * Dump the init state again. This is to identify the init state + * of any feature which is not represented by all zero's. + */ + xsave_state_booting(&init_xstate_ctx); +} + +/* + * Calculate total size of enabled xstates in XCR0/xfeatures_mask. + */ +static void __init init_xstate_size(void) +{ + unsigned int eax, ebx, ecx, edx; + int i; + + if (!cpu_has_xsaves) { + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xstate_size = ebx; + return; + } + + xstate_size = FXSAVE_SIZE + XSAVE_HDR_SIZE; + for (i = 2; i < 64; i++) { + if (test_bit(i, (unsigned long *)&xfeatures_mask)) { + cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx); + xstate_size += eax; + } + } +} + +/* + * Enable and initialize the xsave feature. + * Called once per system bootup. + * + * ( Not marked __init because of false positive section warnings. ) + */ +void fpu__init_system_xstate(void) +{ + unsigned int eax, ebx, ecx, edx; + static bool on_boot_cpu = 1; + + if (!on_boot_cpu) + return; + on_boot_cpu = 0; + + if (!cpu_has_xsave) { + pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + return; + } + + if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { + WARN(1, "x86/fpu: XSTATE_CPUID missing!\n"); + return; + } + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + xfeatures_mask = eax + ((u64)edx << 32); + + if ((xfeatures_mask & XSTATE_FPSSE) != XSTATE_FPSSE) { + pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); + BUG(); + } + + /* + * Support only the state known to OS. + */ + xfeatures_mask = xfeatures_mask & XCNTXT_MASK; + + /* Enable xstate instructions to be able to continue with initialization: */ + fpu__init_cpu_xstate(); + + /* + * Recompute the context size for enabled features + */ + init_xstate_size(); + + update_regset_xstate_info(xstate_size, xfeatures_mask); + prepare_fx_sw_frame(); + setup_init_fpu_buf(); + + pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", + xfeatures_mask, + xstate_size, + cpu_has_xsaves ? "compacted" : "standard"); +} + +/* + * Restore minimal FPU state after suspend: + */ +void fpu__resume_cpu(void) +{ + /* + * Restore XCR0 on xsave capable CPUs: + */ + if (cpu_has_xsave) + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); +} + +/* + * Given the xsave area and a state inside, this function returns the + * address of the state. + * + * This is the API that is called to get xstate address in either + * standard format or compacted format of xsave area. + * + * Inputs: + * xsave: base address of the xsave area; + * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, + * etc.) + * Output: + * address of the state in the xsave area. + */ +void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +{ + int feature = fls64(xstate) - 1; + if (!test_bit(feature, (unsigned long *)&xfeatures_mask)) + return NULL; + + return (void *)xsave + xstate_comp_offsets[feature]; +} +EXPORT_SYMBOL_GPL(get_xsave_addr); -- cgit v0.10.2 From 5b07343034ec9ebb0d443743c7381b25875d0067 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 08:51:17 +0200 Subject: x86/fpu: Introduce cpu_has_xfeatures(xfeatures_mask, feature_name) A lot of FPU using driver code is querying complex CPU features to be able to figure out whether a given set of xstate features is supported by the CPU or not. Introduce a simplified API function that can be used on any CPU type to get this information. Also add an error string return pointer, so that the driver can print a meaningful error message with a standardized feature name. Also mark xfeatures_mask as __read_only. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 62035cc..1429a7c 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -36,4 +36,13 @@ extern bool irq_fpu_usable(void); extern int irq_ts_save(void); extern void irq_ts_restore(int TS_state); +/* + * Query the presence of one or more xfeatures. Works on any legacy CPU as well. + * + * If 'feature_name' is set then put a human-readable description of + * the feature there as well - this can be used to print error (or success) + * messages. + */ +extern int cpu_has_xfeatures(u64 xfeatures_mask, const char **feature_name); + #endif /* _ASM_X86_FPU_API_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index f549e2a..2e52f01 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -11,10 +11,23 @@ #include #include +static const char *xfeature_names[] = +{ + "x87 floating point registers" , + "SSE registers" , + "AVX registers" , + "MPX bounds registers" , + "MPX CSR" , + "AVX-512 opmask" , + "AVX-512 Hi256" , + "AVX-512 ZMM_Hi256" , + "unknown xstate feature" , +}; + /* * Mask of xstate features supported by the CPU and the kernel: */ -u64 xfeatures_mask; +u64 xfeatures_mask __read_mostly; /* * Represents init state for the supported extended state. @@ -29,6 +42,44 @@ static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; static unsigned int xfeatures_nr; /* + * Return whether the system supports a given xfeature. + * + * Also return the name of the (most advanced) feature that the caller requested: + */ +int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) +{ + u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; + + if (unlikely(feature_name)) { + long xfeature_idx, max_idx; + u64 xfeatures_print; + /* + * So we use FLS here to be able to print the most advanced + * feature that was requested but is missing. So if a driver + * asks about "XSTATE_SSE | XSTATE_YMM" we'll print the + * missing AVX feature - this is the most informative message + * to users: + */ + if (xfeatures_missing) + xfeatures_print = xfeatures_missing; + else + xfeatures_print = xfeatures_needed; + + xfeature_idx = fls64(xfeatures_print)-1; + max_idx = ARRAY_SIZE(xfeature_names)-1; + xfeature_idx = min(xfeature_idx, max_idx); + + *feature_name = xfeature_names[xfeature_idx]; + } + + if (xfeatures_missing) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(cpu_has_xfeatures); + +/* * When executing XSAVEOPT (optimized XSAVE), if a processor implementation * detects that an FPU state component is still (or is again) in its * initialized state, it may clear the corresponding bit in the header.xfeatures -- cgit v0.10.2 From 33588b52228f6e94e970a05ead10f524f363ee44 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 09:17:26 +0200 Subject: x86/fpu: Simplify print_xstate_features() We do a boot time printout of xfeatures in print_xstate_features(), simplify this code to make use of the recently introduced cpu_has_xfeature() method. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2e52f01..0f84922 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -545,13 +545,12 @@ static void __init setup_xstate_features(void) } while (1); } -static void print_xstate_feature(u64 xstate_mask, const char *desc) +static void print_xstate_feature(u64 xstate_mask) { - if (xfeatures_mask & xstate_mask) { - int xstate_feature = fls64(xstate_mask)-1; + const char *feature_name; - pr_info("x86/fpu: Supporting XSAVE feature %2d: '%s'\n", xstate_feature, desc); - } + if (cpu_has_xfeatures(xstate_mask, &feature_name)) + pr_info("x86/fpu: Supporting XSAVE feature 0x%02Lx: '%s'\n", xstate_mask, feature_name); } /* @@ -559,14 +558,14 @@ static void print_xstate_feature(u64 xstate_mask, const char *desc) */ static void print_xstate_features(void) { - print_xstate_feature(XSTATE_FP, "x87 floating point registers"); - print_xstate_feature(XSTATE_SSE, "SSE registers"); - print_xstate_feature(XSTATE_YMM, "AVX registers"); - print_xstate_feature(XSTATE_BNDREGS, "MPX bounds registers"); - print_xstate_feature(XSTATE_BNDCSR, "MPX CSR"); - print_xstate_feature(XSTATE_OPMASK, "AVX-512 opmask"); - print_xstate_feature(XSTATE_ZMM_Hi256, "AVX-512 Hi256"); - print_xstate_feature(XSTATE_Hi16_ZMM, "AVX-512 ZMM_Hi256"); + print_xstate_feature(XSTATE_FP); + print_xstate_feature(XSTATE_SSE); + print_xstate_feature(XSTATE_YMM); + print_xstate_feature(XSTATE_BNDREGS); + print_xstate_feature(XSTATE_BNDCSR); + print_xstate_feature(XSTATE_OPMASK); + print_xstate_feature(XSTATE_ZMM_Hi256); + print_xstate_feature(XSTATE_Hi16_ZMM); } /* -- cgit v0.10.2 From 677b98bdd5b93d89ad58b8a4679db086dfbaa854 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 09:40:26 +0200 Subject: x86/fpu: Enumerate xfeature bits Transform the xstate masks into an enumerated list of xfeature bits. This removes the hard coding of XFEATURES_NR_MAX. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 2f2ed32..9b2869d 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -4,25 +4,39 @@ #include #include -#define XSTATE_CPUID 0x0000000d - -#define XSTATE_FP 0x1 -#define XSTATE_SSE 0x2 -#define XSTATE_YMM 0x4 -#define XSTATE_BNDREGS 0x8 -#define XSTATE_BNDCSR 0x10 -#define XSTATE_OPMASK 0x20 -#define XSTATE_ZMM_Hi256 0x40 -#define XSTATE_Hi16_ZMM 0x80 +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) -/* The highest xstate bit above (of XSTATE_Hi16_ZMM): */ -#define XFEATURES_NR_MAX 8 #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) #define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) +#define XSTATE_CPUID 0x0000000d + #define FXSAVE_SIZE 512 #define XSAVE_HDR_SIZE 64 -- cgit v0.10.2 From 91969d690f594f22c15055daf89cecd33584d65e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 09:46:04 +0200 Subject: x86/fpu: Move xfeature type enumeration to fpu/types.h So xsave.h is an internal header that FPU using drivers commonly include, to get access to the xstate feature names, amongst other things. Move these type definitions to fpu/fpu.h to allow simplification of FPU using driver code. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 3a15ac6..006ec29 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -79,6 +79,34 @@ struct i387_soft_struct { }; /* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) + +#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) +#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) + +/* * There are 16x 256-bit AVX registers named YMM0-YMM15. * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) * and are stored in 'struct i387_fxsave_struct::xmm_space[]'. diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 9b2869d..31a002a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -4,34 +4,6 @@ #include #include -/* - * List of XSAVE features Linux knows about: - */ -enum xfeature_bit { - XSTATE_BIT_FP, - XSTATE_BIT_SSE, - XSTATE_BIT_YMM, - XSTATE_BIT_BNDREGS, - XSTATE_BIT_BNDCSR, - XSTATE_BIT_OPMASK, - XSTATE_BIT_ZMM_Hi256, - XSTATE_BIT_Hi16_ZMM, - - XFEATURES_NR_MAX, -}; - -#define XSTATE_FP (1 << XSTATE_BIT_FP) -#define XSTATE_SSE (1 << XSTATE_BIT_SSE) -#define XSTATE_YMM (1 << XSTATE_BIT_YMM) -#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) -#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) -#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) -#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) -#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) - - -#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) -#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) -- cgit v0.10.2 From ce4f5f9b65ae3d630e375b5e406feca31f682886 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 09:59:44 +0200 Subject: x86/fpu, crypto x86/camellia_aesni_avx: Simplify the camellia_aesni_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit: text data bss dec hex filename 2128 2896 0 5024 13a0 camellia_aesni_avx_glue.o.before 2067 2896 0 4963 1363 camellia_aesni_avx_glue.o.after - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c index 0122cd9..80a0e43 100644 --- a/arch/x86/crypto/camellia_aesni_avx_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx_glue.c @@ -19,8 +19,7 @@ #include #include #include -#include -#include +#include #include #include @@ -553,16 +552,10 @@ static struct crypto_alg cmll_algs[10] = { { static int __init camellia_aesni_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { - pr_info("AVX or AES-NI instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From 70d51eb65ddf53a00e6801c2ee541a7219e5685a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/sha256_ssse3: Simplify the sha256_ssse3_mod_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index eb65522..f8097fc 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -38,8 +38,6 @@ #include #include #include -#include -#include #include asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, @@ -132,15 +130,9 @@ static struct shash_alg algs[] = { { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } -- cgit v0.10.2 From 7bc371faa9cd661589d13575cd48aa168e04e208 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/camellia_aesni_avx2: Simplify the camellia_aesni_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 4648448..76ea7df 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -19,8 +19,7 @@ #include #include #include -#include -#include +#include #include #include @@ -561,16 +560,10 @@ static struct crypto_alg cmll_algs[10] = { { static int __init camellia_aesni_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { - pr_info("AVX2 or AES-NI instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX2 detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From 4eecd2616d6a6e91b57659281ca6bf243264f8db Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/twofish_avx: Simplify the twofish_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c index 95434f5..c2bd0ce 100644 --- a/arch/x86/crypto/twofish_avx_glue.c +++ b/arch/x86/crypto/twofish_avx_glue.c @@ -37,8 +37,6 @@ #include #include #include -#include -#include #include #include #include @@ -558,16 +556,10 @@ static struct crypto_alg twofish_algs[10] = { { static int __init twofish_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - printk(KERN_INFO "AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - printk(KERN_INFO "AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From c1c23f7e5e1ef5e72c21f0615fbed560be5b8fa9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/serpent_avx: Simplify the serpent_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c index f66ae85..da7dafc 100644 --- a/arch/x86/crypto/serpent_avx_glue.c +++ b/arch/x86/crypto/serpent_avx_glue.c @@ -36,8 +36,7 @@ #include #include #include -#include -#include +#include #include #include @@ -596,16 +595,10 @@ static struct crypto_alg serpent_algs[10] = { { static int __init serpent_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - printk(KERN_INFO "AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - printk(KERN_INFO "AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From d5d34d98d2df01766854c7eb7044943b596d824f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/cast5_avx: Simplify the cast5_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index ca4f32e..be00aa4 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -31,8 +31,7 @@ #include #include #include -#include -#include +#include #include #define CAST5_PARALLEL_BLOCKS 16 @@ -468,16 +467,10 @@ static struct crypto_alg cast5_algs[6] = { { static int __init cast5_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - pr_info("AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From c93b8a3963242e034a4d1ec87090c62bb25a575c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/sha512_ssse3: Simplify the sha512_ssse3_mod_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 7891464..2edad7b 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -36,8 +36,6 @@ #include #include #include -#include -#include #include @@ -131,15 +129,9 @@ static struct shash_alg algs[] = { { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } -- cgit v0.10.2 From 1debf7db2b9e156ecd69830244285b854e85a71c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/cast6_avx: Simplify the cast6_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c index 21d0b84..5dbba72 100644 --- a/arch/x86/crypto/cast6_avx_glue.c +++ b/arch/x86/crypto/cast6_avx_glue.c @@ -36,8 +36,7 @@ #include #include #include -#include -#include +#include #include #define CAST6_PARALLEL_BLOCKS 8 @@ -590,16 +589,10 @@ static struct crypto_alg cast6_algs[10] = { { static int __init cast6_init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx || !cpu_has_osxsave) { - pr_info("AVX instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From d1e509660c690597edb7e15efd22a5e78b8e2fd5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/sha1_ssse3: Simplify the sha1_ssse3_mod_init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 84db12f..7c48e8b 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -30,8 +30,6 @@ #include #include #include -#include -#include asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, @@ -123,15 +121,9 @@ static struct shash_alg alg = { #ifdef CONFIG_AS_AVX static bool __init avx_usable(void) { - u64 xcr0; - - if (!cpu_has_avx || !cpu_has_osxsave) - return false; - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); - + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { + if (cpu_has_avx) + pr_info("AVX detected but unusable.\n"); return false; } -- cgit v0.10.2 From 534ff06e39292b66b46d6318191b552b5bb2d1e7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:11:24 +0200 Subject: x86/fpu, crypto x86/serpent_avx2: Simplify the init() xfeature checks Use the new 'cpu_has_xfeatures()' function to query AVX CPU support. This has the following advantages to the driver: - Decouples the driver from FPU internals: it's now only using . - Removes detection complexity from the driver, no more raw XGETBV instruction - Shrinks the code a bit. - Standardizes feature name error message printouts across drivers There are also advantages to the x86 FPU code: once all drivers are decoupled from internals we can move them out of common headers and we'll also be able to remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index aa325fa..f226ad4 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -20,8 +20,7 @@ #include #include #include -#include -#include +#include #include #include @@ -537,16 +536,10 @@ static struct crypto_alg srp_algs[10] = { { static int __init init(void) { - u64 xcr0; + const char *feature_name; - if (!cpu_has_avx2 || !cpu_has_osxsave) { - pr_info("AVX2 instructions are not detected.\n"); - return -ENODEV; - } - - xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); - if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) { - pr_info("AVX detected but unusable.\n"); + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; } -- cgit v0.10.2 From 57dd083e0cd0bb1ce447941b6438bf0a8372fc19 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:59:00 +0200 Subject: x86/fpu, crypto x86/sha1_mb: Remove FPU internal headers from sha1_mb.c This file only uses the public FPU APIs, so remove the xcr.h, fpu/xstate.h and fpu/internal.h headers and add the fpu/api.h include. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/sha-mb/sha1_mb.c b/arch/x86/crypto/sha-mb/sha1_mb.c index 6f3f765..f53ed1d 100644 --- a/arch/x86/crypto/sha-mb/sha1_mb.c +++ b/arch/x86/crypto/sha-mb/sha1_mb.c @@ -65,10 +65,8 @@ #include #include #include -#include -#include #include -#include +#include #include "sha_mb_ctx.h" #define FLUSH_INTERVAL 1000 /* in usec */ -- cgit v0.10.2 From befc61ad3c097bb6ace3da0c73ad56272ccee02d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 10:56:54 +0200 Subject: x86/fpu: Move asm/xcr.h to asm/fpu/internal.h Now that all FPU internals using drivers are converted to public APIs, move xcr.h's definitions into fpu/internal.h and remove xcr.h. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 8ec785e..161b51b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -431,6 +431,31 @@ static inline void fpu_reset_state(struct fpu *fpu) } /* + * Definitions for the eXtended Control Register instructions + */ + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + +/* * FPU state switching for scheduling. * * This is a two-stage process: diff --git a/arch/x86/include/asm/xcr.h b/arch/x86/include/asm/xcr.h deleted file mode 100644 index f2cba4e7..0000000 --- a/arch/x86/include/asm/xcr.h +++ /dev/null @@ -1,49 +0,0 @@ -/* -*- linux-c -*- ------------------------------------------------------- * - * - * Copyright 2008 rPath, Inc. - All Rights Reserved - * - * This file is part of the Linux kernel, and is made available under - * the terms of the GNU General Public License version 2 or (at your - * option) any later version; incorporated herein by reference. - * - * ----------------------------------------------------------------------- */ - -/* - * asm-x86/xcr.h - * - * Definitions for the eXtended Control Register instructions - */ - -#ifndef _ASM_X86_XCR_H -#define _ASM_X86_XCR_H - -#define XCR_XFEATURE_ENABLED_MASK 0x00000000 - -#ifdef __KERNEL__ -# ifndef __ASSEMBLY__ - -#include - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ - : : "a" (eax), "d" (edx), "c" (index)); -} - -# endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ - -#endif /* _ASM_X86_XCR_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0f84922..c087f2d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -9,7 +9,6 @@ #include #include #include -#include static const char *xfeature_names[] = { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f93ae71..2de55e9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9ff4df7..5c61aae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -61,7 +61,6 @@ #include #include #include /* Ugh! */ -#include #include #include -- cgit v0.10.2 From d0903193124132c6bb59a895eeb0656f86013da1 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 11:11:10 +0200 Subject: x86/fpu: Rename sanitize_i387_state() to fpstate_sanitize_xstate() So the sanitize_i387_state() function has the following purpose: on CPUs that support optimized xstate saving instructions, an FPU fpstate might end up having partially uninitialized data. This function initializes that data. Note that the function name is a misnomer and confusing on two levels, not only is it not i387 specific at all, but it is the exact opposite: it only matters on xstate CPUs. So rename sanitize_i387_state() and __sanitize_i387_state() to fpstate_sanitize_xstate() and __fpstate_sanitize_xstate(), to clearly express the purpose and usage of the function. We'll further clean up this function in the next patch. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 161b51b..6b6fa46 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -139,13 +139,13 @@ static inline void fx_finit(struct i387_fxsave_struct *fx) fx->mxcsr = MXCSR_DEFAULT; } -extern void __sanitize_i387_state(struct task_struct *); +extern void __fpstate_sanitize_xstate(struct task_struct *); -static inline void sanitize_i387_state(struct task_struct *tsk) +static inline void fpstate_sanitize_xstate(struct task_struct *tsk) { if (!use_xsaveopt()) return; - __sanitize_i387_state(tsk); + __fpstate_sanitize_xstate(tsk); } #define user_insn(insn, output, input...) \ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index edbb5d04..561a353 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -395,7 +395,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, return -ENODEV; fpu__activate_stopped(fpu); - sanitize_i387_state(target); + fpstate_sanitize_xstate(target); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpu->state.fxsave, 0, -1); @@ -412,7 +412,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, return -ENODEV; fpu__activate_stopped(fpu); - sanitize_i387_state(target); + fpstate_sanitize_xstate(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpu->state.fxsave, 0, -1); @@ -644,7 +644,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, &fpu->state.fsave, 0, -1); - sanitize_i387_state(target); + fpstate_sanitize_xstate(target); if (kbuf && pos == 0 && count == sizeof(env)) { convert_from_fxsr(kbuf, target); @@ -666,7 +666,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, fpu__activate_stopped(fpu); - sanitize_i387_state(target); + fpstate_sanitize_xstate(target); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index c087f2d..fc2ff12 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(cpu_has_xfeatures); * if the corresponding header bit is zero. This is to ensure that user-space doesn't * see some stale state in the memory layout during signal handling, debugging etc. */ -void __sanitize_i387_state(struct task_struct *tsk) +void __fpstate_sanitize_xstate(struct task_struct *tsk) { struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; int feature_bit; @@ -318,7 +318,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (ia32_fxstate) fpu_fxsave(&tsk->thread.fpu); } else { - sanitize_i387_state(tsk); + fpstate_sanitize_xstate(tsk); if (__copy_to_user(buf_fx, xsave, xstate_size)) return -1; } -- cgit v0.10.2 From 1ac91a767f1d2ac049dc11e5b7e4342c63c21538 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 11:17:55 +0200 Subject: x86/fpu: Simplify fpstate_sanitize_xstate() calls Remove the extra layer of __fpstate_sanitize_xstate(): if (!use_xsaveopt()) return; __fpstate_sanitize_xstate(tsk); and move the check for use_xsaveopt() into fpstate_sanitize_xstate(). In general we optimize for the presence of CPU features, not for the absence of them. Furthermore there's little point in this inlining, as the call sites are not super hot code paths. Doing this uninlining shrinks the code a bit: text data bss dec hex filename 14108751 2573624 1634304 18316679 1177d87 vmlinux.before 14108627 2573624 1634304 18316555 1177d0b vmlinux.after Also remove a pointless '!fx' check from fpstate_sanitize_xstate(). Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6b6fa46..88fec3f 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -139,14 +139,7 @@ static inline void fx_finit(struct i387_fxsave_struct *fx) fx->mxcsr = MXCSR_DEFAULT; } -extern void __fpstate_sanitize_xstate(struct task_struct *); - -static inline void fpstate_sanitize_xstate(struct task_struct *tsk) -{ - if (!use_xsaveopt()) - return; - __fpstate_sanitize_xstate(tsk); -} +extern void fpstate_sanitize_xstate(struct task_struct *); #define user_insn(insn, output, input...) \ ({ \ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index fc2ff12..47b9591 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -92,13 +92,13 @@ EXPORT_SYMBOL_GPL(cpu_has_xfeatures); * if the corresponding header bit is zero. This is to ensure that user-space doesn't * see some stale state in the memory layout during signal handling, debugging etc. */ -void __fpstate_sanitize_xstate(struct task_struct *tsk) +void fpstate_sanitize_xstate(struct task_struct *tsk) { struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; int feature_bit; u64 xfeatures; - if (!fx) + if (!use_xsaveopt()) return; xfeatures = tsk->thread.fpu.state.xsave.header.xfeatures; -- cgit v0.10.2 From 36e49e7f2ec8fa2cdf1ec0439374583ea0a82c47 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 11:25:02 +0200 Subject: x86/fpu: Pass 'struct fpu' to fpstate_sanitize_xstate() Currently fpstate_sanitize_xstate() has a task_struct input parameter, but it only uses the fpu structure from it - so pass in a 'struct fpu' pointer only and update all call sites. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 88fec3f..da96b0c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -139,7 +139,7 @@ static inline void fx_finit(struct i387_fxsave_struct *fx) fx->mxcsr = MXCSR_DEFAULT; } -extern void fpstate_sanitize_xstate(struct task_struct *); +extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ ({ \ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 561a353..1d6e97c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -395,7 +395,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, return -ENODEV; fpu__activate_stopped(fpu); - fpstate_sanitize_xstate(target); + fpstate_sanitize_xstate(fpu); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &fpu->state.fxsave, 0, -1); @@ -412,7 +412,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, return -ENODEV; fpu__activate_stopped(fpu); - fpstate_sanitize_xstate(target); + fpstate_sanitize_xstate(fpu); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpu->state.fxsave, 0, -1); @@ -644,7 +644,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, &fpu->state.fsave, 0, -1); - fpstate_sanitize_xstate(target); + fpstate_sanitize_xstate(fpu); if (kbuf && pos == 0 && count == sizeof(env)) { convert_from_fxsr(kbuf, target); @@ -665,8 +665,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, int ret; fpu__activate_stopped(fpu); - - fpstate_sanitize_xstate(target); + fpstate_sanitize_xstate(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 47b9591..a8ce38a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -92,16 +92,16 @@ EXPORT_SYMBOL_GPL(cpu_has_xfeatures); * if the corresponding header bit is zero. This is to ensure that user-space doesn't * see some stale state in the memory layout during signal handling, debugging etc. */ -void fpstate_sanitize_xstate(struct task_struct *tsk) +void fpstate_sanitize_xstate(struct fpu *fpu) { - struct i387_fxsave_struct *fx = &tsk->thread.fpu.state.fxsave; + struct i387_fxsave_struct *fx = &fpu->state.fxsave; int feature_bit; u64 xfeatures; if (!use_xsaveopt()) return; - xfeatures = tsk->thread.fpu.state.xsave.header.xfeatures; + xfeatures = fpu->state.xsave.header.xfeatures; /* * None of the feature bits are in init state. So nothing else @@ -318,7 +318,7 @@ int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) if (ia32_fxstate) fpu_fxsave(&tsk->thread.fpu); } else { - fpstate_sanitize_xstate(tsk); + fpstate_sanitize_xstate(&tsk->thread.fpu); if (__copy_to_user(buf_fx, xsave, xstate_size)) return -1; } -- cgit v0.10.2 From c8e1404120d55876d2dedd3a541bc484ef692c58 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 11:35:20 +0200 Subject: x86/fpu: Rename save_xstate_sig() to copy_fpstate_to_sigframe() Standardize the naming of save_xstate_sig() by renaming it to copy_fpstate_to_sigframe(): this tells us at a glance that the function copies an FPU fpstate to a signal frame. This naming also follows the naming of copy_fpregs_to_fpstate(). Don't put 'xstate' into the name: since this is a generic name, it's expected that the function is able to handle xstate frames as well, beyond legacy frames. xstate used to be the odd case in the x86 FPU code - now it's the common case. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index d6d8f4c..2e0b1b7 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -327,7 +327,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); *fpstate = (struct _fpstate_ia32 __user *) sp; - if (save_xstate_sig(*fpstate, (void __user *)fx_aligned, + if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, math_size) < 0) return (void __user *) -1L; } diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index da96b0c..58c274d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -523,7 +523,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc /* * Signal frame handlers... */ -extern int save_xstate_sig(void __user *buf, void __user *fx, int size); +extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); static inline int xstate_sigframe_size(void) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a8ce38a..5178158 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -293,7 +293,7 @@ static inline int save_user_xstate(struct xsave_struct __user *buf) * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ -int save_xstate_sig(void __user *buf, void __user *buf_fx, int size) +int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; struct task_struct *tsk = current; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index c67f96c..59cfc9c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -235,7 +235,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, /* save i387 and extended state */ if (fpu->fpstate_active && - save_xstate_sig(*fpstate, (void __user *)buf_fx, math_size) < 0) + copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0) return (void __user *)-1L; return (void __user *)sp; -- cgit v0.10.2 From 2a52af8b8adc498b3cccbc219ec997cad6cafb9c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 12:09:27 +0200 Subject: x86/fpu: Rename save_user_xstate() to copy_fpregs_to_sigframe() Move the naming in line with existing names, so that we now have: copy_fpregs_to_fpstate() copy_fpstate_to_sigframe() copy_fpregs_to_sigframe() ... where each function does what its name suggests. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5178158..2863882 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -257,7 +257,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) return err; } -static inline int save_user_xstate(struct xsave_struct __user *buf) +static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) { int err; @@ -312,7 +312,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) if (user_has_fpu()) { /* Save the live register state to the user directly. */ - if (save_user_xstate(buf_fx)) + if (copy_fpregs_to_sigframe(buf_fx)) return -1; /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) -- cgit v0.10.2 From be7436d519970365775d3d2d7b73e75c233e1994 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 12:53:45 +0200 Subject: x86/fpu: Clarify ancient comments in fpu__restore() So this function still had ancient language about 'saving current math information' - but we haven't been doing lazy FPU saves for quite some time, we are doing lazy FPU restores. Also remove IRQ13 related comment, which we don't support anymore either. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1d6e97c..91b9935 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -319,14 +319,14 @@ static void fpu__activate_stopped(struct fpu *child_fpu) } /* - * 'fpu__restore()' saves the current math information in the - * old math state array, and gets the new ones from the current task + * 'fpu__restore()' is called to copy FPU registers from + * the FPU fpstate to the live hw registers and to activate + * access to the hardware registers, so that FPU instructions + * can be used afterwards. * - * Careful.. There are problems with IBM-designed IRQ13 behaviour. - * Don't touch unless you *really* know how it works. - * - * Must be called with kernel preemption disabled (eg with local - * local interrupts as in the case of do_device_not_available). + * Must be called with kernel preemption disabled (for example + * with local interrupts disabled, as it is in the case of + * do_device_not_available()). */ void fpu__restore(void) { -- cgit v0.10.2 From 3c6dffa93be9f82f2566dcc948285d6f79fb9ce2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 28 Apr 2015 12:28:08 +0200 Subject: x86/fpu: Rename user_has_fpu() to fpregs_active() Rename this function in line with the new FPU nomenclature. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 58c274d..0f17cd4 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -358,7 +358,7 @@ static inline void __fpregs_activate(struct fpu *fpu) * to save the FP state - we'll just take a #NM * fault and get the FPU access back. */ -static inline int user_has_fpu(void) +static inline int fpregs_active(void) { return current->thread.fpu.fpregs_active; } @@ -557,7 +557,7 @@ static inline void user_fpu_begin(void) struct fpu *fpu = ¤t->thread.fpu; preempt_disable(); - if (!user_has_fpu()) + if (!fpregs_active()) fpregs_activate(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 2863882..b8e5fee 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -310,7 +310,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) sizeof(struct user_i387_ia32_struct), NULL, (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - if (user_has_fpu()) { + if (fpregs_active()) { /* Save the live register state to the user directly. */ if (copy_fpregs_to_sigframe(buf_fx)) return -1; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2de55e9..1c384bf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1882,7 +1882,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) * If the FPU is not active (through the host task or * the guest vcpu), then restore the cr0.TS bit. */ - if (!user_has_fpu() && !vmx->vcpu.guest_fpu_loaded) + if (!fpregs_active() && !vmx->vcpu.guest_fpu_loaded) stts(); load_gdt(this_cpu_ptr(&host_gdt)); } diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index b80e4b8..99bb300 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -251,7 +251,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * we set it now, so we can trap and pass that trap to the Guest if it * uses the FPU. */ - if (cpu->ts && user_has_fpu()) + if (cpu->ts && fpregs_active()) stts(); /* @@ -283,7 +283,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); /* Clear the host TS bit if it was set above. */ - if (cpu->ts && user_has_fpu()) + if (cpu->ts && fpregs_active()) clts(); /* @@ -301,7 +301,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * a different CPU. So all the critical stuff should be done * before this. */ - else if (cpu->regs->trapnum == 7 && !user_has_fpu()) + else if (cpu->regs->trapnum == 7 && !fpregs_active()) fpu__restore(); } -- cgit v0.10.2 From b1276c48e91bee869454301d3678cc49d8f57ab4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 10:58:03 +0200 Subject: x86/fpu: Initialize fpregs in fpu__init_cpu_generic() FPU fpregs do not get initialized during bootup on secondary CPUs, on non-xsave capable CPUs. For example on one of my systems, the secondary CPU has this FPU state on bootup: x86: Booting SMP configuration: .... node #0, CPUs: #1 x86/fpu ###################### x86/fpu # FPU register dump on CPU#1: x86/fpu # ... CWD: ffff0040 x86/fpu # ... SWD: ffff0000 x86/fpu # ... TWD: ffff555a x86/fpu # ... FIP: 00000000 x86/fpu # ... FCS: 00000000 x86/fpu # ... FOO: 00000000 x86/fpu # ... FOS: ffff0000 x86/fpu # ... FP0: 02 57 00 00 00 00 00 00 ff ff x86/fpu # ... FP1: 1b e2 00 00 00 00 00 00 ff ff x86/fpu # ... FP2: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... FP3: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... FP4: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... FP5: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... FP6: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... FP7: 00 00 00 00 00 00 00 00 00 00 x86/fpu # ... SW: dadadada x86/fpu ###################### Note how CWD and TWD are off their usual init state (0x037f and 0xffff), and how FP0 and FP1 has non-zero content. This is normally not a problem, because any user-space FPU state is initalized properly - but it can complicate the use of FPU instructions in kernel code via kernel_fpu_begin()/end(): if the FPU using code does not initialize registers itself, it might generate spurious exceptions depending on which CPU it executes on. Fix this by initializing the x87 state via the FNINIT instruction. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 460e7e2..72219ce2 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -36,6 +36,9 @@ static void fpu__init_cpu_generic(void) if (!cpu_has_fpu) cr0 |= X86_CR0_EM; write_cr0(cr0); + + /* Flush out any pending x87 state: */ + asm volatile ("fninit"); } /* -- cgit v0.10.2 From 2e85591a6ca2ad84741d2859753be5497d74bd42 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 08:46:26 +0200 Subject: x86/fpu: Better document fpu__clear() state handling So prior to this fix: c88d47480d30 ("x86/fpu: Always restore_xinit_state() when use_eager_cpu()") we leaked FPU state across execve() boundaries on eagerfpu systems: $ /host/home/mingo/dump-xmm-regs-exec # XMM state before execve(): XMM0 : 000000000000dede XMM1 : 000000000000dedf XMM2 : 000000000000dee0 XMM3 : 000000000000dee1 XMM4 : 000000000000dee2 XMM5 : 000000000000dee3 XMM6 : 000000000000dee4 XMM7 : 000000000000dee5 XMM8 : 000000000000dee6 XMM9 : 000000000000dee7 XMM10: 000000000000dee8 XMM11: 000000000000dee9 XMM12: 000000000000deea XMM13: 000000000000deeb XMM14: 000000000000deec XMM15: 000000000000deed # XMM state after execve(), in the new task context: XMM0 : 0000000000000000 XMM1 : 2f2f2f2f2f2f2f2f XMM2 : 0000000000000000 XMM3 : 0000000000000000 XMM4 : 00000000000000ff XMM5 : 00000000ff000000 XMM6 : 000000000000dee4 XMM7 : 000000000000dee5 XMM8 : 0000000000000000 XMM9 : 0000000000000000 XMM10: 0000000000000000 XMM11: 0000000000000000 XMM12: 0000000000000000 XMM13: 000000000000deeb XMM14: 000000000000deec XMM15: 000000000000deed Better explain what this function is supposed to do and why. Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 91b9935..a2e2da2 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -348,6 +348,10 @@ void fpu__restore(void) } EXPORT_SYMBOL_GPL(fpu__restore); +/* + * Called by sys_execve() to clear the FPU fpregs, so that FPU state + * of the previous binary does not leak over into the exec()ed binary: + */ void fpu__clear(struct task_struct *tsk) { struct fpu *fpu = &tsk->thread.fpu; -- cgit v0.10.2 From 5e907bb0459399b0d1cc8d4c7e9f363a995b748a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 09:09:26 +0200 Subject: x86/alternatives, x86/fpu: Add 'alternatives_patched' debug flag and use it in xsave_state() We'd like to use xsave_state() earlier, but its SYSTEM_BOOTING check is too imprecise. The real condition that xsave_state() would like to check is whether alternative XSAVE instructions were patched into the kernel image already. Add such a (read-mostly) debug flag and use it in xsave_state(). Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index ba32af0..7bfc85b 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -52,6 +52,12 @@ struct alt_instr { u8 padlen; /* length of build-time padding */ } __packed; +/* + * Debug flag that can be tested to see whether alternative + * instructions were patched in already: + */ +extern int alternatives_patched; + extern void alternative_instructions(void); extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 31a002a..ab2c507 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -119,7 +119,7 @@ static inline int xsave_state(struct xsave_struct *fx) u32 hmask = mask >> 32; int err = 0; - WARN_ON(system_state == SYSTEM_BOOTING); + WARN_ON(!alternatives_patched); /* * If xsaves is enabled, xsaves replaces xsaveopt because diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index aef6531..7fe0972 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -21,6 +21,10 @@ #include #include +int __read_mostly alternatives_patched; + +EXPORT_SYMBOL_GPL(alternatives_patched); + #define MAX_PATCH_LEN (255-1) static int __initdata_or_module debug_alternative; @@ -627,6 +631,7 @@ void __init alternative_instructions(void) apply_paravirt(__parainstructions, __parainstructions_end); restart_nmi(); + alternatives_patched = 1; } /** -- cgit v0.10.2 From 5033861575df08a04090cc7b785b2b7aadcbde82 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 19:04:31 +0200 Subject: x86/fpu: Synchronize the naming of drop_fpu() and fpu_reset_state() drop_fpu() and fpu_reset_state() are similar in functionality and in scope, yet this is not apparent from their names. drop_fpu() deactivates FPU contents (both the fpregs and the fpstate), but leaves register contents intact in the eager-FPU case, mostly as an optimization. It disables fpregs in the lazy FPU case. The drop_fpu() method can be used to destroy FPU state in an optimized way, when we know that a new state will be loaded before user-space might see any remains of the old FPU state: - such as in sys_exit()'s exit_thread() where we know this task won't execute any user-space instructions anymore and the next context switch cleans up the FPU. The old FPU state might still be around in the eagerfpu case but won't be saved. - in __restore_xstate_sig(), where we use drop_fpu() before copying a new state into the fpstate and activating that one. No user-pace instructions can execute between those steps. - in sys_execve()'s fpu__clear(): there we use drop_fpu() in the !eagerfpu case, where it's equivalent to a full reinit. fpu_reset_state() is a stronger version of drop_fpu(): both in the eagerfpu and the lazy-FPU case it guarantees that fpregs are reinitialized to init state. This method is used in cases where we need a full reset: - handle_signal() uses fpu_reset_state() to reset the FPU state to init before executing a user-space signal handler. While we have already saved the original FPU state at this point, and always restore the original state, the signal handling code still has to do this reinit, because signals may interrupt any user-space instruction, and the FPU might be in various intermediate states (such as an unbalanced x87 stack) that is not immediately usable for general C signal handler code. - __restore_xstate_sig() uses fpu_reset_state() when the signal frame has no FP context. Since the signal handler may have modified the FPU state, it gets reset back to init state. - in another branch __restore_xstate_sig() uses fpu_reset_state() to handle a restoration error: when restore_user_xstate() fails to restore FPU state and we might have inconsistent FPU data, fpu_reset_state() is used to reset it back to a known good state. - __kernel_fpu_end() uses fpu_reset_state() in an error branch. This is in a 'must not trigger' error branch, so on bug-free kernels this never triggers. - fpu__restore() uses fpu_reset_state() in an error path as well: if the fpstate was set up with invalid FPU state (via ptrace or via a signal handler), then it's reset back to init state. - likewise, the scheduler's switch_fpu_finish() uses it in a restoration error path too. Move both drop_fpu() and fpu_reset_state() to the fpu__*() namespace and harmonize their naming with their function: fpu__drop() fpu__reset() This clearly shows that both methods operate on the full state of the FPU, just like fpu__restore(). Also add comments to explain what each function does. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0f17cd4..31bfda8 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -382,11 +382,17 @@ static inline void fpregs_deactivate(struct fpu *fpu) __fpregs_deactivate_hw(); } -static inline void drop_fpu(struct fpu *fpu) +/* + * Drops current FPU state: deactivates the fpregs and + * the fpstate. NOTE: it still leaves previous contents + * in the fpregs in the eager-FPU case. + * + * This function can be used in cases where we know that + * a state-restore is coming: either an explicit one, + * or a reschedule. + */ +static inline void fpu__drop(struct fpu *fpu) { - /* - * Forget coprocessor state.. - */ preempt_disable(); fpu->counter = 0; @@ -412,13 +418,12 @@ static inline void restore_init_xstate(void) } /* - * Reset the FPU state in the eager case and drop it in the lazy case (later use - * will reinit it). + * Reset the FPU state back to init state. */ -static inline void fpu_reset_state(struct fpu *fpu) +static inline void fpu__reset(struct fpu *fpu) { if (!use_eager_fpu()) - drop_fpu(fpu); + fpu__drop(fpu); else restore_init_xstate(); } @@ -516,7 +521,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc { if (fpu_switch.preload) { if (unlikely(restore_fpu_checking(new_fpu))) - fpu_reset_state(new_fpu); + fpu__reset(new_fpu); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a2e2da2..bf217cd 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) if (fpu->fpregs_active) { if (WARN_ON(restore_fpu_checking(fpu))) - fpu_reset_state(fpu); + fpu__reset(fpu); } else { __fpregs_deactivate_hw(); } @@ -339,7 +339,7 @@ void fpu__restore(void) kernel_fpu_disable(); fpregs_activate(fpu); if (unlikely(restore_fpu_checking(fpu))) { - fpu_reset_state(fpu); + fpu__reset(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { tsk->thread.fpu.counter++; @@ -360,7 +360,7 @@ void fpu__clear(struct task_struct *tsk) if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ - drop_fpu(fpu); + fpu__drop(fpu); } else { if (!fpu->fpstate_active) { fpu__activate_curr(fpu); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index b8e5fee..5e3d924 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -401,7 +401,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - fpu_reset_state(fpu); + fpu__reset(fpu); return 0; } @@ -449,7 +449,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) * We will be ready to restore/save the state only after * fpu->fpstate_active is again set. */ - drop_fpu(fpu); + fpu__drop(fpu); if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { @@ -474,7 +474,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) */ user_fpu_begin(); if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { - fpu_reset_state(fpu); + fpu__reset(fpu); return -1; } } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 5d37c26..dde263f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -110,7 +110,7 @@ void exit_thread(void) kfree(bp); } - drop_fpu(fpu); + fpu__drop(fpu); } void flush_thread(void) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 59cfc9c..6bf51239 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -667,7 +667,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Ensure the signal handler starts with the new fpu state. */ if (fpu->fpstate_active) - fpu_reset_state(fpu); + fpu__reset(fpu); } signal_setup_done(failed, ksig, stepping); } -- cgit v0.10.2 From 0e75c54f1703e83e6cdf239491bf7294f6c34777 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 20:10:43 +0200 Subject: x86/fpu: Rename restore_fpu_checking() to copy_fpstate_to_fpregs() fpu_restore_checking() is a helper function of restore_fpu_checking(), but this is not apparent from the naming. Both copy fpstate contents to fpregs, while the fuller variant does a full copy without leaking information. So rename them to: copy_fpstate_to_fpregs() __copy_fpstate_to_fpregs() Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 31bfda8..c09aea1 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -289,7 +289,7 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) extern void fpu__save(struct fpu *fpu); -static inline int fpu_restore_checking(struct fpu *fpu) +static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) { if (use_xsave()) return fpu_xrstor_checking(&fpu->state.xsave); @@ -299,7 +299,7 @@ static inline int fpu_restore_checking(struct fpu *fpu) return frstor_checking(&fpu->state.fsave); } -static inline int restore_fpu_checking(struct fpu *fpu) +static inline int copy_fpstate_to_fpregs(struct fpu *fpu) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -314,7 +314,7 @@ static inline int restore_fpu_checking(struct fpu *fpu) : : [addr] "m" (fpu->fpregs_active)); } - return fpu_restore_checking(fpu); + return __copy_fpstate_to_fpregs(fpu); } /* @@ -520,7 +520,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { if (fpu_switch.preload) { - if (unlikely(restore_fpu_checking(new_fpu))) + if (unlikely(copy_fpstate_to_fpregs(new_fpu))) fpu__reset(new_fpu); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index bf217cd..14d8e33 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -115,7 +115,7 @@ void __kernel_fpu_end(void) struct fpu *fpu = ¤t->thread.fpu; if (fpu->fpregs_active) { - if (WARN_ON(restore_fpu_checking(fpu))) + if (WARN_ON(copy_fpstate_to_fpregs(fpu))) fpu__reset(fpu); } else { __fpregs_deactivate_hw(); @@ -338,7 +338,7 @@ void fpu__restore(void) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); fpregs_activate(fpu); - if (unlikely(restore_fpu_checking(fpu))) { + if (unlikely(copy_fpstate_to_fpregs(fpu))) { fpu__reset(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5c61aae..f443817 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7030,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - fpu_restore_checking(&vcpu->arch.guest_fpu); + __copy_fpstate_to_fpregs(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } -- cgit v0.10.2 From 6ffc152e4606c7f9149940d36a83e786f7f0a4f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 20:24:14 +0200 Subject: x86/fpu: Move all the fpu__*() high level methods closer to each other The fpu__*() methods are closely related, but they are defined in scattered places within the FPU code. Concentrate them, and also uninline fpu__save(), fpu__drop() and fpu__reset() to save about 5K of kernel text on 64-bit kernels: text data bss dec filename 14113063 2575280 1634304 18322647 vmlinux.before 14108070 2575280 1634304 18317654 vmlinux.after Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index c09aea1..f20a003 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -46,10 +46,19 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__activate_curr(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); -extern void fpu__clear(struct task_struct *tsk); extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); + +/* + * High level FPU state handling functions: + */ +extern void fpu__save(struct fpu *fpu); extern void fpu__restore(void); +extern void fpu__drop(struct fpu *fpu); +extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); +extern void fpu__reset(struct fpu *fpu); +extern void fpu__clear(struct task_struct *tsk); + extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); @@ -287,8 +296,6 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -extern void fpu__save(struct fpu *fpu); - static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) { if (use_xsave()) @@ -382,33 +389,6 @@ static inline void fpregs_deactivate(struct fpu *fpu) __fpregs_deactivate_hw(); } -/* - * Drops current FPU state: deactivates the fpregs and - * the fpstate. NOTE: it still leaves previous contents - * in the fpregs in the eager-FPU case. - * - * This function can be used in cases where we know that - * a state-restore is coming: either an explicit one, - * or a reschedule. - */ -static inline void fpu__drop(struct fpu *fpu) -{ - preempt_disable(); - fpu->counter = 0; - - if (fpu->fpregs_active) { - /* Ignore delayed exceptions from user space */ - asm volatile("1: fwait\n" - "2:\n" - _ASM_EXTABLE(1b, 2b)); - fpregs_deactivate(fpu); - } - - fpu->fpstate_active = 0; - - preempt_enable(); -} - static inline void restore_init_xstate(void) { if (use_xsave()) @@ -418,17 +398,6 @@ static inline void restore_init_xstate(void) } /* - * Reset the FPU state back to init state. - */ -static inline void fpu__reset(struct fpu *fpu) -{ - if (!use_eager_fpu()) - fpu__drop(fpu); - else - restore_init_xstate(); -} - -/* * Definitions for the eXtended Control Register instructions */ @@ -597,8 +566,6 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); - static inline unsigned long alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 14d8e33..acca83b 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -349,6 +349,44 @@ void fpu__restore(void) EXPORT_SYMBOL_GPL(fpu__restore); /* + * Drops current FPU state: deactivates the fpregs and + * the fpstate. NOTE: it still leaves previous contents + * in the fpregs in the eager-FPU case. + * + * This function can be used in cases where we know that + * a state-restore is coming: either an explicit one, + * or a reschedule. + */ +void fpu__drop(struct fpu *fpu) +{ + preempt_disable(); + fpu->counter = 0; + + if (fpu->fpregs_active) { + /* Ignore delayed exceptions from user space */ + asm volatile("1: fwait\n" + "2:\n" + _ASM_EXTABLE(1b, 2b)); + fpregs_deactivate(fpu); + } + + fpu->fpstate_active = 0; + + preempt_enable(); +} + +/* + * Reset the FPU state back to init state: + */ +void fpu__reset(struct fpu *fpu) +{ + if (!use_eager_fpu()) + fpu__drop(fpu); + else + restore_init_xstate(); +} + +/* * Called by sys_execve() to clear the FPU fpregs, so that FPU state * of the previous binary does not leak over into the exec()ed binary: */ -- cgit v0.10.2 From 04c8e01d50ae1f2b33a46459e8b1e776b747e97d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 20:35:33 +0200 Subject: x86/fpu: Move fpu__clear() to 'struct fpu *' parameter passing Do it like all other high level FPU state handling functions: they only know about struct fpu, not about the task. (Also remove a dead prototype while at it.) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index f20a003..d6ac461 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -37,9 +37,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig, #define MXCSR_DEFAULT 0x1f80 extern unsigned int mxcsr_feature_mask; -extern void fpu__init_cpu(void); -extern void eager_fpu_init(void); +extern void fpu__init_cpu(void); extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); @@ -57,7 +56,7 @@ extern void fpu__restore(void); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); extern void fpu__reset(struct fpu *fpu); -extern void fpu__clear(struct task_struct *tsk); +extern void fpu__clear(struct fpu *fpu); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index acca83b..0ccdd83 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -390,11 +390,9 @@ void fpu__reset(struct fpu *fpu) * Called by sys_execve() to clear the FPU fpregs, so that FPU state * of the previous binary does not leak over into the exec()ed binary: */ -void fpu__clear(struct task_struct *tsk) +void fpu__clear(struct fpu *fpu) { - struct fpu *fpu = &tsk->thread.fpu; - - WARN_ON_ONCE(tsk != current); /* Almost certainly an anomaly */ + WARN_ON_ONCE(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index dde263f..b53f6c9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -120,7 +120,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - fpu__clear(tsk); + fpu__clear(&tsk->thread.fpu); } static void hard_disable_TSC(void) -- cgit v0.10.2 From 9dfe99b755a263f9f6ac1dfdb5512bd6e22c28e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 20:55:19 +0200 Subject: x86/fpu: Rename restore_xstate_sig() to fpu__restore_sig() restore_xstate_sig() is a misnomer: it's not limited to 'xstate' at all, it is the high level 'restore FPU state from a signal frame' function that works with all legacy FPU formats as well. Rename it (and its helper) accordingly, and also move it to the fpu__*() namespace. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 2e0b1b7..de81ddf 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -197,7 +197,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, buf = compat_ptr(tmp); } get_user_catch(err); - err |= restore_xstate_sig(buf, 1); + err |= fpu__restore_sig(buf, 1); force_iret(); diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d6ac461..d6cfbda 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -497,14 +497,14 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc * Signal frame handlers... */ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); -extern int __restore_xstate_sig(void __user *buf, void __user *fx, int size); +extern int __fpu__restore_sig(void __user *buf, void __user *fx, int size); static inline int xstate_sigframe_size(void) { return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; } -static inline int restore_xstate_sig(void __user *buf, int ia32_frame) +static inline int fpu__restore_sig(void __user *buf, int ia32_frame) { void __user *buf_fx = buf; int size = xstate_sigframe_size(); @@ -514,7 +514,7 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame) size += sizeof(struct i387_fsave_struct); } - return __restore_xstate_sig(buf, buf_fx, size); + return __fpu__restore_sig(buf, buf_fx, size); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5e3d924..ea514d6a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -388,7 +388,7 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) return frstor_user(buf); } -int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size) +int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) { int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 6bf51239..7416fa8 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -102,7 +102,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) get_user_ex(buf, &sc->fpstate); } get_user_catch(err); - err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32)); + err |= fpu__restore_sig(buf, config_enabled(CONFIG_X86_32)); force_iret(); -- cgit v0.10.2 From 82c0e45eb5c839a8cee3d8d8a82d0592c4f42773 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 29 Apr 2015 21:09:18 +0200 Subject: x86/fpu: Move the signal frame handling code closer to each other Consolidate more signal frame related functions: text data bss dec filename 14108070 2575280 1634304 18317654 vmlinux.before 14107944 2575344 1634304 18317592 vmlinux.after Also, while moving it, rename alloc_mathframe() to fpu__alloc_mathframe(). Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index de81ddf..54605eb 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -325,7 +325,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs, if (fpu->fpstate_active) { unsigned long fx_aligned, math_size; - sp = alloc_mathframe(sp, 1, &fx_aligned, &math_size); + sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size); *fpstate = (struct _fpstate_ia32 __user *) sp; if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned, math_size) < 0) diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d6cfbda..34fbf95 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -53,6 +53,7 @@ extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); */ extern void fpu__save(struct fpu *fpu); extern void fpu__restore(void); +extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); extern void fpu__reset(struct fpu *fpu); @@ -497,25 +498,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc * Signal frame handlers... */ extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); -extern int __fpu__restore_sig(void __user *buf, void __user *fx, int size); - -static inline int xstate_sigframe_size(void) -{ - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; -} - -static inline int fpu__restore_sig(void __user *buf, int ia32_frame) -{ - void __user *buf_fx = buf; - int size = xstate_sigframe_size(); - - if (ia32_frame && use_fxsr()) { - buf_fx = buf + sizeof(struct i387_fsave_struct); - size += sizeof(struct i387_fsave_struct); - } - - return __fpu__restore_sig(buf, buf_fx, size); -} /* * Needs to be preemption-safe. @@ -565,20 +547,8 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -static inline unsigned long -alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, - unsigned long *size) -{ - unsigned long frame_size = xstate_sigframe_size(); - - *buf_fx = sp = round_down(sp - frame_size, 64); - if (ia32_frame && use_fxsr()) { - frame_size += sizeof(struct i387_fsave_struct); - sp -= sizeof(struct i387_fsave_struct); - } - - *size = frame_size; - return sp; -} +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size); #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ea514d6a..810f080 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -388,7 +388,7 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) return frstor_user(buf); } -int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) +static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) { int ia32_fxstate = (buf != buf_fx); struct task_struct *tsk = current; @@ -482,6 +482,43 @@ int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) return 0; } +static inline int xstate_sigframe_size(void) +{ + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +/* + * Restore FPU state from a sigframe: + */ +int fpu__restore_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); + } + + return __fpu__restore_sig(buf, buf_fx, size); +} + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + + return sp; +} /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7416fa8..9554ca6 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -219,8 +219,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, } if (fpu->fpstate_active) { - sp = alloc_mathframe(sp, config_enabled(CONFIG_X86_32), - &buf_fx, &math_size); + sp = fpu__alloc_mathframe(sp, config_enabled(CONFIG_X86_32), + &buf_fx, &math_size); *fpstate = (void __user *)sp; } -- cgit v0.10.2 From fbce7782467553d09cfde39473d23bde4ad78270 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 07:12:46 +0200 Subject: x86/fpu: Merge fpu__reset() and fpu__clear() With recent cleanups and fixes the fpu__reset() and fpu__clear() functions have become almost identical in functionality: the only difference is that fpu__reset() assumed that the fpstate was already active in the eagerfpu case, while fpu__clear() activated it if it was inactive. This distinction almost never matters, the only case where such fpstate activation happens if if the init thread (PID 1) gets exec()-ed for the first time. So keep fpu__clear() and change all fpu__reset() uses to fpu__clear() to simpify the logic. ( In a later patch we'll further simplify fpu__clear() by making sure that all contexts it is called on are already active. ) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 34fbf95..a55d63e 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -56,7 +56,6 @@ extern void fpu__restore(void); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); -extern void fpu__reset(struct fpu *fpu); extern void fpu__clear(struct fpu *fpu); extern void fpu__init_check_bugs(void); @@ -490,7 +489,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc { if (fpu_switch.preload) { if (unlikely(copy_fpstate_to_fpregs(new_fpu))) - fpu__reset(new_fpu); + fpu__clear(new_fpu); } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 0ccdd83..1ba866c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -116,7 +116,7 @@ void __kernel_fpu_end(void) if (fpu->fpregs_active) { if (WARN_ON(copy_fpstate_to_fpregs(fpu))) - fpu__reset(fpu); + fpu__clear(fpu); } else { __fpregs_deactivate_hw(); } @@ -339,7 +339,7 @@ void fpu__restore(void) kernel_fpu_disable(); fpregs_activate(fpu); if (unlikely(copy_fpstate_to_fpregs(fpu))) { - fpu__reset(fpu); + fpu__clear(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); } else { tsk->thread.fpu.counter++; @@ -376,19 +376,10 @@ void fpu__drop(struct fpu *fpu) } /* - * Reset the FPU state back to init state: - */ -void fpu__reset(struct fpu *fpu) -{ - if (!use_eager_fpu()) - fpu__drop(fpu); - else - restore_init_xstate(); -} - -/* - * Called by sys_execve() to clear the FPU fpregs, so that FPU state - * of the previous binary does not leak over into the exec()ed binary: + * Clear the FPU state back to init state. + * + * Called by sys_execve(), by the signal handler code and by various + * error paths. */ void fpu__clear(struct fpu *fpu) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 810f080..9bc3734 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -401,7 +401,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) config_enabled(CONFIG_IA32_EMULATION)); if (!buf) { - fpu__reset(fpu); + fpu__clear(fpu); return 0; } @@ -474,7 +474,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ user_fpu_begin(); if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { - fpu__reset(fpu); + fpu__clear(fpu); return -1; } } diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 9554ca6..7c08795 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -667,7 +667,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Ensure the signal handler starts with the new fpu state. */ if (fpu->fpstate_active) - fpu__reset(fpu); + fpu__clear(fpu); } signal_setup_done(failed, ksig, stepping); } -- cgit v0.10.2 From 05012c13f69d67be8a6a7e65726eeb70899ad6ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 07:26:04 +0200 Subject: x86/fpu: Move is_ia32*frame() helpers out of fpu/internal.h Move them to their only user. This makes the code easier to read, the header is less cluttered, and it also speeds up the build a bit. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a55d63e..dc4842b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -103,22 +103,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; } -static inline int is_ia32_compat_frame(void) -{ - return config_enabled(CONFIG_IA32_EMULATION) && - test_thread_flag(TIF_IA32); -} - -static inline int is_ia32_frame(void) -{ - return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); -} - -static inline int is_x32_frame(void) -{ - return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); -} - #define X87_FSW_ES (1 << 7) /* Exception Summary */ static __always_inline __pure bool use_eager_fpu(void) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7c08795..f4b2056 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -593,6 +593,22 @@ badframe: return 0; } +static inline int is_ia32_compat_frame(void) +{ + return config_enabled(CONFIG_IA32_EMULATION) && + test_thread_flag(TIF_IA32); +} + +static inline int is_ia32_frame(void) +{ + return config_enabled(CONFIG_X86_32) || is_ia32_compat_frame(); +} + +static inline int is_x32_frame(void) +{ + return config_enabled(CONFIG_X86_X32_ABI) && test_thread_flag(TIF_X32); +} + static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) { -- cgit v0.10.2 From fcbc99c403c4a1a24ac4744e08c04da3ec18a68c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 08:45:02 +0200 Subject: x86/fpu: Split out fpu/signal.h from fpu/internal.h for signal frame handling functions Most of the FPU does not use them, so split it out and include them in signal.c and ia32_signal.c Also fix header file dependency assumption in fpu/core.c. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 54605eb..ae3a29a 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index dc4842b..e2ceb49 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -19,21 +19,6 @@ #include #include -#ifdef CONFIG_X86_64 -# include -# include -struct ksignal; -int ia32_setup_rt_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -int ia32_setup_frame(int sig, struct ksignal *ksig, - compat_sigset_t *set, struct pt_regs *regs); -#else -# define user_i387_ia32_struct user_i387_struct -# define user32_fxsr_struct user_fxsr_struct -# define ia32_setup_frame __setup_frame -# define ia32_setup_rt_frame __setup_rt_frame -#endif - #define MXCSR_DEFAULT 0x1f80 extern unsigned int mxcsr_feature_mask; @@ -63,11 +48,6 @@ extern void fpu__resume_cpu(void); DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -extern void convert_from_fxsr(struct user_i387_ia32_struct *env, - struct task_struct *tsk); -extern void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env); - extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, xstateregs_get; @@ -530,8 +510,4 @@ static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) } } -unsigned long -fpu__alloc_mathframe(unsigned long sp, int ia32_frame, - unsigned long *buf_fx, unsigned long *size); - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h new file mode 100644 index 0000000..0803dc2 --- /dev/null +++ b/arch/x86/include/asm/fpu/signal.h @@ -0,0 +1,31 @@ +/* + * x86 FPU signal frame handling methods: + */ +#ifndef _ASM_X86_FPU_SIGNAL_H +#define _ASM_X86_FPU_SIGNAL_H + +#ifdef CONFIG_X86_64 +# include +# include +struct ksignal; +int ia32_setup_rt_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +int ia32_setup_frame(int sig, struct ksignal *ksig, + compat_sigset_t *set, struct pt_regs *regs); +#else +# define user_i387_ia32_struct user_i387_struct +# define user32_fxsr_struct user_fxsr_struct +# define ia32_setup_frame __setup_frame +# define ia32_setup_rt_frame __setup_rt_frame +#endif + +extern void convert_from_fxsr(struct user_i387_ia32_struct *env, + struct task_struct *tsk); +extern void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env); + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size); + +#endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1ba866c..b61a513 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -6,6 +6,8 @@ * Gareth Hughes , May 2000 */ #include +#include + #include /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9bc3734..7871074 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 4c61566..51e73a6 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index f4b2056..206996c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From 59a36d16be8f9f68410f1bd396577fb7f31ae877 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 08:53:18 +0200 Subject: x86/fpu: Factor out fpu/regset.h from fpu/internal.h Only a few places use the regset definitions, so factor them out. Also fix related header dependency assumptions. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index e2ceb49..7b62d90 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -10,7 +10,6 @@ #ifndef _ASM_X86_FPU_INTERNAL_H #define _ASM_X86_FPU_INTERNAL_H -#include #include #include #include @@ -48,18 +47,6 @@ extern void fpu__resume_cpu(void); DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; -extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, - xstateregs_get; -extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, - xstateregs_set; - -/* - * xstateregs_active == regset_fpregs_active. Please refer to the comment - * at the definition of regset_fpregs_active. - */ -#define xstateregs_active regset_fpregs_active - #ifdef CONFIG_MATH_EMULATION extern void finit_soft_fpu(struct i387_soft_struct *soft); #else diff --git a/arch/x86/include/asm/fpu/regset.h b/arch/x86/include/asm/fpu/regset.h new file mode 100644 index 0000000..39d3107 --- /dev/null +++ b/arch/x86/include/asm/fpu/regset.h @@ -0,0 +1,21 @@ +/* + * FPU regset handling methods: + */ +#ifndef _ASM_X86_FPU_REGSET_H +#define _ASM_X86_FPU_REGSET_H + +#include + +extern user_regset_active_fn regset_fpregs_active, regset_xregset_fpregs_active; +extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get, + xstateregs_get; +extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set, + xstateregs_set; + +/* + * xstateregs_active == regset_fpregs_active. Please refer to the comment + * at the definition of regset_fpregs_active. + */ +#define xstateregs_active regset_fpregs_active + +#endif /* _ASM_X86_FPU_REGSET_H */ diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index ab2c507..afd2132 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -3,6 +3,7 @@ #include #include +#include /* Bit 63 of XCR0 is reserved for future expansion */ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b61a513..07f1cc9 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -6,6 +6,7 @@ * Gareth Hughes , May 2000 */ #include +#include #include #include diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 7871074..59bd35a 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -5,9 +5,11 @@ */ #include #include + #include #include #include +#include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 51e73a6..9be72bc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -30,6 +29,7 @@ #include #include #include +#include #include #include #include -- cgit v0.10.2 From acd58a3ad0ed5875fb88bbb078309a3d7ee147a0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 09:17:50 +0200 Subject: x86/fpu: Remove run-once init quirks Remove various boot quirks that came from the old code. The new code is cleanly split up into per-system and per-cpu init sequences, and system init functions are only called once. Remove the run-once quirks. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 72219ce2..7b6265d 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -143,12 +143,6 @@ EXPORT_SYMBOL_GPL(xstate_size); */ static void fpu__init_system_xstate_size_legacy(void) { - static bool on_boot_cpu = 1; - - if (!on_boot_cpu) - return; - on_boot_cpu = 0; - /* * Note that xstate_size might be overwriten later during * fpu__init_system_xstate(). diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 59bd35a..8285d4b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -658,12 +658,6 @@ void setup_xstate_comp(void) */ static void setup_init_fpu_buf(void) { - static int on_boot_cpu = 1; - - if (!on_boot_cpu) - return; - on_boot_cpu = 0; - if (!cpu_has_xsave) return; @@ -719,11 +713,6 @@ static void __init init_xstate_size(void) void fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; - static bool on_boot_cpu = 1; - - if (!on_boot_cpu) - return; - on_boot_cpu = 0; if (!cpu_has_xsave) { pr_info("x86/fpu: Legacy x87 FPU detected.\n"); -- cgit v0.10.2 From e1cebad49c54e0241e101ebf63d5238fe1137749 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 09:29:38 +0200 Subject: x86/fpu: Factor out the exception error code handling code Factor out the FPU error code handling code from traps.c and fpu/internal.h and move them close to each other. Also convert the helper functions to 'struct fpu *', which further simplifies them. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7b62d90..dfdafea 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -30,7 +30,8 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__activate_curr(struct fpu *fpu); extern void fpstate_init(struct fpu *fpu); -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); /* * High level FPU state handling functions: @@ -467,34 +468,4 @@ static inline void user_fpu_begin(void) preempt_enable(); } -/* - * i387 state interaction - */ -static inline unsigned short get_fpu_cwd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state.fxsave.cwd; - } else { - return (unsigned short)tsk->thread.fpu.state.fsave.cwd; - } -} - -static inline unsigned short get_fpu_swd(struct task_struct *tsk) -{ - if (cpu_has_fxsr) { - return tsk->thread.fpu.state.fxsave.swd; - } else { - return (unsigned short)tsk->thread.fpu.state.fsave.swd; - } -} - -static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) -{ - if (cpu_has_xmm) { - return tsk->thread.fpu.state.fxsave.mxcsr; - } else { - return MXCSR_DEFAULT; - } -} - #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 07f1cc9..4809c32 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include @@ -749,3 +750,90 @@ int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) EXPORT_SYMBOL(dump_fpu); #endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ + +/* + * x87 math exception handling: + */ + +static inline unsigned short get_fpu_cwd(struct fpu *fpu) +{ + if (cpu_has_fxsr) { + return fpu->state.fxsave.cwd; + } else { + return (unsigned short)fpu->state.fsave.cwd; + } +} + +static inline unsigned short get_fpu_swd(struct fpu *fpu) +{ + if (cpu_has_fxsr) { + return fpu->state.fxsave.swd; + } else { + return (unsigned short)fpu->state.fsave.swd; + } +} + +static inline unsigned short get_fpu_mxcsr(struct fpu *fpu) +{ + if (cpu_has_xmm) { + return fpu->state.fxsave.mxcsr; + } else { + return MXCSR_DEFAULT; + } +} + +int fpu__exception_code(struct fpu *fpu, int trap_nr) +{ + int err; + + if (trap_nr == X86_TRAP_MF) { + unsigned short cwd, swd; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't synchronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(fpu); + swd = get_fpu_swd(fpu); + + err = swd & ~cwd; + } else { + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + unsigned short mxcsr = get_fpu_mxcsr(fpu); + err = ~(mxcsr >> 7) & mxcsr; + } + + if (err & 0x001) { /* Invalid op */ + /* + * swd & 0x240 == 0x040: Stack Underflow + * swd & 0x240 == 0x240: Stack Overflow + * User must clear the SF bit (0x40) if set + */ + return FPE_FLTINV; + } else if (err & 0x004) { /* Divide by Zero */ + return FPE_FLTDIV; + } else if (err & 0x008) { /* Overflow */ + return FPE_FLTOVF; + } else if (err & 0x012) { /* Denormal, Underflow */ + return FPE_FLTUND; + } else if (err & 0x020) { /* Precision */ + return FPE_FLTRES; + } + + /* + * If we're using IRQ 13, or supposedly even some trap + * X86_TRAP_MF implementations, it's possible + * we get a spurious trap, which is not an error. + */ + return 0; +} diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 48dfcd9..cab397d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -708,8 +708,8 @@ NOKPROBE_SYMBOL(do_debug); static void math_error(struct pt_regs *regs, int error_code, int trapnr) { struct task_struct *task = current; + struct fpu *fpu = &task->thread.fpu; siginfo_t info; - unsigned short err; char *str = (trapnr == X86_TRAP_MF) ? "fpu exception" : "simd exception"; @@ -717,8 +717,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) return; conditional_sti(regs); - if (!user_mode(regs)) - { + if (!user_mode(regs)) { if (!fixup_exception(regs)) { task->thread.error_code = error_code; task->thread.trap_nr = trapnr; @@ -730,62 +729,20 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) /* * Save the info for the exception handler and clear the error. */ - fpu__save(&task->thread.fpu); - task->thread.trap_nr = trapnr; + fpu__save(fpu); + + task->thread.trap_nr = trapnr; task->thread.error_code = error_code; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - if (trapnr == X86_TRAP_MF) { - unsigned short cwd, swd; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); + info.si_signo = SIGFPE; + info.si_errno = 0; + info.si_addr = (void __user *)uprobe_get_trap_addr(regs); - err = swd & ~cwd; - } else { - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - unsigned short mxcsr = get_fpu_mxcsr(task); - err = ~(mxcsr >> 7) & mxcsr; - } + info.si_code = fpu__exception_code(fpu, trapnr); - if (err & 0x001) { /* Invalid op */ - /* - * swd & 0x240 == 0x040: Stack Underflow - * swd & 0x240 == 0x240: Stack Overflow - * User must clear the SF bit (0x40) if set - */ - info.si_code = FPE_FLTINV; - } else if (err & 0x004) { /* Divide by Zero */ - info.si_code = FPE_FLTDIV; - } else if (err & 0x008) { /* Overflow */ - info.si_code = FPE_FLTOVF; - } else if (err & 0x012) { /* Denormal, Underflow */ - info.si_code = FPE_FLTUND; - } else if (err & 0x020) { /* Precision */ - info.si_code = FPE_FLTRES; - } else { - /* - * If we're using IRQ 13, or supposedly even some trap - * X86_TRAP_MF implementations, it's possible - * we get a spurious trap, which is not an error. - */ + /* Retry when we get spurious exceptions: */ + if (!info.si_code) return; - } + force_sig_info(SIGFPE, &info, task); } -- cgit v0.10.2 From 0aba69789452faab6f6bd7cd293489bab66352bc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 10:08:36 +0200 Subject: x86/fpu: Harmonize the names of the fpstate_init() helper functions Harmonize the inconsistent naming of these related functions: fpstate_init() finit_soft_fpu() => fpstate_init_fsoft() fx_finit() => fpstate_init_fxstate() fx_finit() => fpstate_init_fstate() # split out Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index dfdafea..0236ae6 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -28,7 +28,18 @@ extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__activate_curr(struct fpu *fpu); + extern void fpstate_init(struct fpu *fpu); +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct i387_soft_struct *soft); +#else +static inline void fpstate_init_soft(struct i387_soft_struct *soft) {} +#endif +static inline void fpstate_init_fxstate(struct i387_fxsave_struct *fx) +{ + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); @@ -48,12 +59,6 @@ extern void fpu__resume_cpu(void); DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); -#ifdef CONFIG_MATH_EMULATION -extern void finit_soft_fpu(struct i387_soft_struct *soft); -#else -static inline void finit_soft_fpu(struct i387_soft_struct *soft) {} -#endif - /* * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, * on this CPU. @@ -93,12 +98,6 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has_safe(X86_FEATURE_FXSR); } -static inline void fx_finit(struct i387_fxsave_struct *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} - extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 4809c32..494ab4c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -191,24 +191,30 @@ void fpu__save(struct fpu *fpu) } EXPORT_SYMBOL_GPL(fpu__save); +/* + * Legacy x87 fpstate state init: + */ +static inline void fpstate_init_fstate(struct i387_fsave_struct *fp) +{ + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; +} + void fpstate_init(struct fpu *fpu) { if (!cpu_has_fpu) { - finit_soft_fpu(&fpu->state.soft); + fpstate_init_soft(&fpu->state.soft); return; } memset(&fpu->state, 0, xstate_size); - if (cpu_has_fxsr) { - fx_finit(&fpu->state.fxsave); - } else { - struct i387_fsave_struct *fp = &fpu->state.fsave; - fp->cwd = 0xffff037fu; - fp->swd = 0xffff0000u; - fp->twd = 0xffffffffu; - fp->fos = 0xffff0000u; - } + if (cpu_has_fxsr) + fpstate_init_fxstate(&fpu->state.fxsave); + else + fpstate_init_fstate(&fpu->state.fsave); } EXPORT_SYMBOL_GPL(fpstate_init); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 7b6265d..5a7e570 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -121,7 +121,7 @@ static void fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fx_finit(&init_xstate_ctx.i387); + fpstate_init_fxstate(&init_xstate_ctx.i387); fpu__init_system_mxcsr(); } diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index 7562341..768b2b8 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -30,7 +30,7 @@ static void fclex(void) } /* Needs to be externally visible */ -void finit_soft_fpu(struct i387_soft_struct *soft) +void fpstate_init_soft(struct i387_soft_struct *soft) { struct address *oaddr, *iaddr; memset(soft, 0, sizeof(*soft)); @@ -52,7 +52,7 @@ void finit_soft_fpu(struct i387_soft_struct *soft) void finit(void) { - finit_soft_fpu(¤t->thread.fpu.state.soft); + fpstate_init_soft(¤t->thread.fpu.state.soft); } /* -- cgit v0.10.2 From bf935b0b526ffa0607476dfc6198593553957dd9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 10:23:42 +0200 Subject: x86/fpu: Create 'union thread_xstate' helper for fpstate_init() fpstate_init() only uses fpu->state, so pass that in to it. This enables the cleanup we will do in the next patch. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 0236ae6..b74aa43 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -29,7 +29,7 @@ extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__activate_curr(struct fpu *fpu); -extern void fpstate_init(struct fpu *fpu); +extern void fpstate_init(union thread_xstate *state); #ifdef CONFIG_MATH_EMULATION extern void fpstate_init_soft(struct i387_soft_struct *soft); #else diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 494ab4c..8e4cad5 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -202,19 +202,19 @@ static inline void fpstate_init_fstate(struct i387_fsave_struct *fp) fp->fos = 0xffff0000u; } -void fpstate_init(struct fpu *fpu) +void fpstate_init(union thread_xstate *state) { if (!cpu_has_fpu) { - fpstate_init_soft(&fpu->state.soft); + fpstate_init_soft(&state->soft); return; } - memset(&fpu->state, 0, xstate_size); + memset(state, 0, xstate_size); if (cpu_has_fxsr) - fpstate_init_fxstate(&fpu->state.fxsave); + fpstate_init_fxstate(&state->fxsave); else - fpstate_init_fstate(&fpu->state.fsave); + fpstate_init_fstate(&state->fsave); } EXPORT_SYMBOL_GPL(fpstate_init); @@ -282,7 +282,7 @@ void fpu__activate_curr(struct fpu *fpu) WARN_ON_ONCE(fpu != ¤t->thread.fpu); if (!fpu->fpstate_active) { - fpstate_init(fpu); + fpstate_init(&fpu->state); /* Safe to do for the current task: */ fpu->fpstate_active = 1; @@ -321,7 +321,7 @@ static void fpu__activate_stopped(struct fpu *child_fpu) if (child_fpu->fpstate_active) { child_fpu->last_cpu = -1; } else { - fpstate_init(child_fpu); + fpstate_init(&child_fpu->state); /* Safe to do for stopped child tasks: */ child_fpu->fpstate_active = 1; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 8285d4b..afbd582 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -456,7 +456,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(fpu); + fpstate_init(&fpu->state); err = -1; } else { sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f443817..3d811bb 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7004,7 +7004,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) static void fx_init(struct kvm_vcpu *vcpu) { - fpstate_init(&vcpu->arch.guest_fpu); + fpstate_init(&vcpu->arch.guest_fpu.state); if (cpu_has_xsaves) vcpu->arch.guest_fpu.state.xsave.header.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED; -- cgit v0.10.2 From 6f57502310c85b60bdea78228e9b5bb3e82dc3b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 11:07:06 +0200 Subject: x86/fpu: Generalize 'init_xstate_ctx' So the handling of init_xstate_ctx has a layering violation: both 'struct xsave_struct' and 'union thread_xstate' have a 'struct i387_fxsave_struct' member: xsave_struct::i387 thread_xstate::fxsave The handling of init_xstate_ctx is generic, it is used on all CPUs, with or without XSAVE instruction. So it's confusing how the generic code passes around and handles an XSAVE specific format. What we really want is for init_xstate_ctx to be a proper fpstate and we use its ::fxsave and ::xsave members, as appropriate. Since the xsave_struct::i387 and thread_xstate::fxsave aliases each other this is not a functional problem. So implement this, and move init_xstate_ctx to the generic FPU code in the process. Also, since init_xstate_ctx is not XSAVE specific anymore, rename it to init_fpstate, and mark it __read_mostly, because it's only modified once during bootup, and used as a reference fpstate later on. There's no change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index b74aa43..792fdbe 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -22,6 +22,8 @@ extern unsigned int mxcsr_feature_mask; +extern union thread_xstate init_fpstate; + extern void fpu__init_cpu(void); extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); @@ -342,9 +344,9 @@ static inline void fpregs_deactivate(struct fpu *fpu) static inline void restore_init_xstate(void) { if (use_xsave()) - xrstor_state(&init_xstate_ctx, -1); + xrstor_state(&init_fpstate.xsave, -1); else - fxrstor_checking(&init_xstate_ctx.i387); + fxrstor_checking(&init_fpstate.fxsave); } /* diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index afd2132..3051280 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -37,7 +37,6 @@ extern unsigned int xstate_size; extern u64 xfeatures_mask; extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; -extern struct xsave_struct init_xstate_ctx; extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8e4cad5..a396f80 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -13,6 +13,12 @@ #include /* + * Represents the initial FPU state. It's mostly (but not completely) zeroes, + * depending on the FPU hardware format: + */ +union thread_xstate init_fpstate __read_mostly; + +/* * Track whether the kernel is using the FPU state * currently. * diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5a7e570..93bc11a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -121,7 +121,7 @@ static void fpu__init_system_generic(void) * Set up the legacy init FPU context. (xstate init might overwrite this * with a more modern format, if the CPU supports it.) */ - fpstate_init_fxstate(&init_xstate_ctx.i387); + fpstate_init_fxstate(&init_fpstate.fxsave); fpu__init_system_mxcsr(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index afbd582..527d4bf 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -31,11 +31,6 @@ static const char *xfeature_names[] = */ u64 xfeatures_mask __read_mostly; -/* - * Represents init state for the supported extended state. - */ -struct xsave_struct init_xstate_ctx; - static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; @@ -150,7 +145,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) int size = xstate_sizes[feature_bit]; memcpy((void *)fx + offset, - (void *)&init_xstate_ctx + offset, + (void *)&init_fpstate.xsave + offset, size); } @@ -377,12 +372,12 @@ static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) if (use_xsave()) { if ((unsigned long)buf % 64 || fx_only) { u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; - xrstor_state(&init_xstate_ctx, init_bv); + xrstor_state(&init_fpstate.xsave, init_bv); return fxrstor_user(buf); } else { u64 init_bv = xfeatures_mask & ~xbv; if (unlikely(init_bv)) - xrstor_state(&init_xstate_ctx, init_bv); + xrstor_state(&init_fpstate.xsave, init_bv); return xrestore_user(buf, xbv); } } else if (use_fxsr()) { @@ -665,20 +660,20 @@ static void setup_init_fpu_buf(void) print_xstate_features(); if (cpu_has_xsaves) { - init_xstate_ctx.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; - init_xstate_ctx.header.xfeatures = xfeatures_mask; + init_fpstate.xsave.header.xcomp_bv = (u64)1 << 63 | xfeatures_mask; + init_fpstate.xsave.header.xfeatures = xfeatures_mask; } /* * Init all the features state with header_bv being 0x0 */ - xrstor_state_booting(&init_xstate_ctx, -1); + xrstor_state_booting(&init_fpstate.xsave, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state_booting(&init_xstate_ctx); + xsave_state_booting(&init_fpstate.xsave); } /* -- cgit v0.10.2 From 815418890e2a3984d8b04c433072df1a42573f96 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 11:21:59 +0200 Subject: x86/fpu: Move restore_init_xstate() out of fpu/internal.h Move restore_init_xstate() next to its sole caller. Also rename it to copy_init_fpstate_to_fpregs() and add some comments about what it does. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 792fdbe..a1810eb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -341,14 +341,6 @@ static inline void fpregs_deactivate(struct fpu *fpu) __fpregs_deactivate_hw(); } -static inline void restore_init_xstate(void) -{ - if (use_xsave()) - xrstor_state(&init_fpstate.xsave, -1); - else - fxrstor_checking(&init_fpstate.fxsave); -} - /* * Definitions for the eXtended Control Register instructions */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index a396f80..2ea9e2f 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -392,6 +392,18 @@ void fpu__drop(struct fpu *fpu) } /* + * Clear FPU registers by setting them up from + * the init fpstate: + */ +static inline void copy_init_fpstate_to_fpregs(void) +{ + if (use_xsave()) + xrstor_state(&init_fpstate.xsave, -1); + else + fxrstor_checking(&init_fpstate.fxsave); +} + +/* * Clear the FPU state back to init state. * * Called by sys_execve(), by the signal handler code and by various @@ -409,7 +421,7 @@ void fpu__clear(struct fpu *fpu) fpu__activate_curr(fpu); user_fpu_begin(); } - restore_init_xstate(); + copy_init_fpstate_to_fpregs(); } } -- cgit v0.10.2 From c681314421c7c70c418190f3b4ffb4d3257ea5be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 11:34:09 +0200 Subject: x86/fpu: Rename all the fpregs, xregs, fxregs and fregs handling functions Standardize the naming of the various functions that copy register content in specific FPU context formats: copy_fxregs_to_kernel() # was: fpu_fxsave() copy_xregs_to_kernel() # was: xsave_state() copy_kernel_to_fregs() # was: frstor_checking() copy_kernel_to_fxregs() # was: fxrstor_checking() copy_kernel_to_xregs() # was: fpu_xrstor_checking() copy_kernel_to_xregs_booting() # was: xrstor_state_booting() copy_fregs_to_user() # was: fsave_user() copy_fxregs_to_user() # was: fxsave_user() copy_xregs_to_user() # was: xsave_user() copy_user_to_fregs() # was: frstor_user() copy_user_to_fxregs() # was: fxrstor_user() copy_user_to_xregs() # was: xrestore_user() copy_user_to_fpregs_zeroing() # was: restore_user_xstate() Eliminate fpu_xrstor_checking(), because it was just a wrapper. No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a1810eb..f23ea10 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -133,57 +133,57 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) -static inline int fsave_user(struct i387_fsave_struct __user *fx) +static inline int copy_fregs_to_user(struct i387_fsave_struct __user *fx) { return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } -static inline int fxsave_user(struct i387_fxsave_struct __user *fx) +static inline int copy_fxregs_to_user(struct i387_fxsave_struct __user *fx) { if (config_enabled(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) return user_insn(fxsaveq %[fx], [fx] "=m" (*fx), "m" (*fx)); - /* See comment in fpu_fxsave() below. */ + /* See comment in copy_fxregs_to_kernel() below. */ return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline int fxrstor_checking(struct i387_fxsave_struct *fx) +static inline int copy_kernel_to_fxregs(struct i387_fxsave_struct *fx) { if (config_enabled(CONFIG_X86_32)) return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - /* See comment in fpu_fxsave() below. */ + /* See comment in copy_fxregs_to_kernel() below. */ return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); } -static inline int fxrstor_user(struct i387_fxsave_struct __user *fx) +static inline int copy_user_to_fxregs(struct i387_fxsave_struct __user *fx) { if (config_enabled(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); else if (config_enabled(CONFIG_AS_FXSAVEQ)) return user_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); - /* See comment in fpu_fxsave() below. */ + /* See comment in copy_fxregs_to_kernel() below. */ return user_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); } -static inline int frstor_checking(struct i387_fsave_struct *fx) +static inline int copy_kernel_to_fregs(struct i387_fsave_struct *fx) { return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int frstor_user(struct i387_fsave_struct __user *fx) +static inline int copy_user_to_fregs(struct i387_fsave_struct __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline void fpu_fxsave(struct fpu *fpu) +static inline void copy_fxregs_to_kernel(struct fpu *fpu) { if (config_enabled(CONFIG_X86_32)) asm volatile( "fxsave %[fx]" : [fx] "=m" (fpu->state.fxsave)); @@ -230,12 +230,12 @@ static inline void fpu_fxsave(struct fpu *fpu) static inline int copy_fpregs_to_fpstate(struct fpu *fpu) { if (likely(use_xsave())) { - xsave_state(&fpu->state.xsave); + copy_xregs_to_kernel(&fpu->state.xsave); return 1; } if (likely(use_fxsr())) { - fpu_fxsave(fpu); + copy_fxregs_to_kernel(fpu); return 1; } @@ -251,11 +251,11 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) { if (use_xsave()) - return fpu_xrstor_checking(&fpu->state.xsave); + return copy_kernel_to_xregs(&fpu->state.xsave, -1); else if (use_fxsr()) - return fxrstor_checking(&fpu->state.fxsave); + return copy_kernel_to_fxregs(&fpu->state.fxsave); else - return frstor_checking(&fpu->state.fsave); + return copy_kernel_to_fregs(&fpu->state.fsave); } static inline int copy_fpstate_to_fpregs(struct fpu *fpu) diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3051280..7f59480 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -58,7 +58,7 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int xsave_state_booting(struct xsave_struct *fx) +static inline int copy_xregs_to_kernel_booting(struct xsave_struct *fx) { u64 mask = -1; u32 lmask = mask; @@ -86,7 +86,7 @@ static inline int xsave_state_booting(struct xsave_struct *fx) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) +static inline int copy_kernel_to_xregs_booting(struct xsave_struct *fx, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -112,7 +112,7 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask) /* * Save processor xstate to xsave area. */ -static inline int xsave_state(struct xsave_struct *fx) +static inline int copy_xregs_to_kernel(struct xsave_struct *fx) { u64 mask = -1; u32 lmask = mask; @@ -151,7 +151,7 @@ static inline int xsave_state(struct xsave_struct *fx) /* * Restore processor xstate from xsave area. */ -static inline int xrstor_state(struct xsave_struct *fx, u64 mask) +static inline int copy_kernel_to_xregs(struct xsave_struct *fx, u64 mask) { int err = 0; u32 lmask = mask; @@ -177,14 +177,6 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask) } /* - * Restore xstate context for new process during context switch. - */ -static inline int fpu_xrstor_checking(struct xsave_struct *fx) -{ - return xrstor_state(fx, -1); -} - -/* * Save xstate to user space xsave area. * * We don't use modified optimization because xrstor/xrstors might track @@ -194,7 +186,7 @@ static inline int fpu_xrstor_checking(struct xsave_struct *fx) * backward compatibility for old applications which don't understand * compacted format of xsave area. */ -static inline int xsave_user(struct xsave_struct __user *buf) +static inline int copy_xregs_to_user(struct xsave_struct __user *buf) { int err; @@ -218,7 +210,7 @@ static inline int xsave_user(struct xsave_struct __user *buf) /* * Restore xstate from user space xsave area. */ -static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask) +static inline int copy_user_to_xregs(struct xsave_struct __user *buf, u64 mask) { int err = 0; struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2ea9e2f..2cb75bd 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -398,9 +398,9 @@ void fpu__drop(struct fpu *fpu) static inline void copy_init_fpstate_to_fpregs(void) { if (use_xsave()) - xrstor_state(&init_fpstate.xsave, -1); + copy_kernel_to_xregs(&init_fpstate.xsave, -1); else - fxrstor_checking(&init_fpstate.fxsave); + copy_kernel_to_fxregs(&init_fpstate.fxsave); } /* diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 527d4bf..336b3da 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -260,11 +260,11 @@ static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) int err; if (use_xsave()) - err = xsave_user(buf); + err = copy_xregs_to_user(buf); else if (use_fxsr()) - err = fxsave_user((struct i387_fxsave_struct __user *) buf); + err = copy_fxregs_to_user((struct i387_fxsave_struct __user *) buf); else - err = fsave_user((struct i387_fsave_struct __user *) buf); + err = copy_fregs_to_user((struct i387_fsave_struct __user *) buf); if (unlikely(err) && __clear_user(buf, xstate_size)) err = -EFAULT; @@ -314,7 +314,7 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) return -1; /* Update the thread's fxstate to save the fsave header. */ if (ia32_fxstate) - fpu_fxsave(&tsk->thread.fpu); + copy_fxregs_to_kernel(&tsk->thread.fpu); } else { fpstate_sanitize_xstate(&tsk->thread.fpu); if (__copy_to_user(buf_fx, xsave, xstate_size)) @@ -367,23 +367,23 @@ sanitize_restored_xstate(struct task_struct *tsk, /* * Restore the extended state if present. Otherwise, restore the FP/SSE state. */ -static inline int restore_user_xstate(void __user *buf, u64 xbv, int fx_only) +static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) { if (use_xsave()) { if ((unsigned long)buf % 64 || fx_only) { u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; - xrstor_state(&init_fpstate.xsave, init_bv); - return fxrstor_user(buf); + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_fxregs(buf); } else { u64 init_bv = xfeatures_mask & ~xbv; if (unlikely(init_bv)) - xrstor_state(&init_fpstate.xsave, init_bv); - return xrestore_user(buf, xbv); + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_xregs(buf, xbv); } } else if (use_fxsr()) { - return fxrstor_user(buf); + return copy_user_to_fxregs(buf); } else - return frstor_user(buf); + return copy_user_to_fregs(buf); } static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) @@ -471,7 +471,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * state to the registers directly (with exceptions handled). */ user_fpu_begin(); - if (restore_user_xstate(buf_fx, xfeatures, fx_only)) { + if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { fpu__clear(fpu); return -1; } @@ -667,13 +667,13 @@ static void setup_init_fpu_buf(void) /* * Init all the features state with header_bv being 0x0 */ - xrstor_state_booting(&init_fpstate.xsave, -1); + copy_kernel_to_xregs_booting(&init_fpstate.xsave, -1); /* * Dump the init state again. This is to identify the init state * of any feature which is not represented by all zero's. */ - xsave_state_booting(&init_fpstate.xsave); + copy_xregs_to_kernel_booting(&init_fpstate.xsave); } /* diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index ea5b367..5e20bac 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -389,7 +389,7 @@ int mpx_enable_management(struct task_struct *tsk) * directory into XSAVE/XRSTOR Save Area and enable MPX through * XRSTOR instruction. * - * xsave_state() is expected to be very expensive. Storing the bounds + * copy_xregs_to_kernel() is expected to be very expensive. Storing the bounds * directory here means that we do not have to do xsave in the unmap * path; we can just use mm->bd_addr instead. */ -- cgit v0.10.2 From b992c660d3b3161ddb992e1b0cac727de574709c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 12:45:38 +0200 Subject: x86/fpu: Factor out fpu/signal.c fpu/xstate.c has a lot of generic FPU signal frame handling routines, move them into a separate file: fpu/signal.c. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h index 0803dc2..7358e9d 100644 --- a/arch/x86/include/asm/fpu/signal.h +++ b/arch/x86/include/asm/fpu/signal.h @@ -28,4 +28,6 @@ unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size); +extern void fpu__init_prepare_fx_sw_frame(void); + #endif /* _ASM_X86_FPU_SIGNAL_H */ diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile index 6ae59bc..5c697de 100644 --- a/arch/x86/kernel/fpu/Makefile +++ b/arch/x86/kernel/fpu/Makefile @@ -2,4 +2,4 @@ # Build rules for the FPU support code: # -obj-y += init.o bugs.o core.o xstate.o +obj-y += init.o bugs.o core.o signal.o xstate.o diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c new file mode 100644 index 0000000..8d0c26a --- /dev/null +++ b/arch/x86/kernel/fpu/signal.c @@ -0,0 +1,404 @@ +/* + * FPU signal frame handling routines. + */ + +#include +#include + +#include +#include +#include + +#include + +static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; + +/* + * Check for the presence of extended state information in the + * user fpstate pointer in the sigcontext. + */ +static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, + void __user *fpstate, + struct _fpx_sw_bytes *fx_sw) +{ + int min_xstate_size = sizeof(struct i387_fxsave_struct) + + sizeof(struct xstate_header); + unsigned int magic2; + + if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) + return -1; + + /* Check for the first magic field and other error scenarios. */ + if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || + fx_sw->xstate_size < min_xstate_size || + fx_sw->xstate_size > xstate_size || + fx_sw->xstate_size > fx_sw->extended_size) + return -1; + + /* + * Check for the presence of second magic word at the end of memory + * layout. This detects the case where the user just copied the legacy + * fpstate layout with out copying the extended state information + * in the memory layout. + */ + if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) + || magic2 != FP_XSTATE_MAGIC2) + return -1; + + return 0; +} + +/* + * Signal frame handlers. + */ +static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) +{ + if (use_fxsr()) { + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct user_i387_ia32_struct env; + struct _fpstate_ia32 __user *fp = buf; + + convert_from_fxsr(&env, tsk); + + if (__copy_to_user(buf, &env, sizeof(env)) || + __put_user(xsave->i387.swd, &fp->status) || + __put_user(X86_FXSR_MAGIC, &fp->magic)) + return -1; + } else { + struct i387_fsave_struct __user *fp = buf; + u32 swd; + if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) + return -1; + } + + return 0; +} + +static inline int save_xstate_epilog(void __user *buf, int ia32_frame) +{ + struct xsave_struct __user *x = buf; + struct _fpx_sw_bytes *sw_bytes; + u32 xfeatures; + int err; + + /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ + sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; + err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); + + if (!use_xsave()) + return err; + + err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); + + /* + * Read the xfeatures which we copied (directly from the cpu or + * from the state in task struct) to the user buffers. + */ + err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); + + /* + * For legacy compatible, we always set FP/SSE bits in the bit + * vector while saving the state to the user context. This will + * enable us capturing any changes(during sigreturn) to + * the FP/SSE bits by the legacy applications which don't touch + * xfeatures in the xsave header. + * + * xsave aware apps can change the xfeatures in the xsave + * header as well as change any contents in the memory layout. + * xrestore as part of sigreturn will capture all the changes. + */ + xfeatures |= XSTATE_FPSSE; + + err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); + + return err; +} + +static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) +{ + int err; + + if (use_xsave()) + err = copy_xregs_to_user(buf); + else if (use_fxsr()) + err = copy_fxregs_to_user((struct i387_fxsave_struct __user *) buf); + else + err = copy_fregs_to_user((struct i387_fsave_struct __user *) buf); + + if (unlikely(err) && __clear_user(buf, xstate_size)) + err = -EFAULT; + return err; +} + +/* + * Save the fpu, extended register state to the user signal frame. + * + * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save + * state is copied. + * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. + * + * buf == buf_fx for 64-bit frames and 32-bit fsave frame. + * buf != buf_fx for 32-bit frames with fxstate. + * + * If the fpu, extended register state is live, save the state directly + * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, + * copy the thread's fpu state to the user frame starting at 'buf_fx'. + * + * If this is a 32-bit frame with fxstate, put a fsave header before + * the aligned state at 'buf_fx'. + * + * For [f]xsave state, update the SW reserved fields in the [f]xsave frame + * indicating the absence/presence of the extended state to the user. + */ +int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) +{ + struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; + struct task_struct *tsk = current; + int ia32_fxstate = (buf != buf_fx); + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!access_ok(VERIFY_WRITE, buf, size)) + return -EACCES; + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(current, NULL, 0, + sizeof(struct user_i387_ia32_struct), NULL, + (struct _fpstate_ia32 __user *) buf) ? -1 : 1; + + if (fpregs_active()) { + /* Save the live register state to the user directly. */ + if (copy_fpregs_to_sigframe(buf_fx)) + return -1; + /* Update the thread's fxstate to save the fsave header. */ + if (ia32_fxstate) + copy_fxregs_to_kernel(&tsk->thread.fpu); + } else { + fpstate_sanitize_xstate(&tsk->thread.fpu); + if (__copy_to_user(buf_fx, xsave, xstate_size)) + return -1; + } + + /* Save the fsave header for the 32-bit frames. */ + if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) + return -1; + + if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) + return -1; + + return 0; +} + +static inline void +sanitize_restored_xstate(struct task_struct *tsk, + struct user_i387_ia32_struct *ia32_env, + u64 xfeatures, int fx_only) +{ + struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct xstate_header *header = &xsave->header; + + if (use_xsave()) { + /* These bits must be zero. */ + memset(header->reserved, 0, 48); + + /* + * Init the state that is not present in the memory + * layout and not enabled by the OS. + */ + if (fx_only) + header->xfeatures = XSTATE_FPSSE; + else + header->xfeatures &= (xfeatures_mask & xfeatures); + } + + if (use_fxsr()) { + /* + * mscsr reserved bits must be masked to zero for security + * reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + + convert_to_fxsr(tsk, ia32_env); + } +} + +/* + * Restore the extended state if present. Otherwise, restore the FP/SSE state. + */ +static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) +{ + if (use_xsave()) { + if ((unsigned long)buf % 64 || fx_only) { + u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_fxregs(buf); + } else { + u64 init_bv = xfeatures_mask & ~xbv; + if (unlikely(init_bv)) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return copy_user_to_xregs(buf, xbv); + } + } else if (use_fxsr()) { + return copy_user_to_fxregs(buf); + } else + return copy_user_to_fregs(buf); +} + +static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) +{ + int ia32_fxstate = (buf != buf_fx); + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + int state_size = xstate_size; + u64 xfeatures = 0; + int fx_only = 0; + + ia32_fxstate &= (config_enabled(CONFIG_X86_32) || + config_enabled(CONFIG_IA32_EMULATION)); + + if (!buf) { + fpu__clear(fpu); + return 0; + } + + if (!access_ok(VERIFY_READ, buf, size)) + return -EACCES; + + fpu__activate_curr(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(current, NULL, + 0, sizeof(struct user_i387_ia32_struct), + NULL, buf) != 0; + + if (use_xsave()) { + struct _fpx_sw_bytes fx_sw_user; + if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { + /* + * Couldn't find the extended state information in the + * memory layout. Restore just the FP/SSE and init all + * the other extended state. + */ + state_size = sizeof(struct i387_fxsave_struct); + fx_only = 1; + } else { + state_size = fx_sw_user.xstate_size; + xfeatures = fx_sw_user.xfeatures; + } + } + + if (ia32_fxstate) { + /* + * For 32-bit frames with fxstate, copy the user state to the + * thread's fpu state, reconstruct fxstate from the fsave + * header. Sanitize the copied state etc. + */ + struct fpu *fpu = &tsk->thread.fpu; + struct user_i387_ia32_struct env; + int err = 0; + + /* + * Drop the current fpu which clears fpu->fpstate_active. This ensures + * that any context-switch during the copy of the new state, + * avoids the intermediate state from getting restored/saved. + * Thus avoiding the new restored state from getting corrupted. + * We will be ready to restore/save the state only after + * fpu->fpstate_active is again set. + */ + fpu__drop(fpu); + + if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || + __copy_from_user(&env, buf, sizeof(env))) { + fpstate_init(&fpu->state); + err = -1; + } else { + sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); + } + + fpu->fpstate_active = 1; + if (use_eager_fpu()) { + preempt_disable(); + fpu__restore(); + preempt_enable(); + } + + return err; + } else { + /* + * For 64-bit frames and 32-bit fsave frames, restore the user + * state to the registers directly (with exceptions handled). + */ + user_fpu_begin(); + if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { + fpu__clear(fpu); + return -1; + } + } + + return 0; +} + +static inline int xstate_sigframe_size(void) +{ + return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; +} + +/* + * Restore FPU state from a sigframe: + */ +int fpu__restore_sig(void __user *buf, int ia32_frame) +{ + void __user *buf_fx = buf; + int size = xstate_sigframe_size(); + + if (ia32_frame && use_fxsr()) { + buf_fx = buf + sizeof(struct i387_fsave_struct); + size += sizeof(struct i387_fsave_struct); + } + + return __fpu__restore_sig(buf, buf_fx, size); +} + +unsigned long +fpu__alloc_mathframe(unsigned long sp, int ia32_frame, + unsigned long *buf_fx, unsigned long *size) +{ + unsigned long frame_size = xstate_sigframe_size(); + + *buf_fx = sp = round_down(sp - frame_size, 64); + if (ia32_frame && use_fxsr()) { + frame_size += sizeof(struct i387_fsave_struct); + sp -= sizeof(struct i387_fsave_struct); + } + + *size = frame_size; + + return sp; +} +/* + * Prepare the SW reserved portion of the fxsave memory layout, indicating + * the presence of the extended state information in the memory layout + * pointed by the fpstate pointer in the sigcontext. + * This will be saved when ever the FP and extended state context is + * saved on the user stack during the signal handler delivery to the user. + */ +void fpu__init_prepare_fx_sw_frame(void) +{ + int fsave_header_size = sizeof(struct i387_fsave_struct); + int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; + + if (config_enabled(CONFIG_X86_32)) + size += fsave_header_size; + + fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; + fx_sw_reserved.extended_size = size; + fx_sw_reserved.xfeatures = xfeatures_mask; + fx_sw_reserved.xstate_size = xstate_size; + + if (config_enabled(CONFIG_IA32_EMULATION)) { + fx_sw_reserved_ia32 = fx_sw_reserved; + fx_sw_reserved_ia32.extended_size += fsave_header_size; + } +} + diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 336b3da..3629e2e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -10,7 +10,7 @@ #include #include #include -#include + #include static const char *xfeature_names[] = @@ -31,7 +31,6 @@ static const char *xfeature_names[] = */ u64 xfeatures_mask __read_mostly; -static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; @@ -155,395 +154,6 @@ void fpstate_sanitize_xstate(struct fpu *fpu) } /* - * Check for the presence of extended state information in the - * user fpstate pointer in the sigcontext. - */ -static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, - void __user *fpstate, - struct _fpx_sw_bytes *fx_sw) -{ - int min_xstate_size = sizeof(struct i387_fxsave_struct) + - sizeof(struct xstate_header); - unsigned int magic2; - - if (__copy_from_user(fx_sw, &buf->sw_reserved[0], sizeof(*fx_sw))) - return -1; - - /* Check for the first magic field and other error scenarios. */ - if (fx_sw->magic1 != FP_XSTATE_MAGIC1 || - fx_sw->xstate_size < min_xstate_size || - fx_sw->xstate_size > xstate_size || - fx_sw->xstate_size > fx_sw->extended_size) - return -1; - - /* - * Check for the presence of second magic word at the end of memory - * layout. This detects the case where the user just copied the legacy - * fpstate layout with out copying the extended state information - * in the memory layout. - */ - if (__get_user(magic2, (__u32 __user *)(fpstate + fx_sw->xstate_size)) - || magic2 != FP_XSTATE_MAGIC2) - return -1; - - return 0; -} - -/* - * Signal frame handlers. - */ -static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) -{ - if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; - struct user_i387_ia32_struct env; - struct _fpstate_ia32 __user *fp = buf; - - convert_from_fxsr(&env, tsk); - - if (__copy_to_user(buf, &env, sizeof(env)) || - __put_user(xsave->i387.swd, &fp->status) || - __put_user(X86_FXSR_MAGIC, &fp->magic)) - return -1; - } else { - struct i387_fsave_struct __user *fp = buf; - u32 swd; - if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) - return -1; - } - - return 0; -} - -static inline int save_xstate_epilog(void __user *buf, int ia32_frame) -{ - struct xsave_struct __user *x = buf; - struct _fpx_sw_bytes *sw_bytes; - u32 xfeatures; - int err; - - /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ - sw_bytes = ia32_frame ? &fx_sw_reserved_ia32 : &fx_sw_reserved; - err = __copy_to_user(&x->i387.sw_reserved, sw_bytes, sizeof(*sw_bytes)); - - if (!use_xsave()) - return err; - - err |= __put_user(FP_XSTATE_MAGIC2, (__u32 *)(buf + xstate_size)); - - /* - * Read the xfeatures which we copied (directly from the cpu or - * from the state in task struct) to the user buffers. - */ - err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures); - - /* - * For legacy compatible, we always set FP/SSE bits in the bit - * vector while saving the state to the user context. This will - * enable us capturing any changes(during sigreturn) to - * the FP/SSE bits by the legacy applications which don't touch - * xfeatures in the xsave header. - * - * xsave aware apps can change the xfeatures in the xsave - * header as well as change any contents in the memory layout. - * xrestore as part of sigreturn will capture all the changes. - */ - xfeatures |= XSTATE_FPSSE; - - err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures); - - return err; -} - -static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) -{ - int err; - - if (use_xsave()) - err = copy_xregs_to_user(buf); - else if (use_fxsr()) - err = copy_fxregs_to_user((struct i387_fxsave_struct __user *) buf); - else - err = copy_fregs_to_user((struct i387_fsave_struct __user *) buf); - - if (unlikely(err) && __clear_user(buf, xstate_size)) - err = -EFAULT; - return err; -} - -/* - * Save the fpu, extended register state to the user signal frame. - * - * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save - * state is copied. - * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. - * - * buf == buf_fx for 64-bit frames and 32-bit fsave frame. - * buf != buf_fx for 32-bit frames with fxstate. - * - * If the fpu, extended register state is live, save the state directly - * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise, - * copy the thread's fpu state to the user frame starting at 'buf_fx'. - * - * If this is a 32-bit frame with fxstate, put a fsave header before - * the aligned state at 'buf_fx'. - * - * For [f]xsave state, update the SW reserved fields in the [f]xsave frame - * indicating the absence/presence of the extended state to the user. - */ -int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) -{ - struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; - struct task_struct *tsk = current; - int ia32_fxstate = (buf != buf_fx); - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!access_ok(VERIFY_WRITE, buf, size)) - return -EACCES; - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(current, NULL, 0, - sizeof(struct user_i387_ia32_struct), NULL, - (struct _fpstate_ia32 __user *) buf) ? -1 : 1; - - if (fpregs_active()) { - /* Save the live register state to the user directly. */ - if (copy_fpregs_to_sigframe(buf_fx)) - return -1; - /* Update the thread's fxstate to save the fsave header. */ - if (ia32_fxstate) - copy_fxregs_to_kernel(&tsk->thread.fpu); - } else { - fpstate_sanitize_xstate(&tsk->thread.fpu); - if (__copy_to_user(buf_fx, xsave, xstate_size)) - return -1; - } - - /* Save the fsave header for the 32-bit frames. */ - if ((ia32_fxstate || !use_fxsr()) && save_fsave_header(tsk, buf)) - return -1; - - if (use_fxsr() && save_xstate_epilog(buf_fx, ia32_fxstate)) - return -1; - - return 0; -} - -static inline void -sanitize_restored_xstate(struct task_struct *tsk, - struct user_i387_ia32_struct *ia32_env, - u64 xfeatures, int fx_only) -{ - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; - struct xstate_header *header = &xsave->header; - - if (use_xsave()) { - /* These bits must be zero. */ - memset(header->reserved, 0, 48); - - /* - * Init the state that is not present in the memory - * layout and not enabled by the OS. - */ - if (fx_only) - header->xfeatures = XSTATE_FPSSE; - else - header->xfeatures &= (xfeatures_mask & xfeatures); - } - - if (use_fxsr()) { - /* - * mscsr reserved bits must be masked to zero for security - * reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - - convert_to_fxsr(tsk, ia32_env); - } -} - -/* - * Restore the extended state if present. Otherwise, restore the FP/SSE state. - */ -static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) -{ - if (use_xsave()) { - if ((unsigned long)buf % 64 || fx_only) { - u64 init_bv = xfeatures_mask & ~XSTATE_FPSSE; - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - return copy_user_to_fxregs(buf); - } else { - u64 init_bv = xfeatures_mask & ~xbv; - if (unlikely(init_bv)) - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - return copy_user_to_xregs(buf, xbv); - } - } else if (use_fxsr()) { - return copy_user_to_fxregs(buf); - } else - return copy_user_to_fregs(buf); -} - -static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) -{ - int ia32_fxstate = (buf != buf_fx); - struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int state_size = xstate_size; - u64 xfeatures = 0; - int fx_only = 0; - - ia32_fxstate &= (config_enabled(CONFIG_X86_32) || - config_enabled(CONFIG_IA32_EMULATION)); - - if (!buf) { - fpu__clear(fpu); - return 0; - } - - if (!access_ok(VERIFY_READ, buf, size)) - return -EACCES; - - fpu__activate_curr(fpu); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(current, NULL, - 0, sizeof(struct user_i387_ia32_struct), - NULL, buf) != 0; - - if (use_xsave()) { - struct _fpx_sw_bytes fx_sw_user; - if (unlikely(check_for_xstate(buf_fx, buf_fx, &fx_sw_user))) { - /* - * Couldn't find the extended state information in the - * memory layout. Restore just the FP/SSE and init all - * the other extended state. - */ - state_size = sizeof(struct i387_fxsave_struct); - fx_only = 1; - } else { - state_size = fx_sw_user.xstate_size; - xfeatures = fx_sw_user.xfeatures; - } - } - - if (ia32_fxstate) { - /* - * For 32-bit frames with fxstate, copy the user state to the - * thread's fpu state, reconstruct fxstate from the fsave - * header. Sanitize the copied state etc. - */ - struct fpu *fpu = &tsk->thread.fpu; - struct user_i387_ia32_struct env; - int err = 0; - - /* - * Drop the current fpu which clears fpu->fpstate_active. This ensures - * that any context-switch during the copy of the new state, - * avoids the intermediate state from getting restored/saved. - * Thus avoiding the new restored state from getting corrupted. - * We will be ready to restore/save the state only after - * fpu->fpstate_active is again set. - */ - fpu__drop(fpu); - - if (__copy_from_user(&fpu->state.xsave, buf_fx, state_size) || - __copy_from_user(&env, buf, sizeof(env))) { - fpstate_init(&fpu->state); - err = -1; - } else { - sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); - } - - fpu->fpstate_active = 1; - if (use_eager_fpu()) { - preempt_disable(); - fpu__restore(); - preempt_enable(); - } - - return err; - } else { - /* - * For 64-bit frames and 32-bit fsave frames, restore the user - * state to the registers directly (with exceptions handled). - */ - user_fpu_begin(); - if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) { - fpu__clear(fpu); - return -1; - } - } - - return 0; -} - -static inline int xstate_sigframe_size(void) -{ - return use_xsave() ? xstate_size + FP_XSTATE_MAGIC2_SIZE : xstate_size; -} - -/* - * Restore FPU state from a sigframe: - */ -int fpu__restore_sig(void __user *buf, int ia32_frame) -{ - void __user *buf_fx = buf; - int size = xstate_sigframe_size(); - - if (ia32_frame && use_fxsr()) { - buf_fx = buf + sizeof(struct i387_fsave_struct); - size += sizeof(struct i387_fsave_struct); - } - - return __fpu__restore_sig(buf, buf_fx, size); -} - -unsigned long -fpu__alloc_mathframe(unsigned long sp, int ia32_frame, - unsigned long *buf_fx, unsigned long *size) -{ - unsigned long frame_size = xstate_sigframe_size(); - - *buf_fx = sp = round_down(sp - frame_size, 64); - if (ia32_frame && use_fxsr()) { - frame_size += sizeof(struct i387_fsave_struct); - sp -= sizeof(struct i387_fsave_struct); - } - - *size = frame_size; - - return sp; -} -/* - * Prepare the SW reserved portion of the fxsave memory layout, indicating - * the presence of the extended state information in the memory layout - * pointed by the fpstate pointer in the sigcontext. - * This will be saved when ever the FP and extended state context is - * saved on the user stack during the signal handler delivery to the user. - */ -static void prepare_fx_sw_frame(void) -{ - int fsave_header_size = sizeof(struct i387_fsave_struct); - int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; - fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask; - fx_sw_reserved.xstate_size = xstate_size; - - if (config_enabled(CONFIG_IA32_EMULATION)) { - fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; - } -} - -/* * Enable the extended processor state save/restore feature. * Called once per CPU onlining. */ @@ -741,7 +351,7 @@ void fpu__init_system_xstate(void) init_xstate_size(); update_regset_xstate_info(xstate_size, xfeatures_mask); - prepare_fx_sw_frame(); + fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", -- cgit v0.10.2 From 0c306bcfba288296dc34d00d514546915234bc90 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 12:59:30 +0200 Subject: x86/fpu: Factor out the FPU regset code into fpu/regset.c So much of fpu/core.c is the regset code, but it just obscures the generic FPU state machine logic. Factor out the regset code into fpu/regset.c, where it can be read in isolation. This affects one API: fpu__activate_stopped() has to be made available from the core to fpu/regset.c. No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index f23ea10..db6c24b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -29,8 +29,6 @@ extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpu__activate_curr(struct fpu *fpu); - extern void fpstate_init(union thread_xstate *state); #ifdef CONFIG_MATH_EMULATION extern void fpstate_init_soft(struct i387_soft_struct *soft); @@ -49,6 +47,8 @@ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); /* * High level FPU state handling functions: */ +extern void fpu__activate_curr(struct fpu *fpu); +extern void fpu__activate_stopped(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(void); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/Makefile b/arch/x86/kernel/fpu/Makefile index 5c697de..68279ef 100644 --- a/arch/x86/kernel/fpu/Makefile +++ b/arch/x86/kernel/fpu/Makefile @@ -2,4 +2,4 @@ # Build rules for the FPU support code: # -obj-y += init.o bugs.o core.o signal.o xstate.o +obj-y += init.o bugs.o core.o regset.o signal.o xstate.o diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 2cb75bd..e22711d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -320,7 +320,7 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr); * the read-only case, it's not strictly necessary for * read-only access to the context. */ -static void fpu__activate_stopped(struct fpu *child_fpu) +void fpu__activate_stopped(struct fpu *child_fpu) { WARN_ON_ONCE(child_fpu == ¤t->thread.fpu); @@ -426,356 +426,6 @@ void fpu__clear(struct fpu *fpu) } /* - * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, - * as the "regset->n" for the xstate regset will be updated based on the feature - * capabilites supported by the xsave. - */ -int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - struct fpu *target_fpu = &target->thread.fpu; - - return target_fpu->fpstate_active ? regset->n : 0; -} - -int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) -{ - struct fpu *target_fpu = &target->thread.fpu; - - return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; -} - -int xfpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - - if (!cpu_has_fxsr) - return -ENODEV; - - fpu__activate_stopped(fpu); - fpstate_sanitize_xstate(fpu); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state.fxsave, 0, -1); -} - -int xfpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - int ret; - - if (!cpu_has_fxsr) - return -ENODEV; - - fpu__activate_stopped(fpu); - fpstate_sanitize_xstate(fpu); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state.fxsave, 0, -1); - - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; - - /* - * update the header bits in the xsave header, indicating the - * presence of FP and SSE state. - */ - if (cpu_has_xsave) - fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; - - return ret; -} - -int xstateregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - fpu__activate_stopped(fpu); - - xsave = &fpu->state.xsave; - - /* - * Copy the 48bytes defined by the software first into the xstate - * memory layout in the thread struct, so that we can copy the entire - * xstateregs to the user using one user_regset_copyout(). - */ - memcpy(&xsave->i387.sw_reserved, - xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); - /* - * Copy the xstate memory layout. - */ - ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - return ret; -} - -int xstateregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - struct xsave_struct *xsave; - int ret; - - if (!cpu_has_xsave) - return -ENODEV; - - fpu__activate_stopped(fpu); - - xsave = &fpu->state.xsave; - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); - /* - * mxcsr reserved bits must be masked to zero for security reasons. - */ - xsave->i387.mxcsr &= mxcsr_feature_mask; - xsave->header.xfeatures &= xfeatures_mask; - /* - * These bits must be zero. - */ - memset(&xsave->header.reserved, 0, 48); - - return ret; -} - -#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION - -/* - * FPU tag word conversions. - */ - -static inline unsigned short twd_i387_to_fxsr(unsigned short twd) -{ - unsigned int tmp; /* to avoid 16 bit prefixes in the code */ - - /* Transform each pair of bits into 01 (valid) or 00 (empty) */ - tmp = ~twd; - tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ - /* and move the valid bits to the lower byte. */ - tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ - tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ - tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ - - return tmp; -} - -#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) -#define FP_EXP_TAG_VALID 0 -#define FP_EXP_TAG_ZERO 1 -#define FP_EXP_TAG_SPECIAL 2 -#define FP_EXP_TAG_EMPTY 3 - -static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) -{ - struct _fpxreg *st; - u32 tos = (fxsave->swd >> 11) & 7; - u32 twd = (unsigned long) fxsave->twd; - u32 tag; - u32 ret = 0xffff0000u; - int i; - - for (i = 0; i < 8; i++, twd >>= 1) { - if (twd & 0x1) { - st = FPREG_ADDR(fxsave, (i - tos) & 7); - - switch (st->exponent & 0x7fff) { - case 0x7fff: - tag = FP_EXP_TAG_SPECIAL; - break; - case 0x0000: - if (!st->significand[0] && - !st->significand[1] && - !st->significand[2] && - !st->significand[3]) - tag = FP_EXP_TAG_ZERO; - else - tag = FP_EXP_TAG_SPECIAL; - break; - default: - if (st->significand[3] & 0x8000) - tag = FP_EXP_TAG_VALID; - else - tag = FP_EXP_TAG_SPECIAL; - break; - } - } else { - tag = FP_EXP_TAG_EMPTY; - } - ret |= tag << (2 * i); - } - return ret; -} - -/* - * FXSR floating point environment conversions. - */ - -void -convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; - struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - env->cwd = fxsave->cwd | 0xffff0000u; - env->swd = fxsave->swd | 0xffff0000u; - env->twd = twd_fxsr_to_i387(fxsave); - -#ifdef CONFIG_X86_64 - env->fip = fxsave->rip; - env->foo = fxsave->rdp; - /* - * should be actually ds/cs at fpu exception time, but - * that information is not available in 64bit mode. - */ - env->fcs = task_pt_regs(tsk)->cs; - if (tsk == current) { - savesegment(ds, env->fos); - } else { - env->fos = tsk->thread.ds; - } - env->fos |= 0xffff0000; -#else - env->fip = fxsave->fip; - env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); - env->foo = fxsave->foo; - env->fos = fxsave->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(to[0])); -} - -void convert_to_fxsr(struct task_struct *tsk, - const struct user_i387_ia32_struct *env) - -{ - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; - struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; - struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; - int i; - - fxsave->cwd = env->cwd; - fxsave->swd = env->swd; - fxsave->twd = twd_i387_to_fxsr(env->twd); - fxsave->fop = (u16) ((u32) env->fcs >> 16); -#ifdef CONFIG_X86_64 - fxsave->rip = env->fip; - fxsave->rdp = env->foo; - /* cs and ds ignored */ -#else - fxsave->fip = env->fip; - fxsave->fcs = (env->fcs & 0xffff); - fxsave->foo = env->foo; - fxsave->fos = env->fos; -#endif - - for (i = 0; i < 8; ++i) - memcpy(&to[i], &from[i], sizeof(from[0])); -} - -int fpregs_get(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - void *kbuf, void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - struct user_i387_ia32_struct env; - - fpu__activate_stopped(fpu); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &fpu->state.fsave, 0, - -1); - - fpstate_sanitize_xstate(fpu); - - if (kbuf && pos == 0 && count == sizeof(env)) { - convert_from_fxsr(kbuf, target); - return 0; - } - - convert_from_fxsr(&env, target); - - return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); -} - -int fpregs_set(struct task_struct *target, const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - struct fpu *fpu = &target->thread.fpu; - struct user_i387_ia32_struct env; - int ret; - - fpu__activate_stopped(fpu); - fpstate_sanitize_xstate(fpu); - - if (!static_cpu_has(X86_FEATURE_FPU)) - return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - - if (!cpu_has_fxsr) - return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &fpu->state.fsave, 0, - -1); - - if (pos > 0 || count < sizeof(env)) - convert_from_fxsr(&env, target); - - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); - if (!ret) - convert_to_fxsr(target, &env); - - /* - * update the header bit in the xsave header, indicating the - * presence of FP. - */ - if (cpu_has_xsave) - fpu->state.xsave.header.xfeatures |= XSTATE_FP; - return ret; -} - -/* - * FPU state for core dumps. - * This is only used for a.out dumps now. - * It is declared generically using elf_fpregset_t (which is - * struct user_i387_struct) but is in fact only used for 32-bit - * dumps, so on 64-bit it is really struct user_i387_ia32_struct. - */ -int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) -{ - struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - int fpvalid; - - fpvalid = fpu->fpstate_active; - if (fpvalid) - fpvalid = !fpregs_get(tsk, NULL, - 0, sizeof(struct user_i387_ia32_struct), - ufpu, NULL); - - return fpvalid; -} -EXPORT_SYMBOL(dump_fpu); - -#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ - -/* * x87 math exception handling: */ diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c new file mode 100644 index 0000000..1f58a1c --- /dev/null +++ b/arch/x86/kernel/fpu/regset.c @@ -0,0 +1,356 @@ +/* + * FPU register's regset abstraction, for ptrace, core dumps, etc. + */ +#include +#include +#include + +/* + * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, + * as the "regset->n" for the xstate regset will be updated based on the feature + * capabilites supported by the xsave. + */ +int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + struct fpu *target_fpu = &target->thread.fpu; + + return target_fpu->fpstate_active ? regset->n : 0; +} + +int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + struct fpu *target_fpu = &target->thread.fpu; + + return (cpu_has_fxsr && target_fpu->fpstate_active) ? regset->n : 0; +} + +int xfpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + + if (!cpu_has_fxsr) + return -ENODEV; + + fpu__activate_stopped(fpu); + fpstate_sanitize_xstate(fpu); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fpu->state.fxsave, 0, -1); +} + +int xfpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + int ret; + + if (!cpu_has_fxsr) + return -ENODEV; + + fpu__activate_stopped(fpu); + fpstate_sanitize_xstate(fpu); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu->state.fxsave, 0, -1); + + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + fpu->state.fxsave.mxcsr &= mxcsr_feature_mask; + + /* + * update the header bits in the xsave header, indicating the + * presence of FP and SSE state. + */ + if (cpu_has_xsave) + fpu->state.xsave.header.xfeatures |= XSTATE_FPSSE; + + return ret; +} + +int xstateregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct xsave_struct *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + fpu__activate_stopped(fpu); + + xsave = &fpu->state.xsave; + + /* + * Copy the 48bytes defined by the software first into the xstate + * memory layout in the thread struct, so that we can copy the entire + * xstateregs to the user using one user_regset_copyout(). + */ + memcpy(&xsave->i387.sw_reserved, + xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes)); + /* + * Copy the xstate memory layout. + */ + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + return ret; +} + +int xstateregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct xsave_struct *xsave; + int ret; + + if (!cpu_has_xsave) + return -ENODEV; + + fpu__activate_stopped(fpu); + + xsave = &fpu->state.xsave; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); + /* + * mxcsr reserved bits must be masked to zero for security reasons. + */ + xsave->i387.mxcsr &= mxcsr_feature_mask; + xsave->header.xfeatures &= xfeatures_mask; + /* + * These bits must be zero. + */ + memset(&xsave->header.reserved, 0, 48); + + return ret; +} + +#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION + +/* + * FPU tag word conversions. + */ + +static inline unsigned short twd_i387_to_fxsr(unsigned short twd) +{ + unsigned int tmp; /* to avoid 16 bit prefixes in the code */ + + /* Transform each pair of bits into 01 (valid) or 00 (empty) */ + tmp = ~twd; + tmp = (tmp | (tmp>>1)) & 0x5555; /* 0V0V0V0V0V0V0V0V */ + /* and move the valid bits to the lower byte. */ + tmp = (tmp | (tmp >> 1)) & 0x3333; /* 00VV00VV00VV00VV */ + tmp = (tmp | (tmp >> 2)) & 0x0f0f; /* 0000VVVV0000VVVV */ + tmp = (tmp | (tmp >> 4)) & 0x00ff; /* 00000000VVVVVVVV */ + + return tmp; +} + +#define FPREG_ADDR(f, n) ((void *)&(f)->st_space + (n) * 16) +#define FP_EXP_TAG_VALID 0 +#define FP_EXP_TAG_ZERO 1 +#define FP_EXP_TAG_SPECIAL 2 +#define FP_EXP_TAG_EMPTY 3 + +static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) +{ + struct _fpxreg *st; + u32 tos = (fxsave->swd >> 11) & 7; + u32 twd = (unsigned long) fxsave->twd; + u32 tag; + u32 ret = 0xffff0000u; + int i; + + for (i = 0; i < 8; i++, twd >>= 1) { + if (twd & 0x1) { + st = FPREG_ADDR(fxsave, (i - tos) & 7); + + switch (st->exponent & 0x7fff) { + case 0x7fff: + tag = FP_EXP_TAG_SPECIAL; + break; + case 0x0000: + if (!st->significand[0] && + !st->significand[1] && + !st->significand[2] && + !st->significand[3]) + tag = FP_EXP_TAG_ZERO; + else + tag = FP_EXP_TAG_SPECIAL; + break; + default: + if (st->significand[3] & 0x8000) + tag = FP_EXP_TAG_VALID; + else + tag = FP_EXP_TAG_SPECIAL; + break; + } + } else { + tag = FP_EXP_TAG_EMPTY; + } + ret |= tag << (2 * i); + } + return ret; +} + +/* + * FXSR floating point environment conversions. + */ + +void +convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) +{ + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; + struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + env->cwd = fxsave->cwd | 0xffff0000u; + env->swd = fxsave->swd | 0xffff0000u; + env->twd = twd_fxsr_to_i387(fxsave); + +#ifdef CONFIG_X86_64 + env->fip = fxsave->rip; + env->foo = fxsave->rdp; + /* + * should be actually ds/cs at fpu exception time, but + * that information is not available in 64bit mode. + */ + env->fcs = task_pt_regs(tsk)->cs; + if (tsk == current) { + savesegment(ds, env->fos); + } else { + env->fos = tsk->thread.ds; + } + env->fos |= 0xffff0000; +#else + env->fip = fxsave->fip; + env->fcs = (u16) fxsave->fcs | ((u32) fxsave->fop << 16); + env->foo = fxsave->foo; + env->fos = fxsave->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(to[0])); +} + +void convert_to_fxsr(struct task_struct *tsk, + const struct user_i387_ia32_struct *env) + +{ + struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; + struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; + struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; + int i; + + fxsave->cwd = env->cwd; + fxsave->swd = env->swd; + fxsave->twd = twd_i387_to_fxsr(env->twd); + fxsave->fop = (u16) ((u32) env->fcs >> 16); +#ifdef CONFIG_X86_64 + fxsave->rip = env->fip; + fxsave->rdp = env->foo; + /* cs and ds ignored */ +#else + fxsave->fip = env->fip; + fxsave->fcs = (env->fcs & 0xffff); + fxsave->foo = env->foo; + fxsave->fos = env->fos; +#endif + + for (i = 0; i < 8; ++i) + memcpy(&to[i], &from[i], sizeof(from[0])); +} + +int fpregs_get(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct user_i387_ia32_struct env; + + fpu__activate_stopped(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &fpu->state.fsave, 0, + -1); + + fpstate_sanitize_xstate(fpu); + + if (kbuf && pos == 0 && count == sizeof(env)) { + convert_from_fxsr(kbuf, target); + return 0; + } + + convert_from_fxsr(&env, target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, &env, 0, -1); +} + +int fpregs_set(struct task_struct *target, const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct fpu *fpu = &target->thread.fpu; + struct user_i387_ia32_struct env; + int ret; + + fpu__activate_stopped(fpu); + fpstate_sanitize_xstate(fpu); + + if (!static_cpu_has(X86_FEATURE_FPU)) + return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); + + if (!cpu_has_fxsr) + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &fpu->state.fsave, 0, + -1); + + if (pos > 0 || count < sizeof(env)) + convert_from_fxsr(&env, target); + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1); + if (!ret) + convert_to_fxsr(target, &env); + + /* + * update the header bit in the xsave header, indicating the + * presence of FP. + */ + if (cpu_has_xsave) + fpu->state.xsave.header.xfeatures |= XSTATE_FP; + return ret; +} + +/* + * FPU state for core dumps. + * This is only used for a.out dumps now. + * It is declared generically using elf_fpregset_t (which is + * struct user_i387_struct) but is in fact only used for 32-bit + * dumps, so on 64-bit it is really struct user_i387_ia32_struct. + */ +int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu) +{ + struct task_struct *tsk = current; + struct fpu *fpu = &tsk->thread.fpu; + int fpvalid; + + fpvalid = fpu->fpstate_active; + if (fpvalid) + fpvalid = !fpregs_get(tsk, NULL, + 0, sizeof(struct user_i387_ia32_struct), + ufpu, NULL); + + return fpvalid; +} +EXPORT_SYMBOL(dump_fpu); + +#endif /* CONFIG_X86_32 || CONFIG_IA32_EMULATION */ -- cgit v0.10.2 From c47ada305de3803517ae64aa50686f644c5456fa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 30 Apr 2015 17:15:32 +0200 Subject: x86/fpu: Harmonize FPU register state types Use these consistent names: struct fregs_state # was: i387_fsave_struct struct fxregs_state # was: i387_fxsave_struct struct swregs_state # was: i387_soft_struct struct xregs_state # was: xsave_struct union fpregs_state # was: thread_xstate Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index db6c24b..7fdc90b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -22,20 +22,20 @@ extern unsigned int mxcsr_feature_mask; -extern union thread_xstate init_fpstate; +extern union fpregs_state init_fpstate; extern void fpu__init_cpu(void); extern void fpu__init_system_xstate(void); extern void fpu__init_cpu_xstate(void); extern void fpu__init_system(struct cpuinfo_x86 *c); -extern void fpstate_init(union thread_xstate *state); +extern void fpstate_init(union fpregs_state *state); #ifdef CONFIG_MATH_EMULATION -extern void fpstate_init_soft(struct i387_soft_struct *soft); +extern void fpstate_init_soft(struct swregs_state *soft); #else -static inline void fpstate_init_soft(struct i387_soft_struct *soft) {} +static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif -static inline void fpstate_init_fxstate(struct i387_fxsave_struct *fx) +static inline void fpstate_init_fxstate(struct fxregs_state *fx) { fx->cwd = 0x37f; fx->mxcsr = MXCSR_DEFAULT; @@ -133,12 +133,12 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu); err; \ }) -static inline int copy_fregs_to_user(struct i387_fsave_struct __user *fx) +static inline int copy_fregs_to_user(struct fregs_state __user *fx) { return user_insn(fnsave %[fx]; fwait, [fx] "=m" (*fx), "m" (*fx)); } -static inline int copy_fxregs_to_user(struct i387_fxsave_struct __user *fx) +static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) { if (config_enabled(CONFIG_X86_32)) return user_insn(fxsave %[fx], [fx] "=m" (*fx), "m" (*fx)); @@ -149,7 +149,7 @@ static inline int copy_fxregs_to_user(struct i387_fxsave_struct __user *fx) return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline int copy_kernel_to_fxregs(struct i387_fxsave_struct *fx) +static inline int copy_kernel_to_fxregs(struct fxregs_state *fx) { if (config_enabled(CONFIG_X86_32)) return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -161,7 +161,7 @@ static inline int copy_kernel_to_fxregs(struct i387_fxsave_struct *fx) "m" (*fx)); } -static inline int copy_user_to_fxregs(struct i387_fxsave_struct __user *fx) +static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) { if (config_enabled(CONFIG_X86_32)) return user_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); @@ -173,12 +173,12 @@ static inline int copy_user_to_fxregs(struct i387_fxsave_struct __user *fx) "m" (*fx)); } -static inline int copy_kernel_to_fregs(struct i387_fsave_struct *fx) +static inline int copy_kernel_to_fregs(struct fregs_state *fx) { return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } -static inline int copy_user_to_fregs(struct i387_fsave_struct __user *fx) +static inline int copy_user_to_fregs(struct fregs_state __user *fx) { return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); } diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 006ec29..fe2ce32 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -4,7 +4,7 @@ #ifndef _ASM_X86_FPU_H #define _ASM_X86_FPU_H -struct i387_fsave_struct { +struct fregs_state { u32 cwd; /* FPU Control Word */ u32 swd; /* FPU Status Word */ u32 twd; /* FPU Tag Word */ @@ -20,7 +20,7 @@ struct i387_fsave_struct { u32 status; }; -struct i387_fxsave_struct { +struct fxregs_state { u16 cwd; /* Control Word */ u16 swd; /* Status Word */ u16 twd; /* Tag Word */ @@ -58,7 +58,7 @@ struct i387_fxsave_struct { /* * Software based FPU emulation state: */ -struct i387_soft_struct { +struct swregs_state { u32 cwd; u32 swd; u32 twd; @@ -109,7 +109,7 @@ enum xfeature_bit { /* * There are 16x 256-bit AVX registers named YMM0-YMM15. * The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15) - * and are stored in 'struct i387_fxsave_struct::xmm_space[]'. + * and are stored in 'struct fxregs_state::xmm_space[]'. * * The high 128 bits are stored here: * 16x 128 bits == 256 bytes. @@ -140,8 +140,8 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); -struct xsave_struct { - struct i387_fxsave_struct i387; +struct xregs_state { + struct fxregs_state i387; struct xstate_header header; struct ymmh_struct ymmh; struct lwp_struct lwp; @@ -150,11 +150,11 @@ struct xsave_struct { /* New processor state extensions will go here. */ } __attribute__ ((packed, aligned (64))); -union thread_xstate { - struct i387_fsave_struct fsave; - struct i387_fxsave_struct fxsave; - struct i387_soft_struct soft; - struct xsave_struct xsave; +union fpregs_state { + struct fregs_state fsave; + struct fxregs_state fxsave; + struct swregs_state soft; + struct xregs_state xsave; }; struct fpu { @@ -171,7 +171,7 @@ struct fpu { unsigned int last_cpu; unsigned int fpregs_active; - union thread_xstate state; + union fpregs_state state; /* * This counter contains the number of consecutive context switches * during which the FPU stays used. If this is over a threshold, the diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 7f59480..a6181b9 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -58,7 +58,7 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_xregs_to_kernel_booting(struct xsave_struct *fx) +static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) { u64 mask = -1; u32 lmask = mask; @@ -86,7 +86,7 @@ static inline int copy_xregs_to_kernel_booting(struct xsave_struct *fx) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_kernel_to_xregs_booting(struct xsave_struct *fx, u64 mask) +static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -112,7 +112,7 @@ static inline int copy_kernel_to_xregs_booting(struct xsave_struct *fx, u64 mask /* * Save processor xstate to xsave area. */ -static inline int copy_xregs_to_kernel(struct xsave_struct *fx) +static inline int copy_xregs_to_kernel(struct xregs_state *fx) { u64 mask = -1; u32 lmask = mask; @@ -151,7 +151,7 @@ static inline int copy_xregs_to_kernel(struct xsave_struct *fx) /* * Restore processor xstate from xsave area. */ -static inline int copy_kernel_to_xregs(struct xsave_struct *fx, u64 mask) +static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) { int err = 0; u32 lmask = mask; @@ -186,7 +186,7 @@ static inline int copy_kernel_to_xregs(struct xsave_struct *fx, u64 mask) * backward compatibility for old applications which don't understand * compacted format of xsave area. */ -static inline int copy_xregs_to_user(struct xsave_struct __user *buf) +static inline int copy_xregs_to_user(struct xregs_state __user *buf) { int err; @@ -210,10 +210,10 @@ static inline int copy_xregs_to_user(struct xsave_struct __user *buf) /* * Restore xstate from user space xsave area. */ -static inline int copy_user_to_xregs(struct xsave_struct __user *buf, u64 mask) +static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) { int err = 0; - struct xsave_struct *xstate = ((__force struct xsave_struct *)buf); + struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; u32 hmask = mask >> 32; @@ -226,7 +226,7 @@ static inline int copy_user_to_xregs(struct xsave_struct __user *buf, u64 mask) return err; } -void *get_xsave_addr(struct xsave_struct *xsave, int xstate); +void *get_xsave_addr(struct xregs_state *xsave, int xstate); void setup_xstate_comp(void); #endif diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index a952a13d..f3c1b71 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -60,8 +60,8 @@ #ifdef CONFIG_X86_INTEL_MPX siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf); -int mpx_handle_bd_fault(struct xsave_struct *xsave_buf); + struct xregs_state *xsave_buf); +int mpx_handle_bd_fault(struct xregs_state *xsave_buf); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); @@ -78,11 +78,11 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end); #else static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf) + struct xregs_state *xsave_buf) { return NULL; } -static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +static inline int mpx_handle_bd_fault(struct xregs_state *xsave_buf) { return -EINVAL; } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e22711d..ac39616 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -16,7 +16,7 @@ * Represents the initial FPU state. It's mostly (but not completely) zeroes, * depending on the FPU hardware format: */ -union thread_xstate init_fpstate __read_mostly; +union fpregs_state init_fpstate __read_mostly; /* * Track whether the kernel is using the FPU state @@ -200,7 +200,7 @@ EXPORT_SYMBOL_GPL(fpu__save); /* * Legacy x87 fpstate state init: */ -static inline void fpstate_init_fstate(struct i387_fsave_struct *fp) +static inline void fpstate_init_fstate(struct fregs_state *fp) { fp->cwd = 0xffff037fu; fp->swd = 0xffff0000u; @@ -208,7 +208,7 @@ static inline void fpstate_init_fstate(struct i387_fsave_struct *fp) fp->fos = 0xffff0000u; } -void fpstate_init(union thread_xstate *state) +void fpstate_init(union fpregs_state *state) { if (!cpu_has_fpu) { fpstate_init_soft(&state->soft); diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 93bc11a..5043706 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -95,7 +95,7 @@ static void fpu__init_system_mxcsr(void) unsigned int mask = 0; if (cpu_has_fxsr) { - struct i387_fxsave_struct fx_tmp __aligned(32) = { }; + struct fxregs_state fx_tmp __aligned(32) = { }; asm volatile("fxsave %0" : "+m" (fx_tmp)); @@ -155,12 +155,12 @@ static void fpu__init_system_xstate_size_legacy(void) */ setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - xstate_size = sizeof(struct i387_soft_struct); + xstate_size = sizeof(struct swregs_state); } else { if (cpu_has_fxsr) - xstate_size = sizeof(struct i387_fxsave_struct); + xstate_size = sizeof(struct fxregs_state); else - xstate_size = sizeof(struct i387_fsave_struct); + xstate_size = sizeof(struct fregs_state); } } diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 1f58a1c..297b3da 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -76,7 +76,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct xsave_struct *xsave; + struct xregs_state *xsave; int ret; if (!cpu_has_xsave) @@ -105,7 +105,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, const void *kbuf, const void __user *ubuf) { struct fpu *fpu = &target->thread.fpu; - struct xsave_struct *xsave; + struct xregs_state *xsave; int ret; if (!cpu_has_xsave) @@ -156,7 +156,7 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd) #define FP_EXP_TAG_SPECIAL 2 #define FP_EXP_TAG_EMPTY 3 -static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) +static inline u32 twd_fxsr_to_i387(struct fxregs_state *fxsave) { struct _fpxreg *st; u32 tos = (fxsave->swd >> 11) & 7; @@ -204,7 +204,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; + struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -242,7 +242,7 @@ void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.fpu.state.fxsave; + struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 8d0c26a..99f7309 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -17,11 +17,11 @@ static struct _fpx_sw_bytes fx_sw_reserved, fx_sw_reserved_ia32; * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ -static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, +static inline int check_for_xstate(struct fxregs_state __user *buf, void __user *fpstate, struct _fpx_sw_bytes *fx_sw) { - int min_xstate_size = sizeof(struct i387_fxsave_struct) + + int min_xstate_size = sizeof(struct fxregs_state) + sizeof(struct xstate_header); unsigned int magic2; @@ -54,7 +54,7 @@ static inline int check_for_xstate(struct i387_fxsave_struct __user *buf, static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct user_i387_ia32_struct env; struct _fpstate_ia32 __user *fp = buf; @@ -65,7 +65,7 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) __put_user(X86_FXSR_MAGIC, &fp->magic)) return -1; } else { - struct i387_fsave_struct __user *fp = buf; + struct fregs_state __user *fp = buf; u32 swd; if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return -1; @@ -76,7 +76,7 @@ static inline int save_fsave_header(struct task_struct *tsk, void __user *buf) static inline int save_xstate_epilog(void __user *buf, int ia32_frame) { - struct xsave_struct __user *x = buf; + struct xregs_state __user *x = buf; struct _fpx_sw_bytes *sw_bytes; u32 xfeatures; int err; @@ -114,16 +114,16 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame) return err; } -static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) +static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf) { int err; if (use_xsave()) err = copy_xregs_to_user(buf); else if (use_fxsr()) - err = copy_fxregs_to_user((struct i387_fxsave_struct __user *) buf); + err = copy_fxregs_to_user((struct fxregs_state __user *) buf); else - err = copy_fregs_to_user((struct i387_fsave_struct __user *) buf); + err = copy_fregs_to_user((struct fregs_state __user *) buf); if (unlikely(err) && __clear_user(buf, xstate_size)) err = -EFAULT; @@ -152,7 +152,7 @@ static inline int copy_fpregs_to_sigframe(struct xsave_struct __user *buf) */ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size) { - struct xsave_struct *xsave = ¤t->thread.fpu.state.xsave; + struct xregs_state *xsave = ¤t->thread.fpu.state.xsave; struct task_struct *tsk = current; int ia32_fxstate = (buf != buf_fx); @@ -195,7 +195,7 @@ sanitize_restored_xstate(struct task_struct *tsk, struct user_i387_ia32_struct *ia32_env, u64 xfeatures, int fx_only) { - struct xsave_struct *xsave = &tsk->thread.fpu.state.xsave; + struct xregs_state *xsave = &tsk->thread.fpu.state.xsave; struct xstate_header *header = &xsave->header; if (use_xsave()) { @@ -280,7 +280,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) * memory layout. Restore just the FP/SSE and init all * the other extended state. */ - state_size = sizeof(struct i387_fxsave_struct); + state_size = sizeof(struct fxregs_state); fx_only = 1; } else { state_size = fx_sw_user.xstate_size; @@ -353,8 +353,8 @@ int fpu__restore_sig(void __user *buf, int ia32_frame) int size = xstate_sigframe_size(); if (ia32_frame && use_fxsr()) { - buf_fx = buf + sizeof(struct i387_fsave_struct); - size += sizeof(struct i387_fsave_struct); + buf_fx = buf + sizeof(struct fregs_state); + size += sizeof(struct fregs_state); } return __fpu__restore_sig(buf, buf_fx, size); @@ -368,8 +368,8 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { - frame_size += sizeof(struct i387_fsave_struct); - sp -= sizeof(struct i387_fsave_struct); + frame_size += sizeof(struct fregs_state); + sp -= sizeof(struct fregs_state); } *size = frame_size; @@ -385,7 +385,7 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, */ void fpu__init_prepare_fx_sw_frame(void) { - int fsave_header_size = sizeof(struct i387_fsave_struct); + int fsave_header_size = sizeof(struct fregs_state); int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; if (config_enabled(CONFIG_X86_32)) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 3629e2e..733a8ae 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -91,7 +91,7 @@ EXPORT_SYMBOL_GPL(cpu_has_xfeatures); */ void fpstate_sanitize_xstate(struct fpu *fpu) { - struct i387_fxsave_struct *fx = &fpu->state.fxsave; + struct fxregs_state *fx = &fpu->state.fxsave; int feature_bit; u64 xfeatures; @@ -231,7 +231,7 @@ void setup_xstate_comp(void) * or standard form. */ xstate_comp_offsets[0] = 0; - xstate_comp_offsets[1] = offsetof(struct i387_fxsave_struct, xmm_space); + xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space); if (!cpu_has_xsaves) { for (i = 2; i < xfeatures_nr; i++) { @@ -386,7 +386,7 @@ void fpu__resume_cpu(void) * Output: * address of the state in the xsave area. */ -void *get_xsave_addr(struct xsave_struct *xsave, int xstate) +void *get_xsave_addr(struct xregs_state *xsave, int xstate) { int feature = fls64(xstate) - 1; if (!test_bit(feature, (unsigned long *)&xfeatures_mask)) diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cab397d..6f581c6 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -371,7 +371,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - struct xsave_struct *xsave_buf; + struct xregs_state *xsave_buf; enum ctx_state prev_state; struct bndcsr *bndcsr; siginfo_t *info; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3d811bb..e14a7a6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3195,7 +3195,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state.xsave; + struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; u64 xstate_bv = xsave->header.xfeatures; u64 valid; @@ -3231,7 +3231,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) static void load_xsave(struct kvm_vcpu *vcpu, u8 *src) { - struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state.xsave; + struct xregs_state *xsave = &vcpu->arch.guest_fpu.state.xsave; u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET); u64 valid; @@ -3277,7 +3277,7 @@ static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, } else { memcpy(guest_xsave->region, &vcpu->arch.guest_fpu.state.fxsave, - sizeof(struct i387_fxsave_struct)); + sizeof(struct fxregs_state)); *(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] = XSTATE_FPSSE; } @@ -3302,7 +3302,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, if (xstate_bv & ~XSTATE_FPSSE) return -EINVAL; memcpy(&vcpu->arch.guest_fpu.state.fxsave, - guest_xsave->region, sizeof(struct i387_fxsave_struct)); + guest_xsave->region, sizeof(struct fxregs_state)); } return 0; } @@ -6970,7 +6970,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct i387_fxsave_struct *fxsave = + struct fxregs_state *fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fpu->fpr, fxsave->st_space, 128); @@ -6987,7 +6987,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) { - struct i387_fxsave_struct *fxsave = + struct fxregs_state *fxsave = &vcpu->arch.guest_fpu.state.fxsave; memcpy(fxsave->st_space, fpu->fpr, 128); diff --git a/arch/x86/math-emu/fpu_aux.c b/arch/x86/math-emu/fpu_aux.c index 768b2b8..dd76a05 100644 --- a/arch/x86/math-emu/fpu_aux.c +++ b/arch/x86/math-emu/fpu_aux.c @@ -30,7 +30,7 @@ static void fclex(void) } /* Needs to be externally visible */ -void fpstate_init_soft(struct i387_soft_struct *soft) +void fpstate_init_soft(struct swregs_state *soft) { struct address *oaddr, *iaddr; memset(soft, 0, sizeof(*soft)); diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 5b85051..f37e84a 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -669,7 +669,7 @@ void math_abort(struct math_emu_info *info, unsigned int signal) #endif /* PARANOID */ } -#define S387 ((struct i387_soft_struct *)s387) +#define S387 ((struct swregs_state *)s387) #define sstatus_word() \ ((S387->swd & ~SW_Top & 0xffff) | ((S387->ftop << SW_Top_Shift) & SW_Top)) @@ -678,14 +678,14 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state.soft; + struct swregs_state *s387 = &target->thread.fpu.state.soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; RE_ENTRANT_CHECK_OFF; ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, s387, 0, - offsetof(struct i387_soft_struct, st_space)); + offsetof(struct swregs_state, st_space)); RE_ENTRANT_CHECK_ON; if (ret) @@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.fpu.state.soft; + struct swregs_state *s387 = &target->thread.fpu.state.soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; @@ -748,7 +748,7 @@ int fpregs_soft_get(struct task_struct *target, #endif /* PECULIAR_486 */ ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, s387, 0, - offsetof(struct i387_soft_struct, st_space)); + offsetof(struct swregs_state, st_space)); /* Copy all registers in stack order. */ if (!ret) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5e20bac..2e0dfd3 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -272,7 +272,7 @@ bad_opcode: * The caller is expected to kfree() the returned siginfo_t. */ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xsave_struct *xsave_buf) + struct xregs_state *xsave_buf) { struct bndreg *bndregs, *bndreg; siginfo_t *info = NULL; @@ -497,7 +497,7 @@ out_unmap: * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, * and the size of each bound table is 4MB. */ -static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) +static int do_mpx_bt_fault(struct xregs_state *xsave_buf) { unsigned long bd_entry, bd_base; struct bndcsr *bndcsr; @@ -525,7 +525,7 @@ static int do_mpx_bt_fault(struct xsave_struct *xsave_buf) return allocate_bt((long __user *)bd_entry); } -int mpx_handle_bd_fault(struct xsave_struct *xsave_buf) +int mpx_handle_bd_fault(struct xregs_state *xsave_buf) { /* * Userspace never asked us to manage the bounds tables, -- cgit v0.10.2 From aeb997b9f2a2199c72b89b7a304cafc394e4202b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 1 May 2015 09:59:04 +0200 Subject: x86/fpu: Change fpu->fpregs_active from 'int' to 'char', add lazy switching comments Improve the memory layout of 'struct fpu': - change ->fpregs_active from 'int' to 'char' - it's just a single flag and modern x86 CPUs can do efficient byte accesses. - pack related fields closer to each other: often 'fpu->state' will not be touched, while the other fields will - so pack them into a group. Also add comments to each field, describing their purpose, and add some background information about lazy restores. Also fix an obsolete, lazy switching related comment in fpu_copy()'s description. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index fe2ce32..261cfb7 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -159,8 +159,44 @@ union fpregs_state { struct fpu { /* + * @state: + * + * In-memory copy of all FPU registers that we save/restore + * over context switches. If the task is using the FPU then + * the registers in the FPU are more recent than this state + * copy. If the task context-switches away then they get + * saved here and represent the FPU state. + * + * After context switches there may be a (short) time period + * during which the in-FPU hardware registers are unchanged + * and still perfectly match this state, if the tasks + * scheduled afterwards are not using the FPU. + * + * This is the 'lazy restore' window of optimization, which + * we track though 'fpu_fpregs_owner_ctx' and 'fpu->last_cpu'. + * + * We detect whether a subsequent task uses the FPU via setting + * CR0::TS to 1, which causes any FPU use to raise a #NM fault. + * + * During this window, if the task gets scheduled again, we + * might be able to skip having to do a restore from this + * memory buffer to the hardware registers - at the cost of + * incurring the overhead of #NM fault traps. + * + * Note that on modern CPUs that support the XSAVEOPT (or other + * optimized XSAVE instructions), we don't use #NM traps anymore, + * as the hardware can track whether FPU registers need saving + * or not. On such CPUs we activate the non-lazy ('eagerfpu') + * logic, which unconditionally saves/restores all FPU state + * across context switches. (if FPU state exists.) + */ + union fpregs_state state; + + /* + * @last_cpu: + * * Records the last CPU on which this context was loaded into - * FPU registers. (In the lazy-switching case we might be + * FPU registers. (In the lazy-restore case we might be * able to reuse FPU registers across multiple context switches * this way, if no intermediate task used the FPU.) * @@ -170,23 +206,49 @@ struct fpu { */ unsigned int last_cpu; - unsigned int fpregs_active; - union fpregs_state state; /* + * @fpstate_active: + * + * This flag indicates whether this context is active: if the task + * is not running then we can restore from this context, if the task + * is running then we should save into this context. + */ + unsigned char fpstate_active; + + /* + * @fpregs_active: + * + * This flag determines whether a given context is actively + * loaded into the FPU's registers and that those registers + * represent the task's current FPU state. + * + * Note the interaction with fpstate_active: + * + * # task does not use the FPU: + * fpstate_active == 0 + * + * # task uses the FPU and regs are active: + * fpstate_active == 1 && fpregs_active == 1 + * + * # the regs are inactive but still match fpstate: + * fpstate_active == 1 && fpregs_active == 0 && fpregs_owner == fpu + * + * The third state is what we use for the lazy restore optimization + * on lazy-switching CPUs. + */ + unsigned char fpregs_active; + + /* + * @counter: + * * This counter contains the number of consecutive context switches * during which the FPU stays used. If this is over a threshold, the - * lazy fpu saving logic becomes unlazy, to save the trap overhead. + * lazy FPU restore logic becomes eager, to save the trap overhead. * This is an unsigned char so that after 256 iterations the counter * wraps and the context switch behavior turns lazy again; this is to * deal with bursty apps that only use the FPU for a short time: */ unsigned char counter; - /* - * This flag indicates whether this context is fpstate_active: if the task is - * not running then we can restore from this context, if the task - * is running then we should save into this context. - */ - unsigned char fpstate_active; }; #endif /* _ASM_X86_FPU_H */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index ac39616..97df457 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -227,10 +227,8 @@ EXPORT_SYMBOL_GPL(fpstate_init); /* * Copy the current task's FPU state to a new task's FPU context. * - * In the 'eager' case we just save to the destination context. - * - * In the 'lazy' case we save to the source context, mark the FPU lazy - * via stts() and copy the source context into the destination context. + * In both the 'eager' and the 'lazy' case we save hardware registers + * directly to the destination buffer. */ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) { diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 733a8ae..cd7f1a6 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -76,10 +76,11 @@ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) EXPORT_SYMBOL_GPL(cpu_has_xfeatures); /* - * When executing XSAVEOPT (optimized XSAVE), if a processor implementation - * detects that an FPU state component is still (or is again) in its - * initialized state, it may clear the corresponding bit in the header.xfeatures - * field, and can skip the writeout of registers to the corresponding memory layout. + * When executing XSAVEOPT (or other optimized XSAVE instructions), if + * a processor implementation detects that an FPU state component is still + * (or is again) in its initialized state, it may clear the corresponding + * bit in the header.xfeatures field, and can skip the writeout of registers + * to the corresponding memory layout. * * This means that when the bit is zero, the state component might still contain * some previous - non-initialized register state. -- cgit v0.10.2 From bdf80d104021d3405e5ec8ecc08f189b74ae022a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 2 May 2015 10:22:45 +0200 Subject: x86/fpu: Document the various fpregs state formats Document all the structures that make up 'struct fpu'. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 261cfb7..4c4eceb 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -4,6 +4,10 @@ #ifndef _ASM_X86_FPU_H #define _ASM_X86_FPU_H +/* + * The legacy x87 FPU state format, as saved by FSAVE and + * restored by the FRSTOR instructions: + */ struct fregs_state { u32 cwd; /* FPU Control Word */ u32 swd; /* FPU Status Word */ @@ -16,10 +20,16 @@ struct fregs_state { /* 8*10 bytes for each FP-reg = 80 bytes: */ u32 st_space[20]; - /* Software status information [not touched by FSAVE ]: */ + /* Software status information [not touched by FSAVE]: */ u32 status; }; +/* + * The legacy fx SSE/MMX FPU state format, as saved by FXSAVE and + * restored by the FXRSTOR instructions. It's similar to the FSAVE + * format, but differs in some areas, plus has extensions at + * the end for the XMM registers. + */ struct fxregs_state { u16 cwd; /* Control Word */ u16 swd; /* Status Word */ @@ -56,7 +66,8 @@ struct fxregs_state { } __attribute__((aligned(16))); /* - * Software based FPU emulation state: + * Software based FPU emulation state. This is arbitrary really, + * it matches the x87 format to make it easier to understand: */ struct swregs_state { u32 cwd; @@ -140,6 +151,14 @@ struct xstate_header { u64 reserved[6]; } __attribute__((packed)); +/* + * This is our most modern FPU state format, as saved by the XSAVE + * and restored by the XRSTOR instructions. + * + * It consists of a legacy fxregs portion, an xstate header and + * subsequent fixed size areas as defined by the xstate header. + * Not all CPUs support all the extensions. + */ struct xregs_state { struct fxregs_state i387; struct xstate_header header; @@ -150,6 +169,13 @@ struct xregs_state { /* New processor state extensions will go here. */ } __attribute__ ((packed, aligned (64))); +/* + * This is a union of all the possible FPU state formats + * put together, so that we can pick the right one runtime. + * + * The size of the structure is determined by the largest + * member - which is the xsave area: + */ union fpregs_state { struct fregs_state fsave; struct fxregs_state fxsave; @@ -157,6 +183,11 @@ union fpregs_state { struct xregs_state xsave; }; +/* + * Highest level per task FPU state data structure that + * contains the FPU register state plus various FPU + * state fields: + */ struct fpu { /* * @state: -- cgit v0.10.2 From 63c6680cd09b8803f428ec606d764229cb78eca6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 1 May 2015 10:54:22 +0200 Subject: x86/fpu: Move debugging check from kernel_fpu_begin() to __kernel_fpu_begin() kernel_fpu_begin() is __kernel_fpu_begin() with a preempt_disable(). Move the kernel_fpu_begin() debugging check into __kernel_fpu_begin(), so that users of __kernel_fpu_begin() may benefit from it as well. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 97df457..d67558c 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -109,6 +109,8 @@ void __kernel_fpu_begin(void) { struct fpu *fpu = ¤t->thread.fpu; + WARN_ON_ONCE(!irq_fpu_usable()); + kernel_fpu_disable(); if (fpu->fpregs_active) { @@ -138,7 +140,6 @@ EXPORT_SYMBOL(__kernel_fpu_end); void kernel_fpu_begin(void) { preempt_disable(); - WARN_ON_ONCE(!irq_fpu_usable()); __kernel_fpu_begin(); } EXPORT_SYMBOL_GPL(kernel_fpu_begin); -- cgit v0.10.2 From 39f1acd243725b4b20caa37a81b6c8f641b505b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2015 07:37:47 +0200 Subject: x86/fpu/xstate: Don't assume the first zero xfeatures zero bit means the end MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current xstate code in setup_xstate_features() assumes that the first zero bit means the end of xfeatures - but that is not so, the SDM clearly states that an arbitrary set of xfeatures might be enabled - and it is also clear from the description of the compaction feature that holes are possible: "13-6 Vol. 1MANAGING STATE USING THE XSAVE FEATURE SET [...] Compacted format. Each state component i (i ≥ 2) is located at a byte offset from the base address of the XSAVE area based on the XCOMP_BV field in the XSAVE header: — If XCOMP_BV[i] = 0, state component i is not in the XSAVE area. — If XCOMP_BV[i] = 1, the following items apply: • If XCOMP_BV[j] = 0 for every j, 2 ≤ j < i, state component i is located at a byte offset 576 from the base address of the XSAVE area. (This item applies if i is the first bit set in bits 62:2 of the XCOMP_BV; it implies that state component i is located at the beginning of the extended region.) • Otherwise, let j, 2 ≤ j < i, be the greatest value such that XCOMP_BV[j] = 1. Then state component i is located at a byte offset X from the location of state component j, where X is the number of bytes required for state component j as enumerated in CPUID.(EAX=0DH,ECX=j):EAX. (This item implies that state component i immediately follows the preceding state component whose bit is set in XCOMP_BV.)" So don't assume that the first zero xfeatures bit means the end of all xfeatures - iterate through all of them. I'm not aware of hardware that triggers this currently. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index cd7f1a6..a024fa5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -168,26 +168,27 @@ void fpu__init_cpu_xstate(void) } /* - * Record the offsets and sizes of different state managed by the xsave - * memory layout. + * Record the offsets and sizes of various xstates contained + * in the XSAVE state memory layout. + * + * ( Note that certain features might be non-present, for them + * we'll have 0 offset and 0 size. ) */ static void __init setup_xstate_features(void) { - int eax, ebx, ecx, edx, leaf = 0x2; + u32 eax, ebx, ecx, edx, leaf; xfeatures_nr = fls64(xfeatures_mask); - do { + for (leaf = 2; leaf < xfeatures_nr; leaf++) { cpuid_count(XSTATE_CPUID, leaf, &eax, &ebx, &ecx, &edx); - if (eax == 0) - break; - xstate_offsets[leaf] = ebx; xstate_sizes[leaf] = eax; + printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); leaf++; - } while (1); + } } static void print_xstate_feature(u64 xstate_mask) -- cgit v0.10.2 From 489e9c018854158722eb7123f9efefe67af5c4bd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2015 09:04:56 +0200 Subject: x86/fpu: Clean up xstate feature reservation Put MPX support into its separate high level structure, and also replace the fixed YMM, LWP and MPX structures in xregs_state with just reservations - their exact offsets in the structure will depend on the CPU and no code actually relies on those fields. No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 4c4eceb..02241c2 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -145,12 +145,21 @@ struct bndcsr { u64 bndstatus; } __packed; +struct mpx_struct { + struct bndreg bndreg[4]; + struct bndcsr bndcsr; +}; + struct xstate_header { u64 xfeatures; u64 xcomp_bv; u64 reserved[6]; } __attribute__((packed)); +/* New processor state extensions should be added here: */ +#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \ + sizeof(struct lwp_struct) + \ + sizeof(struct mpx_struct) ) /* * This is our most modern FPU state format, as saved by the XSAVE * and restored by the XRSTOR instructions. @@ -162,11 +171,7 @@ struct xstate_header { struct xregs_state { struct fxregs_state i387; struct xstate_header header; - struct ymmh_struct ymmh; - struct lwp_struct lwp; - struct bndreg bndreg[4]; - struct bndcsr bndcsr; - /* New processor state extensions will go here. */ + u8 __reserved[XSTATE_RESERVE]; } __attribute__ ((packed, aligned (64))); /* -- cgit v0.10.2 From 5fd402dfa7fc97f8e8d74c92d24abadbdc4002ca Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2015 09:43:55 +0200 Subject: x86/fpu/xstate: Clean up setup_xstate_comp() call So call setup_xstate_comp() from the xstate init code, not from the generic fpu__init_system() code. This allows us to remove the protytype from xstate.h as well. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index a6181b9..8f336d2 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -227,6 +227,5 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) } void *get_xsave_addr(struct xregs_state *xsave, int xstate); -void setup_xstate_comp(void); #endif diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 5043706..88902521 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -262,7 +262,6 @@ void fpu__init_system(struct cpuinfo_x86 *c) fpu__init_system_generic(); fpu__init_system_xstate_size_legacy(); fpu__init_system_xstate(); - setup_xstate_comp(); fpu__init_system_ctx_switch(); } diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a024fa5..9e77332 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -218,11 +218,8 @@ static void print_xstate_features(void) * This function sets up offsets and sizes of all extended states in * xsave area. This supports both standard format and compacted format * of the xsave aread. - * - * Input: void - * Output: void */ -void setup_xstate_comp(void) +static void setup_xstate_comp(void) { unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; int i; @@ -355,6 +352,7 @@ void fpu__init_system_xstate(void) update_regset_xstate_info(xstate_size, xfeatures_mask); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); + setup_xstate_comp(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is 0x%x bytes, using '%s' format.\n", xfeatures_mask, -- cgit v0.10.2 From 32231879f66162352fc6f3041c5c2b1d965879b2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2015 09:52:42 +0200 Subject: x86/fpu/init: Propagate __init annotations Now that all the FPU init function call dependencies are cleaned up we can propagate __init annotations deeper. This shrinks the runtime size of the kernel a bit, and also addresses a few section warnings. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/bugs.c b/arch/x86/kernel/fpu/bugs.c index 449b5f3..dd9ca9b6 100644 --- a/arch/x86/kernel/fpu/bugs.c +++ b/arch/x86/kernel/fpu/bugs.c @@ -60,7 +60,7 @@ static void __init check_fpu(void) } } -void fpu__init_check_bugs(void) +void __init fpu__init_check_bugs(void) { /* * kernel_fpu_begin/end() in check_fpu() relies on the patched diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 88902521..a9e506a 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -90,7 +90,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) */ unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; -static void fpu__init_system_mxcsr(void) +static void __init fpu__init_system_mxcsr(void) { unsigned int mask = 0; @@ -115,7 +115,7 @@ static void fpu__init_system_mxcsr(void) /* * Once per bootup FPU initialization sequences that will run on most x86 CPUs: */ -static void fpu__init_system_generic(void) +static void __init fpu__init_system_generic(void) { /* * Set up the legacy init FPU context. (xstate init might overwrite this @@ -141,7 +141,7 @@ EXPORT_SYMBOL_GPL(xstate_size); * We set this up first, and later it will be overwritten by * fpu__init_system_xstate() if the CPU knows about xstates. */ -static void fpu__init_system_xstate_size_legacy(void) +static void __init fpu__init_system_xstate_size_legacy(void) { /* * Note that xstate_size might be overwriten later during @@ -212,7 +212,7 @@ __setup("eagerfpu=", eager_fpu_setup); /* * Pick the FPU context switching strategy: */ -static void fpu__init_system_ctx_switch(void) +static void __init fpu__init_system_ctx_switch(void) { WARN_ON(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; @@ -234,14 +234,14 @@ static void fpu__init_system_ctx_switch(void) if (eagerfpu == ENABLE) setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); - printk_once(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); + printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); } /* * Called on the boot CPU once per system bootup, to set up the initial * FPU state that is later cloned into all processes: */ -void fpu__init_system(struct cpuinfo_x86 *c) +void __init fpu__init_system(struct cpuinfo_x86 *c) { fpu__init_system_early_generic(c); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 9e77332..201f08f 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -191,7 +191,7 @@ static void __init setup_xstate_features(void) } } -static void print_xstate_feature(u64 xstate_mask) +static void __init print_xstate_feature(u64 xstate_mask) { const char *feature_name; @@ -202,7 +202,7 @@ static void print_xstate_feature(u64 xstate_mask) /* * Print out all the supported xstate features: */ -static void print_xstate_features(void) +static void __init print_xstate_features(void) { print_xstate_feature(XSTATE_FP); print_xstate_feature(XSTATE_SSE); @@ -219,7 +219,7 @@ static void print_xstate_features(void) * xsave area. This supports both standard format and compacted format * of the xsave aread. */ -static void setup_xstate_comp(void) +static void __init setup_xstate_comp(void) { unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8]; int i; @@ -260,7 +260,7 @@ static void setup_xstate_comp(void) /* * setup the xstate image representing the init state */ -static void setup_init_fpu_buf(void) +static void __init setup_init_fpu_buf(void) { if (!cpu_has_xsave) return; @@ -314,7 +314,7 @@ static void __init init_xstate_size(void) * * ( Not marked __init because of false positive section warnings. ) */ -void fpu__init_system_xstate(void) +void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; -- cgit v0.10.2 From e1884d69f643c743806ebb9bc9292863ef39e894 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 May 2015 11:49:58 +0200 Subject: x86/fpu: Pass 'struct fpu' to fpu__restore() This cleans up the call sites and the function a bit, and also makes it more symmetric with the other high level FPU state handling functions. It's still only valid for the current task, as we copy to the FPU registers of the current CPU. No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 7fdc90b..a4c1b7d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -50,7 +50,7 @@ extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern void fpu__activate_curr(struct fpu *fpu); extern void fpu__activate_stopped(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); -extern void fpu__restore(void); +extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index d67558c..c066160 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -343,11 +343,8 @@ void fpu__activate_stopped(struct fpu *child_fpu) * with local interrupts disabled, as it is in the case of * do_device_not_available()). */ -void fpu__restore(void) +void fpu__restore(struct fpu *fpu) { - struct task_struct *tsk = current; - struct fpu *fpu = &tsk->thread.fpu; - fpu__activate_curr(fpu); /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ @@ -355,9 +352,9 @@ void fpu__restore(void) fpregs_activate(fpu); if (unlikely(copy_fpstate_to_fpregs(fpu))) { fpu__clear(fpu); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, tsk); + force_sig_info(SIGSEGV, SEND_SIG_PRIV, current); } else { - tsk->thread.fpu.counter++; + fpu->counter++; } kernel_fpu_enable(); } diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 99f7309..50ec9af 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -319,7 +319,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) fpu->fpstate_active = 1; if (use_eager_fpu()) { preempt_disable(); - fpu__restore(); + fpu__restore(fpu); preempt_enable(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 6f581c6..a2510f2 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -803,7 +803,7 @@ do_device_not_available(struct pt_regs *regs, long error_code) return; } #endif - fpu__restore(); /* interrupts still off */ + fpu__restore(¤t->thread.fpu); /* interrupts still off */ #ifdef CONFIG_X86_32 conditional_sti(regs); #endif diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 99bb300..6a4cd77 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -302,7 +302,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * before this. */ else if (cpu->regs->trapnum == 7 && !fpregs_active()) - fpu__restore(); + fpu__restore(¤t->thread.fpu); } /*H:130 -- cgit v0.10.2 From d364a7656c1855c940dfa4baf4ebcc3c6a9e6fd2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 5 May 2015 11:43:02 +0200 Subject: x86/fpu: Fix the 'nofxsr' boot parameter to also clear X86_FEATURE_FXSR_OPT I tried to simulate an ancient CPU via this option, and found that it still has fxsr_opt enabled, confusing the FPU code. Make the 'nofxsr' option also clear FXSR_OPT flag. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d15610b..d6fe512 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -171,6 +171,15 @@ static int __init x86_xsaves_setup(char *s) } __setup("noxsaves", x86_xsaves_setup); +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; @@ -182,14 +191,6 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - static int __init x86_sep_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_SEP); -- cgit v0.10.2 From e97131a8391e9fce5126ed54dc66c9d8965d3b4e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 5 May 2015 11:34:49 +0200 Subject: x86/fpu: Add CONFIG_X86_DEBUG_FPU=y FPU debugging code There are various internal FPU state debugging checks that never trigger in practice, but which are useful for FPU code development. Separate these out into CONFIG_X86_DEBUG_FPU=y, and also add a couple of new ones. The size difference is about 0.5K of code on defconfig: text data bss filename 15028906 2578816 1638400 vmlinux 15029430 2578816 1638400 vmlinux ( Keep this enabled by default until the new FPU code is debugged. ) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 72484a6..2fd3ebb 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -332,4 +332,16 @@ config X86_DEBUG_STATIC_CPU_HAS If unsure, say N. +config X86_DEBUG_FPU + bool "Debug the x86 FPU code" + depends on DEBUG_KERNEL + default y + ---help--- + If this option is enabled then there will be extra sanity + checks and (boot time) debug printouts added to the kernel. + This debugging adds some small amount of runtime overhead + to the kernel. + + If unsure, say N. + endmenu diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a4c1b7d..d2a281b 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -59,6 +59,15 @@ extern void fpu__clear(struct fpu *fpu); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); +/* + * Debugging facility: + */ +#ifdef CONFIG_X86_DEBUG_FPU +# define WARN_ON_FPU(x) WARN_ON_ONCE(x) +#else +# define WARN_ON_FPU(x) ({ 0; }) +#endif + DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); /* @@ -296,6 +305,8 @@ static inline void __fpregs_deactivate_hw(void) /* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */ static inline void __fpregs_deactivate(struct fpu *fpu) { + WARN_ON_FPU(!fpu->fpregs_active); + fpu->fpregs_active = 0; this_cpu_write(fpu_fpregs_owner_ctx, NULL); } @@ -303,6 +314,8 @@ static inline void __fpregs_deactivate(struct fpu *fpu) /* Must be paired with a 'clts' (fpregs_activate_hw()) before! */ static inline void __fpregs_activate(struct fpu *fpu) { + WARN_ON_FPU(fpu->fpregs_active); + fpu->fpregs_active = 1; this_cpu_write(fpu_fpregs_owner_ctx, fpu); } @@ -433,8 +446,10 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { if (fpu_switch.preload) { - if (unlikely(copy_fpstate_to_fpregs(new_fpu))) + if (unlikely(copy_fpstate_to_fpregs(new_fpu))) { + WARN_ON_FPU(1); fpu__clear(new_fpu); + } } } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index c066160..01a1550 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -38,13 +38,13 @@ DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); static void kernel_fpu_disable(void) { - WARN_ON(this_cpu_read(in_kernel_fpu)); + WARN_ON_FPU(this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, true); } static void kernel_fpu_enable(void) { - WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu)); + WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); this_cpu_write(in_kernel_fpu, false); } @@ -109,7 +109,7 @@ void __kernel_fpu_begin(void) { struct fpu *fpu = ¤t->thread.fpu; - WARN_ON_ONCE(!irq_fpu_usable()); + WARN_ON_FPU(!irq_fpu_usable()); kernel_fpu_disable(); @@ -127,7 +127,7 @@ void __kernel_fpu_end(void) struct fpu *fpu = ¤t->thread.fpu; if (fpu->fpregs_active) { - if (WARN_ON(copy_fpstate_to_fpregs(fpu))) + if (WARN_ON_FPU(copy_fpstate_to_fpregs(fpu))) fpu__clear(fpu); } else { __fpregs_deactivate_hw(); @@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(irq_ts_restore); */ void fpu__save(struct fpu *fpu) { - WARN_ON(fpu != ¤t->thread.fpu); + WARN_ON_FPU(fpu != ¤t->thread.fpu); preempt_disable(); if (fpu->fpregs_active) { @@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(fpstate_init); */ static void fpu_copy(struct fpu *dst_fpu, struct fpu *src_fpu) { - WARN_ON(src_fpu != ¤t->thread.fpu); + WARN_ON_FPU(src_fpu != ¤t->thread.fpu); /* * Don't let 'init optimized' areas of the XSAVE area @@ -284,7 +284,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu) */ void fpu__activate_curr(struct fpu *fpu) { - WARN_ON_ONCE(fpu != ¤t->thread.fpu); + WARN_ON_FPU(fpu != ¤t->thread.fpu); if (!fpu->fpstate_active) { fpstate_init(&fpu->state); @@ -321,7 +321,7 @@ EXPORT_SYMBOL_GPL(fpu__activate_curr); */ void fpu__activate_stopped(struct fpu *child_fpu) { - WARN_ON_ONCE(child_fpu == ¤t->thread.fpu); + WARN_ON_FPU(child_fpu == ¤t->thread.fpu); if (child_fpu->fpstate_active) { child_fpu->last_cpu = -1; @@ -407,7 +407,7 @@ static inline void copy_init_fpstate_to_fpregs(void) */ void fpu__clear(struct fpu *fpu) { - WARN_ON_ONCE(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ + WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ if (!use_eager_fpu()) { /* FPU state will be reallocated lazily at the first use. */ diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index a9e506a..e9f1d6e 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -143,6 +143,11 @@ EXPORT_SYMBOL_GPL(xstate_size); */ static void __init fpu__init_system_xstate_size_legacy(void) { + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + /* * Note that xstate_size might be overwriten later during * fpu__init_system_xstate(). @@ -214,7 +219,12 @@ __setup("eagerfpu=", eager_fpu_setup); */ static void __init fpu__init_system_ctx_switch(void) { - WARN_ON(current->thread.fpu.fpstate_active); + static bool on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + + WARN_ON_FPU(current->thread.fpu.fpstate_active); current_thread_info()->status = 0; /* Auto enable eagerfpu for xsaveopt */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 201f08f..5724098 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -262,6 +262,11 @@ static void __init setup_xstate_comp(void) */ static void __init setup_init_fpu_buf(void) { + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; + if (!cpu_has_xsave) return; @@ -317,6 +322,10 @@ static void __init init_xstate_size(void) void __init fpu__init_system_xstate(void) { unsigned int eax, ebx, ecx, edx; + static int on_boot_cpu = 1; + + WARN_ON_FPU(!on_boot_cpu); + on_boot_cpu = 0; if (!cpu_has_xsave) { pr_info("x86/fpu: Legacy x87 FPU detected.\n"); @@ -324,7 +333,7 @@ void __init fpu__init_system_xstate(void) } if (boot_cpu_data.cpuid_level < XSTATE_CPUID) { - WARN(1, "x86/fpu: XSTATE_CPUID missing!\n"); + WARN_ON_FPU(1); return; } -- cgit v0.10.2 From b1b64dc3558b7bde2917f9fc4f573f6a4787a95c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 5 May 2015 15:56:33 +0200 Subject: x86/fpu: Reorganize fpu/internal.h fpu/internal.h has grown organically, with not much high level structure, which hurts its readability. Organize the various definitions into 5 sections: - high level FPU state functions - FPU/CPU feature flag helpers - fpstate handling functions - FPU context switching helpers - misc helper functions Other related changes: - Move MXCSR_DEFAULT to fpu/types.h. - drop the unused X87_FSW_ES define No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d2a281b..a98a08d 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -18,32 +18,6 @@ #include #include -#define MXCSR_DEFAULT 0x1f80 - -extern unsigned int mxcsr_feature_mask; - -extern union fpregs_state init_fpstate; - -extern void fpu__init_cpu(void); -extern void fpu__init_system_xstate(void); -extern void fpu__init_cpu_xstate(void); -extern void fpu__init_system(struct cpuinfo_x86 *c); - -extern void fpstate_init(union fpregs_state *state); -#ifdef CONFIG_MATH_EMULATION -extern void fpstate_init_soft(struct swregs_state *soft); -#else -static inline void fpstate_init_soft(struct swregs_state *soft) {} -#endif -static inline void fpstate_init_fxstate(struct fxregs_state *fx) -{ - fx->cwd = 0x37f; - fx->mxcsr = MXCSR_DEFAULT; -} - -extern int dump_fpu(struct pt_regs *, struct user_i387_struct *); -extern int fpu__exception_code(struct fpu *fpu, int trap_nr); - /* * High level FPU state handling functions: */ @@ -55,7 +29,16 @@ extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu); extern void fpu__clear(struct fpu *fpu); +extern int fpu__exception_code(struct fpu *fpu, int trap_nr); +extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); +/* + * Boot time FPU initialization functions: + */ +extern void fpu__init_cpu(void); +extern void fpu__init_system_xstate(void); +extern void fpu__init_cpu_xstate(void); +extern void fpu__init_system(struct cpuinfo_x86 *c); extern void fpu__init_check_bugs(void); extern void fpu__resume_cpu(void); @@ -68,27 +51,9 @@ extern void fpu__resume_cpu(void); # define WARN_ON_FPU(x) ({ 0; }) #endif -DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); - /* - * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, - * on this CPU. - * - * This will disable any lazy FPU state restore of the current FPU state, - * but if the current thread owns the FPU, it will still be saved by. + * FPU related CPU feature flag helper routines: */ -static inline void __cpu_disable_lazy_restore(unsigned int cpu) -{ - per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; -} - -static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) -{ - return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; -} - -#define X87_FSW_ES (1 << 7) /* Exception Summary */ - static __always_inline __pure bool use_eager_fpu(void) { return static_cpu_has_safe(X86_FEATURE_EAGER_FPU); @@ -109,6 +74,23 @@ static __always_inline __pure bool use_fxsr(void) return static_cpu_has_safe(X86_FEATURE_FXSR); } +/* + * fpstate handling functions: + */ + +extern union fpregs_state init_fpstate; + +extern void fpstate_init(union fpregs_state *state); +#ifdef CONFIG_MATH_EMULATION +extern void fpstate_init_soft(struct swregs_state *soft); +#else +static inline void fpstate_init_soft(struct swregs_state *soft) {} +#endif +static inline void fpstate_init_fxstate(struct fxregs_state *fx) +{ + fx->cwd = 0x37f; + fx->mxcsr = MXCSR_DEFAULT; +} extern void fpstate_sanitize_xstate(struct fpu *fpu); #define user_insn(insn, output, input...) \ @@ -285,6 +267,32 @@ static inline int copy_fpstate_to_fpregs(struct fpu *fpu) return __copy_fpstate_to_fpregs(fpu); } +extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); + +/* + * FPU context switch related helper methods: + */ + +DECLARE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx); + +/* + * Must be run with preemption disabled: this clears the fpu_fpregs_owner_ctx, + * on this CPU. + * + * This will disable any lazy FPU state restore of the current FPU state, + * but if the current thread owns the FPU, it will still be saved by. + */ +static inline void __cpu_disable_lazy_restore(unsigned int cpu) +{ + per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL; +} + +static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu) +{ + return fpu == this_cpu_read_stable(fpu_fpregs_owner_ctx) && cpu == fpu->last_cpu; +} + + /* * Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation' * idiom, which is then paired with the sw-flag (fpregs_active) later on: @@ -355,31 +363,6 @@ static inline void fpregs_deactivate(struct fpu *fpu) } /* - * Definitions for the eXtended Control Register instructions - */ - -#define XCR_XFEATURE_ENABLED_MASK 0x00000000 - -static inline u64 xgetbv(u32 index) -{ - u32 eax, edx; - - asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ - : "=a" (eax), "=d" (edx) - : "c" (index)); - return eax + ((u64)edx << 32); -} - -static inline void xsetbv(u32 index, u64 value) -{ - u32 eax = value; - u32 edx = value >> 32; - - asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ - : : "a" (eax), "d" (edx), "c" (index)); -} - -/* * FPU state switching for scheduling. * * This is a two-stage process: @@ -438,6 +421,10 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) } /* + * Misc helper functions: + */ + +/* * By the time this gets called, we've already cleared CR0.TS and * given the process the FPU if we are going to preload the FPU * state - all we need to do is to conditionally restore the register @@ -454,11 +441,6 @@ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switc } /* - * Signal frame handlers... - */ -extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); - -/* * Needs to be preemption-safe. * * NOTE! user_fpu_begin() must be used only immediately before restoring @@ -476,4 +458,31 @@ static inline void user_fpu_begin(void) preempt_enable(); } +/* + * MXCSR and XCR definitions: + */ + +extern unsigned int mxcsr_feature_mask; + +#define XCR_XFEATURE_ENABLED_MASK 0x00000000 + +static inline u64 xgetbv(u32 index) +{ + u32 eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((u64)edx << 32); +} + +static inline void xsetbv(u32 index, u64 value) +{ + u32 eax = value; + u32 edx = value >> 32; + + asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ + : : "a" (eax), "d" (edx), "c" (index)); +} + #endif /* _ASM_X86_FPU_INTERNAL_H */ diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h index 02241c2..0637826 100644 --- a/arch/x86/include/asm/fpu/types.h +++ b/arch/x86/include/asm/fpu/types.h @@ -65,6 +65,9 @@ struct fxregs_state { } __attribute__((aligned(16))); +/* Default value for fxregs_state.mxcsr: */ +#define MXCSR_DEFAULT 0x1f80 + /* * Software based FPU emulation state. This is arbitrary really, * it matches the x87 format to make it easier to understand: -- cgit v0.10.2 From b11ca7fbc735aca5ab78ce8130603bbf4fefd3f7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 May 2015 09:59:30 +0200 Subject: x86/fpu/xstate: Use explicit parameter in xstate_fault() While looking at xstate.h it took me some time to realize that 'xstate_fault' uses 'err' as a silent parameter. This is not obvious at the call site, at all. Make it an explicit macro argument, so that the syntactic connection is easier to see. Also explain xstate_fault() a bit. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 8f336d2..a6e07bb 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -47,12 +47,18 @@ extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); #define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" #define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" -#define xstate_fault ".section .fixup,\"ax\"\n" \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - ".previous\n" \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (err) +/* xstate instruction fault handler: */ +#define xstate_fault(__err) \ + \ + ".section .fixup,\"ax\"\n" \ + \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + \ + ".previous\n" \ + \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (__err) /* * This function is called only during boot time when x86 caps are not set @@ -70,13 +76,13 @@ static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XSAVES"\n\t" "2:\n\t" - xstate_fault + xstate_fault(err) : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XSAVE"\n\t" "2:\n\t" - xstate_fault + xstate_fault(err) : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); return err; @@ -97,13 +103,13 @@ static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) if (boot_cpu_has(X86_FEATURE_XSAVES)) asm volatile("1:"XRSTORS"\n\t" "2:\n\t" - xstate_fault + xstate_fault(err) : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XRSTOR"\n\t" "2:\n\t" - xstate_fault + xstate_fault(err) : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) : "memory"); return err; @@ -141,7 +147,7 @@ static inline int copy_xregs_to_kernel(struct xregs_state *fx) [fx] "D" (fx), "a" (lmask), "d" (hmask) : "memory"); asm volatile("2:\n\t" - xstate_fault + xstate_fault(err) : "0" (0) : "memory"); @@ -169,7 +175,7 @@ static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) : "memory"); asm volatile("2:\n" - xstate_fault + xstate_fault(err) : "0" (0) : "memory"); @@ -201,7 +207,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) __asm__ __volatile__(ASM_STAC "\n" "1:"XSAVE"\n" "2: " ASM_CLAC "\n" - xstate_fault + xstate_fault(err) : "D" (buf), "a" (-1), "d" (-1), "0" (0) : "memory"); return err; @@ -220,7 +226,7 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) __asm__ __volatile__(ASM_STAC "\n" "1:"XRSTOR"\n" "2: " ASM_CLAC "\n" - xstate_fault + xstate_fault(err) : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) : "memory"); /* memory required? */ return err; -- cgit v0.10.2 From 7cf82d33b613780a79fda91babf7b9e6c5c82d75 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 May 2015 11:35:02 +0200 Subject: x86/fpu/init: Move __setup() functions to fpu/init.c We had a number of FPU init related boot option handlers in arch/x86/kernel/cpu/common.c - move them over into arch/x86/kernel/fpu/init.c to have them all in a single place. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d6fe512..401ccb03 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -144,42 +144,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); -static int __init x86_xsave_setup(char *s) -{ - if (strlen(s)) - return 0; - setup_clear_cpu_cap(X86_FEATURE_XSAVE); - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - setup_clear_cpu_cap(X86_FEATURE_AVX); - setup_clear_cpu_cap(X86_FEATURE_AVX2); - return 1; -} -__setup("noxsave", x86_xsave_setup); - -static int __init x86_xsaveopt_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); - return 1; -} -__setup("noxsaveopt", x86_xsaveopt_setup); - -static int __init x86_xsaves_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_XSAVES); - return 1; -} -__setup("noxsaves", x86_xsaves_setup); - -static int __init x86_fxsr_setup(char *s) -{ - setup_clear_cpu_cap(X86_FEATURE_FXSR); - setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); - setup_clear_cpu_cap(X86_FEATURE_XMM); - return 1; -} -__setup("nofxsr", x86_fxsr_setup); - #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index e9f1d6e..fe8cce7 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -286,3 +286,40 @@ static int __init no_387(char *s) } __setup("no387", no_387); + +static int __init x86_xsave_setup(char *s) +{ + if (strlen(s)) + return 0; + setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + setup_clear_cpu_cap(X86_FEATURE_AVX); + setup_clear_cpu_cap(X86_FEATURE_AVX2); + return 1; +} +__setup("noxsave", x86_xsave_setup); + +static int __init x86_xsaveopt_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + return 1; +} +__setup("noxsaveopt", x86_xsaveopt_setup); + +static int __init x86_xsaves_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVES); + return 1; +} +__setup("noxsaves", x86_xsaves_setup); + +static int __init x86_fxsr_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_FXSR); + setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); + setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; +} +__setup("nofxsr", x86_fxsr_setup); + -- cgit v0.10.2 From 5856afed0c8c419286d9f0c8e57e83e2875eec4b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 May 2015 11:39:35 +0200 Subject: x86/fpu/init: Clean up and comment the __setup() functions Explain the functions and also standardize their style and naming. No change in functionality. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index fe8cce7..9da740e 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -284,42 +284,57 @@ static int __init no_387(char *s) setup_clear_cpu_cap(X86_FEATURE_FPU); return 1; } - __setup("no387", no_387); -static int __init x86_xsave_setup(char *s) +/* + * Disable all xstate CPU features: + */ +static int __init x86_noxsave_setup(char *s) { if (strlen(s)) return 0; + setup_clear_cpu_cap(X86_FEATURE_XSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); setup_clear_cpu_cap(X86_FEATURE_XSAVES); setup_clear_cpu_cap(X86_FEATURE_AVX); setup_clear_cpu_cap(X86_FEATURE_AVX2); + return 1; } -__setup("noxsave", x86_xsave_setup); +__setup("noxsave", x86_noxsave_setup); -static int __init x86_xsaveopt_setup(char *s) +/* + * Disable the XSAVEOPT instruction specifically: + */ +static int __init x86_noxsaveopt_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + return 1; } -__setup("noxsaveopt", x86_xsaveopt_setup); +__setup("noxsaveopt", x86_noxsaveopt_setup); -static int __init x86_xsaves_setup(char *s) +/* + * Disable the XSAVES instruction: + */ +static int __init x86_noxsaves_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVES); + return 1; } -__setup("noxsaves", x86_xsaves_setup); +__setup("noxsaves", x86_noxsaves_setup); -static int __init x86_fxsr_setup(char *s) +/* + * Disable FX save/restore and SSE support: + */ +static int __init x86_nofxsr_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_FXSR); setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); setup_clear_cpu_cap(X86_FEATURE_XMM); + return 1; } -__setup("nofxsr", x86_fxsr_setup); - +__setup("nofxsr", x86_nofxsr_setup); -- cgit v0.10.2 From b54b4bbbf5e9313acc681129ab332e33a397cd13 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 22 May 2015 10:58:45 +0200 Subject: x86/fpu, crypto: Fix AVX2 feature tests For some CPU models I broke the AVX2 feature detection in: 7bc371faa9cd ("x86/fpu, crypto x86/camellia_aesni_avx2: Simplify the camellia_aesni_init() xfeature checks") 534ff06e3929 ("x86/fpu, crypto x86/serpent_avx2: Simplify the init() xfeature checks") ... because I did not realize that it's possible for a CPU to support the xstate necessary for AVX2 execution (XSTATE_YMM), but not have the AVX2 instructions themselves. Restore the necessary CPUID checks as well. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c index 76ea7df..4c65c70 100644 --- a/arch/x86/crypto/camellia_aesni_avx2_glue.c +++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c @@ -562,6 +562,11 @@ static int __init camellia_aesni_init(void) { const char *feature_name; + if (!cpu_has_avx2 || !cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) { + pr_info("AVX2 or AES-NI instructions are not detected.\n"); + return -ENODEV; + } + if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c index f226ad4..7d838dc 100644 --- a/arch/x86/crypto/serpent_avx2_glue.c +++ b/arch/x86/crypto/serpent_avx2_glue.c @@ -538,6 +538,10 @@ static int __init init(void) { const char *feature_name; + if (!cpu_has_avx2 || !cpu_has_osxsave) { + pr_info("AVX2 instructions are not detected.\n"); + return -ENODEV; + } if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) { pr_info("CPU feature '%s' is not supported.\n", feature_name); return -ENODEV; -- cgit v0.10.2 From b8c1b8ea7b219a7ba6d58d97bfdf1403b741f8d5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 May 2015 09:58:12 +0200 Subject: x86/fpu: Fix FPU state save area alignment bug On most configs task-struct is cache line aligned, which makes the XSAVE area's 64-byte required alignment work out fine. But on some .config's task_struct is aligned only to 16 bytes (enforced by ARCH_MIN_TASKALIGN), which makes things like fpu__copy() (that XSAVEOPT uses) not work so well. I broke this in: 7366ed771f6e ("x86/fpu: Simplify FPU handling by embedding the fpstate in task_struct (again)") which embedded the fpstate in the task_struct. The alignment requirements of the FPU code were originally present in ARCH_MIN_TASKALIGN, which still has a value of 16, which was the alignment requirement of the FPU state area prior XSAVE. But this link was not documented (and not required) and the link got lost when the FPU state area was made dynamic years ago. With XSAVEOPT the minimum alignment requirment went up to 64 bytes, and the embedding of the FPU state area in task_struct exposed it again - and '16' was not increased to '64'. So fix this bug, but also try to address the underlying lost link of information that made it easier to happen: - document ARCH_MIN_TASKALIGN a bit better - use alignof() to recover the current alignment requirements. This would work in the future as well, should the alignment requirements go up to 128 bytes with things like AVX512. ( We should probably also use the vSMP alignment rules for all of x86, but that's for another patch. ) Reported-by: Peter Zijlstra Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b9e4874..8e04f51 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -53,11 +53,16 @@ static inline void *current_text_addr(void) return pc; } +/* + * These alignment constraints are for performance in the vSMP case, + * but in the task_struct case we must also meet hardware imposed + * alignment requirements of the FPU state: + */ #ifdef CONFIG_X86_VSMP # define ARCH_MIN_TASKALIGN (1 << INTERNODE_CACHE_SHIFT) # define ARCH_MIN_MMSTRUCT_ALIGN (1 << INTERNODE_CACHE_SHIFT) #else -# define ARCH_MIN_TASKALIGN 16 +# define ARCH_MIN_TASKALIGN __alignof__(union fpregs_state) # define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif -- cgit v0.10.2 From fd169b05415485bf47606d820194c39cb2f53201 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 09:55:39 +0200 Subject: x86/fpu: Move the xstate copying functions into fpu/internal.h All the other register<-> memory copying functions are defined in fpu/internal.h, so move the xstate variants there too. Beyond being more consistent, this also allows FPU debugging checks to be added to them. (Because they can now use the macros defined in fpu/internal.h.) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index a98a08d..bcc4173 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -208,6 +208,198 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) } } +/* These macros all use (%edi)/(%rdi) as the single memory argument. */ +#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" +#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" +#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" +#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" +#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" + +/* xstate instruction fault handler: */ +#define xstate_fault(__err) \ + \ + ".section .fixup,\"ax\"\n" \ + \ + "3: movl $-1,%[err]\n" \ + " jmp 2b\n" \ + \ + ".previous\n" \ + \ + _ASM_EXTABLE(1b, 3b) \ + : [err] "=r" (__err) + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XSAVES"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XSAVE"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * This function is called only during boot time when x86 caps are not set + * up and alternative can not be used yet. + */ +static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) +{ + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(system_state != SYSTEM_BOOTING); + + if (boot_cpu_has(X86_FEATURE_XSAVES)) + asm volatile("1:"XRSTORS"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + else + asm volatile("1:"XRSTOR"\n\t" + "2:\n\t" + xstate_fault(err) + : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + return err; +} + +/* + * Save processor xstate to xsave area. + */ +static inline int copy_xregs_to_kernel(struct xregs_state *fx) +{ + u64 mask = -1; + u32 lmask = mask; + u32 hmask = mask >> 32; + int err = 0; + + WARN_ON(!alternatives_patched); + + /* + * If xsaves is enabled, xsaves replaces xsaveopt because + * it supports compact format and supervisor states in addition to + * modified optimization in xsaveopt. + * + * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave + * because xsaveopt supports modified optimization which is not + * supported by xsave. + * + * If none of xsaves and xsaveopt is enabled, use xsave. + */ + alternative_input_2( + "1:"XSAVE, + XSAVEOPT, + X86_FEATURE_XSAVEOPT, + XSAVES, + X86_FEATURE_XSAVES, + [fx] "D" (fx), "a" (lmask), "d" (hmask) : + "memory"); + asm volatile("2:\n\t" + xstate_fault(err) + : "0" (0) + : "memory"); + + return err; +} + +/* + * Restore processor xstate from xsave area. + */ +static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) +{ + int err = 0; + u32 lmask = mask; + u32 hmask = mask >> 32; + + /* + * Use xrstors to restore context if it is enabled. xrstors supports + * compacted format of xsave area which is not supported by xrstor. + */ + alternative_input( + "1: " XRSTOR, + XRSTORS, + X86_FEATURE_XSAVES, + "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); + + asm volatile("2:\n" + xstate_fault(err) + : "0" (0) + : "memory"); + + return err; +} + +/* + * Save xstate to user space xsave area. + * + * We don't use modified optimization because xrstor/xrstors might track + * a different application. + * + * We don't use compacted format xsave area for + * backward compatibility for old applications which don't understand + * compacted format of xsave area. + */ +static inline int copy_xregs_to_user(struct xregs_state __user *buf) +{ + int err; + + /* + * Clear the xsave header first, so that reserved fields are + * initialized to zero. + */ + err = __clear_user(&buf->header, sizeof(buf->header)); + if (unlikely(err)) + return -EFAULT; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XSAVE"\n" + "2: " ASM_CLAC "\n" + xstate_fault(err) + : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "memory"); + return err; +} + +/* + * Restore xstate from user space xsave area. + */ +static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) +{ + int err = 0; + struct xregs_state *xstate = ((__force struct xregs_state *)buf); + u32 lmask = mask; + u32 hmask = mask >> 32; + + __asm__ __volatile__(ASM_STAC "\n" + "1:"XRSTOR"\n" + "2: " ASM_CLAC "\n" + xstate_fault(err) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "memory"); /* memory required? */ + return err; +} + /* * These must be called with preempt disabled. Returns * 'true' if the FPU state is still intact and we can diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index a6e07bb..3398946 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -40,198 +40,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); -/* These macros all use (%edi)/(%rdi) as the single memory argument. */ -#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27" -#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37" -#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f" -#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f" -#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f" - -/* xstate instruction fault handler: */ -#define xstate_fault(__err) \ - \ - ".section .fixup,\"ax\"\n" \ - \ - "3: movl $-1,%[err]\n" \ - " jmp 2b\n" \ - \ - ".previous\n" \ - \ - _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (__err) - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XSAVES"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XSAVE"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * This function is called only during boot time when x86 caps are not set - * up and alternative can not be used yet. - */ -static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) -{ - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(system_state != SYSTEM_BOOTING); - - if (boot_cpu_has(X86_FEATURE_XSAVES)) - asm volatile("1:"XRSTORS"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - else - asm volatile("1:"XRSTOR"\n\t" - "2:\n\t" - xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - return err; -} - -/* - * Save processor xstate to xsave area. - */ -static inline int copy_xregs_to_kernel(struct xregs_state *fx) -{ - u64 mask = -1; - u32 lmask = mask; - u32 hmask = mask >> 32; - int err = 0; - - WARN_ON(!alternatives_patched); - - /* - * If xsaves is enabled, xsaves replaces xsaveopt because - * it supports compact format and supervisor states in addition to - * modified optimization in xsaveopt. - * - * Otherwise, if xsaveopt is enabled, xsaveopt replaces xsave - * because xsaveopt supports modified optimization which is not - * supported by xsave. - * - * If none of xsaves and xsaveopt is enabled, use xsave. - */ - alternative_input_2( - "1:"XSAVE, - XSAVEOPT, - X86_FEATURE_XSAVEOPT, - XSAVES, - X86_FEATURE_XSAVES, - [fx] "D" (fx), "a" (lmask), "d" (hmask) : - "memory"); - asm volatile("2:\n\t" - xstate_fault(err) - : "0" (0) - : "memory"); - - return err; -} - -/* - * Restore processor xstate from xsave area. - */ -static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) -{ - int err = 0; - u32 lmask = mask; - u32 hmask = mask >> 32; - - /* - * Use xrstors to restore context if it is enabled. xrstors supports - * compacted format of xsave area which is not supported by xrstor. - */ - alternative_input( - "1: " XRSTOR, - XRSTORS, - X86_FEATURE_XSAVES, - "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) - : "memory"); - - asm volatile("2:\n" - xstate_fault(err) - : "0" (0) - : "memory"); - - return err; -} - -/* - * Save xstate to user space xsave area. - * - * We don't use modified optimization because xrstor/xrstors might track - * a different application. - * - * We don't use compacted format xsave area for - * backward compatibility for old applications which don't understand - * compacted format of xsave area. - */ -static inline int copy_xregs_to_user(struct xregs_state __user *buf) -{ - int err; - - /* - * Clear the xsave header first, so that reserved fields are - * initialized to zero. - */ - err = __clear_user(&buf->header, sizeof(buf->header)); - if (unlikely(err)) - return -EFAULT; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XSAVE"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (buf), "a" (-1), "d" (-1), "0" (0) - : "memory"); - return err; -} - -/* - * Restore xstate from user space xsave area. - */ -static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) -{ - int err = 0; - struct xregs_state *xstate = ((__force struct xregs_state *)buf); - u32 lmask = mask; - u32 hmask = mask >> 32; - - __asm__ __volatile__(ASM_STAC "\n" - "1:"XRSTOR"\n" - "2: " ASM_CLAC "\n" - xstate_fault(err) - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) - : "memory"); /* memory required? */ - return err; -} - void *get_xsave_addr(struct xregs_state *xsave, int xstate); #endif -- cgit v0.10.2 From 6e5535940fcc2608e58af997236ce5e0391ccfc6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 May 2015 09:58:12 +0200 Subject: x86/fpu: Fix fpu__init_system_xstate() comments Remove obsolete comment about __init limitations: in the new code there aren't any. Also standardize the comment style in the function while at it. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 5724098..ad4dd26 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -316,8 +316,6 @@ static void __init init_xstate_size(void) /* * Enable and initialize the xsave feature. * Called once per system bootup. - * - * ( Not marked __init because of false positive section warnings. ) */ void __init fpu__init_system_xstate(void) { @@ -345,17 +343,13 @@ void __init fpu__init_system_xstate(void) BUG(); } - /* - * Support only the state known to OS. - */ + /* Support only the state known to the OS: */ xfeatures_mask = xfeatures_mask & XCNTXT_MASK; /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); - /* - * Recompute the context size for enabled features - */ + /* Recompute the context size for enabled features: */ init_xstate_size(); update_regset_xstate_info(xstate_size, xfeatures_mask); -- cgit v0.10.2 From 87dafd41a4423c6f730e5f4b0d56a1aa3fdcf3fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 10:57:06 +0200 Subject: x86/fpu: Rename xstate related 'fx' references to 'xstate' So the xstate code was probably first copied from the fxregs code, hence it carried over the 'fx' naming for the state pointer variable. But this is slightly confusing, as we usually on call the (legacy) MMX/SSE state 'fx', both in data structures and in the functions build around FXSAVE/FXRSTOR. So rename it to 'xstate' to make it more apparent what it is related to. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index bcc4173..d142ecb 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -232,7 +232,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) +static inline int copy_xregs_to_kernel_booting(struct xregs_state *xstate) { u64 mask = -1; u32 lmask = mask; @@ -245,13 +245,13 @@ static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) asm volatile("1:"XSAVES"\n\t" "2:\n\t" xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XSAVE"\n\t" "2:\n\t" xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) : "memory"); return err; } @@ -260,7 +260,7 @@ static inline int copy_xregs_to_kernel_booting(struct xregs_state *fx) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) +static inline int copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -272,13 +272,13 @@ static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) asm volatile("1:"XRSTORS"\n\t" "2:\n\t" xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) : "memory"); else asm volatile("1:"XRSTOR"\n\t" "2:\n\t" xstate_fault(err) - : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) : "memory"); return err; } @@ -286,7 +286,7 @@ static inline int copy_kernel_to_xregs_booting(struct xregs_state *fx, u64 mask) /* * Save processor xstate to xsave area. */ -static inline int copy_xregs_to_kernel(struct xregs_state *fx) +static inline int copy_xregs_to_kernel(struct xregs_state *xstate) { u64 mask = -1; u32 lmask = mask; @@ -312,7 +312,7 @@ static inline int copy_xregs_to_kernel(struct xregs_state *fx) X86_FEATURE_XSAVEOPT, XSAVES, X86_FEATURE_XSAVES, - [fx] "D" (fx), "a" (lmask), "d" (hmask) : + [xstate] "D" (xstate), "a" (lmask), "d" (hmask) : "memory"); asm volatile("2:\n\t" xstate_fault(err) @@ -325,7 +325,7 @@ static inline int copy_xregs_to_kernel(struct xregs_state *fx) /* * Restore processor xstate from xsave area. */ -static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) +static inline int copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { int err = 0; u32 lmask = mask; @@ -339,7 +339,7 @@ static inline int copy_kernel_to_xregs(struct xregs_state *fx, u64 mask) "1: " XRSTOR, XRSTORS, X86_FEATURE_XSAVES, - "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) : "memory"); asm volatile("2:\n" @@ -426,7 +426,7 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) * Legacy FPU register saving, FNSAVE always clears FPU registers, * so we have to mark them inactive: */ - asm volatile("fnsave %[fx]; fwait" : [fx] "=m" (fpu->state.fsave)); + asm volatile("fnsave %[fp]; fwait" : [fp] "=m" (fpu->state.fsave)); return 0; } @@ -459,7 +459,7 @@ static inline int copy_fpstate_to_fpregs(struct fpu *fpu) return __copy_fpstate_to_fpregs(fpu); } -extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fx, int size); +extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); /* * FPU context switch related helper methods: -- cgit v0.10.2 From 87b6559d0a37cd82b4b2ffe38f88c0d4ac6ee7e2 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 11:27:46 +0200 Subject: x86/fpu: Improve xstate_fault() handling There are two problems with xstate_fault handling: - The xstate_fault() macro takes an argument, but that's propagated into the assembly named label as well. This is technically correct currently but might result in failures if anytime a more complex argument is used. So use a separate '_err' name instead for the label. - All the xstate_fault() using functions have an error variable named 'err', which is an output variable to the asm() they are using. The problem is, it's not always set by the asm(), in which case the compiler might optimize out its initialization, so that the C variable 'err' might become corrupted after the asm() - confusing anyone who tries to take advantage of this variable after the asm(). Mark it an input variable as well. This is a latent bug currently, but an upcoming debug patch will make use of 'err'. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d142ecb..5370500 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -220,13 +220,13 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) \ ".section .fixup,\"ax\"\n" \ \ - "3: movl $-1,%[err]\n" \ + "3: movl $-2,%[_err]\n" \ " jmp 2b\n" \ \ ".previous\n" \ \ _ASM_EXTABLE(1b, 3b) \ - : [err] "=r" (__err) + : [_err] "=r" (__err) /* * This function is called only during boot time when x86 caps are not set @@ -245,14 +245,14 @@ static inline int copy_xregs_to_kernel_booting(struct xregs_state *xstate) asm volatile("1:"XSAVES"\n\t" "2:\n\t" xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); else asm volatile("1:"XSAVE"\n\t" "2:\n\t" xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); return err; } @@ -272,14 +272,14 @@ static inline int copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 m asm volatile("1:"XRSTORS"\n\t" "2:\n\t" xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); else asm volatile("1:"XRSTOR"\n\t" "2:\n\t" xstate_fault(err) - : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask) - : "memory"); + : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) + : "memory"); return err; } -- cgit v0.10.2 From 685c9616248c4f0d57e0d81d3236c80bdce1af46 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 11:59:35 +0200 Subject: x86/fpu: Improve the initialization logic of 'err' around xstate_fault() constraints There's a confusing aspect of how xstate_fault() constraints are handled by the FPU register/memory copying functions in fpu/internal.h: they use "0" (0) to signal that the asm code will not always set 'err' to a valid value. But 'err' is already initialized to 0 in C code, which is duplicated by the asm() constraint. Should the initialization value ever be changed, it might become subtly inconsistent with the not too clear asm() constraint. Use 'err' as the value of the input variable instead, to clarify this all. Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5370500..1352d38 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -316,7 +316,7 @@ static inline int copy_xregs_to_kernel(struct xregs_state *xstate) "memory"); asm volatile("2:\n\t" xstate_fault(err) - : "0" (0) + : "0" (err) : "memory"); return err; @@ -327,9 +327,9 @@ static inline int copy_xregs_to_kernel(struct xregs_state *xstate) */ static inline int copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { - int err = 0; u32 lmask = mask; u32 hmask = mask >> 32; + int err = 0; /* * Use xrstors to restore context if it is enabled. xrstors supports @@ -344,7 +344,7 @@ static inline int copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) asm volatile("2:\n" xstate_fault(err) - : "0" (0) + : "0" (err) : "memory"); return err; @@ -376,7 +376,7 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) "1:"XSAVE"\n" "2: " ASM_CLAC "\n" xstate_fault(err) - : "D" (buf), "a" (-1), "d" (-1), "0" (0) + : "D" (buf), "a" (-1), "d" (-1), "0" (err) : "memory"); return err; } @@ -386,16 +386,16 @@ static inline int copy_xregs_to_user(struct xregs_state __user *buf) */ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask) { - int err = 0; struct xregs_state *xstate = ((__force struct xregs_state *)buf); u32 lmask = mask; u32 hmask = mask >> 32; + int err = 0; __asm__ __volatile__(ASM_STAC "\n" "1:"XRSTOR"\n" "2: " ASM_CLAC "\n" xstate_fault(err) - : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0) + : "D" (xstate), "a" (lmask), "d" (hmask), "0" (err) : "memory"); /* memory required? */ return err; } -- cgit v0.10.2 From 8c05f05edb7795ecd1fa95d5d44bc5b22fd85287 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 May 2015 09:23:25 +0200 Subject: x86/fpu: Micro-optimize the copy_xregs_to_kernel*() and copy_kernel_to_xregs*() functions The copy_xregs_to_kernel*() and copy_kernel_to_xregs*() functions are used to copy FPU registers to kernel memory and vice versa. They are never expected to fail, yet they have a return code, mostly because that way they can share the assembly macros with the copy*user*() functions. This error code is then silently ignored by the context switching and other code - which made the bug in: b8c1b8ea7b21 ("x86/fpu: Fix FPU state save area alignment bug") harder to fix than necessary. So remove the return values and check for no faults when FPU debugging is enabled in the .config. This improves the eagerfpu context switching fast path by a couple of instructions, when FPU debugging is disabled: ffffffff810407fa: 89 c2 mov %eax,%edx ffffffff810407fc: 48 0f ae 2f xrstor64 (%rdi) ffffffff81040800: 31 c0 xor %eax,%eax -ffffffff81040802: eb 0a jmp ffffffff8104080e <__switch_to+0x321> +ffffffff81040802: eb 16 jmp ffffffff8104081a <__switch_to+0x32d> ffffffff81040804: 31 c0 xor %eax,%eax ffffffff81040806: 48 0f ae 8b c0 05 00 fxrstor64 0x5c0(%rbx) ffffffff8104080d: 00 Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 1352d38..99690be 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -232,7 +232,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_xregs_to_kernel_booting(struct xregs_state *xstate) +static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) { u64 mask = -1; u32 lmask = mask; @@ -253,14 +253,16 @@ static inline int copy_xregs_to_kernel_booting(struct xregs_state *xstate) xstate_fault(err) : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) : "memory"); - return err; + + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); } /* * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline int copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 mask) +static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -280,13 +282,15 @@ static inline int copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 m xstate_fault(err) : "D" (xstate), "m" (*xstate), "a" (lmask), "d" (hmask), "0" (err) : "memory"); - return err; + + /* We should never fault when copying from a kernel buffer: */ + WARN_ON_FPU(err); } /* * Save processor xstate to xsave area. */ -static inline int copy_xregs_to_kernel(struct xregs_state *xstate) +static inline void copy_xregs_to_kernel(struct xregs_state *xstate) { u64 mask = -1; u32 lmask = mask; @@ -319,13 +323,14 @@ static inline int copy_xregs_to_kernel(struct xregs_state *xstate) : "0" (err) : "memory"); - return err; + /* We should never fault when copying to a kernel buffer: */ + WARN_ON_FPU(err); } /* * Restore processor xstate from xsave area. */ -static inline int copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) +static inline void copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) { u32 lmask = mask; u32 hmask = mask >> 32; @@ -347,7 +352,8 @@ static inline int copy_kernel_to_xregs(struct xregs_state *xstate, u64 mask) : "0" (err) : "memory"); - return err; + /* We should never fault when copying from a kernel buffer: */ + WARN_ON_FPU(err); } /* @@ -433,12 +439,15 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) { - if (use_xsave()) - return copy_kernel_to_xregs(&fpu->state.xsave, -1); - else if (use_fxsr()) - return copy_kernel_to_fxregs(&fpu->state.fxsave); - else - return copy_kernel_to_fregs(&fpu->state.fsave); + if (use_xsave()) { + copy_kernel_to_xregs(&fpu->state.xsave, -1); + return 0; + } else { + if (use_fxsr()) + return copy_kernel_to_fxregs(&fpu->state.fxsave); + else + return copy_kernel_to_fregs(&fpu->state.fsave); + } } static inline int copy_fpstate_to_fpregs(struct fpu *fpu) -- cgit v0.10.2 From 47f01e8cc23f3d041f6b9fb97627369eaf75ba7f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Fix FPU register read access to the current task Bobby Powers reported the following FPU warning during ELF coredumping: WARNING: CPU: 0 PID: 27452 at arch/x86/kernel/fpu/core.c:324 fpu__activate_stopped+0x8a/0xa0() This warning unearthed an invalid assumption about fpu__activate_stopped() that I added in: 67e97fc2ec57 ("x86/fpu: Rename init_fpu() to fpu__unlazy_stopped() and add debugging check") the old init_fpu() function had an (intentional but obscure) side effect: when FPU registers are accessed for the current task, for reading, then it synchronized live in-register FPU state with the fpstate by saving it. So fix this bug by saving the FPU if we are the current task. We'll still warn in fpu__save() if this is called for not yet stopped child tasks, so the debugging check is still preserved. Also rename the function to fpu__activate_fpstate(), because it's not exclusively used for stopped tasks, but for the current task as well. ( Note that this bug calls for a cleaner separation of access-for-read and access-for-modification FPU methods, but we'll do that in separate patches. ) Reported-by: Bobby Powers Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 99690be..62d13d5 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -22,7 +22,7 @@ * High level FPU state handling functions: */ extern void fpu__activate_curr(struct fpu *fpu); -extern void fpu__activate_stopped(struct fpu *fpu); +extern void fpu__activate_fpstate(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 01a1550..b410492 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -296,40 +296,47 @@ void fpu__activate_curr(struct fpu *fpu) EXPORT_SYMBOL_GPL(fpu__activate_curr); /* - * This function must be called before we modify a stopped child's - * fpstate. + * This function must be called before we read or write a task's fpstate. * - * If the child has not used the FPU before then initialize its + * If the task has not used the FPU before then initialize its * fpstate. * - * If the child has used the FPU before then unlazy it. + * If the task has used the FPU before then save and unlazy it. * - * [ After this function call, after registers in the fpstate are + * [ If this function is used for non-current child tasks, then + * after this function call, after registers in the fpstate are * modified and the child task has woken up, the child task will * restore the modified FPU state from the modified context. If we * didn't clear its lazy status here then the lazy in-registers * state pending on its former CPU could be restored, corrupting - * the modifications. ] + * the modifications. * - * This function is also called before we read a stopped child's - * FPU state - to make sure it's initialized if the child has - * no active FPU state. + * This function can be used for the current task as well, but + * only for reading the fpstate. Modifications to the fpstate + * will be lost on eagerfpu systems. ] * * TODO: A future optimization would be to skip the unlazying in * the read-only case, it's not strictly necessary for * read-only access to the context. */ -void fpu__activate_stopped(struct fpu *child_fpu) +void fpu__activate_fpstate(struct fpu *fpu) { - WARN_ON_FPU(child_fpu == ¤t->thread.fpu); - - if (child_fpu->fpstate_active) { - child_fpu->last_cpu = -1; + /* + * If fpregs are active (in the current CPU), then + * copy them to the fpstate: + */ + if (fpu->fpregs_active) { + fpu__save(fpu); } else { - fpstate_init(&child_fpu->state); - - /* Safe to do for stopped child tasks: */ - child_fpu->fpstate_active = 1; + if (fpu->fpstate_active) { + /* Invalidate any lazy state: */ + fpu->last_cpu = -1; + } else { + fpstate_init(&fpu->state); + + /* Safe to do for current and for stopped child tasks: */ + fpu->fpstate_active = 1; + } } } diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 297b3da..a1f97d9 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -33,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); fpstate_sanitize_xstate(fpu); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -50,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); fpstate_sanitize_xstate(fpu); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -82,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); xsave = &fpu->state.xsave; @@ -111,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); xsave = &fpu->state.xsave; @@ -273,7 +273,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); @@ -303,7 +303,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - fpu__activate_stopped(fpu); + fpu__activate_fpstate(fpu); fpstate_sanitize_xstate(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) -- cgit v0.10.2 From 0560281266b313400b622c5ddfafb0ee8e59c702 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Split out the fpu__activate_fpstate_read() method Currently fpu__activate_fpstate() is used for two distinct purposes: - read access by ptrace and core dumping, where in the core dumping case the current task's FPU state may be examined as well. - write access by ptrace, which modifies FPU registers and expects the modified registers to be reloaded on the next context switch. Split out the reading side into fpu__activate_fpstate_read(). ( Note that this is just a pure duplication of fpu__activate_fpstate() for the time being, we'll optimize the new function in the next patch. ) Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 62d13d5..3cc2086 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -22,6 +22,7 @@ * High level FPU state handling functions: */ extern void fpu__activate_curr(struct fpu *fpu); +extern void fpu__activate_fpstate_read(struct fpu *fpu); extern void fpu__activate_fpstate(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index b410492..174add3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -296,6 +296,35 @@ void fpu__activate_curr(struct fpu *fpu) EXPORT_SYMBOL_GPL(fpu__activate_curr); /* + * This function must be called before we read a task's fpstate. + * + * If the task has not used the FPU before then initialize its + * fpstate. + * + * If the task has used the FPU before then save it. + */ +void fpu__activate_fpstate_read(struct fpu *fpu) +{ + /* + * If fpregs are active (in the current CPU), then + * copy them to the fpstate: + */ + if (fpu->fpregs_active) { + fpu__save(fpu); + } else { + if (fpu->fpstate_active) { + /* Invalidate any lazy state: */ + fpu->last_cpu = -1; + } else { + fpstate_init(&fpu->state); + + /* Safe to do for current and for stopped child tasks: */ + fpu->fpstate_active = 1; + } + } +} + +/* * This function must be called before we read or write a task's fpstate. * * If the task has not used the FPU before then initialize its diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index a1f97d9..4e40585 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -33,7 +33,7 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_read(fpu); fpstate_sanitize_xstate(fpu); return user_regset_copyout(&pos, &count, &kbuf, &ubuf, @@ -82,7 +82,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_read(fpu); xsave = &fpu->state.xsave; @@ -273,7 +273,7 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, struct fpu *fpu = &target->thread.fpu; struct user_i387_ia32_struct env; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_read(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); -- cgit v0.10.2 From 9ba6b79102a594293c79f30319cabf476c5e300e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Optimize fpu__activate_fpstate_read() fpu__activate_fpstate_read() is used before FPU registers are read from the fpstate by ptrace and core dumping. It's not necessary to unlazy non-current child tasks in this case, since the reading of registers is non-destructive. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 174add3..06cb7e3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -312,10 +312,7 @@ void fpu__activate_fpstate_read(struct fpu *fpu) if (fpu->fpregs_active) { fpu__save(fpu); } else { - if (fpu->fpstate_active) { - /* Invalidate any lazy state: */ - fpu->last_cpu = -1; - } else { + if (!fpu->fpstate_active) { fpstate_init(&fpu->state); /* Safe to do for current and for stopped child tasks: */ -- cgit v0.10.2 From 6a81d7eb330479c908dab3a47ac33cfca8af5a67 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Rename fpu__activate_fpstate() to fpu__activate_fpstate_write() Remaining users of fpu__activate_fpstate() are all places that want to modify FPU registers, rename the function to fpu__activate_fpstate_write() according to this usage. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 3cc2086..e3bd93c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -23,7 +23,7 @@ */ extern void fpu__activate_curr(struct fpu *fpu); extern void fpu__activate_fpstate_read(struct fpu *fpu); -extern void fpu__activate_fpstate(struct fpu *fpu); +extern void fpu__activate_fpstate_write(struct fpu *fpu); extern void fpu__save(struct fpu *fpu); extern void fpu__restore(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 06cb7e3..6b0955a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -345,7 +345,7 @@ void fpu__activate_fpstate_read(struct fpu *fpu) * the read-only case, it's not strictly necessary for * read-only access to the context. */ -void fpu__activate_fpstate(struct fpu *fpu) +void fpu__activate_fpstate_write(struct fpu *fpu) { /* * If fpregs are active (in the current CPU), then diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index 4e40585..dc60810 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -50,7 +50,7 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_write(fpu); fpstate_sanitize_xstate(fpu); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, @@ -111,7 +111,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_xsave) return -ENODEV; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_write(fpu); xsave = &fpu->state.xsave; @@ -303,7 +303,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, struct user_i387_ia32_struct env; int ret; - fpu__activate_fpstate(fpu); + fpu__activate_fpstate_write(fpu); fpstate_sanitize_xstate(fpu); if (!static_cpu_has(X86_FEATURE_FPU)) -- cgit v0.10.2 From 343763c3b0a4fa2d29f7c3ba405abf8771b90876 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Optimize fpu__activate_fpstate_write() fpu__activate_fpstate_write() is used before ptrace writes to the fpstate context. Because it expects the modified registers to be reloaded on the nexts context switch, it's only valid to call this function for stopped child tasks. - add a debugging check for this assumption - remove code that only runs if the current task's FPU state needs to be saved, which cannot occur here - update comments to match the implementation Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 6b0955a..86a9a9a 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -322,47 +322,34 @@ void fpu__activate_fpstate_read(struct fpu *fpu) } /* - * This function must be called before we read or write a task's fpstate. + * This function must be called before we write a task's fpstate. * - * If the task has not used the FPU before then initialize its - * fpstate. - * - * If the task has used the FPU before then save and unlazy it. - * - * [ If this function is used for non-current child tasks, then - * after this function call, after registers in the fpstate are - * modified and the child task has woken up, the child task will - * restore the modified FPU state from the modified context. If we - * didn't clear its lazy status here then the lazy in-registers - * state pending on its former CPU could be restored, corrupting - * the modifications. + * If the task has used the FPU before then unlazy it. + * If the task has not used the FPU before then initialize its fpstate. * - * This function can be used for the current task as well, but - * only for reading the fpstate. Modifications to the fpstate - * will be lost on eagerfpu systems. ] - * - * TODO: A future optimization would be to skip the unlazying in - * the read-only case, it's not strictly necessary for - * read-only access to the context. + * After this function call, after registers in the fpstate are + * modified and the child task has woken up, the child task will + * restore the modified FPU state from the modified context. If we + * didn't clear its lazy status here then the lazy in-registers + * state pending on its former CPU could be restored, corrupting + * the modifications. */ void fpu__activate_fpstate_write(struct fpu *fpu) { /* - * If fpregs are active (in the current CPU), then - * copy them to the fpstate: + * Only stopped child tasks can be used to modify the FPU + * state in the fpstate buffer: */ - if (fpu->fpregs_active) { - fpu__save(fpu); + WARN_ON_FPU(fpu == ¤t->thread.fpu); + + if (fpu->fpstate_active) { + /* Invalidate any lazy state: */ + fpu->last_cpu = -1; } else { - if (fpu->fpstate_active) { - /* Invalidate any lazy state: */ - fpu->last_cpu = -1; - } else { - fpstate_init(&fpu->state); + fpstate_init(&fpu->state); - /* Safe to do for current and for stopped child tasks: */ - fpu->fpstate_active = 1; - } + /* Safe to do for stopped child tasks: */ + fpu->fpstate_active = 1; } } -- cgit v0.10.2 From ce2a1e67f1738535b011a7b4bd42cc114b1d805f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 10:57:06 +0200 Subject: x86/fpu: Add debugging check to fpu__restore() The copy_fpstate_to_fpregs() function is never supposed to fail, so add a debugging check to its call site in fpu__restore(). Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 86a9a9a..874ef17 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -371,6 +371,8 @@ void fpu__restore(struct fpu *fpu) kernel_fpu_disable(); fpregs_activate(fpu); if (unlikely(copy_fpstate_to_fpregs(fpu))) { + /* Copying the kernel state to FPU registers should never fail: */ + WARN_ON_FPU(1); fpu__clear(fpu); force_sig_info(SIGSEGV, SEND_SIG_PRIV, current); } else { -- cgit v0.10.2 From 43b287b3f4d8665cd5a4909132259b663cc1c0e3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 10:59:31 +0200 Subject: x86/fpu: Add debugging checks to all copy_kernel_to_*() functions Copying from in-kernel FPU context buffers to FPU registers are never supposed to fault. Add debugging checks to copy_kernel_to_fxregs() and copy_kernel_to_fregs() to double check this assumption. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index e3bd93c..eb8fa0f 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -143,14 +143,22 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) static inline int copy_kernel_to_fxregs(struct fxregs_state *fx) { - if (config_enabled(CONFIG_X86_32)) - return check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); - else if (config_enabled(CONFIG_AS_FXSAVEQ)) - return check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + int err; - /* See comment in copy_fxregs_to_kernel() below. */ - return check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), - "m" (*fx)); + if (config_enabled(CONFIG_X86_32)) { + err = check_insn(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + } else { + if (config_enabled(CONFIG_AS_FXSAVEQ)) { + err = check_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx)); + } else { + /* See comment in copy_fxregs_to_kernel() below. */ + err = check_insn(rex64/fxrstor (%[fx]), "=m" (*fx), [fx] "R" (fx), "m" (*fx)); + } + } + /* Copying from a kernel buffer to FPU registers should never fail: */ + WARN_ON_FPU(err); + + return err; } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) @@ -167,7 +175,11 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) static inline int copy_kernel_to_fregs(struct fregs_state *fx) { - return check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); + + WARN_ON_FPU(err); + + return err; } static inline int copy_user_to_fregs(struct fregs_state __user *fx) -- cgit v0.10.2 From 3e1bf47e5c81c2b895db4bea67f70c3ca8e5b984 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 13:47:01 +0200 Subject: x86/fpu: Rename copy_fpstate_to_fpregs() to copy_kernel_to_fpregs() Bring the __copy_fpstate_to_fpregs() and copy_fpstate_to_fpregs() functions in line with the naming of other kernel-to-FPU-registers copying functions. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index eb8fa0f..6193b7a 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -450,7 +450,7 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) +static inline int __copy_kernel_to_fpregs(struct fpu *fpu) { if (use_xsave()) { copy_kernel_to_xregs(&fpu->state.xsave, -1); @@ -463,7 +463,7 @@ static inline int __copy_fpstate_to_fpregs(struct fpu *fpu) } } -static inline int copy_fpstate_to_fpregs(struct fpu *fpu) +static inline int copy_kernel_to_fpregs(struct fpu *fpu) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -478,7 +478,7 @@ static inline int copy_fpstate_to_fpregs(struct fpu *fpu) : : [addr] "m" (fpu->fpregs_active)); } - return __copy_fpstate_to_fpregs(fpu); + return __copy_kernel_to_fpregs(fpu); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -647,7 +647,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { if (fpu_switch.preload) { - if (unlikely(copy_fpstate_to_fpregs(new_fpu))) { + if (unlikely(copy_kernel_to_fpregs(new_fpu))) { WARN_ON_FPU(1); fpu__clear(new_fpu); } diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 874ef17..e0e0ee5 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -127,7 +127,7 @@ void __kernel_fpu_end(void) struct fpu *fpu = ¤t->thread.fpu; if (fpu->fpregs_active) { - if (WARN_ON_FPU(copy_fpstate_to_fpregs(fpu))) + if (WARN_ON_FPU(copy_kernel_to_fpregs(fpu))) fpu__clear(fpu); } else { __fpregs_deactivate_hw(); @@ -370,7 +370,7 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); fpregs_activate(fpu); - if (unlikely(copy_fpstate_to_fpregs(fpu))) { + if (unlikely(copy_kernel_to_fpregs(fpu))) { /* Copying the kernel state to FPU registers should never fail: */ WARN_ON_FPU(1); fpu__clear(fpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 989cfc0..66871f4 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7030,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_fpstate_to_fpregs(&vcpu->arch.guest_fpu); + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu); trace_kvm_fpu(1); } -- cgit v0.10.2 From 9ccc27a5d297503e485373b69688d038a1d8e662 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 11:27:46 +0200 Subject: x86/fpu: Remove error return values from copy_kernel_to_*regs() functions None of the copy_kernel_to_*regs() FPU register copying functions are supposed to fail, and all of them have debugging checks that enforce this. Remove their return values and simplify their call sites, which have redundant error checks and error handling code paths. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 6193b7a..da71d41 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -141,7 +141,7 @@ static inline int copy_fxregs_to_user(struct fxregs_state __user *fx) return user_insn(rex64/fxsave (%[fx]), "=m" (*fx), [fx] "R" (fx)); } -static inline int copy_kernel_to_fxregs(struct fxregs_state *fx) +static inline void copy_kernel_to_fxregs(struct fxregs_state *fx) { int err; @@ -157,8 +157,6 @@ static inline int copy_kernel_to_fxregs(struct fxregs_state *fx) } /* Copying from a kernel buffer to FPU registers should never fail: */ WARN_ON_FPU(err); - - return err; } static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) @@ -173,13 +171,11 @@ static inline int copy_user_to_fxregs(struct fxregs_state __user *fx) "m" (*fx)); } -static inline int copy_kernel_to_fregs(struct fregs_state *fx) +static inline void copy_kernel_to_fregs(struct fregs_state *fx) { int err = check_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx)); WARN_ON_FPU(err); - - return err; } static inline int copy_user_to_fregs(struct fregs_state __user *fx) @@ -450,20 +446,19 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline int __copy_kernel_to_fpregs(struct fpu *fpu) +static inline void __copy_kernel_to_fpregs(struct fpu *fpu) { if (use_xsave()) { copy_kernel_to_xregs(&fpu->state.xsave, -1); - return 0; } else { if (use_fxsr()) - return copy_kernel_to_fxregs(&fpu->state.fxsave); + copy_kernel_to_fxregs(&fpu->state.fxsave); else - return copy_kernel_to_fregs(&fpu->state.fsave); + copy_kernel_to_fregs(&fpu->state.fsave); } } -static inline int copy_kernel_to_fpregs(struct fpu *fpu) +static inline void copy_kernel_to_fpregs(struct fpu *fpu) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -478,7 +473,7 @@ static inline int copy_kernel_to_fpregs(struct fpu *fpu) : : [addr] "m" (fpu->fpregs_active)); } - return __copy_kernel_to_fpregs(fpu); + __copy_kernel_to_fpregs(fpu); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -646,12 +641,8 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) */ static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { - if (fpu_switch.preload) { - if (unlikely(copy_kernel_to_fpregs(new_fpu))) { - WARN_ON_FPU(1); - fpu__clear(new_fpu); - } - } + if (fpu_switch.preload) + copy_kernel_to_fpregs(new_fpu); } /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e0e0ee5..8470df4 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -126,12 +126,10 @@ void __kernel_fpu_end(void) { struct fpu *fpu = ¤t->thread.fpu; - if (fpu->fpregs_active) { - if (WARN_ON_FPU(copy_kernel_to_fpregs(fpu))) - fpu__clear(fpu); - } else { + if (fpu->fpregs_active) + copy_kernel_to_fpregs(fpu); + else __fpregs_deactivate_hw(); - } kernel_fpu_enable(); } @@ -370,14 +368,8 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); fpregs_activate(fpu); - if (unlikely(copy_kernel_to_fpregs(fpu))) { - /* Copying the kernel state to FPU registers should never fail: */ - WARN_ON_FPU(1); - fpu__clear(fpu); - force_sig_info(SIGSEGV, SEND_SIG_PRIV, current); - } else { - fpu->counter++; - } + copy_kernel_to_fpregs(fpu); + fpu->counter++; kernel_fpu_enable(); } EXPORT_SYMBOL_GPL(fpu__restore); -- cgit v0.10.2 From 003e2e8b57e79709e4973f6cb48381b2ba396680 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 25 May 2015 11:59:35 +0200 Subject: x86/fpu: Standardize the parameter type of copy_kernel_to_fpregs() Bring the __copy_fpstate_to_fpregs() and copy_fpstate_to_fpregs() functions in line with the parameter passing convention of other kernel-to-FPU-registers copying functions: pass around an in-memory FPU register state pointer, instead of struct fpu *. NOTE: This patch also changes the assembly constraint of the FXSAVE-leak workaround from 'fpu->fpregs_active' to 'fpstate' - but that is fine, as we only need a valid memory address there for the FILDL instruction. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index da71d41..12acbb3 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -446,19 +446,19 @@ static inline int copy_fpregs_to_fpstate(struct fpu *fpu) return 0; } -static inline void __copy_kernel_to_fpregs(struct fpu *fpu) +static inline void __copy_kernel_to_fpregs(union fpregs_state *fpstate) { if (use_xsave()) { - copy_kernel_to_xregs(&fpu->state.xsave, -1); + copy_kernel_to_xregs(&fpstate->xsave, -1); } else { if (use_fxsr()) - copy_kernel_to_fxregs(&fpu->state.fxsave); + copy_kernel_to_fxregs(&fpstate->fxsave); else - copy_kernel_to_fregs(&fpu->state.fsave); + copy_kernel_to_fregs(&fpstate->fsave); } } -static inline void copy_kernel_to_fpregs(struct fpu *fpu) +static inline void copy_kernel_to_fpregs(union fpregs_state *fpstate) { /* * AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is @@ -470,10 +470,10 @@ static inline void copy_kernel_to_fpregs(struct fpu *fpu) "fnclex\n\t" "emms\n\t" "fildl %P[addr]" /* set F?P to defined value */ - : : [addr] "m" (fpu->fpregs_active)); + : : [addr] "m" (fpstate)); } - __copy_kernel_to_fpregs(fpu); + __copy_kernel_to_fpregs(fpstate); } extern int copy_fpstate_to_sigframe(void __user *buf, void __user *fp, int size); @@ -642,7 +642,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu) static inline void switch_fpu_finish(struct fpu *new_fpu, fpu_switch_t fpu_switch) { if (fpu_switch.preload) - copy_kernel_to_fpregs(new_fpu); + copy_kernel_to_fpregs(&new_fpu->state); } /* diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8470df4..79de954 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -127,7 +127,7 @@ void __kernel_fpu_end(void) struct fpu *fpu = ¤t->thread.fpu; if (fpu->fpregs_active) - copy_kernel_to_fpregs(fpu); + copy_kernel_to_fpregs(&fpu->state); else __fpregs_deactivate_hw(); @@ -368,7 +368,7 @@ void fpu__restore(struct fpu *fpu) /* Avoid __kernel_fpu_begin() right after fpregs_activate() */ kernel_fpu_disable(); fpregs_activate(fpu); - copy_kernel_to_fpregs(fpu); + copy_kernel_to_fpregs(&fpu->state); fpu->counter++; kernel_fpu_enable(); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 66871f4..26eaeb5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7030,7 +7030,7 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) kvm_put_guest_xcr0(vcpu); vcpu->guest_fpu_loaded = 1; __kernel_fpu_begin(); - __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu); + __copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state); trace_kvm_fpu(1); } -- cgit v0.10.2 From d65fcd608f6a9ac0316bd3efc75956ca329b6571 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 14:04:44 +0200 Subject: x86/fpu: Simplify copy_kernel_to_xregs_booting() copy_kernel_to_xregs_booting() has a second parameter that is the mask of xfeatures that should be copied - but this parameter is always -1. Simplify the call site of this function, this also makes it more similar to the function call signature of other copy_kernel_to*regs() functions. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 12acbb3..9f38638 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -271,8 +271,9 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate) * This function is called only during boot time when x86 caps are not set * up and alternative can not be used yet. */ -static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate, u64 mask) +static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate) { + u64 mask = -1; u32 lmask = mask; u32 hmask = mask >> 32; int err = 0; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index ad4dd26..a580eb5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -281,7 +281,7 @@ static void __init setup_init_fpu_buf(void) /* * Init all the features state with header_bv being 0x0 */ - copy_kernel_to_xregs_booting(&init_fpstate.xsave, -1); + copy_kernel_to_xregs_booting(&init_fpstate.xsave); /* * Dump the init state again. This is to identify the init state -- cgit v0.10.2 From 83242c515881cc8642d726c3e648e41bf6c24551 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 May 2015 12:22:29 +0200 Subject: x86/fpu: Make WARN_ON_FPU() more robust in the !CONFIG_X86_DEBUG_FPU case Make sure the WARN_ON_FPU() macro consumes the macro argument, to avoid 'unused variable' build warnings if the only use of a variable is in debugging code. Cc: Andy Lutomirski Cc: Bobby Powers Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Thomas Gleixner Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 9f38638..3c3550c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -49,7 +49,7 @@ extern void fpu__resume_cpu(void); #ifdef CONFIG_X86_DEBUG_FPU # define WARN_ON_FPU(x) WARN_ON_ONCE(x) #else -# define WARN_ON_FPU(x) ({ 0; }) +# define WARN_ON_FPU(x) ({ (void)(x); 0; }) #endif /* -- cgit v0.10.2 From 0c4109bec0a6cde471bef3a21cd6f8384a614469 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:00 -0700 Subject: x86/fpu/xstate: Fix up bad get_xsave_addr() assumptions get_xsave_addr() assumes that if an xsave bit is present in the hardware (pcntxt_mask) that it is present in a given xsave buffer. Due to an bug in the xsave code on all of the systems that have MPX (and thus all the users of this code), that has been a true assumption. But, the bug is getting fixed, so our assumption is not going to hold any more. It's quite possible (and normal) for an enabled state to be present on 'pcntxt_mask', but *not* in 'xstate_bv'. We need to consult 'xstate_bv'. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183700.1E739B34@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index a580eb5..af3700e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -382,19 +382,48 @@ void fpu__resume_cpu(void) * This is the API that is called to get xstate address in either * standard format or compacted format of xsave area. * + * Note that if there is no data for the field in the xsave buffer + * this will return NULL. + * * Inputs: - * xsave: base address of the xsave area; - * xstate: state which is defined in xsave.h (e.g. XSTATE_FP, XSTATE_SSE, - * etc.) + * xstate: the thread's storage area for all FPU data + * xstate_feature: state which is defined in xsave.h (e.g. + * XSTATE_FP, XSTATE_SSE, etc...) * Output: - * address of the state in the xsave area. + * address of the state in the xsave area, or NULL if the + * field is not present in the xsave buffer. */ -void *get_xsave_addr(struct xregs_state *xsave, int xstate) +void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) { - int feature = fls64(xstate) - 1; - if (!test_bit(feature, (unsigned long *)&xfeatures_mask)) + int feature_nr = fls64(xstate_feature) - 1; + /* + * Do we even *have* xsave state? + */ + if (!boot_cpu_has(X86_FEATURE_XSAVE)) + return NULL; + + xsave = ¤t->thread.fpu.state.xsave; + /* + * We should not ever be requesting features that we + * have not enabled. Remember that pcntxt_mask is + * what we write to the XCR0 register. + */ + WARN_ONCE(!(xfeatures_mask & xstate_feature), + "get of unsupported state"); + /* + * This assumes the last 'xsave*' instruction to + * have requested that 'xstate_feature' be saved. + * If it did not, we might be seeing and old value + * of the field in the buffer. + * + * This can happen because the last 'xsave' did not + * request that this feature be saved (unlikely) + * or because the "init optimization" caused it + * to not be saved. + */ + if (!(xsave->header.xfeatures & xstate_feature)) return NULL; - return (void *)xsave + xstate_comp_offsets[feature]; + return (void *)xsave + xstate_comp_offsets[feature_nr]; } EXPORT_SYMBOL_GPL(get_xsave_addr); -- cgit v0.10.2 From 04cd027bcba1ead7bfe39e7f1c6f4d993c4c3323 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:00 -0700 Subject: x86/fpu/xstate: Wrap get_xsave_addr() to make it safer The MPX code appears is calling a low-level FPU function (copy_fpregs_to_fpstate()). This function is not able to be called in all contexts, although it is safe to call directly in some cases. Although probably correct, the current code is ugly and potentially error-prone. So, add a wrapper that calls the (slightly) higher-level fpu__save() (which is preempt- safe) and also ensures that we even *have* an FPU context (in the case that this was called when in lazy FPU mode). Ingo had this to say about the details about when we need preemption disabled: > it's indeed generally unsafe to access/copy FPU registers with preemption enabled, > for two reasons: > > - on older systems that use FSAVE the instruction destroys FPU register > contents, which has to be handled carefully > > - even on newer systems if we copy to FPU registers (which this code doesn't) > then we don't want a context switch to occur in the middle of it, because a > context switch will write to the fpstate, potentially overwriting our new data > with old FPU state. > > But it's safe to access FPU registers with preemption enabled in a couple of > special cases: > > - potentially destructively saving FPU registers: the signal handling code does > this in copy_fpstate_to_sigframe(), because it can rely on the signal restore > side to restore the original FPU state. > > - reading FPU registers on modern systems: we don't do this anywhere at the > moment, mostly to keep symmetry with older systems where FSAVE is > destructive. > > - initializing FPU registers on modern systems: fpu__clear() does this. Here > it's safe because we don't copy from the fpstate. > > - directly writing FPU registers from user-space memory (!). We do this in > fpu__restore_sig(), and it's safe because neither context switches nor > irq-handler FPU use can corrupt the source context of the copy (which is > user-space memory). > > Note that the MPX code's current use of copy_fpregs_to_fpstate() was safe I think, > because: > > - MPX is predicated on eagerfpu, so the destructive F[N]SAVE instruction won't be > used. > > - the code was only reading FPU registers, and was doing it only in places that > guaranteed that an FPU state was already active (i.e. didn't do it in > kthreads) Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Suresh Siddha Cc: bp@alien8.de Link: http://lkml.kernel.org/r/20150607183700.AA881696@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index 3398946..4656b25 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -41,5 +41,6 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask); void *get_xsave_addr(struct xregs_state *xsave, int xstate); +const void *get_xsave_field_ptr(int xstate_field); #endif diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index af3700e..49d0d9b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -427,3 +427,35 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) return (void *)xsave + xstate_comp_offsets[feature_nr]; } EXPORT_SYMBOL_GPL(get_xsave_addr); + +/* + * This wraps up the common operations that need to occur when retrieving + * data from xsave state. It first ensures that the current task was + * using the FPU and retrieves the data in to a buffer. It then calculates + * the offset of the requested field in the buffer. + * + * This function is safe to call whether the FPU is in use or not. + * + * Note that this only works on the current task. + * + * Inputs: + * @xsave_state: state which is defined in xsave.h (e.g. XSTATE_FP, + * XSTATE_SSE, etc...) + * Output: + * address of the state in the xsave area or NULL if the state + * is not present or is in its 'init state'. + */ +const void *get_xsave_field_ptr(int xsave_state) +{ + struct fpu *fpu = ¤t->thread.fpu; + + if (!fpu->fpstate_active) + return NULL; + /* + * fpu__save() takes the CPU's xstate registers + * and saves them off to the 'fpu memory buffer. + */ + fpu__save(fpu); + + return get_xsave_addr(&fpu->state.xsave, xsave_state); +} -- cgit v0.10.2 From a84eeaa96b36a03188e1423349669c108d3a4bd7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:01 -0700 Subject: x86/mpx: Use the new get_xsave_field_ptr()API The MPX registers (bndcsr/bndcfgu/bndstatus) are not directly accessible via normal instructions. They essentially act as if they were floating point registers and are saved/restored along with those registers. There are two main paths in the MPX code where we care about the contents of these registers: 1. #BR (bounds) faults 2. the prctl() code where we are setting MPX up Both of those paths _might_ be called without the FPU having been used. That means that 'tsk->thread.fpu.state' might never be allocated. Also, fpu_save_init() is not preempt-safe. It was a bug to call it without disabling preemption. The new get_xsave_addr() calls unlazy_fpu() instead and properly disables preemption. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Rik van Riel Cc: Suresh Siddha Cc: bp@alien8.de Link: http://lkml.kernel.org/r/20150607183701.BC0D37CF@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index f3c1b71..39f2d0f 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -60,8 +60,8 @@ #ifdef CONFIG_X86_INTEL_MPX siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xregs_state *xsave_buf); -int mpx_handle_bd_fault(struct xregs_state *xsave_buf); + struct task_struct *tsk); +int mpx_handle_bd_fault(struct task_struct *tsk); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); @@ -78,11 +78,11 @@ void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end); #else static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xregs_state *xsave_buf) + struct task_struct *tsk) { return NULL; } -static inline int mpx_handle_bd_fault(struct xregs_state *xsave_buf) +static inline int mpx_handle_bd_fault(struct task_struct *tsk) { return -EINVAL; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a2510f2..42f1531 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_64 @@ -371,9 +372,8 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - struct xregs_state *xsave_buf; enum ctx_state prev_state; - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; siginfo_t *info; prev_state = exception_enter(); @@ -392,12 +392,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) /* * We need to look at BNDSTATUS to resolve this exception. - * It is not directly accessible, though, so we need to - * do an xsave and then pull it out of the xsave buffer. + * A NULL here might mean that it is in its 'init state', + * which is all zeros which indicates MPX was not + * responsible for the exception. */ - copy_fpregs_to_fpstate(&tsk->thread.fpu); - xsave_buf = &(tsk->thread.fpu.state.xsave); - bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) goto exit_trap; @@ -408,11 +407,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) */ switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault(xsave_buf)) + if (mpx_handle_bd_fault(tsk)) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs, xsave_buf); + info = mpx_generate_siginfo(regs, tsk); if (IS_ERR(info)) { /* * We failed to decode the MPX instruction. Act as if diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 2e0dfd3..9d67e23 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -272,9 +272,9 @@ bad_opcode: * The caller is expected to kfree() the returned siginfo_t. */ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct xregs_state *xsave_buf) + struct task_struct *tsk) { - struct bndreg *bndregs, *bndreg; + const struct bndreg *bndregs, *bndreg; siginfo_t *info = NULL; struct insn insn; uint8_t bndregno; @@ -294,8 +294,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, err = -EINVAL; goto err_out; } - /* get the bndregs _area_ of the xsave structure */ - bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS); + /* get bndregs field from current task's xsave area */ + bndregs = get_xsave_field_ptr(XSTATE_BNDREGS); if (!bndregs) { err = -EINVAL; goto err_out; @@ -342,7 +342,7 @@ err_out: static __user void *task_get_bounds_dir(struct task_struct *tsk) { - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; if (!cpu_feature_enabled(X86_FEATURE_MPX)) return MPX_INVALID_BOUNDS_DIR; @@ -357,8 +357,7 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ - copy_fpregs_to_fpstate(&tsk->thread.fpu); - bndcsr = get_xsave_addr(&tsk->thread.fpu.state.xsave, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) return MPX_INVALID_BOUNDS_DIR; @@ -389,9 +388,10 @@ int mpx_enable_management(struct task_struct *tsk) * directory into XSAVE/XRSTOR Save Area and enable MPX through * XRSTOR instruction. * - * copy_xregs_to_kernel() is expected to be very expensive. Storing the bounds - * directory here means that we do not have to do xsave in the unmap - * path; we can just use mm->bd_addr instead. + * The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is + * expected to be relatively expensive. Storing the bounds + * directory here means that we do not have to do xsave in the + * unmap path; we can just use mm->bd_addr instead. */ bd_base = task_get_bounds_dir(tsk); down_write(&mm->mmap_sem); @@ -497,12 +497,12 @@ out_unmap: * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, * and the size of each bound table is 4MB. */ -static int do_mpx_bt_fault(struct xregs_state *xsave_buf) +static int do_mpx_bt_fault(struct task_struct *tsk) { unsigned long bd_entry, bd_base; - struct bndcsr *bndcsr; + const struct bndcsr *bndcsr; - bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR); + bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) return -EINVAL; /* @@ -525,7 +525,7 @@ static int do_mpx_bt_fault(struct xregs_state *xsave_buf) return allocate_bt((long __user *)bd_entry); } -int mpx_handle_bd_fault(struct xregs_state *xsave_buf) +int mpx_handle_bd_fault(struct task_struct *tsk) { /* * Userspace never asked us to manage the bounds tables, @@ -534,7 +534,7 @@ int mpx_handle_bd_fault(struct xregs_state *xsave_buf) if (!kernel_managing_mpx_tables(current->mm)) return -EINVAL; - if (do_mpx_bt_fault(xsave_buf)) { + if (do_mpx_bt_fault(tsk)) { force_sig(SIGSEGV, current); /* * The force_sig() is essentially "handling" this -- cgit v0.10.2 From 46a6e0cf1c6665a8e867d8f7798d7a3538633f03 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:02 -0700 Subject: x86/mpx: Clean up the code by not passing a task pointer around when unnecessary The MPX code can only work on the current task. You can not, for instance, enable MPX management in another process or thread. You can also not handle a fault for another process or thread. Despite this, we pass a task_struct around prolifically. This patch removes all of the task struct passing for code paths where the code can not deal with another task (which turns out to be all of them). This has no functional changes. It's just a cleanup. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: bp@alien8.de Link: http://lkml.kernel.org/r/20150607183702.6A81DA2C@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 39f2d0f..0cdd16a 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -59,9 +59,8 @@ MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) #ifdef CONFIG_X86_INTEL_MPX -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct task_struct *tsk); -int mpx_handle_bd_fault(struct task_struct *tsk); +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); +int mpx_handle_bd_fault(void); static inline int kernel_managing_mpx_tables(struct mm_struct *mm) { return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR); @@ -77,12 +76,11 @@ static inline void mpx_mm_init(struct mm_struct *mm) void mpx_notify_unmap(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long start, unsigned long end); #else -static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct task_struct *tsk) +static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) { return NULL; } -static inline int mpx_handle_bd_fault(struct task_struct *tsk) +static inline int mpx_handle_bd_fault(void) { return -EINVAL; } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 8e04f51..53dbd2b 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -802,18 +802,18 @@ extern int get_tsc_mode(unsigned long adr); extern int set_tsc_mode(unsigned int val); /* Register/unregister a process' MPX related resource */ -#define MPX_ENABLE_MANAGEMENT(tsk) mpx_enable_management((tsk)) -#define MPX_DISABLE_MANAGEMENT(tsk) mpx_disable_management((tsk)) +#define MPX_ENABLE_MANAGEMENT() mpx_enable_management() +#define MPX_DISABLE_MANAGEMENT() mpx_disable_management() #ifdef CONFIG_X86_INTEL_MPX -extern int mpx_enable_management(struct task_struct *tsk); -extern int mpx_disable_management(struct task_struct *tsk); +extern int mpx_enable_management(void); +extern int mpx_disable_management(void); #else -static inline int mpx_enable_management(struct task_struct *tsk) +static inline int mpx_enable_management(void) { return -EINVAL; } -static inline int mpx_disable_management(struct task_struct *tsk) +static inline int mpx_disable_management(void) { return -EINVAL; } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 42f1531..cffff66 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -371,7 +371,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { - struct task_struct *tsk = current; enum ctx_state prev_state; const struct bndcsr *bndcsr; siginfo_t *info; @@ -407,11 +406,11 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) */ switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) { case 2: /* Bound directory has invalid entry. */ - if (mpx_handle_bd_fault(tsk)) + if (mpx_handle_bd_fault()) goto exit_trap; break; /* Success, it was handled */ case 1: /* Bound violation. */ - info = mpx_generate_siginfo(regs, tsk); + info = mpx_generate_siginfo(regs); if (IS_ERR(info)) { /* * We failed to decode the MPX instruction. Act as if diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 9d67e23..47e4a856 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -271,8 +271,7 @@ bad_opcode: * * The caller is expected to kfree() the returned siginfo_t. */ -siginfo_t *mpx_generate_siginfo(struct pt_regs *regs, - struct task_struct *tsk) +siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) { const struct bndreg *bndregs, *bndreg; siginfo_t *info = NULL; @@ -340,7 +339,7 @@ err_out: return ERR_PTR(err); } -static __user void *task_get_bounds_dir(struct task_struct *tsk) +static __user void *mpx_get_bounds_dir(void) { const struct bndcsr *bndcsr; @@ -376,10 +375,10 @@ static __user void *task_get_bounds_dir(struct task_struct *tsk) (bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK); } -int mpx_enable_management(struct task_struct *tsk) +int mpx_enable_management(void) { void __user *bd_base = MPX_INVALID_BOUNDS_DIR; - struct mm_struct *mm = tsk->mm; + struct mm_struct *mm = current->mm; int ret = 0; /* @@ -393,7 +392,7 @@ int mpx_enable_management(struct task_struct *tsk) * directory here means that we do not have to do xsave in the * unmap path; we can just use mm->bd_addr instead. */ - bd_base = task_get_bounds_dir(tsk); + bd_base = mpx_get_bounds_dir(); down_write(&mm->mmap_sem); mm->bd_addr = bd_base; if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR) @@ -403,7 +402,7 @@ int mpx_enable_management(struct task_struct *tsk) return ret; } -int mpx_disable_management(struct task_struct *tsk) +int mpx_disable_management(void) { struct mm_struct *mm = current->mm; @@ -497,7 +496,7 @@ out_unmap: * bound table is 16KB. With 64-bit mode, the size of BD is 2GB, * and the size of each bound table is 4MB. */ -static int do_mpx_bt_fault(struct task_struct *tsk) +static int do_mpx_bt_fault(void) { unsigned long bd_entry, bd_base; const struct bndcsr *bndcsr; @@ -525,7 +524,7 @@ static int do_mpx_bt_fault(struct task_struct *tsk) return allocate_bt((long __user *)bd_entry); } -int mpx_handle_bd_fault(struct task_struct *tsk) +int mpx_handle_bd_fault(void) { /* * Userspace never asked us to manage the bounds tables, @@ -534,7 +533,7 @@ int mpx_handle_bd_fault(struct task_struct *tsk) if (!kernel_managing_mpx_tables(current->mm)) return -EINVAL; - if (do_mpx_bt_fault(tsk)) { + if (do_mpx_bt_fault()) { force_sig(SIGSEGV, current); /* * The force_sig() is essentially "handling" this diff --git a/kernel/sys.c b/kernel/sys.c index a4e372b..8571296 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -92,10 +92,10 @@ # define SET_TSC_CTL(a) (-EINVAL) #endif #ifndef MPX_ENABLE_MANAGEMENT -# define MPX_ENABLE_MANAGEMENT(a) (-EINVAL) +# define MPX_ENABLE_MANAGEMENT() (-EINVAL) #endif #ifndef MPX_DISABLE_MANAGEMENT -# define MPX_DISABLE_MANAGEMENT(a) (-EINVAL) +# define MPX_DISABLE_MANAGEMENT() (-EINVAL) #endif #ifndef GET_FP_MODE # define GET_FP_MODE(a) (-EINVAL) @@ -2230,12 +2230,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_MPX_ENABLE_MANAGEMENT: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - error = MPX_ENABLE_MANAGEMENT(me); + error = MPX_ENABLE_MANAGEMENT(); break; case PR_MPX_DISABLE_MANAGEMENT: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; - error = MPX_DISABLE_MANAGEMENT(me); + error = MPX_DISABLE_MANAGEMENT(); break; case PR_SET_FP_MODE: error = SET_FP_MODE(me, arg2); -- cgit v0.10.2 From 3c1d32300920a446c67d697cd6b80f012ad06028 Mon Sep 17 00:00:00 2001 From: Qiaowei Ren Date: Sun, 7 Jun 2015 11:37:02 -0700 Subject: x86/mpx: Remove redundant MPX_BNDCFG_ADDR_MASK MPX_BNDCFG_ADDR_MASK is defined two times, so this patch removes redundant one. Signed-off-by: Qiaowei Ren Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183702.5F129376@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 0cdd16a..871e5e5 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -45,7 +45,6 @@ #define MPX_BNDSTA_TAIL 2 #define MPX_BNDCFG_TAIL 12 #define MPX_BNDSTA_ADDR_MASK (~((1UL< Date: Sun, 7 Jun 2015 11:37:02 -0700 Subject: x86/mpx: Restrict the mmap() size check to bounds tables The comment and code here are confusing. We do not currently allocate the bounds directory in the kernel. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183702.222CEC2A@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 47e4a856..d6e02f3 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -46,8 +46,8 @@ static unsigned long mpx_mmap(unsigned long len) vm_flags_t vm_flags; struct vm_area_struct *vma; - /* Only bounds table and bounds directory can be allocated here */ - if (len != MPX_BD_SIZE_BYTES && len != MPX_BT_SIZE_BYTES) + /* Only bounds table can be allocated here */ + if (len != MPX_BT_SIZE_BYTES) return -EINVAL; down_write(&mm->mmap_sem); -- cgit v0.10.2 From 8c3641e957a948f41f0174290096ed7a3b95e703 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:02 -0700 Subject: x86/mpx: Introduce a boot-time disable flag MPX has the _potential_ to cause some issues. Say part of your init system tried to protect one of its components from buffer overflows with MPX. If there were a false positive, it's possible that MPX could keep a system from booting. MPX could also potentially cause performance issues since it is present in hot paths like the unmap path. Allow it to be disabled at boot time. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150607183702.2E8B77AB@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 61ab162..8b7e5c3 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -937,6 +937,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. Enable debug messages at boot time. See Documentation/dynamic-debug-howto.txt for details. + nompx [X86] Disables Intel Memory Protection Extensions. + See Documentation/x86/intel_mpx.txt for more + information about the feature. + eagerfpu= [X86] on enable eager fpu restore off disable eager fpu restore diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 401ccb03..3956858 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -144,6 +144,22 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { } }; EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); +static int __init x86_mpx_setup(char *s) +{ + /* require an exact match without trailing characters */ + if (strlen(s)) + return 0; + + /* do not emit a message if the feature is not present */ + if (!boot_cpu_has(X86_FEATURE_MPX)) + return 1; + + setup_clear_cpu_cap(X86_FEATURE_MPX); + pr_info("nompx: Intel Memory Protection Extensions (MPX) disabled\n"); + return 1; +} +__setup("nompx", x86_mpx_setup); + #ifdef CONFIG_X86_32 static int cachesize_override = -1; static int disable_x86_serial_nr = 1; -- cgit v0.10.2 From e7126cf5f10aef1555cb99eddb7efff41bdf9566 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:03 -0700 Subject: x86/mpx: Trace #BR exceptions This is the first in a series of MPX tracing patches. I've found these extremely useful in the process of debugging applications and the kernel code itself. This exception hooks in to the bounds (#BR) exception very early and allows capturing the key registers which would influence how the exception is handled. Note that bndcfgu/bndstatus are technically still 64-bit registers even in 32-bit mode. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183703.5FE2619A@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h new file mode 100644 index 0000000..5c03ec8 --- /dev/null +++ b/arch/x86/include/asm/trace/mpx.h @@ -0,0 +1,50 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mpx + +#if !defined(_TRACE_MPX_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_MPX_H + +#include + +#ifdef CONFIG_X86_INTEL_MPX + +TRACE_EVENT(bounds_exception_mpx, + + TP_PROTO(const struct bndcsr *bndcsr), + TP_ARGS(bndcsr), + + TP_STRUCT__entry( + __field(u64, bndcfgu) + __field(u64, bndstatus) + ), + + TP_fast_assign( + /* need to get rid of the 'const' on bndcsr */ + __entry->bndcfgu = (u64)bndcsr->bndcfgu; + __entry->bndstatus = (u64)bndcsr->bndstatus; + ), + + TP_printk("bndcfgu:0x%llx bndstatus:0x%llx", + __entry->bndcfgu, + __entry->bndstatus) +); + +#else + +/* + * This gets used outside of MPX-specific code, so we need a stub. + */ +static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr) +{ +} + +#endif /* CONFIG_X86_INTEL_MPX */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH asm/trace/ +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE mpx +#endif /* _TRACE_MPX_H */ + +/* This part must be outside protection */ +#include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index cffff66..36cb15b 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #ifdef CONFIG_X86_64 @@ -399,6 +400,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) if (!bndcsr) goto exit_trap; + trace_bounds_exception_mpx(bndcsr); /* * The error code field of the BNDSTATUS register communicates status * information of a bound range exception #BR or operation involving diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index d6e02f3..1fef52c 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -17,6 +17,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include + static const char *mpx_mapping_name(struct vm_area_struct *vma) { return "[mpx]"; -- cgit v0.10.2 From 97efebf1bc30a80122af3295ebdb726dbc040ca6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:03 -0700 Subject: x86/mpx: Trace entry to bounds exception paths There are two basic things that can happen as the result of a bounds exception (#BR): 1. We allocate a new bounds table 2. We pass up a bounds exception to userspace. This patch adds a trace point for the case where we are passing the exception up to userspace with a signal. We are also explicit that we're printing out the inverse of the 'upper' that we encounter. If you want to filter, for instance, you need to ~ the value first. The reason we do this is because of how 'upper' is stored in the bounds table. If a pointer's range is: 0x1000 -> 0x2000 it is stored in the bounds table as (32-bits here for brevity): lower: 0x00001000 upper: 0xffffdfff That is so that an all 0's entry: lower: 0x00000000 upper: 0x00000000 corresponds to the "init" bounds which store a *range* of: 0x00000000 -> 0xffffffff That is, by far, the common case, and that lets us use the zero page, or deduplicate the memory, etc... The 'upper' stored in the table is gibberish to print by itself, so we print ~upper to get the *actual*, logical, human-readable value printed out. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183703.027BB9B0@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 5c03ec8..5c3af06 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -8,6 +8,40 @@ #ifdef CONFIG_X86_INTEL_MPX +TRACE_EVENT(mpx_bounds_register_exception, + + TP_PROTO(void *addr_referenced, + const struct bndreg *bndreg), + TP_ARGS(addr_referenced, bndreg), + + TP_STRUCT__entry( + __field(void *, addr_referenced) + __field(u64, lower_bound) + __field(u64, upper_bound) + ), + + TP_fast_assign( + __entry->addr_referenced = addr_referenced; + __entry->lower_bound = bndreg->lower_bound; + __entry->upper_bound = bndreg->upper_bound; + ), + /* + * Note that we are printing out the '~' of the upper + * bounds register here. It is actually stored in its + * one's complement form so that its 'init' state + * corresponds to all 0's. But, that looks like + * gibberish when printed out, so print out the 1's + * complement instead of the actual value here. Note + * though that you still need to specify filters for the + * actual value, not the displayed one. + */ + TP_printk("address referenced: 0x%p bounds: lower: 0x%llx ~upper: 0x%llx", + __entry->addr_referenced, + __entry->lower_bound, + ~__entry->upper_bound + ) +); + TRACE_EVENT(bounds_exception_mpx, TP_PROTO(const struct bndcsr *bndcsr), diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 1fef52c..75e5d70 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -335,6 +335,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs) err = -EINVAL; goto err_out; } + trace_mpx_bounds_register_exception(info->si_addr, bndreg); return info; err_out: /* info might be NULL, but kfree() handles that */ -- cgit v0.10.2 From 2a1dcb1f796ad37028df37d96fc7c5b6b1705a43 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:03 -0700 Subject: x86/mpx: Trace the attempts to find bounds tables There are two different events being traced here. They are doing similar things so share a trace "EVENT_CLASS" and are presented together. 1. Trace when MPX is zapping pages "mpx_unmap_zap": When MPX can not free an entire bounds table, it will instead try to zap unused parts of a bounds table to free the backing memory. This decreases RSS (resident set size) without decreasing the virtual space allocated for bounds tables. 2. Trace attempts to find bounds tables "mpx_unmap_search": This event traces any time we go looking to unmap a bounds table for a given virtual address range. This is useful to ensure that the kernel actually "tried" to free a bounds table versus times it succeeded in finding one. It might try and fail if it realized that a table was shared with an adjacent VMA which is not being unmapped. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183703.B9D2468B@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index 5c3af06..c13c6fa 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -63,6 +63,38 @@ TRACE_EVENT(bounds_exception_mpx, __entry->bndstatus) ); +DECLARE_EVENT_CLASS(mpx_range_trace, + + TP_PROTO(unsigned long start, + unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field(unsigned long, start) + __field(unsigned long, end) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("[0x%p:0x%p]", + (void *)__entry->start, + (void *)__entry->end + ) +); + +DEFINE_EVENT(mpx_range_trace, mpx_unmap_zap, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end) +); + +DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end) +); + #else /* diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 75e5d70..55729ee 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -668,6 +668,7 @@ static int zap_bt_entries(struct mm_struct *mm, len = min(vma->vm_end, end) - addr; zap_page_range(vma, addr, len, NULL); + trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; addr = vma->vm_start; @@ -840,6 +841,7 @@ static int mpx_unmap_tables(struct mm_struct *mm, long __user *bd_entry, *bde_start, *bde_end; unsigned long bt_addr; + trace_mpx_unmap_search(start, end); /* * "Edge" bounds tables are those which are being used by the region * (start -> end), but that may be shared with adjacent areas. If they -- cgit v0.10.2 From cd4996dce18b619bd7b3acf75c91f49c77f05a97 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:04 -0700 Subject: x86/mpx: Trace allocation of new bounds tables Bounds tables are a significant consumer of memory. It is important to know when they are being allocated. Add a trace point to trace whenever an allocation occurs and also its virtual address. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183704.EC23A93E@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h index c13c6fa..173dd3b 100644 --- a/arch/x86/include/asm/trace/mpx.h +++ b/arch/x86/include/asm/trace/mpx.h @@ -95,6 +95,22 @@ DEFINE_EVENT(mpx_range_trace, mpx_unmap_search, TP_ARGS(start, end) ); +TRACE_EVENT(mpx_new_bounds_table, + + TP_PROTO(unsigned long table_vaddr), + TP_ARGS(table_vaddr), + + TP_STRUCT__entry( + __field(unsigned long, table_vaddr) + ), + + TP_fast_assign( + __entry->table_vaddr = table_vaddr; + ), + + TP_printk("table vaddr:%p", (void *)__entry->table_vaddr) +); + #else /* diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 55729ee..c17fd27 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -483,6 +483,7 @@ static int allocate_bt(long __user *bd_entry) ret = -EINVAL; goto out_unmap; } + trace_mpx_new_bounds_table(bt_addr); return 0; out_unmap: vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); -- cgit v0.10.2 From b0e9b09b3bd64e67bba862e238d3757b2482b6de Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:04 -0700 Subject: x86: Make is_64bit_mm() widely available The uprobes code has a nice helper, is_64bit_mm(), that consults both the runtime and compile-time flags for 32-bit support. Instead of reinventing the wheel, pull it in to an x86 header so we can use it for MPX. I prefer passing the 'mm' around to test_thread_flag(TIF_IA32) because it makes it explicit where the context is coming from. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183704.F0209999@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 883f6b9..5e8daee 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -142,6 +142,19 @@ static inline void arch_exit_mmap(struct mm_struct *mm) paravirt_arch_exit_mmap(mm); } +#ifdef CONFIG_X86_64 +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return !config_enabled(CONFIG_IA32_EMULATION) || + !(mm->context.ia32_compat == TIF_IA32); +} +#else +static inline bool is_64bit_mm(struct mm_struct *mm) +{ + return false; +} +#endif + static inline void arch_bprm_mm_init(struct mm_struct *mm, struct vm_area_struct *vma) { diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 0b81ad6..6647624 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -29,6 +29,7 @@ #include #include #include +#include /* Post-execution fixups. */ @@ -312,11 +313,6 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool } #ifdef CONFIG_X86_64 -static inline bool is_64bit_mm(struct mm_struct *mm) -{ - return !config_enabled(CONFIG_IA32_EMULATION) || - !(mm->context.ia32_compat == TIF_IA32); -} /* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses @@ -497,10 +493,6 @@ static void riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) } } #else /* 32-bit: */ -static inline bool is_64bit_mm(struct mm_struct *mm) -{ - return false; -} /* * No RIP-relative addressing on 32-bit */ -- cgit v0.10.2 From a1149fc83a1f97612e72ec24a0bdbabff7b85e77 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:04 -0700 Subject: x86/mpx: Add temporary variable to reduce masking When we allocate a bounds table, we call mmap(), then add a "valid" bit to the value before storing it in to the bounds directory. If we fail along the way, we go and mask that valid bit _back_ out. That seems a little silly, and this makes it much more clear when we have a plain address versus an actual table _entry_. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183704.3D69D5F4@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c17fd27..4f7fb7c 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -429,6 +429,7 @@ static int allocate_bt(long __user *bd_entry) unsigned long expected_old_val = 0; unsigned long actual_old_val = 0; unsigned long bt_addr; + unsigned long bd_new_entry; int ret = 0; /* @@ -441,7 +442,7 @@ static int allocate_bt(long __user *bd_entry) /* * Set the valid flag (kinda like _PAGE_PRESENT in a pte) */ - bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG; + bd_new_entry = bt_addr | MPX_BD_ENTRY_VALID_FLAG; /* * Go poke the address of the new bounds table in to the @@ -455,7 +456,7 @@ static int allocate_bt(long __user *bd_entry) * of the MPX code that have to pagefault_disable(). */ ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, - expected_old_val, bt_addr); + expected_old_val, bd_new_entry); if (ret) goto out_unmap; @@ -486,7 +487,7 @@ static int allocate_bt(long __user *bd_entry) trace_mpx_new_bounds_table(bt_addr); return 0; out_unmap: - vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES); + vm_munmap(bt_addr, MPX_BT_SIZE_BYTES); return ret; } -- cgit v0.10.2 From 54587653904c552c56b9dec153d7a89063394b09 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:04 -0700 Subject: x86/mpx: Introduce new 'directory entry' to 'addr' helper function Currently, to get from a bounds directory entry to the virtual address of a bounds table, we simply mask off a few low bits. However, the set of bits we mask off is different for 32-bit and 64-bit binaries. This breaks the operation out in to a helper function and also adds a temporary variable to store the result until we are sure we are returning one. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183704.007686CE@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 871e5e5..99d374e 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -45,7 +45,6 @@ #define MPX_BNDSTA_TAIL 2 #define MPX_BNDCFG_TAIL 12 #define MPX_BNDSTA_ADDR_MASK (~((1UL< Date: Sun, 7 Jun 2015 11:37:05 -0700 Subject: x86/mpx: Use 32-bit-only cmpxchg() for 32-bit apps user_atomic_cmpxchg_inatomic() actually looks at sizeof(*ptr) to figure out how many bytes to copy. If we run it on a 64-bit kernel with a 64-bit pointer, it will copy a 64-bit bounds directory entry. That's fine, except when we have 32-bit programs with 32-bit bounds directory entries and we only *want* 32-bits. This patch breaks the cmpxchg() operation out in to its own function and performs the 32-bit type swizzling in there. Note, the "64-bit" version of this code _would_ work on a 32-bit-only kernel. The issue this patch addresses is only for when the kernel's 'long' is mismatched from the size of the bounds directory entry of the process we are working on. The new helper modifies 'actual_old_val' or returns an error. But gcc doesn't know this, so it warns about 'actual_old_val' being unused. Shut it up with an uninitialized_var(). Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183705.672B115E@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 8cc7934..294ea20 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -419,6 +419,35 @@ int mpx_disable_management(void) return 0; } +static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, + unsigned long *curval, + unsigned long __user *addr, + unsigned long old_val, unsigned long new_val) +{ + int ret; + /* + * user_atomic_cmpxchg_inatomic() actually uses sizeof() + * the pointer that we pass to it to figure out how much + * data to cmpxchg. We have to be careful here not to + * pass a pointer to a 64-bit data type when we only want + * a 32-bit copy. + */ + if (is_64bit_mm(mm)) { + ret = user_atomic_cmpxchg_inatomic(curval, + addr, old_val, new_val); + } else { + u32 uninitialized_var(curval_32); + u32 old_val_32 = old_val; + u32 new_val_32 = new_val; + u32 __user *addr_32 = (u32 __user *)addr; + + ret = user_atomic_cmpxchg_inatomic(&curval_32, + addr_32, old_val_32, new_val_32); + *curval = curval_32; + } + return ret; +} + /* * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, @@ -426,6 +455,7 @@ int mpx_disable_management(void) */ static int allocate_bt(long __user *bd_entry) { + struct mm_struct *mm = current->mm; unsigned long expected_old_val = 0; unsigned long actual_old_val = 0; unsigned long bt_addr; @@ -455,8 +485,8 @@ static int allocate_bt(long __user *bd_entry) * mmap_sem at this point, unlike some of the other part * of the MPX code that have to pagefault_disable(). */ - ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, - expected_old_val, bd_new_entry); + ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry, + expected_old_val, bd_new_entry); if (ret) goto out_unmap; @@ -710,15 +740,16 @@ static int unmap_single_bt(struct mm_struct *mm, long __user *bd_entry, unsigned long bt_addr) { unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - unsigned long actual_old_val = 0; + unsigned long uninitialized_var(actual_old_val); int ret; while (1) { int need_write = 1; + unsigned long cleared_bd_entry = 0; pagefault_disable(); - ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry, - expected_old_val, 0); + ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, + bd_entry, expected_old_val, cleared_bd_entry); pagefault_enable(); if (!ret) break; -- cgit v0.10.2 From 613fcb7d3c79ec25b5913a6aa974c9047c31e68c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:05 -0700 Subject: x86/mpx: Support 32-bit binaries on 64-bit kernels Right now, the kernel can only switch between 64-bit and 32-bit binaries at compile time. This patch adds support for 32-bit binaries on 64-bit kernels when we support ia32 emulation. We essentially choose which set of table sizes to use when doing arithmetic for the bounds table calculations. This also uses a different approach for calculating the table indexes than before. I think the new one makes it much more clear what is going on, and allows us to share more code between the 32-bit and 64-bit cases. Based-on-patch-by: Qiaowei Ren Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183705.E01F21E2@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 99d374e..7a35495 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -13,49 +13,47 @@ #define MPX_BNDCFG_ENABLE_FLAG 0x1 #define MPX_BD_ENTRY_VALID_FLAG 0x1 -#ifdef CONFIG_X86_64 - -/* upper 28 bits [47:20] of the virtual address in 64-bit used to - * index into bounds directory (BD). - */ -#define MPX_BD_ENTRY_OFFSET 28 -#define MPX_BD_ENTRY_SHIFT 3 -/* bits [19:3] of the virtual address in 64-bit used to index into - * bounds table (BT). +/* + * The upper 28 bits [47:20] of the virtual address in 64-bit + * are used to index into bounds directory (BD). + * + * The directory is 2G (2^31) in size, and with 8-byte entries + * it has 2^28 entries. */ -#define MPX_BT_ENTRY_OFFSET 17 -#define MPX_BT_ENTRY_SHIFT 5 -#define MPX_IGN_BITS 3 -#define MPX_BD_ENTRY_TAIL 3 +#define MPX_BD_SIZE_BYTES_64 (1UL<<31) +#define MPX_BD_ENTRY_BYTES_64 8 +#define MPX_BD_NR_ENTRIES_64 (MPX_BD_SIZE_BYTES_64/MPX_BD_ENTRY_BYTES_64) -#else - -#define MPX_BD_ENTRY_OFFSET 20 -#define MPX_BD_ENTRY_SHIFT 2 -#define MPX_BT_ENTRY_OFFSET 10 -#define MPX_BT_ENTRY_SHIFT 4 -#define MPX_IGN_BITS 2 -#define MPX_BD_ENTRY_TAIL 2 +/* + * The 32-bit directory is 4MB (2^22) in size, and with 4-byte + * entries it has 2^20 entries. + */ +#define MPX_BD_SIZE_BYTES_32 (1UL<<22) +#define MPX_BD_ENTRY_BYTES_32 4 +#define MPX_BD_NR_ENTRIES_32 (MPX_BD_SIZE_BYTES_32/MPX_BD_ENTRY_BYTES_32) -#endif +/* + * A 64-bit table is 4MB total in size, and an entry is + * 4 64-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_64 (1UL<<22) +#define MPX_BT_ENTRY_BYTES_64 32 +#define MPX_BT_NR_ENTRIES_64 (MPX_BT_SIZE_BYTES_64/MPX_BT_ENTRY_BYTES_64) -#define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT)) -#define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT)) +/* + * A 32-bit table is 16kB total in size, and an entry is + * 4 32-bit pointers in size. + */ +#define MPX_BT_SIZE_BYTES_32 (1UL<<14) +#define MPX_BT_ENTRY_BYTES_32 16 +#define MPX_BT_NR_ENTRIES_32 (MPX_BT_SIZE_BYTES_32/MPX_BT_ENTRY_BYTES_32) #define MPX_BNDSTA_TAIL 2 #define MPX_BNDCFG_TAIL 12 #define MPX_BNDSTA_ADDR_MASK (~((1UL<>(MPX_BT_ENTRY_OFFSET+ \ - MPX_IGN_BITS)) & MPX_BD_ENTRY_MASK) << MPX_BD_ENTRY_SHIFT) -#define MPX_GET_BT_ENTRY_OFFSET(addr) ((((addr)>>MPX_IGN_BITS) & \ - MPX_BT_ENTRY_MASK) << MPX_BT_ENTRY_SHIFT) - #ifdef CONFIG_X86_INTEL_MPX siginfo_t *mpx_generate_siginfo(struct pt_regs *regs); int mpx_handle_bd_fault(void); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 294ea20..e323ef6 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -34,6 +34,22 @@ static int is_mpx_vma(struct vm_area_struct *vma) return (vma->vm_ops == &mpx_vma_ops); } +static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BD_SIZE_BYTES_64; + else + return MPX_BD_SIZE_BYTES_32; +} + +static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BT_SIZE_BYTES_64; + else + return MPX_BT_SIZE_BYTES_32; +} + /* * This is really a simplified "vm_mmap". it only handles MPX * bounds tables (the bounds directory is user-allocated). @@ -50,7 +66,7 @@ static unsigned long mpx_mmap(unsigned long len) struct vm_area_struct *vma; /* Only bounds table can be allocated here */ - if (len != MPX_BT_SIZE_BYTES) + if (len != mpx_bt_size_bytes(mm)) return -EINVAL; down_write(&mm->mmap_sem); @@ -449,13 +465,12 @@ static int mpx_cmpxchg_bd_entry(struct mm_struct *mm, } /* - * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each - * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB, + * With 32-bit mode, a bounds directory is 4MB, and the size of each + * bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB, * and the size of each bounds table is 4MB. */ -static int allocate_bt(long __user *bd_entry) +static int allocate_bt(struct mm_struct *mm, long __user *bd_entry) { - struct mm_struct *mm = current->mm; unsigned long expected_old_val = 0; unsigned long actual_old_val = 0; unsigned long bt_addr; @@ -466,7 +481,7 @@ static int allocate_bt(long __user *bd_entry) * Carve the virtual space out of userspace for the new * bounds table: */ - bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES); + bt_addr = mpx_mmap(mpx_bt_size_bytes(mm)); if (IS_ERR((void *)bt_addr)) return PTR_ERR((void *)bt_addr); /* @@ -517,7 +532,7 @@ static int allocate_bt(long __user *bd_entry) trace_mpx_new_bounds_table(bt_addr); return 0; out_unmap: - vm_munmap(bt_addr, MPX_BT_SIZE_BYTES); + vm_munmap(bt_addr, mpx_bt_size_bytes(mm)); return ret; } @@ -536,6 +551,7 @@ static int do_mpx_bt_fault(void) { unsigned long bd_entry, bd_base; const struct bndcsr *bndcsr; + struct mm_struct *mm = current->mm; bndcsr = get_xsave_field_ptr(XSTATE_BNDCSR); if (!bndcsr) @@ -554,10 +570,10 @@ static int do_mpx_bt_fault(void) * the directory is. */ if ((bd_entry < bd_base) || - (bd_entry >= bd_base + MPX_BD_SIZE_BYTES)) + (bd_entry >= bd_base + mpx_bd_size_bytes(mm))) return -EINVAL; - return allocate_bt((long __user *)bd_entry); + return allocate_bt(mm, (long __user *)bd_entry); } int mpx_handle_bd_fault(void) @@ -789,7 +805,115 @@ static int unmap_single_bt(struct mm_struct *mm, * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, MPX_BT_SIZE_BYTES); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); +} + +static inline int bt_entry_size_bytes(struct mm_struct *mm) +{ + if (is_64bit_mm(mm)) + return MPX_BT_ENTRY_BYTES_64; + else + return MPX_BT_ENTRY_BYTES_32; +} + +/* + * Take a virtual address and turns it in to the offset in bytes + * inside of the bounds table where the bounds table entry + * controlling 'addr' can be found. + */ +static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, + unsigned long addr) +{ + unsigned long bt_table_nr_entries; + unsigned long offset = addr; + + if (is_64bit_mm(mm)) { + /* Bottom 3 bits are ignored on 64-bit */ + offset >>= 3; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_64; + } else { + /* Bottom 2 bits are ignored on 32-bit */ + offset >>= 2; + bt_table_nr_entries = MPX_BT_NR_ENTRIES_32; + } + /* + * We know the size of the table in to which we are + * indexing, and we have eliminated all the low bits + * which are ignored for indexing. + * + * Mask out all the high bits which we do not need + * to index in to the table. Note that the tables + * are always powers of two so this gives us a proper + * mask. + */ + offset &= (bt_table_nr_entries-1); + /* + * We now have an entry offset in terms of *entries* in + * the table. We need to scale it back up to bytes. + */ + offset *= bt_entry_size_bytes(mm); + return offset; +} + +/* + * How much virtual address space does a single bounds + * directory entry cover? + * + * Note, we need a long long because 4GB doesn't fit in + * to a long on 32-bit. + */ +static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) +{ + unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); + if (is_64bit_mm(mm)) + return virt_space / MPX_BD_NR_ENTRIES_64; + else + return virt_space / MPX_BD_NR_ENTRIES_32; +} + +/* + * Return an offset in terms of bytes in to the bounds + * directory where the bounds directory entry for a given + * virtual address resides. + * + * This has to be in bytes because the directory entries + * are different sizes on 64/32 bit. + */ +static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, + unsigned long addr) +{ + /* + * There are several ways to derive the bd offsets. We + * use the following approach here: + * 1. We know the size of the virtual address space + * 2. We know the number of entries in a bounds table + * 3. We know that each entry covers a fixed amount of + * virtual address space. + * So, we can just divide the virtual address by the + * virtual space used by one entry to determine which + * entry "controls" the given virtual address. + */ + if (is_64bit_mm(mm)) { + int bd_entry_size = 8; /* 64-bit pointer */ + /* + * Take the 64-bit addressing hole in to account. + */ + addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1); + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; + } else { + int bd_entry_size = 4; /* 32-bit pointer */ + /* + * 32-bit has no hole so this case needs no mask + */ + return (addr / bd_entry_virt_space(mm)) * bd_entry_size; + } + /* + * The two return calls above are exact copies. If we + * pull out a single copy and put it in here, gcc won't + * realize that we're doing a power-of-2 divide and use + * shifts. It uses a real divide. If we put them up + * there, it manages to figure it out (gcc 4.8.3). + */ } /* @@ -803,6 +927,7 @@ static int unmap_shared_bt(struct mm_struct *mm, unsigned long end, bool prev_shared, bool next_shared) { unsigned long bt_addr; + unsigned long start_off, end_off; int ret; ret = get_bt_addr(mm, bd_entry, &bt_addr); @@ -814,17 +939,20 @@ static int unmap_shared_bt(struct mm_struct *mm, if (ret) return ret; + start_off = mpx_get_bt_entry_offset_bytes(mm, start); + end_off = mpx_get_bt_entry_offset_bytes(mm, end); + if (prev_shared && next_shared) ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); + bt_addr + start_off, + bt_addr + end_off); else if (prev_shared) ret = zap_bt_entries(mm, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(start), - bt_addr+MPX_BT_SIZE_BYTES); + bt_addr + start_off, + bt_addr + mpx_bt_size_bytes(mm)); else if (next_shared) ret = zap_bt_entries(mm, bt_addr, bt_addr, - bt_addr+MPX_GET_BT_ENTRY_OFFSET(end)); + bt_addr + end_off); else ret = unmap_single_bt(mm, bd_entry, bt_addr); @@ -845,8 +973,8 @@ static int unmap_edge_bts(struct mm_struct *mm, struct vm_area_struct *prev, *next; bool prev_shared = false, next_shared = false; - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); /* * Check whether bde_start and bde_end are shared with adjacent @@ -858,10 +986,10 @@ static int unmap_edge_bts(struct mm_struct *mm, * in to 'next'. */ next = find_vma_prev(mm, start, &prev); - if (prev && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(prev->vm_end-1)) + if (prev && (mm->bd_addr + mpx_get_bd_entry_offset(mm, prev->vm_end-1)) == bde_start) prev_shared = true; - if (next && (mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(next->vm_start)) + if (next && (mm->bd_addr + mpx_get_bd_entry_offset(mm, next->vm_start)) == bde_end) next_shared = true; @@ -927,8 +1055,8 @@ static int mpx_unmap_tables(struct mm_struct *mm, * 1. fully covered * 2. not at the edges of the mapping, even if full aligned */ - bde_start = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(start); - bde_end = mm->bd_addr + MPX_GET_BD_ENTRY_OFFSET(end-1); + bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { ret = get_bt_addr(mm, bd_entry, &bt_addr); switch (ret) { -- cgit v0.10.2 From 3ceaccdf92073d193f0bfbe24280dd736e3fed86 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:06 -0700 Subject: x86/mpx: Rewrite the unmap code The MPX code needs to clear out bounds tables for memory which is no longer in use. We do this when a userspace mapping is torn down (unmapped). There are two modes: 1. An entire bounds table becomes unused, and can be freed and its pointer removed from the bounds directory. This happens either when a large mapping is torn down, or when a small mapping is torn down and it is the last mapping "covered" by a bounds table. 2. Only part of a bounds table becomes unused, in which case we free the backing memory as if MADV_DONTNEED was called. The old code was a spaghetti mess of "edge" bounds tables where the edges were handled specially, even if we were unmapping an entire one. Non-edge bounds tables are always fully unmapped, but share a different code path from the edge ones. The old code had a bug where it was unmapping too much memory. I worked on fixing it for two days and gave up. I didn't write the original code. I didn't particularly like it, but it worked, so I left it. After my debug session, I realized it was undebuggagle *and* buggy, so out it went. I also wrote a new unmapping test program which uncovers bugs pretty nicely. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183706.DCAEC67D@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e323ef6..18fcf737 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -704,110 +704,6 @@ static int get_bt_addr(struct mm_struct *mm, return 0; } -/* - * Free the backing physical pages of bounds table 'bt_addr'. - * Assume start...end is within that bounds table. - */ -static int zap_bt_entries(struct mm_struct *mm, - unsigned long bt_addr, - unsigned long start, unsigned long end) -{ - struct vm_area_struct *vma; - unsigned long addr, len; - - /* - * Find the first overlapping vma. If vma->vm_start > start, there - * will be a hole in the bounds table. This -EINVAL return will - * cause a SIGSEGV. - */ - vma = find_vma(mm, start); - if (!vma || vma->vm_start > start) - return -EINVAL; - - /* - * A NUMA policy on a VM_MPX VMA could cause this bouds table to - * be split. So we need to look across the entire 'start -> end' - * range of this bounds table, find all of the VM_MPX VMAs, and - * zap only those. - */ - addr = start; - while (vma && vma->vm_start < end) { - /* - * We followed a bounds directory entry down - * here. If we find a non-MPX VMA, that's bad, - * so stop immediately and return an error. This - * probably results in a SIGSEGV. - */ - if (!is_mpx_vma(vma)) - return -EINVAL; - - len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); - trace_mpx_unmap_zap(addr, addr+len); - - vma = vma->vm_next; - addr = vma->vm_start; - } - - return 0; -} - -static int unmap_single_bt(struct mm_struct *mm, - long __user *bd_entry, unsigned long bt_addr) -{ - unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; - unsigned long uninitialized_var(actual_old_val); - int ret; - - while (1) { - int need_write = 1; - unsigned long cleared_bd_entry = 0; - - pagefault_disable(); - ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, - bd_entry, expected_old_val, cleared_bd_entry); - pagefault_enable(); - if (!ret) - break; - if (ret == -EFAULT) - ret = mpx_resolve_fault(bd_entry, need_write); - /* - * If we could not resolve the fault, consider it - * userspace's fault and error out. - */ - if (ret) - return ret; - } - /* - * The cmpxchg was performed, check the results. - */ - if (actual_old_val != expected_old_val) { - /* - * Someone else raced with us to unmap the table. - * There was no bounds table pointed to by the - * directory, so declare success. Somebody freed - * it. - */ - if (!actual_old_val) - return 0; - /* - * Something messed with the bounds directory - * entry. We hold mmap_sem for read or write - * here, so it could not be a _new_ bounds table - * that someone just allocated. Something is - * wrong, so pass up the error and SIGSEGV. - */ - return -EINVAL; - } - - /* - * Note, we are likely being called under do_munmap() already. To - * avoid recursion, do_munmap() will check whether it comes - * from one bounds table through VM_MPX flag. - */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); -} - static inline int bt_entry_size_bytes(struct mm_struct *mm) { if (is_64bit_mm(mm)) @@ -872,13 +768,69 @@ static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) } /* - * Return an offset in terms of bytes in to the bounds - * directory where the bounds directory entry for a given - * virtual address resides. - * - * This has to be in bytes because the directory entries - * are different sizes on 64/32 bit. + * Free the backing physical pages of bounds table 'bt_addr'. + * Assume start...end is within that bounds table. */ +static noinline int zap_bt_entries_mapping(struct mm_struct *mm, + unsigned long bt_addr, + unsigned long start_mapping, unsigned long end_mapping) +{ + struct vm_area_struct *vma; + unsigned long addr, len; + unsigned long start; + unsigned long end; + + /* + * if we 'end' on a boundary, the offset will be 0 which + * is not what we want. Back it up a byte to get the + * last bt entry. Then once we have the entry itself, + * move 'end' back up by the table entry size. + */ + start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping); + end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1); + /* + * Move end back up by one entry. Among other things + * this ensures that it remains page-aligned and does + * not screw up zap_page_range() + */ + end += bt_entry_size_bytes(mm); + + /* + * Find the first overlapping vma. If vma->vm_start > start, there + * will be a hole in the bounds table. This -EINVAL return will + * cause a SIGSEGV. + */ + vma = find_vma(mm, start); + if (!vma || vma->vm_start > start) + return -EINVAL; + + /* + * A NUMA policy on a VM_MPX VMA could cause this bounds table to + * be split. So we need to look across the entire 'start -> end' + * range of this bounds table, find all of the VM_MPX VMAs, and + * zap only those. + */ + addr = start; + while (vma && vma->vm_start < end) { + /* + * We followed a bounds directory entry down + * here. If we find a non-MPX VMA, that's bad, + * so stop immediately and return an error. This + * probably results in a SIGSEGV. + */ + if (!is_mpx_vma(vma)) + return -EINVAL; + + len = min(vma->vm_end, end) - addr; + zap_page_range(vma, addr, len, NULL); + trace_mpx_unmap_zap(addr, addr+len); + + vma = vma->vm_next; + addr = vma->vm_start; + } + return 0; +} + static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, unsigned long addr) { @@ -916,69 +868,80 @@ static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm, */ } -/* - * If the bounds table pointed by bounds directory 'bd_entry' is - * not shared, unmap this whole bounds table. Otherwise, only free - * those backing physical pages of bounds table entries covered - * in this virtual address region start...end. - */ -static int unmap_shared_bt(struct mm_struct *mm, - long __user *bd_entry, unsigned long start, - unsigned long end, bool prev_shared, bool next_shared) +static int unmap_entire_bt(struct mm_struct *mm, + long __user *bd_entry, unsigned long bt_addr) { - unsigned long bt_addr; - unsigned long start_off, end_off; + unsigned long expected_old_val = bt_addr | MPX_BD_ENTRY_VALID_FLAG; + unsigned long uninitialized_var(actual_old_val); int ret; - ret = get_bt_addr(mm, bd_entry, &bt_addr); + while (1) { + int need_write = 1; + unsigned long cleared_bd_entry = 0; + + pagefault_disable(); + ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, + bd_entry, expected_old_val, cleared_bd_entry); + pagefault_enable(); + if (!ret) + break; + if (ret == -EFAULT) + ret = mpx_resolve_fault(bd_entry, need_write); + /* + * If we could not resolve the fault, consider it + * userspace's fault and error out. + */ + if (ret) + return ret; + } /* - * We could see an "error" ret for not-present bounds - * tables (not really an error), or actual errors, but - * stop unmapping either way. + * The cmpxchg was performed, check the results. */ - if (ret) - return ret; - - start_off = mpx_get_bt_entry_offset_bytes(mm, start); - end_off = mpx_get_bt_entry_offset_bytes(mm, end); - - if (prev_shared && next_shared) - ret = zap_bt_entries(mm, bt_addr, - bt_addr + start_off, - bt_addr + end_off); - else if (prev_shared) - ret = zap_bt_entries(mm, bt_addr, - bt_addr + start_off, - bt_addr + mpx_bt_size_bytes(mm)); - else if (next_shared) - ret = zap_bt_entries(mm, bt_addr, bt_addr, - bt_addr + end_off); - else - ret = unmap_single_bt(mm, bd_entry, bt_addr); - - return ret; + if (actual_old_val != expected_old_val) { + /* + * Someone else raced with us to unmap the table. + * That is OK, since we were both trying to do + * the same thing. Declare success. + */ + if (!actual_old_val) + return 0; + /* + * Something messed with the bounds directory + * entry. We hold mmap_sem for read or write + * here, so it could not be a _new_ bounds table + * that someone just allocated. Something is + * wrong, so pass up the error and SIGSEGV. + */ + return -EINVAL; + } + /* + * Note, we are likely being called under do_munmap() already. To + * avoid recursion, do_munmap() will check whether it comes + * from one bounds table through VM_MPX flag. + */ + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); } -/* - * A virtual address region being munmap()ed might share bounds table - * with adjacent VMAs. We only need to free the backing physical - * memory of these shared bounds tables entries covered in this virtual - * address region. - */ -static int unmap_edge_bts(struct mm_struct *mm, - unsigned long start, unsigned long end) +static int try_unmap_single_bt(struct mm_struct *mm, + unsigned long start, unsigned long end) { + struct vm_area_struct *next; + struct vm_area_struct *prev; + /* + * "bta" == Bounds Table Area: the area controlled by the + * bounds table that we are unmapping. + */ + unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1); + unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm); + unsigned long uninitialized_var(bt_addr); + void __user *bde_vaddr; int ret; - long __user *bde_start, *bde_end; - struct vm_area_struct *prev, *next; - bool prev_shared = false, next_shared = false; - - bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); - bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); - /* - * Check whether bde_start and bde_end are shared with adjacent - * VMAs. + * We know 'start' and 'end' lie within an area controlled + * by a single bounds table. See if there are any other + * VMAs controlled by that bounds table. If there are not + * then we can "expand" the are we are unmapping to possibly + * cover the entire table. * * We already unliked the VMAs from the mm's rbtree so 'start' * is guaranteed to be in a hole. This gets us the first VMA @@ -986,102 +949,64 @@ static int unmap_edge_bts(struct mm_struct *mm, * in to 'next'. */ next = find_vma_prev(mm, start, &prev); - if (prev && (mm->bd_addr + mpx_get_bd_entry_offset(mm, prev->vm_end-1)) - == bde_start) - prev_shared = true; - if (next && (mm->bd_addr + mpx_get_bd_entry_offset(mm, next->vm_start)) - == bde_end) - next_shared = true; - - /* - * This virtual address region being munmap()ed is only - * covered by one bounds table. - * - * In this case, if this table is also shared with adjacent - * VMAs, only part of the backing physical memory of the bounds - * table need be freeed. Otherwise the whole bounds table need - * be unmapped. - */ - if (bde_start == bde_end) { - return unmap_shared_bt(mm, bde_start, start, end, - prev_shared, next_shared); + if ((!prev || prev->vm_end <= bta_start_vaddr) && + (!next || next->vm_start >= bta_end_vaddr)) { + /* + * No neighbor VMAs controlled by same bounds + * table. Try to unmap the whole thing + */ + start = bta_start_vaddr; + end = bta_end_vaddr; } + bde_vaddr = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); + ret = get_bt_addr(mm, bde_vaddr, &bt_addr); /* - * If more than one bounds tables are covered in this virtual - * address region being munmap()ed, we need to separately check - * whether bde_start and bde_end are shared with adjacent VMAs. + * No bounds table there, so nothing to unmap. */ - ret = unmap_shared_bt(mm, bde_start, start, end, prev_shared, false); - if (ret) - return ret; - ret = unmap_shared_bt(mm, bde_end, start, end, false, next_shared); + if (ret == -ENOENT) { + ret = 0; + return 0; + } if (ret) return ret; - - return 0; + /* + * We are unmapping an entire table. Either because the + * unmap that started this whole process was large enough + * to cover an entire table, or that the unmap was small + * but was the area covered by a bounds table. + */ + if ((start == bta_start_vaddr) && + (end == bta_end_vaddr)) + return unmap_entire_bt(mm, bde_vaddr, bt_addr); + return zap_bt_entries_mapping(mm, bt_addr, start, end); } static int mpx_unmap_tables(struct mm_struct *mm, unsigned long start, unsigned long end) { - int ret; - long __user *bd_entry, *bde_start, *bde_end; - unsigned long bt_addr; - + unsigned long one_unmap_start; trace_mpx_unmap_search(start, end); - /* - * "Edge" bounds tables are those which are being used by the region - * (start -> end), but that may be shared with adjacent areas. If they - * turn out to be completely unshared, they will be freed. If they are - * shared, we will free the backing store (like an MADV_DONTNEED) for - * areas used by this region. - */ - ret = unmap_edge_bts(mm, start, end); - switch (ret) { - /* non-present tables are OK */ - case 0: - case -ENOENT: - /* Success, or no tables to unmap */ - break; - case -EINVAL: - case -EFAULT: - default: - return ret; - } - - /* - * Only unmap the bounds table that are - * 1. fully covered - * 2. not at the edges of the mapping, even if full aligned - */ - bde_start = mm->bd_addr + mpx_get_bd_entry_offset(mm, start); - bde_end = mm->bd_addr + mpx_get_bd_entry_offset(mm, end-1); - for (bd_entry = bde_start + 1; bd_entry < bde_end; bd_entry++) { - ret = get_bt_addr(mm, bd_entry, &bt_addr); - switch (ret) { - case 0: - break; - case -ENOENT: - /* No table here, try the next one */ - continue; - case -EINVAL: - case -EFAULT: - default: - /* - * Note: we are being strict here. - * Any time we run in to an issue - * unmapping tables, we stop and - * SIGSEGV. - */ - return ret; - } - ret = unmap_single_bt(mm, bd_entry, bt_addr); + one_unmap_start = start; + while (one_unmap_start < end) { + int ret; + unsigned long next_unmap_start = ALIGN(one_unmap_start+1, + bd_entry_virt_space(mm)); + unsigned long one_unmap_end = end; + /* + * if the end is beyond the current bounds table, + * move it back so we only deal with a single one + * at a time + */ + if (one_unmap_end > next_unmap_start) + one_unmap_end = next_unmap_start; + ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end); if (ret) return ret; - } + one_unmap_start = next_unmap_start; + } return 0; } -- cgit v0.10.2 From bea03c50b871a2fa922f31ad7c9993bb4fc7b192 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:06 -0700 Subject: x86/mpx: Do not count MPX VMAs as neighbors when unmapping The comment pretty much says it all. I wrote a test program that does lots of random allocations and forces bounds tables to be created. It came up with a layout like this: .... | BOUNDS DIRECTORY ENTRY COVERS | .... | BOUNDS TABLE COVERS | | BOUNDS TABLE | REAL ALLOC | BOUNDS TABLE | Unmapping "REAL ALLOC" should have been able to free the bounds table "covering" the "REAL ALLOC" because it was the last real user. But, the neighboring VMA bounds tables were found, considered as real neighbors, and we declined to free the bounds table covering the area. Doing this over and over left a small but significant number of these orphans. Handling them is fairly straighforward. All we have to do is walk the VMAs and skip all of the MPX ones when looking for neighbors. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183706.A6BD90BF@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 18fcf737..6233d51 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -937,16 +937,30 @@ static int try_unmap_single_bt(struct mm_struct *mm, void __user *bde_vaddr; int ret; /* + * We already unlinked the VMAs from the mm's rbtree so 'start' + * is guaranteed to be in a hole. This gets us the first VMA + * before the hole in to 'prev' and the next VMA after the hole + * in to 'next'. + */ + next = find_vma_prev(mm, start, &prev); + /* + * Do not count other MPX bounds table VMAs as neighbors. + * Although theoretically possible, we do not allow bounds + * tables for bounds tables so our heads do not explode. + * If we count them as neighbors here, we may end up with + * lots of tables even though we have no actual table + * entries in use. + */ + while (next && is_mpx_vma(next)) + next = next->vm_next; + while (prev && is_mpx_vma(prev)) + prev = prev->vm_prev; + /* * We know 'start' and 'end' lie within an area controlled * by a single bounds table. See if there are any other * VMAs controlled by that bounds table. If there are not * then we can "expand" the are we are unmapping to possibly * cover the entire table. - * - * We already unliked the VMAs from the mm's rbtree so 'start' - * is guaranteed to be in a hole. This gets us the first VMA - * before the hole in to 'prev' and the next VMA after the hole - * in to 'next'. */ next = find_vma_prev(mm, start, &prev); if ((!prev || prev->vm_end <= bta_start_vaddr) && -- cgit v0.10.2 From 97ac46a5087eaf87fd76ff7bb31ec9c896010442 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 7 Jun 2015 11:37:06 -0700 Subject: x86/mpx: Allow 32-bit binaries on 64-bit kernels again Now that the bugs in mixed mode MPX handling are fixed, re-allow 32-bit binaries on 64-bit kernels again. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20150607183706.70277DAD@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 6233d51..7a657f5 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -367,12 +367,6 @@ static __user void *mpx_get_bounds_dir(void) return MPX_INVALID_BOUNDS_DIR; /* - * 32-bit binaries on 64-bit kernels are currently - * unsupported. - */ - if (IS_ENABLED(CONFIG_X86_64) && test_thread_flag(TIF_IA32)) - return MPX_INVALID_BOUNDS_DIR; - /* * The bounds directory pointer is stored in a register * only accessible if we first do an xsave. */ -- cgit v0.10.2 From a8424003679e90b9952e20adcd1ff1560d9dd3e9 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 11 Jun 2015 12:34:00 -0700 Subject: x86/fpu: Fix double-increment in setup_xstate_features() I noticed that my MPX tracepoints were producing garbage for the lower and upper bounds: mpx_bounds_register_exception: address referenced: 0x00007fffffffccb7 bounds: lower: 0x0 ~upper: 0xffffffffffffffff mpx_bounds_register_exception: address referenced: 0x00007fffffffccbf bounds: lower: 0x0 ~upper: 0xffffffffffffffff This is, of course, bogus because 0x00007fffffffccbf is *within* the bounds. I assumed that my instruction decoder was bad and went looking at it. But I eventually realized that I was getting a '0' offset back from xstate_offsets[BNDREGS]. It was being skipped in the initialization, which is obviously bogus, so remove the extra leaf++. This also goes an initializes xstate_offsets/sizes[] to -1 so so that bugs like this will oops instead of silently failing in interesting ways. This was introduced by: 39f1acd ("x86/fpu/xstate: Don't assume the first zero xfeatures zero bit means the end") Signed-off-by: Dave Hansen Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@sr71.net Link: http://lkml.kernel.org/r/20150611193400.2E0B00DB@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 49d0d9b..62fc001 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -31,7 +31,8 @@ static const char *xfeature_names[] = */ u64 xfeatures_mask __read_mostly; -static unsigned int xstate_offsets[XFEATURES_NR_MAX], xstate_sizes[XFEATURES_NR_MAX]; +static unsigned int xstate_offsets[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; +static unsigned int xstate_sizes[XFEATURES_NR_MAX] = { [ 0 ... XFEATURES_NR_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8]; /* The number of supported xfeatures in xfeatures_mask: */ @@ -187,7 +188,6 @@ static void __init setup_xstate_features(void) xstate_sizes[leaf] = eax; printk(KERN_INFO "x86/fpu: xstate_offset[%d]: %04x, xstate_sizes[%d]: %04x\n", leaf, ebx, leaf, eax); - leaf++; } } -- cgit v0.10.2