summaryrefslogtreecommitdiff
path: root/arch/x86/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 04:50:26 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 04:50:26 (GMT)
commitce4d72fac16a9540452957b526443b6080030bff (patch)
tree650abdc13edbc6150332c854a7467f06c2bf5ad9 /arch/x86/include
parent0f25f2c1b18f7e47279ec2cf1d24c11c3108873b (diff)
parent158ecc39185b885420e5136b803b29be2bbec7fb (diff)
downloadlinux-ce4d72fac16a9540452957b526443b6080030bff.tar.xz
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fpu changes from Ingo Molnar: "There are two main areas of changes: - Rework of the extended FPU state code to robustify the kernel's usage of cpuid provided xstate sizes - and related changes (Dave Hansen)" - math emulation enhancements: new modern FPU instructions support, with testcases, plus cleanups (Denys Vlasnko)" * 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits) x86/fpu: Fixup uninitialized feature_name warning x86/fpu/math-emu: Add support for FISTTP instructions x86/fpu/math-emu, selftests: Add test for FISTTP instructions x86/fpu/math-emu: Add support for FCMOVcc insns x86/fpu/math-emu: Add support for F[U]COMI[P] insns x86/fpu/math-emu: Remove define layer for undocumented opcodes x86/fpu/math-emu, selftests: Add tests for FCMOV and FCOMI insns x86/fpu/math-emu: Remove !NO_UNDOC_CODE x86/fpu: Check CPU-provided sizes against struct declarations x86/fpu: Check to ensure increasing-offset xstate offsets x86/fpu: Correct and check XSAVE xstate size calculations x86/fpu: Add C structures for AVX-512 state components x86/fpu: Rework YMM definition x86/fpu/mpx: Rework MPX 'xstate' types x86/fpu: Add xfeature_enabled() helper instead of test_bit() x86/fpu: Remove 'xfeature_nr' x86/fpu: Rework XSTATE_* macros to remove magic '2' x86/fpu: Rename XFEATURES_NR_MAX x86/fpu: Rename XSAVE macros x86/fpu: Remove partial LWP support definitions ...
Diffstat (limited to 'arch/x86/include')
-rw-r--r--arch/x86/include/asm/fpu/types.h148
-rw-r--r--arch/x86/include/asm/fpu/xstate.h15
-rw-r--r--arch/x86/include/asm/trace/mpx.h7
3 files changed, 117 insertions, 53 deletions
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index c49c517..1c6f6ac 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -95,63 +95,122 @@ struct swregs_state {
/*
* List of XSAVE features Linux knows about:
*/
-enum xfeature_bit {
- XSTATE_BIT_FP,
- XSTATE_BIT_SSE,
- XSTATE_BIT_YMM,
- XSTATE_BIT_BNDREGS,
- XSTATE_BIT_BNDCSR,
- XSTATE_BIT_OPMASK,
- XSTATE_BIT_ZMM_Hi256,
- XSTATE_BIT_Hi16_ZMM,
-
- XFEATURES_NR_MAX,
+enum xfeature {
+ XFEATURE_FP,
+ XFEATURE_SSE,
+ /*
+ * Values above here are "legacy states".
+ * Those below are "extended states".
+ */
+ XFEATURE_YMM,
+ XFEATURE_BNDREGS,
+ XFEATURE_BNDCSR,
+ XFEATURE_OPMASK,
+ XFEATURE_ZMM_Hi256,
+ XFEATURE_Hi16_ZMM,
+
+ XFEATURE_MAX,
};
-#define XSTATE_FP (1 << XSTATE_BIT_FP)
-#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
-#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
-#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
-#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
-#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
-#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
-#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
+#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
+#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
+#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
+#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
+#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
+#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
+#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
+
+#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
+ | XFEATURE_MASK_ZMM_Hi256 \
+ | XFEATURE_MASK_Hi16_ZMM)
+
+#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
-#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
-#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+struct reg_128_bit {
+ u8 regbytes[128/8];
+};
+struct reg_256_bit {
+ u8 regbytes[256/8];
+};
+struct reg_512_bit {
+ u8 regbytes[512/8];
+};
/*
+ * State component 2:
+ *
* There are 16x 256-bit AVX registers named YMM0-YMM15.
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
- * and are stored in 'struct fxregs_state::xmm_space[]'.
+ * and are stored in 'struct fxregs_state::xmm_space[]' in the
+ * "legacy" area.
*
- * The high 128 bits are stored here:
- * 16x 128 bits == 256 bytes.
+ * The high 128 bits are stored here.
*/
struct ymmh_struct {
- u8 ymmh_space[256];
-};
-
-/* We don't support LWP yet: */
-struct lwp_struct {
- u8 reserved[128];
-};
+ struct reg_128_bit hi_ymm[16];
+} __packed;
/* Intel MPX support: */
-struct bndreg {
+
+struct mpx_bndreg {
u64 lower_bound;
u64 upper_bound;
} __packed;
+/*
+ * State component 3 is used for the 4 128-bit bounds registers
+ */
+struct mpx_bndreg_state {
+ struct mpx_bndreg bndreg[4];
+} __packed;
-struct bndcsr {
+/*
+ * State component 4 is used for the 64-bit user-mode MPX
+ * configuration register BNDCFGU and the 64-bit MPX status
+ * register BNDSTATUS. We call the pair "BNDCSR".
+ */
+struct mpx_bndcsr {
u64 bndcfgu;
u64 bndstatus;
} __packed;
-struct mpx_struct {
- struct bndreg bndreg[4];
- struct bndcsr bndcsr;
-};
+/*
+ * The BNDCSR state is padded out to be 64-bytes in size.
+ */
+struct mpx_bndcsr_state {
+ union {
+ struct mpx_bndcsr bndcsr;
+ u8 pad_to_64_bytes[64];
+ };
+} __packed;
+
+/* AVX-512 Components: */
+
+/*
+ * State component 5 is used for the 8 64-bit opmask registers
+ * k0-k7 (opmask state).
+ */
+struct avx_512_opmask_state {
+ u64 opmask_reg[8];
+} __packed;
+
+/*
+ * State component 6 is used for the upper 256 bits of the
+ * registers ZMM0-ZMM15. These 16 256-bit values are denoted
+ * ZMM0_H-ZMM15_H (ZMM_Hi256 state).
+ */
+struct avx_512_zmm_uppers_state {
+ struct reg_256_bit zmm_upper[16];
+} __packed;
+
+/*
+ * State component 7 is used for the 16 512-bit registers
+ * ZMM16-ZMM31 (Hi16_ZMM state).
+ */
+struct avx_512_hi16_state {
+ struct reg_512_bit hi16_zmm[16];
+} __packed;
struct xstate_header {
u64 xfeatures;
@@ -159,22 +218,19 @@ struct xstate_header {
u64 reserved[6];
} __attribute__((packed));
-/* New processor state extensions should be added here: */
-#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
- sizeof(struct lwp_struct) + \
- sizeof(struct mpx_struct) )
/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
*
* It consists of a legacy fxregs portion, an xstate header and
- * subsequent fixed size areas as defined by the xstate header.
- * Not all CPUs support all the extensions.
+ * subsequent areas as defined by the xstate header. Not all CPUs
+ * support all the extensions, so the size of the extended area
+ * can vary quite a bit between CPUs.
*/
struct xregs_state {
struct fxregs_state i387;
struct xstate_header header;
- u8 __reserved[XSTATE_RESERVE];
+ u8 extended_state_area[0];
} __attribute__ ((packed, aligned (64)));
/*
@@ -182,7 +238,9 @@ struct xregs_state {
* put together, so that we can pick the right one runtime.
*
* The size of the structure is determined by the largest
- * member - which is the xsave area:
+ * member - which is the xsave area. The padding is there
+ * to ensure that statically-allocated task_structs (just
+ * the init_task today) have enough space.
*/
union fpregs_state {
struct fregs_state fsave;
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 4656b25..3a6c89b 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -6,7 +6,7 @@
#include <linux/uaccess.h>
/* Bit 63 of XCR0 is reserved for future expansion */
-#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
+#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
#define XSTATE_CPUID 0x0000000d
@@ -19,14 +19,18 @@
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/* Supported features which support lazy state saving */
-#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
- | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
+#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
+ XFEATURE_MASK_SSE | \
+ XFEATURE_MASK_YMM | \
+ XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
/* Supported features which require eager state saving */
-#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
+#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)
/* All currently supported features */
-#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
+#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
@@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
+void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);
diff --git a/arch/x86/include/asm/trace/mpx.h b/arch/x86/include/asm/trace/mpx.h
index 173dd3b..0f492fc 100644
--- a/arch/x86/include/asm/trace/mpx.h
+++ b/arch/x86/include/asm/trace/mpx.h
@@ -11,7 +11,7 @@
TRACE_EVENT(mpx_bounds_register_exception,
TP_PROTO(void *addr_referenced,
- const struct bndreg *bndreg),
+ const struct mpx_bndreg *bndreg),
TP_ARGS(addr_referenced, bndreg),
TP_STRUCT__entry(
@@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,
TRACE_EVENT(bounds_exception_mpx,
- TP_PROTO(const struct bndcsr *bndcsr),
+ TP_PROTO(const struct mpx_bndcsr *bndcsr),
TP_ARGS(bndcsr),
TP_STRUCT__entry(
@@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
/*
* This gets used outside of MPX-specific code, so we need a stub.
*/
-static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
+static inline
+void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
{
}