From e2724e9d969294879936daf7833d4adda26c8efc Mon Sep 17 00:00:00 2001 From: Jeremy Compostella Date: Wed, 11 May 2016 17:23:34 +0200 Subject: x86/tsc: Add missing Cherrytrail frequency to the table Intel Cherrytrail is based on Airmont core so MSR_FSB_FREQ[2:0] = 4 means that the CPU reference clock runs at 80MHz. Add this missing frequency to the table. Signed-off-by: Jeremy Compostella Link: http://lkml.kernel.org/r/87y47gty89.fsf@intel.com Signed-off-by: Thomas Gleixner diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 6aa0f4d..9911a06 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -23,6 +23,7 @@ #include /* CPU reference clock frequency: in KHz */ +#define FREQ_80 80000 #define FREQ_83 83200 #define FREQ_100 99840 #define FREQ_133 133200 @@ -56,6 +57,8 @@ static struct freq_desc freq_desc_tables[] = { { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, /* ANN */ { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, + /* AIRMONT */ + { 6, 0x4c, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, FREQ_80, 0, 0, 0 } }, }; static int match_cpu(u8 family, u8 model) -- cgit v0.10.2 From e8df1a95b685af84a81698199ee206e0e66a8b44 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 13 May 2016 15:13:28 -0700 Subject: x86/cpufeature, x86/mm/pkeys: Fix broken compile-time disabling of pkeys When I added support for the Memory Protection Keys processor feature, I had to reindent the REQUIRED/DISABLED_MASK macros, and also consult the later cpufeature words. I'm not quite sure how I bungled it, but I consulted the wrong word at the end. This only affected required or disabled cpu features in cpufeature words 14, 15 and 16. So, only Protection Keys itself was screwed over here. The result was that if you disabled pkeys in your .config, you might still see some code show up that should have been compiled out. There should be no functional problems, though. In verifying this patch I also realized that the DISABLE_PKU/OSPKE macros were defined backwards and that the cpu_has() check in setup_pku() was not doing the compile-time disabled checks. So also fix the macro for DISABLE_PKU/OSPKE and add a compile-time check for pkeys being enabled in setup_pku(). Signed-off-by: Dave Hansen Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Dave Hansen Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: dfb4a70f20c5 ("x86/cpufeature, x86/mm/pkeys: Add protection keys related CPUID definitions") Link: http://lkml.kernel.org/r/20160513221328.C200930B@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 3636ec0..aeab479 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -63,9 +63,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (((bit)>>5)==11 && (1UL<<((bit)&31) & REQUIRED_MASK11)) || \ (((bit)>>5)==12 && (1UL<<((bit)&31) & REQUIRED_MASK12)) || \ (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK13)) || \ - (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \ - (((bit)>>5)==13 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \ - (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK16)) ) + (((bit)>>5)==14 && (1UL<<((bit)&31) & REQUIRED_MASK14)) || \ + (((bit)>>5)==15 && (1UL<<((bit)&31) & REQUIRED_MASK15)) || \ + (((bit)>>5)==16 && (1UL<<((bit)&31) & REQUIRED_MASK16)) ) #define DISABLED_MASK_BIT_SET(bit) \ ( (((bit)>>5)==0 && (1UL<<((bit)&31) & DISABLED_MASK0 )) || \ @@ -82,9 +82,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; (((bit)>>5)==11 && (1UL<<((bit)&31) & DISABLED_MASK11)) || \ (((bit)>>5)==12 && (1UL<<((bit)&31) & DISABLED_MASK12)) || \ (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK13)) || \ - (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \ - (((bit)>>5)==13 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \ - (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK16)) ) + (((bit)>>5)==14 && (1UL<<((bit)&31) & DISABLED_MASK14)) || \ + (((bit)>>5)==15 && (1UL<<((bit)&31) & DISABLED_MASK15)) || \ + (((bit)>>5)==16 && (1UL<<((bit)&31) & DISABLED_MASK16)) ) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 39343be..911e935 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -29,11 +29,11 @@ #endif /* CONFIG_X86_64 */ #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS -# define DISABLE_PKU (1<<(X86_FEATURE_PKU)) -# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE)) -#else # define DISABLE_PKU 0 # define DISABLE_OSPKE 0 +#else +# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31)) +# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31)) #endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */ /* diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8394b3d..f45a4b9 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -310,6 +310,10 @@ static bool pku_disabled; static __always_inline void setup_pku(struct cpuinfo_x86 *c) { + /* check the boot processor, plus compile options for PKU: */ + if (!cpu_feature_enabled(X86_FEATURE_PKU)) + return; + /* checks the actual processor's cpuid bits: */ if (!cpu_has(c, X86_FEATURE_PKU)) return; if (pku_disabled) -- cgit v0.10.2 From 683ad8092cd262a02d01377dd17a29d492438b90 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 May 2016 13:05:45 -0700 Subject: x86/efi: Fix 7-parameter efi_call()s Alex Thorlton reported that the SGI/UV code crashes in the efi_call() code when invoked with 7 parameters, due to: mov (%rsp), %rax mov 8(%rax), %rax ... mov %rax, 40(%rsp) Offset 8 is only true if CONFIG_FRAME_POINTERS is disabled, with frame pointers enabled it should be 16. Furthermore, the SAVE_XMM code saves the old stack pointer, but that's just crazy. It saves the stack pointer *AFTER* we've done the: FRAME_BEGIN ... which will have *changed* the stack pointer, depending on whether stack frames are enabled or not. So when the code then does: mov (%rsp), %rax ... we now move that old stack pointer into %rax, but the offset off that stack pointer will depend on whether that FRAME_BEGIN saved off %rbp or not. So that whole 8-vs-16 offset confusion depends on the frame pointer! If frame pointers were enabled, it will be 16. If they weren't, it will be 8. The right fix is to just get rid of that silly conditional frame pointer thing, and always use frame pointers in this stub function. And then we don't need that (odd) load to get the old stack pointer into %rax - we can just use the frame pointer. Reported-by: Alex Thorlton Tested-by: Alex Thorlton Signed-off-by: Linus Torvalds Cc: Alexander Shishkin Cc: Andrew Morton Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Matt Fleming Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/CA%2B55aFzBS2v%3DWnEH83cUDg7XkOremFqJ30BJwF40dCYjReBkUQ@mail.gmail.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 92723ae..cd95075 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -11,7 +11,6 @@ #include #include #include -#include #define SAVE_XMM \ mov %rsp, %rax; \ @@ -40,10 +39,10 @@ mov (%rsp), %rsp ENTRY(efi_call) - FRAME_BEGIN + pushq %rbp + movq %rsp, %rbp SAVE_XMM - mov (%rsp), %rax - mov 8(%rax), %rax + mov 16(%rbp), %rax subq $48, %rsp mov %r9, 32(%rsp) mov %rax, 40(%rsp) @@ -53,6 +52,6 @@ ENTRY(efi_call) call *%rdi addq $48, %rsp RESTORE_XMM - FRAME_END + popq %rbp ret ENDPROC(efi_call) -- cgit v0.10.2 From d4bf7078c43e11097e0d6f04d3fb999bf92c4fb0 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 17 May 2016 13:06:06 -0500 Subject: x86/entry/64: Fix stack return address retrieval in thunk With CONFIG_FRAME_POINTER enabled, a thunk can pass a bad return address value to the called function. '9*8(%rsp)' actually gets the frame pointer, not the return address. The only users of the 'put_ret_addr_in_rdi' option are two functions which trace the enabling and disabling of interrupts, so this bug can result in bad debug or tracing information with CONFIG_IRQSOFF_TRACER or CONFIG_PROVE_LOCKING. Fix this by implementing the suggestion of Linus: explicitly push the frame pointer all the time and constify the stack offsets that way. This is both correct and easier to read. Reported-by: Matt Fleming Signed-off-by: Josh Poimboeuf [ Extended the changelog a bit. ] Acked-by: Linus Torvalds Cc: Alex Thorlton Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: 058fb73274f9 ("x86/asm/entry: Create stack frames in thunk functions") Link: http://lkml.kernel.org/r/20160517180606.v5o7wcgdni7443ol@treble Signed-off-by: Ingo Molnar diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 98df1fa..027aec4 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -8,16 +8,15 @@ #include #include "calling.h" #include -#include /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 .globl \name .type \name, @function \name: - FRAME_BEGIN + pushq %rbp + movq %rsp, %rbp - /* this one pushes 9 elems, the next one would be %rIP */ pushq %rdi pushq %rsi pushq %rdx @@ -29,8 +28,8 @@ pushq %r11 .if \put_ret_addr_in_rdi - /* 9*8(%rsp) is return addr on stack */ - movq 9*8(%rsp), %rdi + /* 8(%rbp) is return addr on stack */ + movq 8(%rbp), %rdi .endif call \func @@ -65,7 +64,7 @@ restore: popq %rdx popq %rsi popq %rdi - FRAME_END + popq %rbp ret _ASM_NOKPROBE(restore) #endif -- cgit v0.10.2 From 0f6ff2bce0d4c3e4ff34f5d2ffb7329025b30844 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 12 May 2016 15:04:00 -0700 Subject: x86/mm/mpx: Work around MPX erratum SKD046 This erratum essentially causes the CPU to forget which privilege level it is operating on (kernel vs. user) for the purposes of MPX. This erratum can only be triggered when a system is not using Supervisor Mode Execution Prevention (SMEP). Our workaround for the erratum is to ensure that MPX can only be used in cases where SMEP is present in the processor and is enabled. This erratum only affects Core processors. Atom is unaffected. But, there is no architectural way to determine Atom vs. Core. So, we just apply this workaround to all processors. It's possible that it will mistakenly disable MPX on some Atom processsors or future unaffected Core processors. There are currently no processors that have MPX and not SMEP. It would take something akin to a hypervisor masking SMEP out on an Atom processor for this to present itself on current hardware. More details can be found at: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf " SKD046 Branch Instructions May Initialize MPX Bound Registers Incorrectly Problem: Depending on the current Intel MPX (Memory Protection Extensions) configuration, execution of certain branch instructions (near CALL, near RET, near JMP, and Jcc instructions) without a BND prefix (F2H) initialize the MPX bound registers. Due to this erratum, such a branch instruction that is executed both with CPL = 3 and with CPL < 3 may not use the correct MPX configuration register (BNDCFGU or BNDCFGS, respectively) for determining whether to initialize the bound registers; it may thus initialize the bound registers when it should not, or fail to initialize them when it should. Implication: A branch instruction that has executed both in user mode and in supervisor mode (from the same linear address) may cause a #BR (bound range fault) when it should not have or may not cause a #BR when it should have. Workaround An operating system can avoid this erratum by setting CR4.SMEP[bit 20] to enable supervisor-mode execution prevention (SMEP). When SMEP is enabled, no code can be executed both with CPL = 3 and with CPL < 3. " Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160512220400.3B35F1BC@viggo.jf.intel.com Signed-off-by: Ingo Molnar diff --git a/arch/x86/include/asm/bugs.h b/arch/x86/include/asm/bugs.h index 08abf63..5490bba 100644 --- a/arch/x86/include/asm/bugs.h +++ b/arch/x86/include/asm/bugs.h @@ -1,8 +1,16 @@ #ifndef _ASM_X86_BUGS_H #define _ASM_X86_BUGS_H +#include + extern void check_bugs(void); +#if defined(CONFIG_CPU_SUP_INTEL) +void check_mpx_erratum(struct cpuinfo_x86 *c); +#else +static inline void check_mpx_erratum(struct cpuinfo_x86 *c) {} +#endif + #if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32) int ppro_with_ram_bug(void); #else diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f45a4b9..62ff525 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -270,6 +271,8 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c) static __init int setup_disable_smep(char *arg) { setup_clear_cpu_cap(X86_FEATURE_SMEP); + /* Check for things that depend on SMEP being enabled: */ + check_mpx_erratum(&boot_cpu_data); return 1; } __setup("nosmep", setup_disable_smep); diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index e4393bf..b47df99 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -25,6 +25,41 @@ #include #endif +/* + * Just in case our CPU detection goes bad, or you have a weird system, + * allow a way to override the automatic disabling of MPX. + */ +static int forcempx; + +static int __init forcempx_setup(char *__unused) +{ + forcempx = 1; + + return 1; +} +__setup("intel-skd-046-workaround=disable", forcempx_setup); + +void check_mpx_erratum(struct cpuinfo_x86 *c) +{ + if (forcempx) + return; + /* + * Turn off the MPX feature on CPUs where SMEP is not + * available or disabled. + * + * Works around Intel Erratum SKD046: "Branch Instructions + * May Initialize MPX Bound Registers Incorrectly". + * + * This might falsely disable MPX on systems without + * SMEP, like Atom processors without SMEP. But there + * is no such hardware known at the moment. + */ + if (cpu_has(c, X86_FEATURE_MPX) && !cpu_has(c, X86_FEATURE_SMEP)) { + setup_clear_cpu_cap(X86_FEATURE_MPX); + pr_warn("x86/mpx: Disabling MPX since SMEP not present\n"); + } +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -173,6 +208,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (edx & (1U << 28)) c->x86_coreid_bits = get_count_order((ebx >> 16) & 0xff); } + + check_mpx_erratum(c); } #ifdef CONFIG_X86_32 -- cgit v0.10.2 From d696ca016d579d43fc043f28ba656d9305fba651 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 10 May 2016 09:18:46 -0700 Subject: x86/fsgsbase/64: Use TASK_SIZE_MAX for FSBASE/GSBASE upper limits The GSBASE upper limit exists to prevent user code from confusing the paranoid idtentry path. The FSBASE upper limit is just for consistency. There's no need to enforce a smaller limit for 32-bit tasks. Just use TASK_SIZE_MAX. This simplifies the logic and will save a few bytes of code. Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/5357f2fe0f103eabf005773b70722451eab09a89.1462897104.git.luto@kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 6b16c36..6e789ca 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -532,7 +532,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) switch (code) { case ARCH_SET_GS: - if (addr >= TASK_SIZE_OF(task)) + if (addr >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.gsindex = 0; @@ -546,7 +546,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) case ARCH_SET_FS: /* Not strictly needed for fs, but do it for symmetry with gs */ - if (addr >= TASK_SIZE_OF(task)) + if (addr >= TASK_SIZE_MAX) return -EPERM; cpu = get_cpu(); task->thread.fsindex = 0; diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index e60ef91..600edd2 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -392,7 +392,7 @@ static int putreg(struct task_struct *child, #ifdef CONFIG_X86_64 case offsetof(struct user_regs_struct,fs_base): - if (value >= TASK_SIZE_OF(child)) + if (value >= TASK_SIZE_MAX) return -EIO; /* * When changing the segment base, use do_arch_prctl @@ -406,7 +406,7 @@ static int putreg(struct task_struct *child, /* * Exactly the same here as the %fs handling above. */ - if (value >= TASK_SIZE_OF(child)) + if (value >= TASK_SIZE_MAX) return -EIO; if (child->thread.gsbase != value) return do_arch_prctl(child, ARCH_SET_GS, value); -- cgit v0.10.2 From dc4fac84f8e66b147921ebdc385e767d5def7422 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Tue, 10 May 2016 14:10:29 -0700 Subject: x86/mm: Switch from TASK_SIZE to TASK_SIZE_MAX in the page fault code x86's page fault handlers had two TASK_SIZE uses that should have been TASK_SIZE_MAX. I don't think that either one had a visible effect, but this makes the code clearer and should save a few bytes of text. (And I eventually want to eradicate TASK_SIZE. This will help.) Reported-by: Cyrill Gorcunov Signed-off-by: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Pavel Emelyanov Cc: Peter Zijlstra Cc: Ruslan Kabatsayev Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1242fb23b0d05c3069dbf5758ac55d26bc114bef.1462914565.git.luto@kernel.org Signed-off-by: Ingo Molnar diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 5ce1ed0..7d1fa7c 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -292,7 +292,7 @@ void vmalloc_sync_all(void) return; for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE && address < FIXADDR_TOP; + address >= TASK_SIZE_MAX && address < FIXADDR_TOP; address += PMD_SIZE) { struct page *page; @@ -854,8 +854,13 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code, return; } #endif - /* Kernel addresses are always protection faults: */ - if (address >= TASK_SIZE) + + /* + * To avoid leaking information about the kernel page table + * layout, pretend that user-mode accesses to kernel addresses + * are always protection faults. + */ + if (address >= TASK_SIZE_MAX) error_code |= PF_PROT; if (likely(show_unhandled_signals)) -- cgit v0.10.2