summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/boot/compressed/kaslr.c3
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/boot/string.c1
-rw-r--r--arch/x86/boot/string.h9
-rw-r--r--arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S12
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S67
-rw-r--r--arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S12
-rw-r--r--arch/x86/entry/entry_64.S2
-rw-r--r--arch/x86/events/core.c4
-rw-r--r--arch/x86/events/intel/core.c9
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/rapl.c59
-rw-r--r--arch/x86/events/intel/uncore.c1
-rw-r--r--arch/x86/events/intel/uncore_snbep.c2
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/elf.h18
-rw-r--r--arch/x86/include/asm/io.h4
-rw-r--r--arch/x86/include/asm/kvm_emulate.h5
-rw-r--r--arch/x86/include/asm/mmu_context.h4
-rw-r--r--arch/x86/include/asm/msr-index.h2
-rw-r--r--arch/x86/include/asm/pat.h1
-rw-r--r--arch/x86/include/asm/uaccess.h13
-rw-r--r--arch/x86/include/asm/xen/hypercall.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c15
-rw-r--r--arch/x86/kernel/apic/apic.c26
-rw-r--r--arch/x86/kernel/apic/io_apic.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c3
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c19
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/signal.c4
-rw-r--r--arch/x86/kernel/kvm.c9
-rw-r--r--arch/x86/kernel/process_64.c236
-rw-r--r--arch/x86/kernel/setup.c7
-rw-r--r--arch/x86/kernel/smpboot.c11
-rw-r--r--arch/x86/kernel/tsc.c9
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c17
-rw-r--r--arch/x86/kvm/mmu.c15
-rw-r--r--arch/x86/kvm/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/pmu_intel.c2
-rw-r--r--arch/x86/kvm/vmx.c254
-rw-r--r--arch/x86/kvm/x86.c72
-rw-r--r--arch/x86/lib/copy_user_64.S7
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/init_64.c10
-rw-r--r--arch/x86/mm/mpx.c12
-rw-r--r--arch/x86/mm/pat.c28
-rw-r--r--arch/x86/mm/tlb.c4
-rw-r--r--arch/x86/oprofile/op_model_ppro.c4
-rw-r--r--arch/x86/pci/fixup.c32
-rw-r--r--arch/x86/platform/intel-mid/device_libs/Makefile2
-rw-r--r--arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c (renamed from arch/x86/platform/intel-mid/device_libs/platform_spidev.c)4
-rw-r--r--arch/x86/purgatory/Makefile1
-rw-r--r--arch/x86/tools/relocs.c3
57 files changed, 644 insertions, 482 deletions
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index a66854d..6de58f1 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -430,9 +430,6 @@ void choose_random_location(unsigned long input,
{
unsigned long random_addr, min_addr;
- /* By default, keep output position unchanged. */
- *virt_addr = *output;
-
if (cmdline_find_option_bool("nokaslr")) {
warn("KASLR disabled: 'nokaslr' on cmdline.");
return;
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index b3c5a5f0..c945acd 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -338,7 +338,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned long output_len)
{
const unsigned long kernel_total_size = VO__end - VO__text;
- unsigned long virt_addr = (unsigned long)output;
+ unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
/* Retain x86 boot parameters pointer passed from startup_32/64. */
boot_params = rmode;
@@ -397,7 +397,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifndef CONFIG_RELOCATABLE
if ((unsigned long)output != LOAD_PHYSICAL_ADDR)
error("Destination address does not match LOAD_PHYSICAL_ADDR");
- if ((unsigned long)output != virt_addr)
+ if (virt_addr != LOAD_PHYSICAL_ADDR)
error("Destination virtual address changed when not relocatable");
#endif
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 1c8355e..766a521 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -81,8 +81,6 @@ static inline void choose_random_location(unsigned long input,
unsigned long output_size,
unsigned long *virt_addr)
{
- /* No change from existing output location. */
- *virt_addr = *output;
}
#endif
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index cc3bd58..9e240fc 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,6 +14,7 @@
#include <linux/types.h>
#include "ctype.h"
+#include "string.h"
int memcmp(const void *s1, const void *s2, size_t len)
{
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h
index 725e820..113588d 100644
--- a/arch/x86/boot/string.h
+++ b/arch/x86/boot/string.h
@@ -18,4 +18,13 @@ int memcmp(const void *s1, const void *s2, size_t len);
#define memset(d,c,l) __builtin_memset(d,c,l)
#define memcmp __builtin_memcmp
+extern int strcmp(const char *str1, const char *str2);
+extern int strncmp(const char *cs, const char *ct, size_t count);
+extern size_t strlen(const char *s);
+extern char *strstr(const char *s1, const char *s2);
+extern size_t strnlen(const char *s, size_t maxlen);
+extern unsigned int atou(const char *s);
+extern unsigned long long simple_strtoull(const char *cp, char **endp,
+ unsigned int base);
+
#endif /* BOOT_STRING_H */
diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
index 96df6a3..a2ae689 100644
--- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S
@@ -157,8 +157,8 @@ LABEL skip_ %I
.endr
# Find min length
- vmovdqa _lens+0*16(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
+ vmovdqu _lens+0*16(state), %xmm0
+ vmovdqu _lens+1*16(state), %xmm1
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
@@ -178,8 +178,8 @@ LABEL skip_ %I
vpsubd %xmm2, %xmm0, %xmm0
vpsubd %xmm2, %xmm1, %xmm1
- vmovdqa %xmm0, _lens+0*16(state)
- vmovdqa %xmm1, _lens+1*16(state)
+ vmovdqu %xmm0, _lens+0*16(state)
+ vmovdqu %xmm1, _lens+1*16(state)
# "state" and "args" are the same address, arg1
# len is arg2
@@ -235,8 +235,8 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2)
jc .return_null
# Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
+ vmovdqu _lens(state), %xmm0
+ vmovdqu _lens+1*16(state), %xmm1
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 1cd792d..1eab79c 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -117,11 +117,10 @@
.set T1, REG_T1
.endm
-#define K_BASE %r8
#define HASH_PTR %r9
+#define BLOCKS_CTR %r8
#define BUFFER_PTR %r10
#define BUFFER_PTR2 %r13
-#define BUFFER_END %r11
#define PRECALC_BUF %r14
#define WK_BUF %r15
@@ -205,14 +204,14 @@
* blended AVX2 and ALU instruction scheduling
* 1 vector iteration per 8 rounds
*/
- vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP
+ vmovdqu (i * 2)(BUFFER_PTR), W_TMP
.elseif ((i & 7) == 1)
- vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\
+ vinsertf128 $1, ((i-1) * 2)(BUFFER_PTR2),\
WY_TMP, WY_TMP
.elseif ((i & 7) == 2)
vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY
.elseif ((i & 7) == 4)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
.elseif ((i & 7) == 7)
vmovdqu WY_TMP, PRECALC_WK(i&~7)
@@ -255,7 +254,7 @@
vpxor WY, WY_TMP, WY_TMP
.elseif ((i & 7) == 7)
vpxor WY_TMP2, WY_TMP, WY
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -291,7 +290,7 @@
vpsrld $30, WY, WY
vpor WY, WY_TMP, WY
.elseif ((i & 7) == 7)
- vpaddd K_XMM(K_BASE), WY, WY_TMP
+ vpaddd K_XMM + K_XMM_AR(%rip), WY, WY_TMP
vmovdqu WY_TMP, PRECALC_WK(i&~7)
PRECALC_ROTATE_WY
@@ -446,6 +445,16 @@
.endm
+/* Add constant only if (%2 > %3) condition met (uses RTA as temp)
+ * %1 + %2 >= %3 ? %4 : 0
+ */
+.macro ADD_IF_GE a, b, c, d
+ mov \a, RTA
+ add $\d, RTA
+ cmp $\c, \b
+ cmovge RTA, \a
+.endm
+
/*
* macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining
*/
@@ -463,13 +472,16 @@
lea (2*4*80+32)(%rsp), WK_BUF
# Precalc WK for first 2 blocks
- PRECALC_OFFSET = 0
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 2, 64
.set i, 0
.rept 160
PRECALC i
.set i, i + 1
.endr
- PRECALC_OFFSET = 128
+
+ /* Go to next block if needed */
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 3, 128
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
xchg WK_BUF, PRECALC_BUF
.align 32
@@ -479,8 +491,8 @@ _loop:
* we use K_BASE value as a signal of a last block,
* it is set below by: cmovae BUFFER_PTR, K_BASE
*/
- cmp K_BASE, BUFFER_PTR
- jne _begin
+ test BLOCKS_CTR, BLOCKS_CTR
+ jnz _begin
.align 32
jmp _end
.align 32
@@ -512,10 +524,10 @@ _loop0:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */
- cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
-
+ /* Update Counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR, BLOCKS_CTR, 4, 128
/*
* rounds
* 60,62,64,66,68
@@ -532,8 +544,8 @@ _loop0:
UPDATE_HASH 12(HASH_PTR), D
UPDATE_HASH 16(HASH_PTR), E
- cmp K_BASE, BUFFER_PTR /* is current block the last one? */
- je _loop
+ test BLOCKS_CTR, BLOCKS_CTR
+ jz _loop
mov TB, B
@@ -575,10 +587,10 @@ _loop2:
.set j, j+2
.endr
- add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */
-
- cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */
- cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */
+ /* update counter */
+ sub $1, BLOCKS_CTR
+ /* Move to the next block only if needed*/
+ ADD_IF_GE BUFFER_PTR2, BLOCKS_CTR, 4, 128
jmp _loop3
_loop3:
@@ -641,19 +653,12 @@ _loop3:
avx2_zeroupper
- lea K_XMM_AR(%rip), K_BASE
-
+ /* Setup initial values */
mov CTX, HASH_PTR
mov BUF, BUFFER_PTR
- lea 64(BUF), BUFFER_PTR2
-
- shl $6, CNT /* mul by 64 */
- add BUF, CNT
- add $64, CNT
- mov CNT, BUFFER_END
- cmp BUFFER_END, BUFFER_PTR2
- cmovae K_BASE, BUFFER_PTR2
+ mov BUF, BUFFER_PTR2
+ mov CNT, BLOCKS_CTR
xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP
diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
index a78a069..ec9bee6 100644
--- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
+++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S
@@ -155,8 +155,8 @@ LABEL skip_ %I
.endr
# Find min length
- vmovdqa _lens+0*16(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
+ vmovdqu _lens+0*16(state), %xmm0
+ vmovdqu _lens+1*16(state), %xmm1
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
@@ -176,8 +176,8 @@ LABEL skip_ %I
vpsubd %xmm2, %xmm0, %xmm0
vpsubd %xmm2, %xmm1, %xmm1
- vmovdqa %xmm0, _lens+0*16(state)
- vmovdqa %xmm1, _lens+1*16(state)
+ vmovdqu %xmm0, _lens+0*16(state)
+ vmovdqu %xmm1, _lens+1*16(state)
# "state" and "args" are the same address, arg1
# len is arg2
@@ -234,8 +234,8 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2)
jc .return_null
# Find min length
- vmovdqa _lens(state), %xmm0
- vmovdqa _lens+1*16(state), %xmm1
+ vmovdqu _lens(state), %xmm0
+ vmovdqu _lens+1*16(state), %xmm1
vpminud %xmm1, %xmm0, %xmm2 # xmm2 has {D,C,B,A}
vpalignr $8, %xmm2, %xmm3, %xmm3 # xmm3 has {x,x,D,C}
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ef766a3..e7b0e7f 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1215,6 +1215,8 @@ ENTRY(nmi)
* other IST entries.
*/
+ ASM_CLAC
+
/* Use %rdx as our temp variable throughout */
pushq %rdx
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 38623e2..9604b25 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -505,6 +505,10 @@ int x86_pmu_hw_config(struct perf_event *event)
if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
+
+ /* There's no sense in having PEBS for non sampling events: */
+ if (!is_sampling_event(event))
+ return -EINVAL;
}
/*
* check that PEBS LBR correction does not conflict with
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 3bdb917..f0f197f 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3164,13 +3164,16 @@ static void intel_pmu_cpu_starting(int cpu)
if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
for_each_cpu(i, topology_sibling_cpumask(cpu)) {
+ struct cpu_hw_events *sibling;
struct intel_excl_cntrs *c;
- c = per_cpu(cpu_hw_events, i).excl_cntrs;
+ sibling = &per_cpu(cpu_hw_events, i);
+ c = sibling->excl_cntrs;
if (c && c->core_id == core_id) {
cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
cpuc->excl_cntrs = c;
- cpuc->excl_thread_id = 1;
+ if (!sibling->excl_thread_id)
+ cpuc->excl_thread_id = 1;
break;
}
}
@@ -3975,7 +3978,7 @@ __init int intel_pmu_init(void)
x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
}
- x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1;
+ x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index fec8a46..1076c9a 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -434,6 +434,7 @@ static struct pmu cstate_core_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static struct pmu cstate_pkg_pmu = {
@@ -447,6 +448,7 @@ static struct pmu cstate_pkg_pmu = {
.stop = cstate_pmu_event_stop,
.read = cstate_pmu_event_update,
.capabilities = PERF_PMU_CAP_NO_INTERRUPT,
+ .module = THIS_MODULE,
};
static const struct cstate_model nhm_cstates __initconst = {
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 8b902b6..4c1b7ea 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -161,7 +161,13 @@ static u64 rapl_timer_ms;
static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
{
- return rapl_pmus->pmus[topology_logical_package_id(cpu)];
+ unsigned int pkgid = topology_logical_package_id(cpu);
+
+ /*
+ * The unsigned check also catches the '-1' return value for non
+ * existent mappings in the topology map.
+ */
+ return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL;
}
static inline u64 rapl_read_counter(struct perf_event *event)
@@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event)
/* must be done before validate_group */
pmu = cpu_to_rapl_pmu(event->cpu);
+ if (!pmu)
+ return -EINVAL;
event->cpu = pmu->cpu;
event->pmu_private = pmu;
event->hw.event_base = msr;
@@ -585,6 +593,19 @@ static int rapl_cpu_online(unsigned int cpu)
struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
int target;
+ if (!pmu) {
+ pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
+ if (!pmu)
+ return -ENOMEM;
+
+ raw_spin_lock_init(&pmu->lock);
+ INIT_LIST_HEAD(&pmu->active_list);
+ pmu->pmu = &rapl_pmus->pmu;
+ pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
+ rapl_hrtimer_init(pmu);
+
+ rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
+ }
/*
* Check if there is an online cpu in the package which collects rapl
* events already.
@@ -598,27 +619,6 @@ static int rapl_cpu_online(unsigned int cpu)
return 0;
}
-static int rapl_cpu_prepare(unsigned int cpu)
-{
- struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
-
- if (pmu)
- return 0;
-
- pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
- if (!pmu)
- return -ENOMEM;
-
- raw_spin_lock_init(&pmu->lock);
- INIT_LIST_HEAD(&pmu->active_list);
- pmu->pmu = &rapl_pmus->pmu;
- pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
- pmu->cpu = -1;
- rapl_hrtimer_init(pmu);
- rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu;
- return 0;
-}
-
static int rapl_check_hw_unit(bool apply_quirk)
{
u64 msr_rapl_power_unit_bits;
@@ -697,6 +697,7 @@ static int __init init_rapl_pmus(void)
rapl_pmus->pmu.start = rapl_pmu_event_start;
rapl_pmus->pmu.stop = rapl_pmu_event_stop;
rapl_pmus->pmu.read = rapl_pmu_event_read;
+ rapl_pmus->pmu.module = THIS_MODULE;
return 0;
}
@@ -803,28 +804,21 @@ static int __init rapl_pmu_init(void)
* Install callbacks. Core will call them for each online cpu.
*/
- ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "PERF_X86_RAPL_PREP",
- rapl_cpu_prepare, NULL);
- if (ret)
- goto out;
-
ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
"AP_PERF_X86_RAPL_ONLINE",
rapl_cpu_online, rapl_cpu_offline);
if (ret)
- goto out1;
+ goto out;
ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
if (ret)
- goto out2;
+ goto out1;
rapl_advertise();
return 0;
-out2:
- cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out1:
- cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP);
+ cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
out:
pr_warn("Initialization failed (%d), disabled\n", ret);
cleanup_rapl_pmus();
@@ -835,7 +829,6 @@ module_init(rapl_pmu_init);
static void __exit intel_rapl_exit(void)
{
cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
- cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP);
perf_pmu_unregister(&rapl_pmus->pmu);
cleanup_rapl_pmus();
}
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 19d646a..aec6cc9 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -733,6 +733,7 @@ static int uncore_pmu_register(struct intel_uncore_pmu *pmu)
.start = uncore_pmu_event_start,
.stop = uncore_pmu_event_stop,
.read = uncore_pmu_event_read,
+ .module = THIS_MODULE,
};
} else {
pmu->pmu = *pmu->type->pmu;
diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c
index 2724277..afe8024 100644
--- a/arch/x86/events/intel/uncore_snbep.c
+++ b/arch/x86/events/intel/uncore_snbep.c
@@ -2686,7 +2686,7 @@ static struct intel_uncore_type *hswep_msr_uncores[] = {
void hswep_uncore_cpu_init(void)
{
- int pkg = topology_phys_to_logical_pkg(0);
+ int pkg = boot_cpu_data.logical_proc_id;
if (hswep_uncore_cbox.num_boxes > boot_cpu_data.x86_max_cores)
hswep_uncore_cbox.num_boxes = boot_cpu_data.x86_max_cores;
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636ba..6c98821 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
#define new_len2 145f-144f
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b02038..d4aea31 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 94aad63..7bcd138 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -204,6 +204,7 @@ void set_personality_ia32(bool);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
+ unsigned long base; \
unsigned v; \
(pr_reg)[0] = (regs)->r15; \
(pr_reg)[1] = (regs)->r14; \
@@ -226,8 +227,8 @@ do { \
(pr_reg)[18] = (regs)->flags; \
(pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
- (pr_reg)[21] = current->thread.fsbase; \
- (pr_reg)[22] = current->thread.gsbase; \
+ rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \
+ rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \
asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
@@ -245,12 +246,13 @@ extern int force_personality32;
#define CORE_DUMP_USE_REGSET
#define ELF_EXEC_PAGESIZE 4096
-/* This is the location that an ET_DYN program is loaded if exec'ed. Typical
- use of this is to invoke "./ld.so someprog" to test out a new version of
- the loader. We need to make sure that it is out of the way of the program
- that it will "exec", and that there is sufficient room for the brk. */
-
-#define ELF_ET_DYN_BASE (TASK_SIZE / 3 * 2)
+/*
+ * This is the base location for PIE (ET_DYN with INTERP) loads. On
+ * 64-bit, this is above 4GB to leave the entire 32-bit address
+ * space open for things that want to use the area for 32-bit pointers.
+ */
+#define ELF_ET_DYN_BASE (mmap_is_ia32() ? 0x000400000UL : \
+ (TASK_SIZE / 3 * 2))
/* This yields a mask that user programs can use to figure out what
instruction set this CPU supports. This could be done in user space,
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index d34bd37..6c50201 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -304,13 +304,13 @@ static inline unsigned type in##bwl##_p(int port) \
static inline void outs##bwl(int port, const void *addr, unsigned long count) \
{ \
asm volatile("rep; outs" #bwl \
- : "+S"(addr), "+c"(count) : "d"(port)); \
+ : "+S"(addr), "+c"(count) : "d"(port) : "memory"); \
} \
\
static inline void ins##bwl(int port, void *addr, unsigned long count) \
{ \
asm volatile("rep; ins" #bwl \
- : "+D"(addr), "+c"(count) : "d"(port)); \
+ : "+D"(addr), "+c"(count) : "d"(port) : "memory"); \
}
BUILDIO(b, b, char)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e9cd7be..fc3c7e4 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -221,6 +221,9 @@ struct x86_emulate_ops {
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+
+ unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
+ void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -290,10 +293,10 @@ struct x86_emulate_ctxt {
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
- int emul_flags;
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8e0a9fe..f9dd224 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -116,9 +116,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1;
}
#endif
- init_new_context_ldt(tsk, mm);
-
- return 0;
+ return init_new_context_ldt(tsk, mm);
}
static inline void destroy_context(struct mm_struct *mm)
{
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 78f3760..b601dda 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -405,6 +405,8 @@
#define MSR_IA32_TSC_ADJUST 0x0000003b
#define MSR_IA32_BNDCFGS 0x00000d90
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
#define MSR_IA32_XSS 0x00000da0
#define FEATURE_CONTROL_LOCKED (1<<0)
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index 0b1ff4c..fffb279 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -7,6 +7,7 @@
bool pat_enabled(void);
void pat_disable(const char *reason);
extern void pat_init(void);
+extern void init_cache_modes(void);
extern int reserve_memtype(u64 start, u64 end,
enum page_cache_mode req_pcm, enum page_cache_mode *ret_pcm);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a300aa1..dead0f3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -68,6 +68,12 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
__chk_range_not_ok((unsigned long __force)(addr), size, limit); \
})
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+# define WARN_ON_IN_IRQ() WARN_ON_ONCE(!in_task())
+#else
+# define WARN_ON_IN_IRQ()
+#endif
+
/**
* access_ok: - Checks if a user space pointer is valid
* @type: Type of access: %VERIFY_READ or %VERIFY_WRITE. Note that
@@ -88,8 +94,11 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
* checks that the pointer is in the user space range - after calling
* this function, memory access functions may still return -EFAULT.
*/
-#define access_ok(type, addr, size) \
- likely(!__range_not_ok(addr, size, user_addr_max()))
+#define access_ok(type, addr, size) \
+({ \
+ WARN_ON_IN_IRQ(); \
+ likely(!__range_not_ok(addr, size, user_addr_max())); \
+})
/*
* These are the main single-value transfer routines. They automatically
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index a12a047..8b678af 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -43,6 +43,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/smap.h>
#include <xen/interface/xen.h>
#include <xen/interface/sched.h>
@@ -214,10 +215,12 @@ privcmd_call(unsigned call,
__HYPERCALL_DECLS;
__HYPERCALL_5ARG(a1, a2, a3, a4, a5);
+ stac();
asm volatile("call *%[call]"
: __HYPERCALL_5PARAM
: [call] "a" (&hypercall_page[call])
: __HYPERCALL_CLOBBER5);
+ clac();
return (long)__res;
}
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 931ced8..b89bef9 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -176,10 +176,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
return -EINVAL;
}
+ if (!enabled) {
+ ++disabled_cpus;
+ return -EINVAL;
+ }
+
if (boot_cpu_physical_apicid != -1U)
ver = boot_cpu_apic_version;
- cpu = __generic_processor_info(id, ver, enabled);
+ cpu = generic_processor_info(id, ver);
if (cpu >= 0)
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
@@ -338,6 +343,14 @@ static void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger,
struct mpc_intsrc mp_irq;
/*
+ * Check bus_irq boundary.
+ */
+ if (bus_irq >= NR_IRQS_LEGACY) {
+ pr_warn("Invalid bus_irq %u for legacy override\n", bus_irq);
+ return;
+ }
+
+ /*
* Convert 'gsi' to 'ioapic.pin'.
*/
ioapic = mp_find_ioapic(gsi);
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f223491..e2ead34 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2070,7 +2070,7 @@ static int allocate_logical_cpuid(int apicid)
return nr_logical_cpuids++;
}
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2128,11 +2128,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
if (num_processors >= nr_cpu_ids) {
int thiscpu = max + disabled_cpus;
- if (enabled) {
- pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
- "reached. Processor %d/0x%x ignored.\n",
- max, thiscpu, apicid);
- }
+ pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+ "reached. Processor %d/0x%x ignored.\n",
+ max, thiscpu, apicid);
disabled_cpus++;
return -EINVAL;
@@ -2184,23 +2182,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
apic->x86_32_early_logical_apicid(cpu);
#endif
set_cpu_possible(cpu, true);
-
- if (enabled) {
- num_processors++;
- physid_set(apicid, phys_cpu_present_map);
- set_cpu_present(cpu, true);
- } else {
- disabled_cpus++;
- }
+ physid_set(apicid, phys_cpu_present_map);
+ set_cpu_present(cpu, true);
+ num_processors++;
return cpu;
}
-int generic_processor_info(int apicid, int version)
-{
- return __generic_processor_info(apicid, version, true);
-}
-
int hard_smp_processor_id(void)
{
return read_apic_id();
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7249f15..cf89928 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2116,7 +2116,7 @@ static inline void __init check_timer(void)
int idx;
idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_trigger(idx))
- unmask_ioapic_irq(irq_get_chip_data(0));
+ unmask_ioapic_irq(irq_get_irq_data(0));
}
irq_domain_deactivate_irq(irq_data);
irq_domain_activate_irq(irq_data);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 3dfca7b..a5b47c1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -955,6 +955,9 @@ static int threshold_create_bank(unsigned int cpu, unsigned int bank)
const char *name = get_name(bank, NULL);
int err = 0;
+ if (!dev)
+ return -ENODEV;
+
if (is_shared_bank(bank)) {
nb = node_to_amd_nb(amd_get_nb_id(cpu));
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index cdc0dea..13dbcc0 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -34,6 +34,7 @@
#include <linux/mm.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
@@ -1046,6 +1047,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
return 0;
}
+static bool is_blacklisted(unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
+ pr_err_once("late loading on model 79 is disabled.\n");
+ return true;
+ }
+
+ return false;
+}
+
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
@@ -1054,6 +1067,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
const struct firmware *firmware;
enum ucode_state ret;
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
@@ -1078,6 +1094,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
}
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c114b13..7052d9a 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -130,11 +130,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
fpu__activate_fpstate_write(fpu);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
ret = copyin_to_xsaves(kbuf, ubuf, xsave);
- else
+ } else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+ /* xcomp_bv must be 0 when using uncompacted format */
+ if (!ret && xsave->header.xcomp_bv)
+ ret = -EINVAL;
+ }
+
/*
* In case of failure, mark all states as init:
*/
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a184c21..3ec0d2d 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -329,6 +329,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
} else {
err = __copy_from_user(&fpu->state.xsave,
buf_fx, state_size);
+
+ /* xcomp_bv must be 0 when using uncompacted format */
+ if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
+ err = -EINVAL;
}
if (err || __copy_from_user(&env, buf, sizeof(env))) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 9cf697c..77f17cb 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -141,7 +141,8 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) || preempt_count() > 1;
+ n.halted = is_idle_task(current) || preempt_count() > 1 ||
+ rcu_preempt_depth();
init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock);
@@ -152,6 +153,8 @@ void kvm_async_pf_task_wait(u32 token)
if (hlist_unhashed(&n.link))
break;
+ rcu_irq_exit();
+
if (!n.halted) {
local_irq_enable();
schedule();
@@ -160,11 +163,11 @@ void kvm_async_pf_task_wait(u32 token)
/*
* We cannot reschedule. So halt.
*/
- rcu_irq_exit();
native_safe_halt();
local_irq_disable();
- rcu_irq_enter();
}
+
+ rcu_irq_enter();
}
if (!n.halted)
finish_swait(&n.wq, &wait);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3..0887d2a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,6 +136,123 @@ void release_thread(struct task_struct *dead_task)
}
}
+enum which_selector {
+ FS,
+ GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available. The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+ unsigned short selector,
+ enum which_selector which)
+{
+ if (likely(selector == 0)) {
+ /*
+ * On Intel (without X86_BUG_NULL_SEG), the segment base could
+ * be the pre-existing saved base or it could be zero. On AMD
+ * (with X86_BUG_NULL_SEG), the segment base could be almost
+ * anything.
+ *
+ * This branch is very hot (it's hit twice on almost every
+ * context switch between 64-bit programs), and avoiding
+ * the RDMSR helps a lot, so we just assume that whatever
+ * value is already saved is correct. This matches historical
+ * Linux behavior, so it won't break existing applications.
+ *
+ * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+ * report that the base is zero, it needs to actually be zero:
+ * see the corresponding logic in load_seg_legacy.
+ */
+ } else {
+ /*
+ * If the selector is 1, 2, or 3, then the base is zero on
+ * !X86_BUG_NULL_SEG CPUs and could be anything on
+ * X86_BUG_NULL_SEG CPUs. In the latter case, Linux
+ * has never attempted to preserve the base across context
+ * switches.
+ *
+ * If selector > 3, then it refers to a real segment, and
+ * saving the base isn't necessary.
+ */
+ if (which == FS)
+ prev_p->thread.fsbase = 0;
+ else
+ prev_p->thread.gsbase = 0;
+ }
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+ savesegment(fs, task->thread.fsindex);
+ savesegment(gs, task->thread.gsindex);
+ save_base_legacy(task, task->thread.fsindex, FS);
+ save_base_legacy(task, task->thread.gsindex, GS);
+}
+
+static __always_inline void loadseg(enum which_selector which,
+ unsigned short sel)
+{
+ if (which == FS)
+ loadsegment(fs, sel);
+ else
+ load_gs_index(sel);
+}
+
+static __always_inline void load_seg_legacy(unsigned short prev_index,
+ unsigned long prev_base,
+ unsigned short next_index,
+ unsigned long next_base,
+ enum which_selector which)
+{
+ if (likely(next_index <= 3)) {
+ /*
+ * The next task is using 64-bit TLS, is not using this
+ * segment at all, or is having fun with arcane CPU features.
+ */
+ if (next_base == 0) {
+ /*
+ * Nasty case: on AMD CPUs, we need to forcibly zero
+ * the base.
+ */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ loadseg(which, __USER_DS);
+ loadseg(which, next_index);
+ } else {
+ /*
+ * We could try to exhaustively detect cases
+ * under which we can skip the segment load,
+ * but there's really only one case that matters
+ * for performance: if both the previous and
+ * next states are fully zeroed, we can skip
+ * the load.
+ *
+ * (This assumes that prev_base == 0 has no
+ * false positives. This is the case on
+ * Intel-style CPUs.)
+ */
+ if (likely(prev_index | next_index | prev_base))
+ loadseg(which, next_index);
+ }
+ } else {
+ if (prev_index != next_index)
+ loadseg(which, next_index);
+ wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+ next_base);
+ }
+ } else {
+ /*
+ * The next task is using a real segment. Loading the selector
+ * is sufficient.
+ */
+ loadseg(which, next_index);
+ }
+}
+
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -216,10 +333,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
+ WARN_ON_ONCE(regs != current_pt_regs());
+
+ if (static_cpu_has(X86_BUG_NULL_SEG)) {
+ /* Loading zero below won't clear the base. */
+ loadsegment(fs, __USER_DS);
+ load_gs_index(__USER_DS);
+ }
+
loadsegment(fs, 0);
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
+
regs->ip = new_ip;
regs->sp = new_sp;
regs->cs = _cs;
@@ -264,7 +390,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned prev_fsindex, prev_gsindex;
fpu_switch_t fpu_switch;
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -274,8 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, prev_fsindex);
- savesegment(gs, prev_gsindex);
+ save_fsgs(prev_p);
/*
* Load TLS before restoring any segments so that segment loads
@@ -314,108 +438,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
- /*
- * Switch FS and GS.
- *
- * These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. The bases
- * don't necessarily match the selectors, as user code can do
- * any number of things to cause them to be inconsistent.
- *
- * We don't promise to preserve the bases if the selectors are
- * nonzero. We also don't promise to preserve the base if the
- * selector is zero and the base doesn't match whatever was
- * most recently passed to ARCH_SET_FS/GS. (If/when the
- * FSGSBASE instructions are enabled, we'll need to offer
- * stronger guarantees.)
- *
- * As an invariant,
- * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
- * impossible.
- */
- if (next->fsindex) {
- /* Loading a nonzero value into FS sets the index and base. */
- loadsegment(fs, next->fsindex);
- } else {
- if (next->fsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_fsindex)
- loadsegment(fs, 0);
- wrmsrl(MSR_FS_BASE, next->fsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- */
- loadsegment(fs, __USER_DS);
- loadsegment(fs, 0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_FS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->fsbase || prev_fsindex)
- loadsegment(fs, 0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_fsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_fsindex)
- prev->fsbase = 0;
- prev->fsindex = prev_fsindex;
-
- if (next->gsindex) {
- /* Loading a nonzero value into GS sets the index and base. */
- load_gs_index(next->gsindex);
- } else {
- if (next->gsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_gsindex)
- load_gs_index(0);
- wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- *
- * This contains a pointless SWAPGS pair.
- * Fixing it would involve an explicit check
- * for Xen or a new pvop.
- */
- load_gs_index(__USER_DS);
- load_gs_index(0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_GS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->gsbase || prev_gsindex)
- load_gs_index(0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_gsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_gsindex)
- prev->gsbase = 0;
- prev->gsindex = prev_gsindex;
+ load_seg_legacy(prev->fsindex, prev->fsbase,
+ next->fsindex, next->fsbase, FS);
+ load_seg_legacy(prev->gsindex, prev->gsbase,
+ next->gsindex, next->gsbase, GS);
switch_fpu_finish(next_fpu, fpu_switch);
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9c337b0..feaab07 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1054,6 +1054,13 @@ void __init setup_arch(char **cmdline_p)
max_possible_pfn = max_pfn;
/*
+ * This call is required when the CPU does not support PAT. If
+ * mtrr_bp_init() invoked it already via pat_init() the call has no
+ * effect.
+ */
+ init_cache_modes();
+
+ /*
* Define random base addresses for memory sections after max_pfn is
* defined and before each memory section base is used.
*/
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 36171bc..9fe7b9e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -181,6 +181,12 @@ static void smp_callin(void)
smp_store_cpu_info(cpuid);
/*
+ * The topology information must be up to date before
+ * calibrate_delay() and notify_cpu_starting().
+ */
+ set_cpu_sibling_map(raw_smp_processor_id());
+
+ /*
* Get our bogomips.
* Update loops_per_jiffy in cpu_data. Previous call to
* smp_store_cpu_info() stored a value that is close but not as
@@ -190,11 +196,6 @@ static void smp_callin(void)
cpu_data(cpuid).loops_per_jiffy = loops_per_jiffy;
pr_debug("Stack at about %p\n", &cpuid);
- /*
- * This must be done before setting cpu_online_mask
- * or calling notify_cpu_starting.
- */
- set_cpu_sibling_map(raw_smp_processor_id());
wmb();
notify_cpu_starting(cpuid);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index eea88fe..44bf5cf 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -694,6 +694,7 @@ unsigned long native_calibrate_tsc(void)
crystal_khz = 24000; /* 24.0 MHz */
break;
case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ATOM_DENVERTON:
crystal_khz = 25000; /* 25.0 MHz */
break;
case INTEL_FAM6_ATOM_GOLDMONT:
@@ -1381,12 +1382,10 @@ void __init tsc_init(void)
unsigned long calibrate_delay_is_known(void)
{
int sibling, cpu = smp_processor_id();
- struct cpumask *mask = topology_core_cpumask(cpu);
+ int constant_tsc = cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC);
+ const struct cpumask *mask = topology_core_cpumask(cpu);
- if (!tsc_disabled && !cpu_has(&cpu_data(cpu), X86_FEATURE_CONSTANT_TSC))
- return 0;
-
- if (!mask)
+ if (tsc_disabled || !constant_tsc || !mask)
return 0;
sibling = cpumask_any_but(mask, cpu);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 649d8f2..91af75e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -456,7 +456,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
cpuid_mask(&entry->ecx, CPUID_7_ECX);
/* PKU is not yet implemented for shadow paging. */
- if (!tdp_enabled)
+ if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
} else {
entry->ebx = 0;
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 35058c2..9368fec 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -144,6 +144,14 @@ static inline bool guest_cpuid_has_rtm(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_RTM));
}
+static inline bool guest_cpuid_has_mpx(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_MPX));
+}
+
static inline bool guest_cpuid_has_rdtscp(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f676ad..72b737b 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2543,7 +2543,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;
- if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
/*
@@ -2592,11 +2592,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
ctxt->ops->set_nmi_mask(ctxt, false);
- ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
- ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
@@ -2738,6 +2738,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
@@ -5312,6 +5313,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;
+ unsigned emul_flags;
ctxt->mem_read.pos = 0;
@@ -5326,6 +5328,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ emul_flags = ctxt->ops->get_hflags(ctxt);
if (unlikely(ctxt->d &
(No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
@@ -5359,7 +5362,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5388,7 +5391,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5442,7 +5445,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5f24127..d29c745 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3649,19 +3649,19 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
unsigned level, unsigned gpte)
{
/*
- * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
- * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
- * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
- */
- gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
-
- /*
* The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
* If it is clear, there are no large pages at this level, so clear
* PT_PAGE_SIZE_MASK in gpte if that is the case.
*/
gpte &= level - mmu->last_nonleaf_level;
+ /*
+ * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
+ * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+ * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+ */
+ gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
+
return gpte & PT_PAGE_SIZE_MASK;
}
@@ -4169,6 +4169,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
+ update_last_nonleaf_level(vcpu, context);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a011054..3736390 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -324,10 +324,11 @@ retry_walk:
--walker->level;
index = PT_INDEX(addr, walker->level);
-
table_gfn = gpte_to_gfn(pte);
offset = index * sizeof(pt_element_t);
pte_gpa = gfn_to_gpa(table_gfn) + offset;
+
+ BUG_ON(walker->level < 1);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index 9d4a850..5ab4a36 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -294,7 +294,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
((u64)1 << edx.split.bit_width_fixed) - 1;
}
- pmu->global_ctrl = ((1 << pmu->nr_arch_gp_counters) - 1) |
+ pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
(((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
pmu->global_ctrl_mask = ~pmu->global_ctrl;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 89b98e0..a8ae57a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2167,46 +2167,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new;
unsigned int dest;
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ /*
+ * In case of hot-plug or hot-unplug, we may have to undo
+ * vmx_vcpu_pi_put even if there is no assigned device. And we
+ * always keep PI.NDST up to date for simplicity: it makes the
+ * code easier, and CPU migration is not a fast path.
+ */
+ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ return;
+
+ /*
+ * First handle the simple case where no cmpxchg is necessary; just
+ * allow posting non-urgent interrupts.
+ *
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+ * expects the VCPU to be on the blocked_vcpu_list that matches
+ * PI.NDST.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+ vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
return;
+ }
+ /* The full case. */
do {
old.control = new.control = pi_desc->control;
- /*
- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
- * are two possible cases:
- * 1. After running 'pre_block', context switch
- * happened. For this case, 'sn' was set in
- * vmx_vcpu_put(), so we need to clear it here.
- * 2. After running 'pre_block', we were blocked,
- * and woken up by some other guy. For this case,
- * we don't need to do anything, 'pi_post_block'
- * will do everything for us. However, we cannot
- * check whether it is case #1 or case #2 here
- * (maybe, not needed), so we also clear sn here,
- * I think it is not a big deal.
- */
- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
- if (vcpu->cpu != cpu) {
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
- }
+ dest = cpu_physical_id(cpu);
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- }
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
- /* Allow posting non-urgent interrupts */
new.sn = 0;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
}
static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -2455,7 +2453,7 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
if (!(vmcs12->exception_bitmap & (1u << nr)))
return 0;
- nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+ nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
vmcs_read32(VM_EXIT_INTR_INFO),
vmcs_readl(EXIT_QUALIFICATION));
return 1;
@@ -2987,7 +2985,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() ||
+ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
return 1;
msr_info->data = vmcs_read64(GUEST_BNDCFGS);
break;
@@ -3069,7 +3068,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_BNDCFGS:
- if (!kvm_mpx_supported())
+ if (!kvm_mpx_supported() ||
+ (!msr_info->host_initiated && !guest_cpuid_has_mpx(vcpu)))
+ return 1;
+ if (is_noncanonical_address(data & PAGE_MASK) ||
+ (data & MSR_IA32_BNDCFGS_RSVD))
return 1;
vmcs_write64(GUEST_BNDCFGS, data);
break;
@@ -4756,21 +4759,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
/*
- * Currently, we don't support urgent interrupt,
- * all interrupts are recognized as non-urgent
- * interrupt, so we cannot post interrupts when
- * 'SN' is set.
+ * The vector of interrupt to be delivered to vcpu had
+ * been set in PIR before this function.
+ *
+ * Following cases will be reached in this block, and
+ * we always send a notification event in all cases as
+ * explained below.
+ *
+ * Case 1: vcpu keeps in non-root mode. Sending a
+ * notification event posts the interrupt to vcpu.
*
- * If the vcpu is in guest mode, it means it is
- * running instead of being scheduled out and
- * waiting in the run queue, and that's the only
- * case when 'SN' is set currently, warning if
- * 'SN' is set.
+ * Case 2: vcpu exits to root mode and is still
+ * runnable. PIR will be synced to vIRR before the
+ * next vcpu entry. Sending a notification event in
+ * this case has no effect, as vcpu is not in root
+ * mode.
+ *
+ * Case 3: vcpu exits to root mode and is blocked.
+ * vcpu_block() has already synced PIR to vIRR and
+ * never blocks vcpu if vIRR is not cleared. Therefore,
+ * a blocked vcpu here does not wait for any requested
+ * interrupts in PIR, and sending a notification event
+ * which has no effect is safe here.
*/
- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
@@ -6474,7 +6486,6 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
- vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
@@ -9183,6 +9194,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+ /*
+ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+ * or POSTED_INTR_WAKEUP_VECTOR.
+ */
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+ vmx->pi_desc.sn = 1;
+
return &vmx->vcpu;
free_vmcs:
@@ -9992,6 +10010,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
page_to_phys(vmx->nested.virtual_apic_page));
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ } else {
+#ifdef CONFIG_X86_64
+ exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING;
+#endif
}
if (cpu_has_vmx_msr_bitmap() &&
@@ -10667,7 +10690,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
*/
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
- kvm_set_cr4(vcpu, vmcs12->host_cr4);
+ vmx_set_cr4(vcpu, vmcs12->host_cr4);
nested_ept_uninit_mmu_context(vcpu);
@@ -10996,6 +11019,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ do {
+ old.control = new.control = pi_desc->control;
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+}
+
/*
* This routine does the following things for vCPU which is going
* to be blocked if VT-d PI is enabled.
@@ -11011,7 +11065,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
*/
static int pi_pre_block(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
unsigned int dest;
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11021,34 +11074,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
!kvm_vcpu_apicv_active(vcpu))
return 0;
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
do {
old.control = new.control = pi_desc->control;
- /*
- * We should not block the vCPU if
- * an interrupt is posted for it.
- */
- if (pi_test_on(pi_desc) == 1) {
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
-
- return 1;
- }
-
WARN((pi_desc->sn == 1),
"Warning: SN field of posted-interrupts "
"is set before blocking\n");
@@ -11070,10 +11109,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
- return 0;
+ /* We should not block the vCPU if an interrupt is posted for it. */
+ if (pi_test_on(pi_desc) == 1)
+ __pi_post_block(vcpu);
+
+ local_irq_enable();
+ return (vcpu->pre_pcpu == -1);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11089,44 +11133,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
static void pi_post_block(struct kvm_vcpu *vcpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
- unsigned long flags;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (vcpu->pre_pcpu == -1)
return;
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if(vcpu->pre_pcpu != -1) {
- spin_lock_irqsave(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
- }
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ __pi_post_block(vcpu);
+ local_irq_enable();
}
static void vmx_post_block(struct kvm_vcpu *vcpu)
@@ -11154,7 +11167,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct vcpu_data vcpu_info;
- int idx, ret = -EINVAL;
+ int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
@@ -11163,7 +11176,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
idx = srcu_read_lock(&kvm->irq_srcu);
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+ if (guest_irq >= irq_rt->nr_rt_entries ||
+ hlist_empty(&irq_rt->map[guest_irq])) {
+ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+ guest_irq, irq_rt->nr_rt_entries);
+ goto out;
+ }
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
if (e->type != KVM_IRQ_ROUTING_MSI)
@@ -11206,12 +11224,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
if (set)
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else {
- /* suppress notification event before unposting */
- pi_set_sn(vcpu_to_pi_desc(vcpu));
+ else
ret = irq_set_vcpu_affinity(host_irq, NULL);
- pi_clear_sn(vcpu_to_pi_desc(vcpu));
- }
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 62cde4f..595f814 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4999,6 +4999,8 @@ static bool emulator_get_segment(struct x86_emulate_ctxt *ctxt, u16 *selector,
if (var.unusable) {
memset(desc, 0, sizeof(*desc));
+ if (base3)
+ *base3 = 0;
return false;
}
@@ -5154,6 +5156,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
}
+static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+{
+ return emul_to_vcpu(ctxt)->arch.hflags;
+}
+
+static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+{
+ kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
@@ -5193,6 +5205,8 @@ static const struct x86_emulate_ops emulate_ops = {
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
+ .get_hflags = emulator_get_hflags,
+ .set_hflags = emulator_set_hflags,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -5236,6 +5250,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5245,7 +5261,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
- ctxt->emul_flags = vcpu->arch.hflags;
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5452,37 +5467,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5636,11 +5640,10 @@ restart:
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- if (vcpu->arch.hflags != ctxt->emul_flags)
- kvm_set_hflags(vcpu, ctxt->emul_flags);
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
@@ -6111,7 +6114,8 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt)
kvm_x86_ops->patch_hypercall(vcpu, instruction);
- return emulator_write_emulated(ctxt, rip, instruction, 3, NULL);
+ return emulator_write_emulated(ctxt, rip, instruction, 3,
+ &ctxt->exception);
}
static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index d376e4b..04c067b 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -84,7 +84,7 @@ ENTRY(copy_user_generic_unrolled)
movl %edx,%ecx
andl $63,%edx
shrl $6,%ecx
- jz 17f
+ jz .L_copy_short_string
1: movq (%rsi),%r8
2: movq 1*8(%rsi),%r9
3: movq 2*8(%rsi),%r10
@@ -105,7 +105,8 @@ ENTRY(copy_user_generic_unrolled)
leaq 64(%rdi),%rdi
decl %ecx
jnz 1b
-17: movl %edx,%ecx
+.L_copy_short_string:
+ movl %edx,%ecx
andl $7,%edx
shrl $3,%ecx
jz 20f
@@ -221,6 +222,8 @@ EXPORT_SYMBOL(copy_user_generic_string)
*/
ENTRY(copy_user_enhanced_fast_string)
ASM_STAC
+ cmpl $64,%edx
+ jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
movl %edx,%ecx
1: rep
movsb
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9f72ca3..1dd7960 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
- struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* valid VMA, so we should never reach this without a
* valid VMA.
*/
- if (!vma) {
+ if (!pkey) {
WARN_ONCE(1, "PKU fault with no VMA passed in");
info->si_pkey = 0;
return;
@@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* absolutely guranteed to be 100% accurate because of
* the race explained above.
*/
- info->si_pkey = vma_pkey(vma);
+ info->si_pkey = *pkey;
}
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk, struct vm_area_struct *vma,
- int fault)
+ struct task_struct *tsk, u32 *pkey, int fault)
{
unsigned lsb = 0;
siginfo_t info;
@@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, vma);
+ fill_sig_info_pkey(si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
@@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
unsigned long flags;
int sig;
- /* No context means no VMA to pass down */
- struct vm_area_struct *vma = NULL;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address,
- tsk, vma, 0);
+ tsk, NULL, 0);
}
/*
@@ -853,8 +849,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- int si_code)
+ unsigned long address, u32 *pkey, int si_code)
{
struct task_struct *tsk = current;
@@ -902,7 +897,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
return;
}
@@ -915,9 +910,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, u32 *pkey)
{
- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
}
static void
@@ -925,6 +920,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma, int si_code)
{
struct mm_struct *mm = current->mm;
+ u32 pkey;
+
+ if (vma)
+ pkey = vma_pkey(vma);
/*
* Something tried to access memory that isn't in our memory map..
@@ -932,7 +931,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
*/
up_read(&mm->mmap_sem);
- __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+ __bad_area_nosemaphore(regs, error_code, address,
+ (vma) ? &pkey : NULL, si_code);
}
static noinline void
@@ -975,7 +975,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- struct vm_area_struct *vma, unsigned int fault)
+ u32 *pkey, unsigned int fault)
{
struct task_struct *tsk = current;
int code = BUS_ADRERR;
@@ -1002,13 +1002,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
}
static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- unsigned int fault)
+ unsigned long address, u32 *pkey, unsigned int fault)
{
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
no_context(regs, error_code, address, 0, 0);
@@ -1032,9 +1031,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
- do_sigbus(regs, error_code, address, vma, fault);
+ do_sigbus(regs, error_code, address, pkey, fault);
else if (fault & VM_FAULT_SIGSEGV)
- bad_area_nosemaphore(regs, error_code, address, vma);
+ bad_area_nosemaphore(regs, error_code, address, pkey);
else
BUG();
}
@@ -1220,6 +1219,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ u32 pkey;
tsk = current;
mm = tsk->mm;
@@ -1420,9 +1420,10 @@ good_area:
return;
}
+ pkey = vma_pkey(vma);
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, vma, fault);
+ mm_fault_error(regs, error_code, address, &pkey, fault);
return;
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 14b9dd7..3e27ded 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -94,10 +94,10 @@ __setup("noexec32=", nonx32_setup);
*/
void sync_global_pgds(unsigned long start, unsigned long end, int removed)
{
- unsigned long address;
+ unsigned long addr;
- for (address = start; address <= end; address += PGDIR_SIZE) {
- const pgd_t *pgd_ref = pgd_offset_k(address);
+ for (addr = start; addr <= end; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
+ const pgd_t *pgd_ref = pgd_offset_k(addr);
struct page *page;
/*
@@ -113,7 +113,7 @@ void sync_global_pgds(unsigned long start, unsigned long end, int removed)
pgd_t *pgd;
spinlock_t *pgt_lock;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ pgd = (pgd_t *)page_address(page) + pgd_index(addr);
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
@@ -689,7 +689,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
- magic = (unsigned long)page->lru.next;
+ magic = (unsigned long)page->freelist;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e4f8009..a75103e 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -293,7 +293,7 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs)
* We were not able to extract an address from the instruction,
* probably because there was something invalid in it.
*/
- if (info->si_addr == (void *)-1) {
+ if (info->si_addr == (void __user *)-1) {
err = -EINVAL;
goto err_out;
}
@@ -525,15 +525,7 @@ int mpx_handle_bd_fault(void)
if (!kernel_managing_mpx_tables(current->mm))
return -EINVAL;
- if (do_mpx_bt_fault()) {
- force_sig(SIGSEGV, current);
- /*
- * The force_sig() is essentially "handling" this
- * exception, so we do not pass up the error
- * from do_mpx_bt_fault().
- */
- }
- return 0;
+ return do_mpx_bt_fault();
}
/*
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 83e701f..89d7907 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -36,14 +36,14 @@
#undef pr_fmt
#define pr_fmt(fmt) "" fmt
-static bool boot_cpu_done;
-
-static int __read_mostly __pat_enabled = IS_ENABLED(CONFIG_X86_PAT);
-static void init_cache_modes(void);
+static bool __read_mostly boot_cpu_done;
+static bool __read_mostly pat_disabled = !IS_ENABLED(CONFIG_X86_PAT);
+static bool __read_mostly pat_initialized;
+static bool __read_mostly init_cm_done;
void pat_disable(const char *reason)
{
- if (!__pat_enabled)
+ if (pat_disabled)
return;
if (boot_cpu_done) {
@@ -51,10 +51,8 @@ void pat_disable(const char *reason)
return;
}
- __pat_enabled = 0;
+ pat_disabled = true;
pr_info("x86/PAT: %s\n", reason);
-
- init_cache_modes();
}
static int __init nopat(char *str)
@@ -66,7 +64,7 @@ early_param("nopat", nopat);
bool pat_enabled(void)
{
- return !!__pat_enabled;
+ return pat_initialized;
}
EXPORT_SYMBOL_GPL(pat_enabled);
@@ -204,6 +202,8 @@ static void __init_cache_modes(u64 pat)
update_cache_mode_entry(i, cache);
}
pr_info("x86/PAT: Configuration [0-7]: %s\n", pat_msg);
+
+ init_cm_done = true;
}
#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
@@ -224,6 +224,7 @@ static void pat_bsp_init(u64 pat)
}
wrmsrl(MSR_IA32_CR_PAT, pat);
+ pat_initialized = true;
__init_cache_modes(pat);
}
@@ -241,10 +242,9 @@ static void pat_ap_init(u64 pat)
wrmsrl(MSR_IA32_CR_PAT, pat);
}
-static void init_cache_modes(void)
+void init_cache_modes(void)
{
u64 pat = 0;
- static int init_cm_done;
if (init_cm_done)
return;
@@ -286,8 +286,6 @@ static void init_cache_modes(void)
}
__init_cache_modes(pat);
-
- init_cm_done = 1;
}
/**
@@ -305,10 +303,8 @@ void pat_init(void)
u64 pat;
struct cpuinfo_x86 *c = &boot_cpu_data;
- if (!pat_enabled()) {
- init_cache_modes();
+ if (pat_disabled)
return;
- }
if ((c->x86_vendor == X86_VENDOR_INTEL) &&
(((c->x86 == 0x6) && (c->x86_model <= 0xd)) ||
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a7655f6..75fb011 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -263,8 +263,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
{
struct flush_tlb_info info;
- if (end == 0)
- end = start + PAGE_SIZE;
info.flush_mm = mm;
info.flush_start = start;
info.flush_end = end;
@@ -393,7 +391,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
}
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), mm, start, 0UL);
+ flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
preempt_enable();
}
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 350f709..7913b69 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -212,8 +212,8 @@ static void arch_perfmon_setup_counters(void)
eax.full = cpuid_eax(0xa);
/* Workaround for BIOS bugs in 6/15. Taken from perfmon2 */
- if (eax.split.version_id == 0 && __this_cpu_read(cpu_info.x86) == 6 &&
- __this_cpu_read(cpu_info.x86_model) == 15) {
+ if (eax.split.version_id == 0 && boot_cpu_data.x86 == 6 &&
+ boot_cpu_data.x86_model == 15) {
eax.split.version_id = 2;
eax.split.num_counters = 2;
eax.split.bit_width = 40;
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 6d52b94..20fa7c8 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -571,3 +571,35 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, pci_invalid_bar);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6f60, pci_invalid_bar);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fa0, pci_invalid_bar);
DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, pci_invalid_bar);
+
+/*
+ * Apple MacBook Pro: Avoid [mem 0x7fa00000-0x7fbfffff]
+ *
+ * Using the [mem 0x7fa00000-0x7fbfffff] region, e.g., by assigning it to
+ * the 00:1c.0 Root Port, causes a conflict with [io 0x1804], which is used
+ * for soft poweroff and suspend-to-RAM.
+ *
+ * As far as we know, this is related to the address space, not to the Root
+ * Port itself. Attaching the quirk to the Root Port is a convenience, but
+ * it could probably also be a standalone DMI quirk.
+ *
+ * https://bugzilla.kernel.org/show_bug.cgi?id=103211
+ */
+static void quirk_apple_mbp_poweroff(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct resource *res;
+
+ if ((!dmi_match(DMI_PRODUCT_NAME, "MacBookPro11,4") &&
+ !dmi_match(DMI_PRODUCT_NAME, "MacBookPro11,5")) ||
+ pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x1c, 0))
+ return;
+
+ res = request_mem_region(0x7fa00000, 0x200000,
+ "MacBook Pro poweroff workaround");
+ if (res)
+ dev_info(dev, "claimed %s %pR\n", res->name, res);
+ else
+ dev_info(dev, "can't work around MacBook Pro poweroff issue\n");
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x8c10, quirk_apple_mbp_poweroff);
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
index dd6cfa4..75029d0 100644
--- a/arch/x86/platform/intel-mid/device_libs/Makefile
+++ b/arch/x86/platform/intel-mid/device_libs/Makefile
@@ -15,7 +15,7 @@ obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o
obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
# SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_spidev.o
+obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
# I2C Devices
obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
index 30c601b..27186ad 100644
--- a/arch/x86/platform/intel-mid/device_libs/platform_spidev.c
+++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
@@ -11,6 +11,7 @@
* of the License.
*/
+#include <linux/err.h>
#include <linux/init.h>
#include <linux/sfi.h>
#include <linux/spi/pxa2xx_spi.h>
@@ -34,6 +35,9 @@ static void __init *spidev_platform_data(void *info)
{
struct spi_board_info *spi_info = info;
+ if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
+ return ERR_PTR(-ENODEV);
+
spi_info->mode = SPI_MODE_0;
spi_info->controller_data = &spidev_spi_chip;
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 555b9fa..7dbdb78 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
+KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index 0c2fae8..73eb7fd 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -992,11 +992,12 @@ static void emit_relocs(int as_text, int use_real_mode)
die("Segment relocations found but --realmode not specified\n");
/* Order the relocations for more efficient processing */
- sort_relocs(&relocs16);
sort_relocs(&relocs32);
#if ELF_BITS == 64
sort_relocs(&relocs32neg);
sort_relocs(&relocs64);
+#else
+ sort_relocs(&relocs16);
#endif
/* Print the relocations */