summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig3
-rw-r--r--arch/x86/boot/compressed/Makefile5
-rw-r--r--arch/x86/boot/compressed/eboot.c47
-rw-r--r--arch/x86/include/asm/bootparam_utils.h20
-rw-r--r--arch/x86/include/asm/efi.h7
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/include/asm/kprobes.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h4
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/syscall.h4
-rw-r--r--arch/x86/include/asm/tlb.h2
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h1
-rw-r--r--arch/x86/include/uapi/asm/msr-index.h1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl48
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh41
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c24
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_ds.c13
-rw-r--r--arch/x86/kernel/kprobes/core.c5
-rw-r--r--arch/x86/kernel/microcode_core_early.c38
-rw-r--r--arch/x86/kernel/microcode_intel_early.c30
-rw-r--r--arch/x86/kernel/paravirt.c25
-rw-r--r--arch/x86/kernel/setup.c55
-rw-r--r--arch/x86/kernel/smpboot.c3
-rw-r--r--arch/x86/kvm/lapic.c2
-rw-r--r--arch/x86/kvm/x86.c69
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init.c10
-rw-r--r--arch/x86/mm/init_32.c10
-rw-r--r--arch/x86/mm/init_64.c73
-rw-r--r--arch/x86/mm/ioremap.c7
-rw-r--r--arch/x86/mm/numa.c9
-rw-r--r--arch/x86/mm/pageattr-test.c2
-rw-r--r--arch/x86/mm/pageattr.c12
-rw-r--r--arch/x86/mm/pat.c7
-rw-r--r--arch/x86/mm/pgtable.c7
-rw-r--r--arch/x86/pci/common.c11
-rw-r--r--arch/x86/pci/xen.c6
-rw-r--r--arch/x86/platform/efi/efi.c168
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/xen/enlighten.c57
-rw-r--r--arch/x86/xen/mmu.c16
-rw-r--r--arch/x86/xen/smp.c21
-rw-r--r--arch/x86/xen/spinlock.c25
-rw-r--r--arch/x86/xen/time.c13
51 files changed, 681 insertions, 269 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a4f24f5..15b5cef 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -112,7 +112,7 @@ config X86
select GENERIC_STRNLEN_USER
select HAVE_CONTEXT_TRACKING if X86_64
select HAVE_IRQ_TIME_ACCOUNTING
- select HAVE_VIRT_TO_BUS
+ select VIRT_TO_BUS
select MODULES_USE_ELF_REL if X86_32
select MODULES_USE_ELF_RELA if X86_64
select CLONE_BACKWARDS if X86_32
@@ -1549,6 +1549,7 @@ config X86_SMAP
config EFI
bool "EFI runtime service support"
depends on ACPI
+ select UCS2_STRING
---help---
This enables the kernel to use EFI runtime services that are
available (such as the EFI variable services).
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8a84501..5ef205c 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
+targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -29,7 +29,6 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/piggy.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
-$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
ifeq ($(CONFIG_EFI_STUB), y)
VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
@@ -43,7 +42,7 @@ OBJCOPYFLAGS_vmlinux.bin := -R .comment -S
$(obj)/vmlinux.bin: vmlinux FORCE
$(call if_changed,objcopy)
-targets += vmlinux.bin.all vmlinux.relocs
+targets += $(patsubst $(obj)/%,%,$(VMLINUX_OBJS)) vmlinux.bin.all vmlinux.relocs
CMD_RELOCS = arch/x86/tools/relocs
quiet_cmd_relocs = RELOCS $@
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index c205035..35ee62f 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -251,6 +251,51 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size)
*size = len;
}
+static efi_status_t setup_efi_vars(struct boot_params *params)
+{
+ struct setup_data *data;
+ struct efi_var_bootdata *efidata;
+ u64 store_size, remaining_size, var_size;
+ efi_status_t status;
+
+ if (sys_table->runtime->hdr.revision < EFI_2_00_SYSTEM_TABLE_REVISION)
+ return EFI_UNSUPPORTED;
+
+ data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
+
+ while (data && data->next)
+ data = (struct setup_data *)(unsigned long)data->next;
+
+ status = efi_call_phys4((void *)sys_table->runtime->query_variable_info,
+ EFI_VARIABLE_NON_VOLATILE |
+ EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ EFI_VARIABLE_RUNTIME_ACCESS, &store_size,
+ &remaining_size, &var_size);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ status = efi_call_phys3(sys_table->boottime->allocate_pool,
+ EFI_LOADER_DATA, sizeof(*efidata), &efidata);
+
+ if (status != EFI_SUCCESS)
+ return status;
+
+ efidata->data.type = SETUP_EFI_VARS;
+ efidata->data.len = sizeof(struct efi_var_bootdata) -
+ sizeof(struct setup_data);
+ efidata->data.next = 0;
+ efidata->store_size = store_size;
+ efidata->remaining_size = remaining_size;
+ efidata->max_var_size = var_size;
+
+ if (data)
+ data->next = (unsigned long)efidata;
+ else
+ params->hdr.setup_data = (unsigned long)efidata;
+
+}
+
static efi_status_t setup_efi_pci(struct boot_params *params)
{
efi_pci_io_protocol *pci;
@@ -1157,6 +1202,8 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table,
setup_graphics(boot_params);
+ setup_efi_vars(boot_params);
+
setup_efi_pci(boot_params);
status = efi_call_phys3(sys_table->boottime->allocate_pool,
diff --git a/arch/x86/include/asm/bootparam_utils.h b/arch/x86/include/asm/bootparam_utils.h
index 5b5e9cb..653668d 100644
--- a/arch/x86/include/asm/bootparam_utils.h
+++ b/arch/x86/include/asm/bootparam_utils.h
@@ -14,13 +14,29 @@
* analysis of kexec-tools; if other broken bootloaders initialize a
* different set of fields we will need to figure out how to disambiguate.
*
+ * Note: efi_info is commonly left uninitialized, but that field has a
+ * private magic, so it is better to leave it unchanged.
*/
static void sanitize_boot_params(struct boot_params *boot_params)
{
+ /*
+ * IMPORTANT NOTE TO BOOTLOADER AUTHORS: do not simply clear
+ * this field. The purpose of this field is to guarantee
+ * compliance with the x86 boot spec located in
+ * Documentation/x86/boot.txt . That spec says that the
+ * *whole* structure should be cleared, after which only the
+ * portion defined by struct setup_header (boot_params->hdr)
+ * should be copied in.
+ *
+ * If you're having an issue because the sentinel is set, you
+ * need to change the whole structure to be cleared, not this
+ * (or any other) individual field, or you will soon have
+ * problems again.
+ */
if (boot_params->sentinel) {
- /*fields in boot_params are not valid, clear them */
+ /* fields in boot_params are left uninitialized, clear them */
memset(&boot_params->olpc_ofw_header, 0,
- (char *)&boot_params->alt_mem_k -
+ (char *)&boot_params->efi_info -
(char *)&boot_params->olpc_ofw_header);
memset(&boot_params->kbd_status, 0,
(char *)&boot_params->hdr -
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 60c89f3..2fb5d58 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -102,6 +102,13 @@ extern void efi_call_phys_epilog(void);
extern void efi_unmap_memmap(void);
extern void efi_memory_uc(u64 addr, unsigned long size);
+struct efi_var_bootdata {
+ struct setup_data data;
+ u64 store_size;
+ u64 remaining_size;
+ u64 max_var_size;
+};
+
#ifdef CONFIG_EFI
static inline bool efi_is_native(void)
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index bdd35db..a809121 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#define _ASM_X86_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index d3ddd17..5a6d287 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -77,6 +77,7 @@ struct arch_specific_insn {
* a post_handler or break_handler).
*/
int boostable;
+ bool if_modifier;
};
struct arch_optimized_insn {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 635a74d..4979778 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,8 +414,8 @@ struct kvm_vcpu_arch {
gpa_t time;
struct pvclock_vcpu_time_info hv_clock;
unsigned int hw_tsc_khz;
- unsigned int time_offset;
- struct page *time_page;
+ struct gfn_to_hva_cache pv_time;
+ bool pv_time_enabled;
/* set guest stopped flag in pvclock flags field */
bool pvclock_set_guest_stopped_request;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 5edd174..7361e47 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -703,7 +703,10 @@ static inline void arch_leave_lazy_mmu_mode(void)
PVOP_VCALL0(pv_mmu_ops.lazy_mode.leave);
}
-void arch_flush_lazy_mmu_mode(void);
+static inline void arch_flush_lazy_mmu_mode(void)
+{
+ PVOP_VCALL0(pv_mmu_ops.lazy_mode.flush);
+}
static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 142236e..b3b0ec1 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -91,6 +91,7 @@ struct pv_lazy_ops {
/* Set deferred update mode, used for batching operations. */
void (*enter)(void);
void (*leave)(void);
+ void (*flush)(void);
};
struct pv_time_ops {
@@ -679,6 +680,7 @@ void paravirt_end_context_switch(struct task_struct *next);
void paravirt_enter_lazy_mmu(void);
void paravirt_leave_lazy_mmu(void);
+void paravirt_flush_lazy_mmu(void);
void _paravirt_nop(void);
u32 _paravirt_ident_32(u32);
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 1ace47b..2e188d6 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -29,13 +29,13 @@ extern const unsigned long sys_call_table[];
*/
static inline int syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
- return regs->orig_ax & __SYSCALL_MASK;
+ return regs->orig_ax;
}
static inline void syscall_rollback(struct task_struct *task,
struct pt_regs *regs)
{
- regs->ax = regs->orig_ax & __SYSCALL_MASK;
+ regs->ax = regs->orig_ax;
}
static inline long syscall_get_error(struct task_struct *task,
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 4fef207..c779730 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -7,7 +7,7 @@
#define tlb_flush(tlb) \
{ \
- if (tlb->fullmm == 0) \
+ if (!tlb->fullmm && !tlb->need_flush_all) \
flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL); \
else \
flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL); \
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index c20d1ce..e709884 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -382,14 +382,14 @@ HYPERVISOR_console_io(int cmd, int count, char *str)
return _hypercall3(int, console_io, cmd, count, str);
}
-extern int __must_check HYPERVISOR_physdev_op_compat(int, void *);
+extern int __must_check xen_physdev_op_compat(int, void *);
static inline int
HYPERVISOR_physdev_op(int cmd, void *arg)
{
int rc = _hypercall2(int, physdev_op, cmd, arg);
if (unlikely(rc == -ENOSYS))
- rc = HYPERVISOR_physdev_op_compat(cmd, arg);
+ rc = xen_physdev_op_compat(cmd, arg);
return rc;
}
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c15ddaf..0874424 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -6,6 +6,7 @@
#define SETUP_E820_EXT 1
#define SETUP_DTB 2
#define SETUP_PCI 3
+#define SETUP_EFI_VARS 4
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 892ce40..7a060f4 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -44,6 +44,7 @@
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_MTRRcap 0x000000fe
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a0e067d..c9496313 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -43,10 +43,10 @@ obj-$(CONFIG_MTRR) += mtrr/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
quiet_cmd_mkcapflags = MKCAP $@
- cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
cpufeature = $(src)/../../include/asm/cpufeature.h
targets += capflags.c
-$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
deleted file mode 100644
index 091972e..0000000
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
-#
-
-($in, $out) = @ARGV;
-
-open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
-open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
-
-print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
-print OUT "#include <asm/cpufeature.h>\n";
-print OUT "#endif\n";
-print OUT "\n";
-print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
-
-%features = ();
-$err = 0;
-
-while (defined($line = <IN>)) {
- if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
- $macro = $1;
- $feature = "\L$2";
- $tail = $3;
- if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
- $feature = "\L$1";
- }
-
- next if ($feature eq '');
-
- if ($features{$feature}++) {
- print STDERR "$in: duplicate feature name: $feature\n";
- $err++;
- }
- printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;
- }
-}
-print OUT "};\n";
-
-close(IN);
-close(OUT);
-
-if ($err) {
- unlink($out);
- exit(1);
-}
-
-exit(0);
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
new file mode 100644
index 0000000..2bf6165
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+#
+# Generate the x86_cap_flags[] array from include/asm/cpufeature.h
+#
+
+IN=$1
+OUT=$2
+
+TABS="$(printf '\t\t\t\t\t')"
+trap 'rm "$OUT"' EXIT
+
+(
+ echo "#ifndef _ASM_X86_CPUFEATURE_H"
+ echo "#include <asm/cpufeature.h>"
+ echo "#endif"
+ echo ""
+ echo "const char * const x86_cap_flags[NCAPINTS*32] = {"
+
+ # Iterate through any input lines starting with #define X86_FEATURE_
+ sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN |
+ while read i
+ do
+ # Name is everything up to the first whitespace
+ NAME="$(echo "$i" | sed 's/ .*//')"
+
+ # If the /* comment */ starts with a quote string, grab that.
+ VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
+ [ -z "$VALUE" ] && VALUE="\"$NAME\""
+ [ "$VALUE" == '""' ] && continue
+
+ # Name is uppercase, VALUE is all lowercase
+ VALUE="$(echo "$VALUE" | tr A-Z a-z)"
+
+ TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 ))
+ printf "\t[%s]%.*s = %s,\n" \
+ "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE"
+ done
+ echo "};"
+) > $OUT
+
+trap - EXIT
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index a7d26d8..8f4be53 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -35,13 +35,6 @@ static bool __init ms_hyperv_platform(void)
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return false;
- /*
- * Xen emulates Hyper-V to support enlightened Windows.
- * Check to see first if we are on a Xen Hypervisor.
- */
- if (xen_cpuid_base())
- return false;
-
cpuid(HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS,
&eax, &hyp_signature[0], &hyp_signature[1], &hyp_signature[2]);
@@ -82,12 +75,6 @@ static void __init ms_hyperv_init_platform(void)
if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE)
clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100);
-#if IS_ENABLED(CONFIG_HYPERV)
- /*
- * Setup the IDT for hypervisor callback.
- */
- alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
-#endif
}
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
@@ -103,6 +90,11 @@ static irq_handler_t vmbus_isr;
void hv_register_vmbus_handler(int irq, irq_handler_t handler)
{
+ /*
+ * Setup the IDT for hypervisor callback.
+ */
+ alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector);
+
vmbus_irq = irq;
vmbus_isr = handler;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 529c893..cc45deb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -101,6 +101,10 @@ static struct event_constraint intel_snb_event_constraints[] __read_mostly =
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
+ INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
+ INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
@@ -149,8 +153,14 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly =
};
static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
- INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffffffffull, RSP_0),
- INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffffffffull, RSP_1),
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
+static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
+ INTEL_EVENT_EXTRA_REG(0xb7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
+ INTEL_EVENT_EXTRA_REG(0xbb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
EVENT_EXTRA_END
};
@@ -2093,7 +2103,10 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_snb_event_constraints;
x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ if (boot_cpu_data.x86_model == 45)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
@@ -2119,7 +2132,10 @@ __init int intel_pmu_init(void)
x86_pmu.event_constraints = intel_ivb_event_constraints;
x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
- x86_pmu.extra_regs = intel_snb_extra_regs;
+ if (boot_cpu_data.x86_model == 62)
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ else
+ x86_pmu.extra_regs = intel_snb_extra_regs;
/* all extra regs are per-cpu when HT is on */
x86_pmu.er_flags |= ERF_HAS_RSP_1;
x86_pmu.er_flags |= ERF_NO_HT_SHARING;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 826054a..26830f3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -314,10 +314,11 @@ int intel_pmu_drain_bts_buffer(void)
if (top <= at)
return 0;
+ memset(&regs, 0, sizeof(regs));
+
ds->bts_index = ds->bts_buffer_base;
perf_sample_data_init(&data, 0, event->hw.last_period);
- regs.ip = 0;
/*
* Prepare a generic sample, i.e. fill in the invariant fields.
@@ -729,3 +730,13 @@ void intel_ds_init(void)
}
}
}
+
+void perf_restore_debug_store(void)
+{
+ struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
+
+ if (!x86_pmu.bts && !x86_pmu.pebs)
+ return;
+
+ wrmsrl(MSR_IA32_DS_AREA, (unsigned long)ds);
+}
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 3f06e61..7bfe318 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -375,6 +375,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p)
else
p->ainsn.boostable = -1;
+ /* Check whether the instruction modifies Interrupt Flag or not */
+ p->ainsn.if_modifier = is_IF_modifier(p->ainsn.insn);
+
/* Also, displacement change doesn't affect the first byte */
p->opcode = p->ainsn.insn[0];
}
@@ -434,7 +437,7 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
__this_cpu_write(current_kprobe, p);
kcb->kprobe_saved_flags = kcb->kprobe_old_flags
= (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
- if (is_IF_modifier(p->ainsn.insn))
+ if (p->ainsn.if_modifier)
kcb->kprobe_saved_flags &= ~X86_EFLAGS_IF;
}
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/microcode_core_early.c
index 577db84..833d51d 100644
--- a/arch/x86/kernel/microcode_core_early.c
+++ b/arch/x86/kernel/microcode_core_early.c
@@ -45,9 +45,6 @@ static int __cpuinit x86_vendor(void)
u32 eax = 0x00000000;
u32 ebx, ecx = 0, edx;
- if (!have_cpuid_p())
- return X86_VENDOR_UNKNOWN;
-
native_cpuid(&eax, &ebx, &ecx, &edx);
if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx))
@@ -59,18 +56,45 @@ static int __cpuinit x86_vendor(void)
return X86_VENDOR_UNKNOWN;
}
+static int __cpuinit x86_family(void)
+{
+ u32 eax = 0x00000001;
+ u32 ebx, ecx = 0, edx;
+ int x86;
+
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+
+ x86 = (eax >> 8) & 0xf;
+ if (x86 == 15)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
void __init load_ucode_bsp(void)
{
- int vendor = x86_vendor();
+ int vendor, x86;
+
+ if (!have_cpuid_p())
+ return;
- if (vendor == X86_VENDOR_INTEL)
+ vendor = x86_vendor();
+ x86 = x86_family();
+
+ if (vendor == X86_VENDOR_INTEL && x86 >= 6)
load_ucode_intel_bsp();
}
void __cpuinit load_ucode_ap(void)
{
- int vendor = x86_vendor();
+ int vendor, x86;
+
+ if (!have_cpuid_p())
+ return;
+
+ vendor = x86_vendor();
+ x86 = x86_family();
- if (vendor == X86_VENDOR_INTEL)
+ if (vendor == X86_VENDOR_INTEL && x86 >= 6)
load_ucode_intel_ap();
}
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/microcode_intel_early.c
index 7890bc8..d893e8e 100644
--- a/arch/x86/kernel/microcode_intel_early.c
+++ b/arch/x86/kernel/microcode_intel_early.c
@@ -90,13 +90,13 @@ microcode_phys(struct microcode_intel **mc_saved_tmp,
struct microcode_intel ***mc_saved;
mc_saved = (struct microcode_intel ***)
- __pa_symbol(&mc_saved_data->mc_saved);
+ __pa_nodebug(&mc_saved_data->mc_saved);
for (i = 0; i < mc_saved_data->mc_saved_count; i++) {
struct microcode_intel *p;
p = *(struct microcode_intel **)
- __pa(mc_saved_data->mc_saved + i);
- mc_saved_tmp[i] = (struct microcode_intel *)__pa(p);
+ __pa_nodebug(mc_saved_data->mc_saved + i);
+ mc_saved_tmp[i] = (struct microcode_intel *)__pa_nodebug(p);
}
}
#endif
@@ -562,7 +562,7 @@ scan_microcode(unsigned long start, unsigned long end,
struct cpio_data cd;
long offset = 0;
#ifdef CONFIG_X86_32
- char *p = (char *)__pa_symbol(ucode_name);
+ char *p = (char *)__pa_nodebug(ucode_name);
#else
char *p = ucode_name;
#endif
@@ -630,8 +630,8 @@ static void __cpuinit print_ucode(struct ucode_cpu_info *uci)
if (mc_intel == NULL)
return;
- delay_ucode_info_p = (int *)__pa_symbol(&delay_ucode_info);
- current_mc_date_p = (int *)__pa_symbol(&current_mc_date);
+ delay_ucode_info_p = (int *)__pa_nodebug(&delay_ucode_info);
+ current_mc_date_p = (int *)__pa_nodebug(&current_mc_date);
*delay_ucode_info_p = 1;
*current_mc_date_p = mc_intel->hdr.date;
@@ -659,8 +659,8 @@ static inline void __cpuinit print_ucode(struct ucode_cpu_info *uci)
}
#endif
-static int apply_microcode_early(struct mc_saved_data *mc_saved_data,
- struct ucode_cpu_info *uci)
+static int __cpuinit apply_microcode_early(struct mc_saved_data *mc_saved_data,
+ struct ucode_cpu_info *uci)
{
struct microcode_intel *mc_intel;
unsigned int val[2];
@@ -741,15 +741,15 @@ load_ucode_intel_bsp(void)
#ifdef CONFIG_X86_32
struct boot_params *boot_params_p;
- boot_params_p = (struct boot_params *)__pa_symbol(&boot_params);
+ boot_params_p = (struct boot_params *)__pa_nodebug(&boot_params);
ramdisk_image = boot_params_p->hdr.ramdisk_image;
ramdisk_size = boot_params_p->hdr.ramdisk_size;
initrd_start_early = ramdisk_image;
initrd_end_early = initrd_start_early + ramdisk_size;
_load_ucode_intel_bsp(
- (struct mc_saved_data *)__pa_symbol(&mc_saved_data),
- (unsigned long *)__pa_symbol(&mc_saved_in_initrd),
+ (struct mc_saved_data *)__pa_nodebug(&mc_saved_data),
+ (unsigned long *)__pa_nodebug(&mc_saved_in_initrd),
initrd_start_early, initrd_end_early, &uci);
#else
ramdisk_image = boot_params.hdr.ramdisk_image;
@@ -772,10 +772,10 @@ void __cpuinit load_ucode_intel_ap(void)
unsigned long *initrd_start_p;
mc_saved_in_initrd_p =
- (unsigned long *)__pa_symbol(mc_saved_in_initrd);
- mc_saved_data_p = (struct mc_saved_data *)__pa_symbol(&mc_saved_data);
- initrd_start_p = (unsigned long *)__pa_symbol(&initrd_start);
- initrd_start_addr = (unsigned long)__pa_symbol(*initrd_start_p);
+ (unsigned long *)__pa_nodebug(mc_saved_in_initrd);
+ mc_saved_data_p = (struct mc_saved_data *)__pa_nodebug(&mc_saved_data);
+ initrd_start_p = (unsigned long *)__pa_nodebug(&initrd_start);
+ initrd_start_addr = (unsigned long)__pa_nodebug(*initrd_start_p);
#else
mc_saved_data_p = &mc_saved_data;
mc_saved_in_initrd_p = mc_saved_in_initrd;
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 17fff18..8bfb335 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -263,6 +263,18 @@ void paravirt_leave_lazy_mmu(void)
leave_lazy(PARAVIRT_LAZY_MMU);
}
+void paravirt_flush_lazy_mmu(void)
+{
+ preempt_disable();
+
+ if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
+ arch_leave_lazy_mmu_mode();
+ arch_enter_lazy_mmu_mode();
+ }
+
+ preempt_enable();
+}
+
void paravirt_start_context_switch(struct task_struct *prev)
{
BUG_ON(preemptible());
@@ -292,18 +304,6 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
return this_cpu_read(paravirt_lazy_mode);
}
-void arch_flush_lazy_mmu_mode(void)
-{
- preempt_disable();
-
- if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
- arch_leave_lazy_mmu_mode();
- arch_enter_lazy_mmu_mode();
- }
-
- preempt_enable();
-}
-
struct pv_info pv_info = {
.name = "bare hardware",
.paravirt_enabled = 0,
@@ -475,6 +475,7 @@ struct pv_mmu_ops pv_mmu_ops = {
.lazy_mode = {
.enter = paravirt_nop,
.leave = paravirt_nop,
+ .flush = paravirt_nop,
},
.set_fixmap = native_set_fixmap,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 84d3285..fae9134 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -171,9 +171,15 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
/* cpu data as detected by the assembly code in head.S */
-struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
+struct cpuinfo_x86 new_cpu_data __cpuinitdata = {
+ .wp_works_ok = -1,
+ .fdiv_bug = -1,
+};
/* common cpu data for all cpus */
-struct cpuinfo_x86 boot_cpu_data __read_mostly = {0, 0, 0, 0, -1, 1, 0, 0, -1};
+struct cpuinfo_x86 boot_cpu_data __read_mostly = {
+ .wp_works_ok = -1,
+ .fdiv_bug = -1,
+};
EXPORT_SYMBOL(boot_cpu_data);
unsigned int def_to_bigsmp;
@@ -501,11 +507,14 @@ static void __init memblock_x86_reserve_range_setup_data(void)
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_LOW_MAX (512 << 20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX (512 << 20)
#else
-# define CRASH_KERNEL_ADDR_MAX MAXMEM
+# define CRASH_KERNEL_ADDR_LOW_MAX (896UL<<20)
+# define CRASH_KERNEL_ADDR_HIGH_MAX MAXMEM
#endif
static void __init reserve_crashkernel_low(void)
@@ -515,19 +524,35 @@ static void __init reserve_crashkernel_low(void)
unsigned long long low_base = 0, low_size = 0;
unsigned long total_low_mem;
unsigned long long base;
+ bool auto_set = false;
int ret;
total_low_mem = memblock_mem_size(1UL<<(32-PAGE_SHIFT));
+ /* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem,
&low_size, &base);
- if (ret != 0 || low_size <= 0)
- return;
+ if (ret != 0) {
+ /*
+ * two parts from lib/swiotlb.c:
+ * swiotlb size: user specified with swiotlb= or default.
+ * swiotlb overflow buffer: now is hardcoded to 32k.
+ * We round it to 8M for other buffers that
+ * may need to stay low too.
+ */
+ low_size = swiotlb_size_or_default() + (8UL<<20);
+ auto_set = true;
+ } else {
+ /* passed with crashkernel=0,low ? */
+ if (!low_size)
+ return;
+ }
low_base = memblock_find_in_range(low_size, (1ULL<<32),
low_size, alignment);
if (!low_base) {
- pr_info("crashkernel low reservation failed - No suitable area found.\n");
+ if (!auto_set)
+ pr_info("crashkernel low reservation failed - No suitable area found.\n");
return;
}
@@ -548,14 +573,22 @@ static void __init reserve_crashkernel(void)
const unsigned long long alignment = 16<<20; /* 16M */
unsigned long long total_mem;
unsigned long long crash_size, crash_base;
+ bool high = false;
int ret;
total_mem = memblock_phys_mem_size();
+ /* crashkernel=XM */
ret = parse_crashkernel(boot_command_line, total_mem,
&crash_size, &crash_base);
- if (ret != 0 || crash_size <= 0)
- return;
+ if (ret != 0 || crash_size <= 0) {
+ /* crashkernel=X,high */
+ ret = parse_crashkernel_high(boot_command_line, total_mem,
+ &crash_size, &crash_base);
+ if (ret != 0 || crash_size <= 0)
+ return;
+ high = true;
+ }
/* 0 means: find the address automatically */
if (crash_base <= 0) {
@@ -563,7 +596,9 @@ static void __init reserve_crashkernel(void)
* kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
*/
crash_base = memblock_find_in_range(alignment,
- CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+ high ? CRASH_KERNEL_ADDR_HIGH_MAX :
+ CRASH_KERNEL_ADDR_LOW_MAX,
+ crash_size, alignment);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index a6ceaed..9f190a2 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1365,9 +1365,8 @@ static inline void mwait_play_dead(void)
unsigned int eax, ebx, ecx, edx;
unsigned int highest_cstate = 0;
unsigned int highest_subcstate = 0;
- int i;
void *mwait_ptr;
- struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
+ int i;
if (!this_cpu_has(X86_FEATURE_MWAIT))
return;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 02b51dd..f77df1c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1857,7 +1857,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
if (!pv_eoi_enabled(vcpu))
return 0;
return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
- addr);
+ addr, sizeof(u8));
}
void kvm_lapic_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f71500a..e172132 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1406,25 +1406,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
unsigned long flags, this_tsc_khz;
struct kvm_vcpu_arch *vcpu = &v->arch;
struct kvm_arch *ka = &v->kvm->arch;
- void *shared_kaddr;
s64 kernel_ns, max_kernel_ns;
u64 tsc_timestamp, host_tsc;
- struct pvclock_vcpu_time_info *guest_hv_clock;
+ struct pvclock_vcpu_time_info guest_hv_clock;
u8 pvclock_flags;
bool use_master_clock;
kernel_ns = 0;
host_tsc = 0;
- /* Keep irq disabled to prevent changes to the clock */
- local_irq_save(flags);
- this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
- if (unlikely(this_tsc_khz == 0)) {
- local_irq_restore(flags);
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
- return 1;
- }
-
/*
* If the host uses TSC clock, then passthrough TSC as stable
* to the guest.
@@ -1436,6 +1426,15 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
kernel_ns = ka->master_kernel_ns;
}
spin_unlock(&ka->pvclock_gtod_sync_lock);
+
+ /* Keep irq disabled to prevent changes to the clock */
+ local_irq_save(flags);
+ this_tsc_khz = __get_cpu_var(cpu_tsc_khz);
+ if (unlikely(this_tsc_khz == 0)) {
+ local_irq_restore(flags);
+ kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
+ return 1;
+ }
if (!use_master_clock) {
host_tsc = native_read_tsc();
kernel_ns = get_kernel_ns();
@@ -1463,7 +1462,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
local_irq_restore(flags);
- if (!vcpu->time_page)
+ if (!vcpu->pv_time_enabled)
return 0;
/*
@@ -1525,12 +1524,12 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
*/
vcpu->hv_clock.version += 2;
- shared_kaddr = kmap_atomic(vcpu->time_page);
-
- guest_hv_clock = shared_kaddr + vcpu->time_offset;
+ if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+ &guest_hv_clock, sizeof(guest_hv_clock))))
+ return 0;
/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
- pvclock_flags = (guest_hv_clock->flags & PVCLOCK_GUEST_STOPPED);
+ pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
if (vcpu->pvclock_set_guest_stopped_request) {
pvclock_flags |= PVCLOCK_GUEST_STOPPED;
@@ -1543,12 +1542,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->hv_clock.flags = pvclock_flags;
- memcpy(shared_kaddr + vcpu->time_offset, &vcpu->hv_clock,
- sizeof(vcpu->hv_clock));
-
- kunmap_atomic(shared_kaddr);
-
- mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT);
+ kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
+ &vcpu->hv_clock,
+ sizeof(vcpu->hv_clock));
return 0;
}
@@ -1827,7 +1823,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 0;
}
- if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa))
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa,
+ sizeof(u32)))
return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
@@ -1837,10 +1834,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
static void kvmclock_reset(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.time_page) {
- kvm_release_page_dirty(vcpu->arch.time_page);
- vcpu->arch.time_page = NULL;
- }
+ vcpu->arch.pv_time_enabled = false;
}
static void accumulate_steal_time(struct kvm_vcpu *vcpu)
@@ -1947,6 +1941,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_KVM_SYSTEM_TIME_NEW:
case MSR_KVM_SYSTEM_TIME: {
+ u64 gpa_offset;
kvmclock_reset(vcpu);
vcpu->arch.time = data;
@@ -1956,14 +1951,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
if (!(data & 1))
break;
- /* ...but clean it before doing the actual write */
- vcpu->arch.time_offset = data & ~(PAGE_MASK | 1);
-
- vcpu->arch.time_page =
- gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
+ gpa_offset = data & ~(PAGE_MASK | 1);
- if (is_error_page(vcpu->arch.time_page))
- vcpu->arch.time_page = NULL;
+ if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.pv_time, data & ~1ULL,
+ sizeof(struct pvclock_vcpu_time_info)))
+ vcpu->arch.pv_time_enabled = false;
+ else
+ vcpu->arch.pv_time_enabled = true;
break;
}
@@ -1980,7 +1975,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime,
- data & KVM_STEAL_VALID_BITS))
+ data & KVM_STEAL_VALID_BITS,
+ sizeof(struct kvm_steal_time)))
return 1;
vcpu->arch.st.msr_val = data;
@@ -2967,7 +2963,7 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
*/
static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
{
- if (!vcpu->arch.time_page)
+ if (!vcpu->arch.pv_time_enabled)
return -EINVAL;
vcpu->arch.pvclock_set_guest_stopped_request = true;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -6718,6 +6714,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
goto fail_free_wbinvd_dirty_mask;
vcpu->arch.ia32_tsc_adjust_msr = 0x0;
+ vcpu->arch.pv_time_enabled = false;
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index 1cbd89c..7114c63 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -1334,6 +1334,7 @@ __init void lguest_init(void)
pv_mmu_ops.read_cr3 = lguest_read_cr3;
pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode;
+ pv_mmu_ops.lazy_mode.flush = paravirt_flush_lazy_mmu;
pv_mmu_ops.pte_update = lguest_pte_update;
pv_mmu_ops.pte_update_defer = lguest_pte_update;
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 05928aa..906fea3 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -74,10 +74,10 @@ copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
char c;
unsigned zero_len;
- for (; len; --len) {
+ for (; len; --len, to++) {
if (__get_user_nocheck(c, from++, sizeof(char)))
break;
- if (__put_user_nocheck(c, to++, sizeof(char)))
+ if (__put_user_nocheck(c, to, sizeof(char)))
break;
}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 2b97525..0e88336 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -378,10 +378,12 @@ static noinline __kprobes int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_ref))
return -1;
- if (pgd_none(*pgd))
+ if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
- else
+ arch_flush_lazy_mmu_mode();
+ } else {
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+ }
/*
* Below here mismatches are bugs because these lower tables
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 6f31ee5..252b8f5 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -137,5 +137,4 @@ void __init set_highmem_pages_init(void)
add_highpages_with_active_regions(nid, zone_start_pfn,
zone_end_pfn);
}
- totalram_pages += totalhigh_pages;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 4903a03..fdc5dca 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -410,9 +410,8 @@ void __init init_mem_mapping(void)
/* the ISA range is always mapped regardless of memory holes */
init_memory_mapping(0, ISA_END_ADDRESS);
- /* xen has big range in reserved near end of ram, skip it at first */
- addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE,
- PAGE_SIZE);
+ /* xen has big range in reserved near end of ram, skip it at first.*/
+ addr = memblock_find_in_range(ISA_END_ADDRESS, end, PMD_SIZE, PMD_SIZE);
real_end = addr + PMD_SIZE;
/* step_size need to be small so pgt_buf from BRK could cover it */
@@ -516,11 +515,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
for (; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
+ free_reserved_page(virt_to_page(addr));
}
#endif
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2d19001..3ac7e31 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,14 +427,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __init add_one_highpage_init(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
-}
-
void __init add_highpages_with_active_regions(int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
@@ -448,7 +440,7 @@ void __init add_highpages_with_active_regions(int nid,
start_pfn, end_pfn);
for ( ; pfn < e_pfn; pfn++)
if (pfn_valid(pfn))
- add_one_highpage_init(pfn_to_page(pfn));
+ free_highmem_page(pfn_to_page(pfn));
}
}
#else
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 474e28f..71ff55a 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1011,11 +1011,8 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
flush_tlb_all();
}
-void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
+void __ref vmemmap_free(unsigned long start, unsigned long end)
{
- unsigned long start = (unsigned long)memmap;
- unsigned long end = (unsigned long)(memmap + nr_pages);
-
remove_pagetable(start, end, false);
}
@@ -1067,10 +1064,9 @@ void __init mem_init(void)
/* clear_bss() already clear the empty_zero_page */
- reservedpages = 0;
-
- /* this will put all low memory onto the freelists */
register_page_bootmem_info();
+
+ /* this will put all memory onto the freelists */
totalram_pages = free_all_bootmem();
absent_pages = absent_pages_in_range(0, max_pfn);
@@ -1285,18 +1281,17 @@ static long __meminitdata addr_start, addr_end;
static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;
-int __meminit
-vmemmap_populate(struct page *start_page, unsigned long size, int node)
+static int __meminit vmemmap_populate_hugepages(unsigned long start,
+ unsigned long end, int node)
{
- unsigned long addr = (unsigned long)start_page;
- unsigned long end = (unsigned long)(start_page + size);
+ unsigned long addr;
unsigned long next;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
- for (; addr < end; addr = next) {
- void *p = NULL;
+ for (addr = start; addr < end; addr = next) {
+ next = pmd_addr_end(addr, end);
pgd = vmemmap_pgd_populate(addr, node);
if (!pgd)
@@ -1306,31 +1301,14 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
if (!pud)
return -ENOMEM;
- if (!cpu_has_pse) {
- next = (addr + PAGE_SIZE) & PAGE_MASK;
- pmd = vmemmap_pmd_populate(pud, addr, node);
-
- if (!pmd)
- return -ENOMEM;
-
- p = vmemmap_pte_populate(pmd, addr, node);
+ pmd = pmd_offset(pud, addr);
+ if (pmd_none(*pmd)) {
+ void *p;
- if (!p)
- return -ENOMEM;
-
- addr_end = addr + PAGE_SIZE;
- p_end = p + PAGE_SIZE;
- } else {
- next = pmd_addr_end(addr, end);
-
- pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd)) {
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+ if (p) {
pte_t entry;
- p = vmemmap_alloc_block_buf(PMD_SIZE, node);
- if (!p)
- return -ENOMEM;
-
entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
PAGE_KERNEL_LARGE);
set_pmd(pmd, __pmd(pte_val(entry)));
@@ -1347,15 +1325,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
- } else
- vmemmap_verify((pte_t *)pmd, node, addr, next);
+ continue;
+ }
+ } else if (pmd_large(*pmd)) {
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
+ continue;
}
-
+ pr_warn_once("vmemmap: falling back to regular page backing\n");
+ if (vmemmap_populate_basepages(addr, next, node))
+ return -ENOMEM;
}
- sync_global_pgds((unsigned long)start_page, end - 1);
return 0;
}
+int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
+{
+ int err;
+
+ if (cpu_has_pse)
+ err = vmemmap_populate_hugepages(start, end, node);
+ else
+ err = vmemmap_populate_basepages(start, end, node);
+ if (!err)
+ sync_global_pgds(start, end - 1);
+ return err;
+}
+
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size)
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 78fe3f1..9a1e658 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -282,12 +282,7 @@ void iounmap(volatile void __iomem *addr)
in parallel. Reuse of the virtual address is prevented by
leaving it in the global lists until we're done with it.
cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == (void __force *)addr)
- break;
- }
- read_unlock(&vmlist_lock);
+ p = find_vm_area((void __force *)addr);
if (!p) {
printk(KERN_ERR "iounmap: bad address %p\n", addr);
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 72fe01e..a71c4e2 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -114,14 +114,11 @@ void numa_clear_node(int cpu)
*/
void __init setup_node_to_cpumask_map(void)
{
- unsigned int node, num = 0;
+ unsigned int node;
/* setup nr_node_ids if not done yet */
- if (nr_node_ids == MAX_NUMNODES) {
- for_each_node_mask(node, node_possible_map)
- num = node;
- nr_node_ids = num + 1;
- }
+ if (nr_node_ids == MAX_NUMNODES)
+ setup_nr_node_ids();
/* allocate the map */
for (node = 0; node < nr_node_ids; node++)
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index b008656..0e38951 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -68,7 +68,7 @@ static int print_split(struct split_state *s)
s->gpg++;
i += GPS/PAGE_SIZE;
} else if (level == PG_LEVEL_2M) {
- if (!(pte_val(*pte) & _PAGE_PSE)) {
+ if ((pte_val(*pte) & _PAGE_PRESENT) && !(pte_val(*pte) & _PAGE_PSE)) {
printk(KERN_ERR
"%lx level %d but not PSE %Lx\n",
addr, level, (u64)pte_val(*pte));
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 091934e..fb4e73e 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -467,7 +467,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* We are safe now. Check whether the new pgprot is the same:
*/
old_pte = *kpte;
- old_prot = new_prot = req_prot = pte_pgprot(old_pte);
+ old_prot = req_prot = pte_pgprot(old_pte);
pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr);
pgprot_val(req_prot) |= pgprot_val(cpa->mask_set);
@@ -478,12 +478,12 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
* a non present pmd. The canon_pgprot will clear _PAGE_GLOBAL
* for the ancient hardware that doesn't support it.
*/
- if (pgprot_val(new_prot) & _PAGE_PRESENT)
- pgprot_val(new_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
+ if (pgprot_val(req_prot) & _PAGE_PRESENT)
+ pgprot_val(req_prot) |= _PAGE_PSE | _PAGE_GLOBAL;
else
- pgprot_val(new_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
+ pgprot_val(req_prot) &= ~(_PAGE_PSE | _PAGE_GLOBAL);
- new_prot = canon_pgprot(new_prot);
+ req_prot = canon_pgprot(req_prot);
/*
* old_pte points to the large page base address. So we need
@@ -1413,6 +1413,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
* but that can deadlock->flush only current cpu:
*/
__flush_tlb_all();
+
+ arch_flush_lazy_mmu_mode();
}
#ifdef CONFIG_HIBERNATION
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 2610bd9..6574388 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -563,6 +563,13 @@ int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
if (base > __pa(high_memory-1))
return 0;
+ /*
+ * some areas in the middle of the kernel identity range
+ * are not mapped, like the PCI space.
+ */
+ if (!page_is_ram(base >> PAGE_SHIFT))
+ return 0;
+
id_sz = (__pa(high_memory-1) <= base + size) ?
__pa(high_memory) - base :
size;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 193350b..17fda6a 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -58,6 +58,13 @@ void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
+ /*
+ * NOTE! For PAE, any changes to the top page-directory-pointer-table
+ * entries need a full cr3 reload to flush.
+ */
+#ifdef CONFIG_X86_PAE
+ tlb->need_flush_all = 1;
+#endif
tlb_remove_page(tlb, virt_to_page(pmd));
}
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 901177d..305c68b 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -6,6 +6,7 @@
#include <linux/sched.h>
#include <linux/pci.h>
+#include <linux/pci-acpi.h>
#include <linux/ioport.h>
#include <linux/init.h>
#include <linux/dmi.h>
@@ -170,6 +171,16 @@ void pcibios_fixup_bus(struct pci_bus *b)
pcibios_fixup_device_resources(dev);
}
+void pcibios_add_bus(struct pci_bus *bus)
+{
+ acpi_pci_add_bus(bus);
+}
+
+void pcibios_remove_bus(struct pci_bus *bus)
+{
+ acpi_pci_remove_bus(bus);
+}
+
/*
* Only use DMI information to set this if nothing was passed
* on the kernel command line (which was parsed earlier).
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 94e7662..4a9be6d 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -177,7 +177,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
goto error;
i = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], 0,
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i],
(type == PCI_CAP_ID_MSIX) ?
"pcifront-msi-x" :
"pcifront-msi",
@@ -244,7 +244,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
dev_dbg(&dev->dev,
"xen: msi already bound to pirq=%d\n", pirq);
}
- irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, 0,
+ irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq,
(type == PCI_CAP_ID_MSIX) ?
"msi-x" : "msi",
DOMID_SELF);
@@ -326,7 +326,7 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
}
ret = xen_bind_pirq_msi_to_irq(dev, msidesc,
- map_irq.pirq, map_irq.index,
+ map_irq.pirq,
(type == PCI_CAP_ID_MSIX) ?
"msi-x" : "msi",
domid);
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 5f2ecaf..e4a86a6 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -41,6 +41,7 @@
#include <linux/io.h>
#include <linux/reboot.h>
#include <linux/bcd.h>
+#include <linux/ucs2_string.h>
#include <asm/setup.h>
#include <asm/efi.h>
@@ -51,6 +52,13 @@
#define EFI_DEBUG 1
+/*
+ * There's some additional metadata associated with each
+ * variable. Intel's reference implementation is 60 bytes - bump that
+ * to account for potential alignment constraints
+ */
+#define VAR_METADATA_SIZE 64
+
struct efi __read_mostly efi = {
.mps = EFI_INVALID_TABLE_ADDR,
.acpi = EFI_INVALID_TABLE_ADDR,
@@ -69,6 +77,13 @@ struct efi_memory_map memmap;
static struct efi efi_phys __initdata;
static efi_system_table_t efi_systab __initdata;
+static u64 efi_var_store_size;
+static u64 efi_var_remaining_size;
+static u64 efi_var_max_var_size;
+static u64 boot_used_size;
+static u64 boot_var_size;
+static u64 active_size;
+
unsigned long x86_efi_facility;
/*
@@ -98,6 +113,15 @@ static int __init setup_add_efi_memmap(char *arg)
}
early_param("add_efi_memmap", setup_add_efi_memmap);
+static bool efi_no_storage_paranoia;
+
+static int __init setup_storage_paranoia(char *arg)
+{
+ efi_no_storage_paranoia = true;
+ return 0;
+}
+early_param("efi_no_storage_paranoia", setup_storage_paranoia);
+
static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc)
{
@@ -162,8 +186,53 @@ static efi_status_t virt_efi_get_next_variable(unsigned long *name_size,
efi_char16_t *name,
efi_guid_t *vendor)
{
- return efi_call_virt3(get_next_variable,
- name_size, name, vendor);
+ efi_status_t status;
+ static bool finished = false;
+ static u64 var_size;
+
+ status = efi_call_virt3(get_next_variable,
+ name_size, name, vendor);
+
+ if (status == EFI_NOT_FOUND) {
+ finished = true;
+ if (var_size < boot_used_size) {
+ boot_var_size = boot_used_size - var_size;
+ active_size += boot_var_size;
+ } else {
+ printk(KERN_WARNING FW_BUG "efi: Inconsistent initial sizes\n");
+ }
+ }
+
+ if (boot_used_size && !finished) {
+ unsigned long size;
+ u32 attr;
+ efi_status_t s;
+ void *tmp;
+
+ s = virt_efi_get_variable(name, vendor, &attr, &size, NULL);
+
+ if (s != EFI_BUFFER_TOO_SMALL || !size)
+ return status;
+
+ tmp = kmalloc(size, GFP_ATOMIC);
+
+ if (!tmp)
+ return status;
+
+ s = virt_efi_get_variable(name, vendor, &attr, &size, tmp);
+
+ if (s == EFI_SUCCESS && (attr & EFI_VARIABLE_NON_VOLATILE)) {
+ var_size += size;
+ var_size += ucs2_strsize(name, 1024);
+ active_size += size;
+ active_size += VAR_METADATA_SIZE;
+ active_size += ucs2_strsize(name, 1024);
+ }
+
+ kfree(tmp);
+ }
+
+ return status;
}
static efi_status_t virt_efi_set_variable(efi_char16_t *name,
@@ -172,9 +241,34 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
unsigned long data_size,
void *data)
{
- return efi_call_virt5(set_variable,
- name, vendor, attr,
- data_size, data);
+ efi_status_t status;
+ u32 orig_attr = 0;
+ unsigned long orig_size = 0;
+
+ status = virt_efi_get_variable(name, vendor, &orig_attr, &orig_size,
+ NULL);
+
+ if (status != EFI_BUFFER_TOO_SMALL)
+ orig_size = 0;
+
+ status = efi_call_virt5(set_variable,
+ name, vendor, attr,
+ data_size, data);
+
+ if (status == EFI_SUCCESS) {
+ if (orig_size) {
+ active_size -= orig_size;
+ active_size -= ucs2_strsize(name, 1024);
+ active_size -= VAR_METADATA_SIZE;
+ }
+ if (data_size) {
+ active_size += data_size;
+ active_size += ucs2_strsize(name, 1024);
+ active_size += VAR_METADATA_SIZE;
+ }
+ }
+
+ return status;
}
static efi_status_t virt_efi_query_variable_info(u32 attr,
@@ -682,6 +776,9 @@ void __init efi_init(void)
char vendor[100] = "unknown";
int i = 0;
void *tmp;
+ struct setup_data *data;
+ struct efi_var_bootdata *efi_var_data;
+ u64 pa_data;
#ifdef CONFIG_X86_32
if (boot_params.efi_info.efi_systab_hi ||
@@ -699,6 +796,22 @@ void __init efi_init(void)
if (efi_systab_init(efi_phys.systab))
return;
+ pa_data = boot_params.hdr.setup_data;
+ while (pa_data) {
+ data = early_ioremap(pa_data, sizeof(*efi_var_data));
+ if (data->type == SETUP_EFI_VARS) {
+ efi_var_data = (struct efi_var_bootdata *)data;
+
+ efi_var_store_size = efi_var_data->store_size;
+ efi_var_remaining_size = efi_var_data->remaining_size;
+ efi_var_max_var_size = efi_var_data->max_var_size;
+ }
+ pa_data = data->next;
+ early_iounmap(data, sizeof(*efi_var_data));
+ }
+
+ boot_used_size = efi_var_store_size - efi_var_remaining_size;
+
set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility);
/*
@@ -999,3 +1112,48 @@ u64 efi_mem_attributes(unsigned long phys_addr)
}
return 0;
}
+
+/*
+ * Some firmware has serious problems when using more than 50% of the EFI
+ * variable store, i.e. it triggers bugs that can brick machines. Ensure that
+ * we never use more than this safe limit.
+ *
+ * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable
+ * store.
+ */
+efi_status_t efi_query_variable_store(u32 attributes, unsigned long size)
+{
+ efi_status_t status;
+ u64 storage_size, remaining_size, max_size;
+
+ status = efi.query_variable_info(attributes, &storage_size,
+ &remaining_size, &max_size);
+ if (status != EFI_SUCCESS)
+ return status;
+
+ if (!max_size && remaining_size > size)
+ printk_once(KERN_ERR FW_BUG "Broken EFI implementation"
+ " is returning MaxVariableSize=0\n");
+ /*
+ * Some firmware implementations refuse to boot if there's insufficient
+ * space in the variable store. We account for that by refusing the
+ * write if permitting it would reduce the available space to under
+ * 50%. However, some firmware won't reclaim variable space until
+ * after the used (not merely the actively used) space drops below
+ * a threshold. We can approximate that case with the value calculated
+ * above. If both the firmware and our calculations indicate that the
+ * available space would drop below 50%, refuse the write.
+ */
+
+ if (!storage_size || size > remaining_size ||
+ (max_size && size > max_size))
+ return EFI_OUT_OF_RESOURCES;
+
+ if (!efi_no_storage_paranoia &&
+ ((active_size + size + VAR_METADATA_SIZE > storage_size / 2) &&
+ (remaining_size - size < storage_size / 2)))
+ return EFI_OUT_OF_RESOURCES;
+
+ return EFI_SUCCESS;
+}
+EXPORT_SYMBOL_GPL(efi_query_variable_store);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 120cee1..3c68768 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -11,6 +11,7 @@
#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/smp.h>
+#include <linux/perf_event.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
@@ -228,6 +229,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
do_fpu_end();
x86_platform.restore_sched_clock_state();
mtrr_bp_restore();
+ perf_restore_debug_store();
}
/* Needed by apm.c */
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8e1c7b..ddbd54a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
+#include <linux/edd.h>
#include <xen/xen.h>
#include <xen/events.h>
@@ -1306,6 +1307,55 @@ static const struct machine_ops xen_machine_ops __initconst = {
.emergency_restart = xen_emergency_restart,
};
+static void __init xen_boot_params_init_edd(void)
+{
+#if IS_ENABLED(CONFIG_EDD)
+ struct xen_platform_op op;
+ struct edd_info *edd_info;
+ u32 *mbr_signature;
+ unsigned nr;
+ int ret;
+
+ edd_info = boot_params.eddbuf;
+ mbr_signature = boot_params.edd_mbr_sig_buffer;
+
+ op.cmd = XENPF_firmware_info;
+
+ op.u.firmware_info.type = XEN_FW_DISK_INFO;
+ for (nr = 0; nr < EDDMAXNR; nr++) {
+ struct edd_info *info = edd_info + nr;
+
+ op.u.firmware_info.index = nr;
+ info->params.length = sizeof(info->params);
+ set_xen_guest_handle(op.u.firmware_info.u.disk_info.edd_params,
+ &info->params);
+ ret = HYPERVISOR_dom0_op(&op);
+ if (ret)
+ break;
+
+#define C(x) info->x = op.u.firmware_info.u.disk_info.x
+ C(device);
+ C(version);
+ C(interface_support);
+ C(legacy_max_cylinder);
+ C(legacy_max_head);
+ C(legacy_sectors_per_track);
+#undef C
+ }
+ boot_params.eddbuf_entries = nr;
+
+ op.u.firmware_info.type = XEN_FW_DISK_MBR_SIGNATURE;
+ for (nr = 0; nr < EDD_MBR_SIG_MAX; nr++) {
+ op.u.firmware_info.index = nr;
+ ret = HYPERVISOR_dom0_op(&op);
+ if (ret)
+ break;
+ mbr_signature[nr] = op.u.firmware_info.u.disk_mbr_signature.mbr_signature;
+ }
+ boot_params.edd_mbr_sig_buf_entries = nr;
+#endif
+}
+
/*
* Set up the GDT and segment registers for -fstack-protector. Until
* we do this, we have to be careful not to call any stack-protected
@@ -1508,6 +1558,8 @@ asmlinkage void __init xen_start_kernel(void)
/* Avoid searching for BIOS MP tables */
x86_init.mpparse.find_smp_config = x86_init_noop;
x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+
+ xen_boot_params_init_edd();
}
#ifdef CONFIG_PCI
/* PCI BIOS service won't work from a PV guest. */
@@ -1589,8 +1641,11 @@ static int __cpuinit xen_hvm_cpu_notify(struct notifier_block *self,
switch (action) {
case CPU_UP_PREPARE:
xen_vcpu_setup(cpu);
- if (xen_have_vector_callback)
+ if (xen_have_vector_callback) {
xen_init_lock_cpu(cpu);
+ if (xen_feature(XENFEAT_hvm_safe_pvclock))
+ xen_setup_timer(cpu);
+ }
break;
default:
break;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index e8e3493..e006c18 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1467,8 +1467,6 @@ static void __init xen_write_cr3_init(unsigned long cr3)
__xen_write_cr3(true, cr3);
xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */
-
- pv_mmu_ops.write_cr3 = &xen_write_cr3;
}
#endif
@@ -1750,14 +1748,18 @@ static void *m2v(phys_addr_t maddr)
}
/* Set the page permissions on an identity-mapped pages */
-static void set_page_prot(void *addr, pgprot_t prot)
+static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags)
{
unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
pte_t pte = pfn_pte(pfn, prot);
- if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+ if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
BUG();
}
+static void set_page_prot(void *addr, pgprot_t prot)
+{
+ return set_page_prot_flags(addr, prot, UVMF_NONE);
+}
#ifdef CONFIG_X86_32
static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
{
@@ -1841,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
unsigned long addr)
{
if (*pt_base == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_base)++;
}
if (*pt_end == PFN_DOWN(__pa(addr))) {
- set_page_prot((void *)addr, PAGE_KERNEL);
+ set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
clear_page((void *)addr);
(*pt_end)--;
}
@@ -2122,6 +2124,7 @@ static void __init xen_post_allocator_init(void)
#endif
#ifdef CONFIG_X86_64
+ pv_mmu_ops.write_cr3 = &xen_write_cr3;
SetPagePinned(virt_to_page(level3_user_vsyscall));
#endif
xen_mark_init_mm_pinned();
@@ -2197,6 +2200,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.lazy_mode = {
.enter = paravirt_enter_lazy_mmu,
.leave = xen_leave_lazy_mmu,
+ .flush = paravirt_flush_lazy_mmu,
},
.set_fixmap = xen_set_fixmap,
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 09ea61d..0d466d7 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -144,6 +144,13 @@ static int xen_smp_intr_init(unsigned int cpu)
goto fail;
per_cpu(xen_callfuncsingle_irq, cpu) = rc;
+ /*
+ * The IRQ worker on PVHVM goes through the native path and uses the
+ * IPI mechanism.
+ */
+ if (xen_hvm_domain())
+ return 0;
+
callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
cpu,
@@ -167,6 +174,9 @@ static int xen_smp_intr_init(unsigned int cpu)
if (per_cpu(xen_callfuncsingle_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu),
NULL);
+ if (xen_hvm_domain())
+ return rc;
+
if (per_cpu(xen_irq_work, cpu) >= 0)
unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
@@ -418,7 +428,7 @@ static int xen_cpu_disable(void)
static void xen_cpu_die(unsigned int cpu)
{
- while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
+ while (xen_pv_domain() && HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
current->state = TASK_UNINTERRUPTIBLE;
schedule_timeout(HZ/10);
}
@@ -426,7 +436,8 @@ static void xen_cpu_die(unsigned int cpu)
unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
+ if (!xen_hvm_domain())
+ unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
xen_uninit_lock_cpu(cpu);
xen_teardown_timer(cpu);
}
@@ -657,11 +668,7 @@ static int __cpuinit xen_hvm_cpu_up(unsigned int cpu, struct task_struct *tidle)
static void xen_hvm_cpu_die(unsigned int cpu)
{
- unbind_from_irqhandler(per_cpu(xen_resched_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfunc_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_debug_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_callfuncsingle_irq, cpu), NULL);
- unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
+ xen_cpu_die(cpu);
native_cpu_die(cpu);
}
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index f7a080e..8b54603 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -364,6 +364,16 @@ void __cpuinit xen_init_lock_cpu(int cpu)
int irq;
const char *name;
+ WARN(per_cpu(lock_kicker_irq, cpu) > 0, "spinlock on CPU%d exists on IRQ%d!\n",
+ cpu, per_cpu(lock_kicker_irq, cpu));
+
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
name = kasprintf(GFP_KERNEL, "spinlock%d", cpu);
irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR,
cpu,
@@ -382,11 +392,26 @@ void __cpuinit xen_init_lock_cpu(int cpu)
void xen_uninit_lock_cpu(int cpu)
{
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
unbind_from_irqhandler(per_cpu(lock_kicker_irq, cpu), NULL);
+ per_cpu(lock_kicker_irq, cpu) = -1;
}
void __init xen_init_spinlocks(void)
{
+ /*
+ * See git commit f10cd522c5fbfec9ae3cc01967868c9c2401ed23
+ * (xen: disable PV spinlocks on HVM)
+ */
+ if (xen_hvm_domain())
+ return;
+
BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
pv_lock_ops.spin_is_locked = xen_spin_is_locked;
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 0296a95..3d88bfd 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -377,7 +377,7 @@ static const struct clock_event_device xen_vcpuop_clockevent = {
static const struct clock_event_device *xen_clockevent =
&xen_timerop_clockevent;
-static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
+static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events) = { .irq = -1 };
static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
{
@@ -401,6 +401,9 @@ void xen_setup_timer(int cpu)
struct clock_event_device *evt;
int irq;
+ evt = &per_cpu(xen_clock_events, cpu);
+ WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
+
printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
name = kasprintf(GFP_KERNEL, "timer%d", cpu);
@@ -413,7 +416,6 @@ void xen_setup_timer(int cpu)
IRQF_FORCE_RESUME,
name, NULL);
- evt = &per_cpu(xen_clock_events, cpu);
memcpy(evt, xen_clockevent, sizeof(*evt));
evt->cpumask = cpumask_of(cpu);
@@ -426,6 +428,7 @@ void xen_teardown_timer(int cpu)
BUG_ON(cpu == 0);
evt = &per_cpu(xen_clock_events, cpu);
unbind_from_irqhandler(evt->irq, NULL);
+ evt->irq = -1;
}
void xen_setup_cpu_clockevents(void)
@@ -497,7 +500,11 @@ static void xen_hvm_setup_cpu_clockevents(void)
{
int cpu = smp_processor_id();
xen_setup_runstate_info(cpu);
- xen_setup_timer(cpu);
+ /*
+ * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
+ * doing it xen_hvm_cpu_notify (which gets called by smp_init during
+ * early bootup and also during CPU hotplug events).
+ */
xen_setup_cpu_clockevents();
}