summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/eboot.c8
-rw-r--r--arch/x86/crypto/camellia_aesni_avx_glue.c5
-rw-r--r--arch/x86/include/asm/cpufeature.h2
-rw-r--r--arch/x86/include/asm/efi.h2
-rw-r--r--arch/x86/include/asm/hpet.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h6
-rw-r--r--arch/x86/include/asm/pvclock-abi.h1
-rw-r--r--arch/x86/include/asm/string_64.h5
-rw-r--r--arch/x86/include/asm/xen/hypercall.h4
-rw-r--r--arch/x86/include/uapi/asm/bitsperlong.h2
-rw-r--r--arch/x86/kernel/apic/io_apic.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c12
-rw-r--r--arch/x86/kernel/cpu/scattered.c2
-rw-r--r--arch/x86/kernel/crash.c7
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/hpet.c29
-rw-r--r--arch/x86/kernel/pci-dma.c5
-rw-r--r--arch/x86/kernel/process.c58
-rw-r--r--arch/x86/kernel/process_32.c28
-rw-r--r--arch/x86/kernel/process_64.c24
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c8
-rw-r--r--arch/x86/kernel/smpboot.c15
-rw-r--r--arch/x86/kvm/emulate.c10
-rw-r--r--arch/x86/kvm/svm.c125
-rw-r--r--arch/x86/kvm/vmx.c37
-rw-r--r--arch/x86/kvm/x86.c139
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/platform/efi/efi.c67
-rw-r--r--arch/x86/um/ldt.c5
-rw-r--r--arch/x86/xen/enlighten.c24
-rw-r--r--arch/x86/xen/p2m.c19
-rw-r--r--arch/x86/xen/setup.c4
34 files changed, 364 insertions, 310 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 328c835..96d058a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1308,6 +1308,7 @@ config HIGHMEM
config X86_PAE
bool "PAE (Physical Address Extension) Support"
depends on X86_32 && !HIGHMEM4G
+ select SWIOTLB
---help---
PAE is required for NX support, and furthermore enables
larger swapspace support for non-overcommit purposes. It
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index ee1b6d3..db51c1f 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -667,6 +667,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u32 h = handles[i];
+ u32 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop32);
@@ -678,7 +679,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query32(gop32, &info, &size, &fb_base);
+ status = __gop_query32(gop32, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -692,6 +693,7 @@ setup_gop32(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
@@ -770,6 +772,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
bool conout_found = false;
void *dummy = NULL;
u64 h = handles[i];
+ u32 current_fb_base;
status = efi_call_early(handle_protocol, h,
proto, (void **)&gop64);
@@ -781,7 +784,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query64(gop64, &info, &size, &fb_base);
+ status = __gop_query64(gop64, &info, &size, &current_fb_base);
if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
/*
* Systems that use the UEFI Console Splitter may
@@ -795,6 +798,7 @@ setup_gop64(struct screen_info *si, efi_guid_t *proto,
pixel_format = info->pixel_format;
pixel_info = info->pixel_information;
pixels_per_scan_line = info->pixels_per_scan_line;
+ fb_base = current_fb_base;
/*
* Once we've found a GOP supporting ConOut,
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 80a0e43..bacaa13 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -554,6 +554,11 @@ static int __init camellia_aesni_init(void)
{
const char *feature_name;
+ if (!cpu_has_avx || !cpu_has_aes || !cpu_has_osxsave) {
+ pr_info("AVX or AES-NI instructions are not detected.\n");
+ return -ENODEV;
+ }
+
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index e6cf2ad..9727b3b 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -193,7 +193,7 @@
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_HWP ( 7*32+ 10) /* "hwp" Intel HWP */
-#define X86_FEATURE_HWP_NOITFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
+#define X86_FEATURE_HWP_NOTIFY ( 7*32+ 11) /* Intel HWP_NOTIFY */
#define X86_FEATURE_HWP_ACT_WINDOW ( 7*32+ 12) /* Intel HWP_ACT_WINDOW */
#define X86_FEATURE_HWP_EPP ( 7*32+13) /* Intel HWP_EPP */
#define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index ab5f1d4..ae68be9 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -86,6 +86,7 @@ extern u64 asmlinkage efi_call(void *fp, ...);
extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
u32 type, u64 attribute);
+#ifdef CONFIG_KASAN
/*
* CONFIG_KASAN may redefine memset to __memset. __memset function is present
* only in kernel binary. Since the EFI stub linked into a separate binary it
@@ -95,6 +96,7 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
#undef memcpy
#undef memset
#undef memmove
+#endif
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 5fa9fb0..cc285ec 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -63,10 +63,10 @@
/* hpet memory map physical address */
extern unsigned long hpet_address;
extern unsigned long force_hpet_address;
-extern int boot_hpet_disable;
+extern bool boot_hpet_disable;
extern u8 hpet_blockid;
-extern int hpet_force_user;
-extern u8 hpet_msi_disable;
+extern bool hpet_force_user;
+extern bool hpet_msi_disable;
extern int is_hpet_enabled(void);
extern int hpet_enable(void);
extern void hpet_disable(void);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2beee03..3a36ee7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1226,10 +1226,8 @@ void kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
int kvm_is_in_guest(void);
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem);
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size);
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/pvclock-abi.h b/arch/x86/include/asm/pvclock-abi.h
index 655e07a..67f0823 100644
--- a/arch/x86/include/asm/pvclock-abi.h
+++ b/arch/x86/include/asm/pvclock-abi.h
@@ -41,6 +41,7 @@ struct pvclock_wall_clock {
#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
#define PVCLOCK_GUEST_STOPPED (1 << 1)
+/* PVCLOCK_COUNTS_FROM_ZERO broke ABI and can't be used anymore. */
#define PVCLOCK_COUNTS_FROM_ZERO (1 << 2)
#endif /* __ASSEMBLY__ */
#endif /* _ASM_X86_PVCLOCK_ABI_H */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index e466119..ff8b9a1 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -27,12 +27,11 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t
function. */
#define __HAVE_ARCH_MEMCPY 1
+extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#ifndef CONFIG_KMEMCHECK
-#if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
-extern void *memcpy(void *to, const void *from, size_t len);
-#else
+#if (__GNUC__ == 4 && __GNUC_MINOR__ < 3) || __GNUC__ < 4
#define memcpy(dst, src, len) \
({ \
size_t __len = (len); \
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 83aea80..4c20dd3 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -336,10 +336,10 @@ HYPERVISOR_update_descriptor(u64 ma, u64 desc)
return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
}
-static inline int
+static inline long
HYPERVISOR_memory_op(unsigned int cmd, void *arg)
{
- return _hypercall2(int, memory_op, cmd, arg);
+ return _hypercall2(long, memory_op, cmd, arg);
}
static inline int
diff --git a/arch/x86/include/uapi/asm/bitsperlong.h b/arch/x86/include/uapi/asm/bitsperlong.h
index b0ae1c4..217909b 100644
--- a/arch/x86/include/uapi/asm/bitsperlong.h
+++ b/arch/x86/include/uapi/asm/bitsperlong.h
@@ -1,7 +1,7 @@
#ifndef __ASM_X86_BITSPERLONG_H
#define __ASM_X86_BITSPERLONG_H
-#ifdef __x86_64__
+#if defined(__x86_64__) && !defined(__ILP32__)
# define __BITS_PER_LONG 64
#else
# define __BITS_PER_LONG 32
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 5c60bb1..4f28215 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -2547,7 +2547,9 @@ void __init setup_ioapic_dest(void)
mask = apic->target_cpus();
chip = irq_data_get_irq_chip(idata);
- chip->irq_set_affinity(idata, mask, false);
+ /* Might be lapic_chip for irq 0 */
+ if (chip->irq_set_affinity)
+ chip->irq_set_affinity(idata, mask, false);
}
}
#endif
@@ -2907,6 +2909,7 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
struct irq_data *irq_data;
struct mp_chip_data *data;
struct irq_alloc_info *info = arg;
+ unsigned long flags;
if (!info || nr_irqs > 1)
return -EINVAL;
@@ -2939,11 +2942,14 @@ int mp_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
cfg = irqd_cfg(irq_data);
add_pin_to_irq_node(data, ioapic_alloc_attr_node(info), ioapic, pin);
+
+ local_irq_save(flags);
if (info->ioapic_entry)
mp_setup_entry(cfg, data, info->ioapic_entry);
mp_register_handler(virq, data->trigger);
if (virq < nr_legacy_irqs())
legacy_pic->mask(virq);
+ local_irq_restore(flags);
apic_printk(APIC_VERBOSE, KERN_DEBUG
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i Dest:%d)\n",
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 381c8b9..20e242e 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -34,11 +34,10 @@
struct ms_hyperv_info ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
-static void (*hv_kexec_handler)(void);
-static void (*hv_crash_handler)(struct pt_regs *regs);
-
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
+static void (*hv_kexec_handler)(void);
+static void (*hv_crash_handler)(struct pt_regs *regs);
void hyperv_vector_handler(struct pt_regs *regs)
{
@@ -96,8 +95,8 @@ void hv_remove_crash_handler(void)
hv_crash_handler = NULL;
}
EXPORT_SYMBOL_GPL(hv_remove_crash_handler);
-#endif
+#ifdef CONFIG_KEXEC_CORE
static void hv_machine_shutdown(void)
{
if (kexec_in_progress && hv_kexec_handler)
@@ -111,7 +110,8 @@ static void hv_machine_crash_shutdown(struct pt_regs *regs)
hv_crash_handler(regs);
native_machine_crash_shutdown(regs);
}
-
+#endif /* CONFIG_KEXEC_CORE */
+#endif /* CONFIG_HYPERV */
static uint32_t __init ms_hyperv_platform(void)
{
@@ -186,8 +186,10 @@ static void __init ms_hyperv_init_platform(void)
no_timer_check = 1;
#endif
+#if IS_ENABLED(CONFIG_HYPERV) && defined(CONFIG_KEXEC_CORE)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
+#endif
mark_tsc_unstable("running on Hyper-V");
}
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 3d423a1..608fb26 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -37,7 +37,7 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c)
{ X86_FEATURE_PLN, CR_EAX, 4, 0x00000006, 0 },
{ X86_FEATURE_PTS, CR_EAX, 6, 0x00000006, 0 },
{ X86_FEATURE_HWP, CR_EAX, 7, 0x00000006, 0 },
- { X86_FEATURE_HWP_NOITFY, CR_EAX, 8, 0x00000006, 0 },
+ { X86_FEATURE_HWP_NOTIFY, CR_EAX, 8, 0x00000006, 0 },
{ X86_FEATURE_HWP_ACT_WINDOW, CR_EAX, 9, 0x00000006, 0 },
{ X86_FEATURE_HWP_EPP, CR_EAX,10, 0x00000006, 0 },
{ X86_FEATURE_HWP_PKG_REQ, CR_EAX,11, 0x00000006, 0 },
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index e068d66..74ca2fe 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -185,10 +185,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
}
#ifdef CONFIG_KEXEC_FILE
-static int get_nr_ram_ranges_callback(unsigned long start_pfn,
- unsigned long nr_pfn, void *arg)
+static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg)
{
- int *nr_ranges = arg;
+ unsigned int *nr_ranges = arg;
(*nr_ranges)++;
return 0;
@@ -214,7 +213,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced,
ced->image = image;
- walk_system_ram_range(0, -1, &nr_ranges,
+ walk_system_ram_res(0, -1, &nr_ranges,
get_nr_ram_ranges_callback);
ced->max_nr_ranges = nr_ranges;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 9f9cc68..db9a675 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -584,7 +584,7 @@ static void __init intel_graphics_stolen(int num, int slot, int func)
static void __init force_disable_hpet(int num, int slot, int func)
{
#ifdef CONFIG_HPET_TIMER
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
pr_info("x86/hpet: Will disable the HPET for this platform because it's not reliable\n");
#endif
}
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 88b4da3..b8e6ff5 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -37,10 +37,10 @@
*/
unsigned long hpet_address;
u8 hpet_blockid; /* OS timer block num */
-u8 hpet_msi_disable;
+bool hpet_msi_disable;
#ifdef CONFIG_PCI_MSI
-static unsigned long hpet_num_timers;
+static unsigned int hpet_num_timers;
#endif
static void __iomem *hpet_virt_address;
@@ -86,9 +86,9 @@ static inline void hpet_clear_mapping(void)
/*
* HPET command line enable / disable
*/
-int boot_hpet_disable;
-int hpet_force_user;
-static int hpet_verbose;
+bool boot_hpet_disable;
+bool hpet_force_user;
+static bool hpet_verbose;
static int __init hpet_setup(char *str)
{
@@ -98,11 +98,11 @@ static int __init hpet_setup(char *str)
if (next)
*next++ = 0;
if (!strncmp("disable", str, 7))
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
if (!strncmp("force", str, 5))
- hpet_force_user = 1;
+ hpet_force_user = true;
if (!strncmp("verbose", str, 7))
- hpet_verbose = 1;
+ hpet_verbose = true;
str = next;
}
return 1;
@@ -111,7 +111,7 @@ __setup("hpet=", hpet_setup);
static int __init disable_hpet(char *str)
{
- boot_hpet_disable = 1;
+ boot_hpet_disable = true;
return 1;
}
__setup("nohpet", disable_hpet);
@@ -124,7 +124,7 @@ static inline int is_hpet_capable(void)
/*
* HPET timer interrupt enable / disable
*/
-static int hpet_legacy_int_enabled;
+static bool hpet_legacy_int_enabled;
/**
* is_hpet_enabled - check whether the hpet timer interrupt is enabled
@@ -230,7 +230,7 @@ static struct clock_event_device hpet_clockevent;
static void hpet_stop_counter(void)
{
- unsigned long cfg = hpet_readl(HPET_CFG);
+ u32 cfg = hpet_readl(HPET_CFG);
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
}
@@ -272,7 +272,7 @@ static void hpet_enable_legacy_int(void)
cfg |= HPET_CFG_LEGACY;
hpet_writel(cfg, HPET_CFG);
- hpet_legacy_int_enabled = 1;
+ hpet_legacy_int_enabled = true;
}
static void hpet_legacy_clockevent_register(void)
@@ -983,7 +983,7 @@ void hpet_disable(void)
cfg = *hpet_boot_cfg;
else if (hpet_legacy_int_enabled) {
cfg &= ~HPET_CFG_LEGACY;
- hpet_legacy_int_enabled = 0;
+ hpet_legacy_int_enabled = false;
}
cfg &= ~HPET_CFG_ENABLE;
hpet_writel(cfg, HPET_CFG);
@@ -1121,8 +1121,7 @@ EXPORT_SYMBOL_GPL(hpet_rtc_timer_init);
static void hpet_disable_rtc_channel(void)
{
- unsigned long cfg;
- cfg = hpet_readl(HPET_T1_CFG);
+ u32 cfg = hpet_readl(HPET_T1_CFG);
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T1_CFG);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index 1b55de1..cd99433 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -131,11 +131,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp)
{
+ if (!*dev)
+ *dev = &x86_dma_fallback_dev;
+
*gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
*gfp = dma_alloc_coherent_gfp_flags(*dev, *gfp);
- if (!*dev)
- *dev = &x86_dma_fallback_dev;
if (!is_device_dma_capable(*dev))
return false;
return true;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d0e62a..9f7c21c 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -84,6 +84,9 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
memcpy(dst, src, arch_task_struct_size);
+#ifdef CONFIG_VM86
+ dst->thread.vm86 = NULL;
+#endif
return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
}
@@ -506,3 +509,58 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
}
+/*
+ * Called from fs/proc with a reference on @p to find the function
+ * which called into schedule(). This needs to be done carefully
+ * because the task might wake up and we might look at a stack
+ * changing under us.
+ */
+unsigned long get_wchan(struct task_struct *p)
+{
+ unsigned long start, bottom, top, sp, fp, ip;
+ int count = 0;
+
+ if (!p || p == current || p->state == TASK_RUNNING)
+ return 0;
+
+ start = (unsigned long)task_stack_page(p);
+ if (!start)
+ return 0;
+
+ /*
+ * Layout of the stack page:
+ *
+ * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long)
+ * PADDING
+ * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
+ * stack
+ * ----------- bottom = start + sizeof(thread_info)
+ * thread_info
+ * ----------- start
+ *
+ * The tasks stack pointer points at the location where the
+ * framepointer is stored. The data on the stack is:
+ * ... IP FP ... IP FP
+ *
+ * We need to read FP and IP, so we need to adjust the upper
+ * bound by another unsigned long.
+ */
+ top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
+ top -= 2 * sizeof(unsigned long);
+ bottom = start + sizeof(struct thread_info);
+
+ sp = READ_ONCE(p->thread.sp);
+ if (sp < bottom || sp > top)
+ return 0;
+
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
+ do {
+ if (fp < bottom || fp > top)
+ return 0;
+ ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
+ if (!in_sched_functions(ip))
+ return ip;
+ fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
+ } while (count++ < 16 && p->state != TASK_RUNNING);
+ return 0;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c13df2c..737527b 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -324,31 +324,3 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
-
-#define top_esp (THREAD_SIZE - sizeof(unsigned long))
-#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
-
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long bp, sp, ip;
- unsigned long stack_page;
- int count = 0;
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
- stack_page = (unsigned long)task_stack_page(p);
- sp = p->thread.sp;
- if (!stack_page || sp < stack_page || sp > top_esp+stack_page)
- return 0;
- /* include/asm-i386/system.h:switch_to() pushes bp last. */
- bp = *(unsigned long *) sp;
- do {
- if (bp < stack_page || bp > top_ebp+stack_page)
- return 0;
- ip = *(unsigned long *) (bp+4);
- if (!in_sched_functions(ip))
- return ip;
- bp = *(unsigned long *) bp;
- } while (count++ < 16);
- return 0;
-}
-
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c1bbcf..b35921a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -499,30 +499,6 @@ void set_personality_ia32(bool x32)
}
EXPORT_SYMBOL_GPL(set_personality_ia32);
-unsigned long get_wchan(struct task_struct *p)
-{
- unsigned long stack;
- u64 fp, ip;
- int count = 0;
-
- if (!p || p == current || p->state == TASK_RUNNING)
- return 0;
- stack = (unsigned long)task_stack_page(p);
- if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
- return 0;
- fp = *(u64 *)(p->thread.sp);
- do {
- if (fp < (unsigned long)stack ||
- fp >= (unsigned long)stack+THREAD_SIZE)
- return 0;
- ip = *(u64 *)(fp+8);
- if (!in_sched_functions(ip))
- return ip;
- fp = *(u64 *)fp;
- } while (count++ < 16);
- return 0;
-}
-
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
{
int ret = 0;
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 176a0f9..cc457ff 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -524,7 +524,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
*/
static void force_disable_hpet_msi(struct pci_dev *unused)
{
- hpet_msi_disable = 1;
+ hpet_msi_disable = true;
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index fdb7f2a..a3cccbf 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1173,6 +1173,14 @@ void __init setup_arch(char **cmdline_p)
clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
+
+ /*
+ * sync back low identity map too. It is used for example
+ * in the 32-bit EFI stub.
+ */
+ clone_pgd_range(initial_page_table,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
#endif
tboot_probe();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e0c198e..892ee2e5 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -509,7 +509,7 @@ void __inquire_remote_apic(int apicid)
*/
#define UDELAY_10MS_DEFAULT 10000
-static unsigned int init_udelay = UDELAY_10MS_DEFAULT;
+static unsigned int init_udelay = INT_MAX;
static int __init cpu_init_udelay(char *str)
{
@@ -522,13 +522,16 @@ early_param("cpu_init_udelay", cpu_init_udelay);
static void __init smp_quirk_init_udelay(void)
{
/* if cmdline changed it from default, leave it alone */
- if (init_udelay != UDELAY_10MS_DEFAULT)
+ if (init_udelay != INT_MAX)
return;
/* if modern processor, use no delay */
if (((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 == 6)) ||
((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && (boot_cpu_data.x86 >= 0xF)))
init_udelay = 0;
+
+ /* else, use legacy delay */
+ init_udelay = UDELAY_10MS_DEFAULT;
}
/*
@@ -657,7 +660,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(300);
pr_debug("Startup point 1\n");
@@ -668,7 +673,9 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
/*
* Give the other CPU some time to accept the IPI.
*/
- if (init_udelay)
+ if (init_udelay == 0)
+ udelay(10);
+ else
udelay(200);
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b372a75..9da95b9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2418,7 +2418,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
u64 val, cr0, cr4;
u32 base3;
u16 selector;
- int i;
+ int i, r;
for (i = 0; i < 16; i++)
*reg_write(ctxt, i) = GET_SMSTATE(u64, smbase, 0x7ff8 - i * 8);
@@ -2460,13 +2460,17 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
ctxt->ops->set_gdt(ctxt, &dt);
+ r = rsm_enter_protected_mode(ctxt, cr0, cr4);
+ if (r != X86EMUL_CONTINUE)
+ return r;
+
for (i = 0; i < 6; i++) {
- int r = rsm_load_seg_64(ctxt, smbase, i);
+ r = rsm_load_seg_64(ctxt, smbase, i);
if (r != X86EMUL_CONTINUE)
return r;
}
- return rsm_enter_protected_mode(ctxt, cr0, cr4);
+ return X86EMUL_CONTINUE;
}
static int em_rsm(struct x86_emulate_ctxt *ctxt)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 94b7d15..2f9ed1f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -514,7 +514,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->vmcb->control.next_rip != 0) {
- WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS));
+ WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
@@ -866,64 +866,6 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
}
-#define MTRR_TYPE_UC_MINUS 7
-#define MTRR2PROTVAL_INVALID 0xff
-
-static u8 mtrr2protval[8];
-
-static u8 fallback_mtrr_type(int mtrr)
-{
- /*
- * WT and WP aren't always available in the host PAT. Treat
- * them as UC and UC- respectively. Everything else should be
- * there.
- */
- switch (mtrr)
- {
- case MTRR_TYPE_WRTHROUGH:
- return MTRR_TYPE_UNCACHABLE;
- case MTRR_TYPE_WRPROT:
- return MTRR_TYPE_UC_MINUS;
- default:
- BUG();
- }
-}
-
-static void build_mtrr2protval(void)
-{
- int i;
- u64 pat;
-
- for (i = 0; i < 8; i++)
- mtrr2protval[i] = MTRR2PROTVAL_INVALID;
-
- /* Ignore the invalid MTRR types. */
- mtrr2protval[2] = 0;
- mtrr2protval[3] = 0;
-
- /*
- * Use host PAT value to figure out the mapping from guest MTRR
- * values to nested page table PAT/PCD/PWT values. We do not
- * want to change the host PAT value every time we enter the
- * guest.
- */
- rdmsrl(MSR_IA32_CR_PAT, pat);
- for (i = 0; i < 8; i++) {
- u8 mtrr = pat >> (8 * i);
-
- if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
- mtrr2protval[mtrr] = __cm_idx2pte(i);
- }
-
- for (i = 0; i < 8; i++) {
- if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
- u8 fallback = fallback_mtrr_type(i);
- mtrr2protval[i] = mtrr2protval[fallback];
- BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
- }
- }
-}
-
static __init int svm_hardware_setup(void)
{
int cpu;
@@ -990,7 +932,6 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
- build_mtrr2protval();
return 0;
err:
@@ -1145,43 +1086,6 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
return target_tsc - tsc;
}
-static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
-{
- struct kvm_vcpu *vcpu = &svm->vcpu;
-
- /* Unlike Intel, AMD takes the guest's CR0.CD into account.
- *
- * AMD does not have IPAT. To emulate it for the case of guests
- * with no assigned devices, just set everything to WB. If guests
- * have assigned devices, however, we cannot force WB for RAM
- * pages only, so use the guest PAT directly.
- */
- if (!kvm_arch_has_assigned_device(vcpu->kvm))
- *g_pat = 0x0606060606060606;
- else
- *g_pat = vcpu->arch.pat;
-}
-
-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
- u8 mtrr;
-
- /*
- * 1. MMIO: trust guest MTRR, so same as item 3.
- * 2. No passthrough: always map as WB, and force guest PAT to WB as well
- * 3. Passthrough: can't guarantee the result, try to trust guest.
- */
- if (!is_mmio && !kvm_arch_has_assigned_device(vcpu->kvm))
- return 0;
-
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED) &&
- kvm_read_cr0(vcpu) & X86_CR0_CD)
- return _PAGE_NOCACHE;
-
- mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
- return mtrr2protval[mtrr];
-}
-
static void init_vmcb(struct vcpu_svm *svm, bool init_event)
{
struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1278,7 +1182,6 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
clr_cr_intercept(svm, INTERCEPT_CR3_READ);
clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
save->g_pat = svm->vcpu.arch.pat;
- svm_set_guest_pat(svm, &save->g_pat);
save->cr3 = 0;
save->cr4 = 0;
}
@@ -1673,10 +1576,13 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
if (!vcpu->fpu_active)
cr0 |= X86_CR0_TS;
-
- /* These are emulated via page tables. */
- cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
-
+ /*
+ * re-enable caching here because the QEMU bios
+ * does not do it - this results in some delay at
+ * reboot
+ */
+ if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
+ cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
svm->vmcb->save.cr0 = cr0;
mark_dirty(svm->vmcb, VMCB_CR);
update_cr0_intercept(svm);
@@ -3351,16 +3257,6 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
case MSR_VM_IGNNE:
vcpu_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
break;
- case MSR_IA32_CR_PAT:
- if (npt_enabled) {
- if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
- return 1;
- vcpu->arch.pat = data;
- svm_set_guest_pat(svm, &svm->vmcb->save.g_pat);
- mark_dirty(svm->vmcb, VMCB_NPT);
- break;
- }
- /* fall through */
default:
return kvm_set_msr_common(vcpu, msr);
}
@@ -4195,6 +4091,11 @@ static bool svm_has_high_real_mode_segbase(void)
return true;
}
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+ return 0;
+}
+
static void svm_cpuid_update(struct kvm_vcpu *vcpu)
{
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6407674..6a8bc64 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4105,17 +4105,13 @@ static void seg_setup(int seg)
static int alloc_apic_access_page(struct kvm *kvm)
{
struct page *page;
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
mutex_lock(&kvm->slots_lock);
if (kvm->arch.apic_access_page_done)
goto out;
- kvm_userspace_mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr = APIC_DEFAULT_PHYS_BASE;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT,
+ APIC_DEFAULT_PHYS_BASE, PAGE_SIZE);
if (r)
goto out;
@@ -4140,17 +4136,12 @@ static int alloc_identity_pagetable(struct kvm *kvm)
{
/* Called with kvm->slots_lock held. */
- struct kvm_userspace_memory_region kvm_userspace_mem;
int r = 0;
BUG_ON(kvm->arch.ept_identity_pagetable_done);
- kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- kvm_userspace_mem.flags = 0;
- kvm_userspace_mem.guest_phys_addr =
- kvm->arch.ept_identity_map_addr;
- kvm_userspace_mem.memory_size = PAGE_SIZE;
- r = __x86_set_memory_region(kvm, &kvm_userspace_mem);
+ r = __x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT,
+ kvm->arch.ept_identity_map_addr, PAGE_SIZE);
return r;
}
@@ -4949,14 +4940,9 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
{
int ret;
- struct kvm_userspace_memory_region tss_mem = {
- .slot = TSS_PRIVATE_MEMSLOT,
- .guest_phys_addr = addr,
- .memory_size = PAGE_SIZE * 3,
- .flags = 0,
- };
- ret = x86_set_memory_region(kvm, &tss_mem);
+ ret = x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, addr,
+ PAGE_SIZE * 3);
if (ret)
return ret;
kvm->arch.tss_addr = addr;
@@ -8617,17 +8603,22 @@ static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
u64 ipat = 0;
/* For VT-d and EPT combination
- * 1. MMIO: guest may want to apply WC, trust it.
+ * 1. MMIO: always map as UC
* 2. EPT with VT-d:
* a. VT-d without snooping control feature: can't guarantee the
- * result, try to trust guest. So the same as item 1.
+ * result, try to trust guest.
* b. VT-d with snooping control feature: snooping control feature of
* VT-d engine can guarantee the cache correctness. Just set it
* to WB to keep consistent with host. So the same as item 3.
* 3. EPT without VT-d: always map as WB and set IPAT=1 to keep
* consistent with host MTRR
*/
- if (!is_mmio && !kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
+ if (is_mmio) {
+ cache = MTRR_TYPE_UNCACHABLE;
+ goto exit;
+ }
+
+ if (!kvm_arch_has_noncoherent_dma(vcpu->kvm)) {
ipat = VMX_EPT_IPAT_BIT;
cache = MTRR_TYPE_WRBACK;
goto exit;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 991466b..9a9a198 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1708,8 +1708,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
vcpu->pvclock_set_guest_stopped_request = false;
}
- pvclock_flags |= PVCLOCK_COUNTS_FROM_ZERO;
-
/* If the host uses TSC clocksource, then it is stable */
if (use_master_clock)
pvclock_flags |= PVCLOCK_TSC_STABLE_BIT;
@@ -2007,8 +2005,6 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
&vcpu->requests);
ka->boot_vcpu_runs_old_kvmclock = tmp;
-
- ka->kvmclock_offset = -get_kernel_ns();
}
vcpu->arch.time = data;
@@ -6457,6 +6453,12 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
return 1;
}
+static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
+ !vcpu->arch.apf.halted);
+}
+
static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
@@ -6465,8 +6467,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
for (;;) {
- if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
+ if (kvm_vcpu_running(vcpu))
r = vcpu_enter_guest(vcpu);
else
r = vcpu_block(kvm, vcpu);
@@ -7478,34 +7479,66 @@ void kvm_arch_sync_events(struct kvm *kvm)
kvm_free_pit(kvm);
}
-int __x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int i, r;
+ unsigned long hva;
+ struct kvm_memslots *slots = kvm_memslots(kvm);
+ struct kvm_memory_slot *slot, old;
/* Called with kvm->slots_lock held. */
- BUG_ON(mem->slot >= KVM_MEM_SLOTS_NUM);
+ if (WARN_ON(id >= KVM_MEM_SLOTS_NUM))
+ return -EINVAL;
+ slot = id_to_memslot(slots, id);
+ if (size) {
+ if (WARN_ON(slot->npages))
+ return -EEXIST;
+
+ /*
+ * MAP_SHARED to prevent internal slot pages from being moved
+ * by fork()/COW.
+ */
+ hva = vm_mmap(NULL, 0, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, 0);
+ if (IS_ERR((void *)hva))
+ return PTR_ERR((void *)hva);
+ } else {
+ if (!slot->npages)
+ return 0;
+
+ hva = 0;
+ }
+
+ old = *slot;
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- struct kvm_userspace_memory_region m = *mem;
+ struct kvm_userspace_memory_region m;
- m.slot |= i << 16;
+ m.slot = id | (i << 16);
+ m.flags = 0;
+ m.guest_phys_addr = gpa;
+ m.userspace_addr = hva;
+ m.memory_size = size;
r = __kvm_set_memory_region(kvm, &m);
if (r < 0)
return r;
}
+ if (!size) {
+ r = vm_munmap(old.userspace_addr, old.npages * PAGE_SIZE);
+ WARN_ON(r < 0);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(__x86_set_memory_region);
-int x86_set_memory_region(struct kvm *kvm,
- const struct kvm_userspace_memory_region *mem)
+int x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size)
{
int r;
mutex_lock(&kvm->slots_lock);
- r = __x86_set_memory_region(kvm, mem);
+ r = __x86_set_memory_region(kvm, id, gpa, size);
mutex_unlock(&kvm->slots_lock);
return r;
@@ -7520,16 +7553,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
* unless the the memory map has changed due to process exit
* or fd copying.
*/
- struct kvm_userspace_memory_region mem;
- memset(&mem, 0, sizeof(mem));
- mem.slot = APIC_ACCESS_PAGE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
-
- mem.slot = TSS_PRIVATE_MEMSLOT;
- x86_set_memory_region(kvm, &mem);
+ x86_set_memory_region(kvm, APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, IDENTITY_PAGETABLE_PRIVATE_MEMSLOT, 0, 0);
+ x86_set_memory_region(kvm, TSS_PRIVATE_MEMSLOT, 0, 0);
}
kvm_iommu_unmap_guest(kvm);
kfree(kvm->arch.vpic);
@@ -7632,27 +7658,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
const struct kvm_userspace_memory_region *mem,
enum kvm_mr_change change)
{
- /*
- * Only private memory slots need to be mapped here since
- * KVM_SET_MEMORY_REGION ioctl is no longer supported.
- */
- if ((memslot->id >= KVM_USER_MEM_SLOTS) && (change == KVM_MR_CREATE)) {
- unsigned long userspace_addr;
-
- /*
- * MAP_SHARED to prevent internal slot pages from being moved
- * by fork()/COW.
- */
- userspace_addr = vm_mmap(NULL, 0, memslot->npages * PAGE_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, 0);
-
- if (IS_ERR((void *)userspace_addr))
- return PTR_ERR((void *)userspace_addr);
-
- memslot->userspace_addr = userspace_addr;
- }
-
return 0;
}
@@ -7714,17 +7719,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
{
int nr_mmu_pages = 0;
- if (change == KVM_MR_DELETE && old->id >= KVM_USER_MEM_SLOTS) {
- int ret;
-
- ret = vm_munmap(old->userspace_addr,
- old->npages * PAGE_SIZE);
- if (ret < 0)
- printk(KERN_WARNING
- "kvm_vm_ioctl_set_memory_region: "
- "failed to munmap memory\n");
- }
-
if (!kvm->arch.n_requested_mmu_pages)
nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm);
@@ -7773,19 +7767,36 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
kvm_mmu_invalidate_zap_all_pages(kvm);
}
+static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
+{
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+
+ if (kvm_apic_has_events(vcpu))
+ return true;
+
+ if (vcpu->arch.pv.pv_unhalted)
+ return true;
+
+ if (atomic_read(&vcpu->arch.nmi_queued))
+ return true;
+
+ if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ return true;
+
+ if (kvm_arch_interrupt_allowed(vcpu) &&
+ kvm_cpu_has_interrupt(vcpu))
+ return true;
+
+ return false;
+}
+
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
kvm_x86_ops->check_nested_events(vcpu, false);
- return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
- !vcpu->arch.apf.halted)
- || !list_empty_careful(&vcpu->async_pf.done)
- || kvm_apic_has_events(vcpu)
- || vcpu->arch.pv.pv_unhalted
- || atomic_read(&vcpu->arch.nmi_queued) ||
- (kvm_arch_interrupt_allowed(vcpu) &&
- kvm_cpu_has_interrupt(vcpu));
+ return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu);
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30564e2..df48430 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1132,7 +1132,7 @@ void mark_rodata_ro(void)
* has been zapped already via cleanup_highmem().
*/
all_end = roundup((unsigned long)_brk_end, PMD_SIZE);
- set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT);
rodata_test();
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 1db84c0..6a28ded 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -705,6 +705,70 @@ out:
}
/*
+ * Iterate the EFI memory map in reverse order because the regions
+ * will be mapped top-down. The end result is the same as if we had
+ * mapped things forward, but doesn't require us to change the
+ * existing implementation of efi_map_region().
+ */
+static inline void *efi_map_next_entry_reverse(void *entry)
+{
+ /* Initial call */
+ if (!entry)
+ return memmap.map_end - memmap.desc_size;
+
+ entry -= memmap.desc_size;
+ if (entry < memmap.map)
+ return NULL;
+
+ return entry;
+}
+
+/*
+ * efi_map_next_entry - Return the next EFI memory map descriptor
+ * @entry: Previous EFI memory map descriptor
+ *
+ * This is a helper function to iterate over the EFI memory map, which
+ * we do in different orders depending on the current configuration.
+ *
+ * To begin traversing the memory map @entry must be %NULL.
+ *
+ * Returns %NULL when we reach the end of the memory map.
+ */
+static void *efi_map_next_entry(void *entry)
+{
+ if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) {
+ /*
+ * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE
+ * config table feature requires us to map all entries
+ * in the same order as they appear in the EFI memory
+ * map. That is to say, entry N must have a lower
+ * virtual address than entry N+1. This is because the
+ * firmware toolchain leaves relative references in
+ * the code/data sections, which are split and become
+ * separate EFI memory regions. Mapping things
+ * out-of-order leads to the firmware accessing
+ * unmapped addresses.
+ *
+ * Since we need to map things this way whether or not
+ * the kernel actually makes use of
+ * EFI_PROPERTIES_TABLE, let's just switch to this
+ * scheme by default for 64-bit.
+ */
+ return efi_map_next_entry_reverse(entry);
+ }
+
+ /* Initial call */
+ if (!entry)
+ return memmap.map;
+
+ entry += memmap.desc_size;
+ if (entry >= memmap.map_end)
+ return NULL;
+
+ return entry;
+}
+
+/*
* Map the efi memory ranges of the runtime services and update new_mmap with
* virtual addresses.
*/
@@ -714,7 +778,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift)
unsigned long left = 0;
efi_memory_desc_t *md;
- for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+ p = NULL;
+ while ((p = efi_map_next_entry(p))) {
md = p;
if (!(md->attribute & EFI_MEMORY_RUNTIME)) {
#ifdef CONFIG_X86_64
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 9701a4f..836a1eb 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -12,7 +12,10 @@
#include <skas.h>
#include <sysdep/tls.h>
-extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
+static inline int modify_ldt (int func, void *ptr, unsigned long bytecount)
+{
+ return syscall(__NR_modify_ldt, func, ptr, bytecount);
+}
static long write_ldt_entry(struct mm_id *mm_idp, int func,
struct user_desc *desc, void **addr, int done)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 30d12af..993b7a7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,10 @@
#include <linux/memblock.h>
#include <linux/edd.h>
+#ifdef CONFIG_KEXEC_CORE
+#include <linux/kexec.h>
+#endif
+
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/interface/xen.h>
@@ -1077,6 +1081,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
/* Fast syscall setup is all done in hypercalls, so
these are all ignored. Stub them out here to stop
Xen console noise. */
+ break;
default:
if (!pmu_msr_write(msr, low, high, &ret))
@@ -1807,6 +1812,21 @@ static struct notifier_block xen_hvm_cpu_notifier = {
.notifier_call = xen_hvm_cpu_notify,
};
+#ifdef CONFIG_KEXEC_CORE
+static void xen_hvm_shutdown(void)
+{
+ native_machine_shutdown();
+ if (kexec_in_progress)
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+
+static void xen_hvm_crash_shutdown(struct pt_regs *regs)
+{
+ native_machine_crash_shutdown(regs);
+ xen_reboot(SHUTDOWN_soft_reset);
+}
+#endif
+
static void __init xen_hvm_guest_init(void)
{
if (xen_pv_domain())
@@ -1826,6 +1846,10 @@ static void __init xen_hvm_guest_init(void)
x86_init.irqs.intr_init = xen_init_IRQ;
xen_hvm_init_time_ops();
xen_hvm_init_mmu_ops();
+#ifdef CONFIG_KEXEC_CORE
+ machine_ops.shutdown = xen_hvm_shutdown;
+ machine_ops.crash_shutdown = xen_hvm_crash_shutdown;
+#endif
}
#endif
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index bfc08b1..660b3cf 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -112,6 +112,15 @@ static unsigned long *p2m_identity;
static pte_t *p2m_missing_pte;
static pte_t *p2m_identity_pte;
+/*
+ * Hint at last populated PFN.
+ *
+ * Used to set HYPERVISOR_shared_info->arch.max_pfn so the toolstack
+ * can avoid scanning the whole P2M (which may be sized to account for
+ * hotplugged memory).
+ */
+static unsigned long xen_p2m_last_pfn;
+
static inline unsigned p2m_top_index(unsigned long pfn)
{
BUG_ON(pfn >= MAX_P2M_PFN);
@@ -270,7 +279,7 @@ void xen_setup_mfn_list_list(void)
else
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
virt_to_mfn(p2m_top_mfn);
- HYPERVISOR_shared_info->arch.max_pfn = xen_max_p2m_pfn;
+ HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
HYPERVISOR_shared_info->arch.p2m_generation = 0;
HYPERVISOR_shared_info->arch.p2m_vaddr = (unsigned long)xen_p2m_addr;
HYPERVISOR_shared_info->arch.p2m_cr3 =
@@ -406,6 +415,8 @@ void __init xen_vmalloc_p2m_tree(void)
static struct vm_struct vm;
unsigned long p2m_limit;
+ xen_p2m_last_pfn = xen_max_p2m_pfn;
+
p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
vm.flags = VM_ALLOC;
vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
@@ -608,6 +619,12 @@ static bool alloc_p2m(unsigned long pfn)
free_p2m_page(p2m);
}
+ /* Expanded the p2m? */
+ if (pfn > xen_p2m_last_pfn) {
+ xen_p2m_last_pfn = pfn;
+ HYPERVISOR_shared_info->arch.max_pfn = xen_p2m_last_pfn;
+ }
+
return true;
}
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index f5ef674..1c30e4a 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -548,7 +548,7 @@ static unsigned long __init xen_get_max_pages(void)
{
unsigned long max_pages, limit;
domid_t domid = DOMID_SELF;
- int ret;
+ long ret;
limit = xen_get_pages_limit();
max_pages = limit;
@@ -798,7 +798,7 @@ char * __init xen_memory_setup(void)
xen_ignore_unusable();
/* Make sure the Xen-supplied memory map is well-ordered. */
- sanitize_e820_map(xen_e820_map, xen_e820_map_entries,
+ sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map),
&xen_e820_map_entries);
max_pages = xen_get_max_pages();