summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/include/asm/barrier.h37
-rw-r--r--arch/arm/boot/dts/spear320-hmi.dts2
-rw-r--r--arch/arm/configs/bcm_defconfig2
-rw-r--r--arch/arm/mach-spear/headsmp.S2
-rw-r--r--arch/arm/mach-spear/platsmp.c2
-rw-r--r--arch/arm/mach-spear/time.c2
-rw-r--r--arch/ia64/kernel/head.S2
-rw-r--r--arch/ia64/kernel/ivt.S2
-rw-r--r--arch/ia64/kvm/vmm_ivt.S2
-rw-r--r--arch/mips/mm/cache.c4
-rw-r--r--arch/parisc/include/asm/shmparam.h5
-rw-r--r--arch/parisc/kernel/cache.c3
-rw-r--r--arch/parisc/kernel/sys_parisc.c14
-rw-r--r--arch/parisc/kernel/syscall_table.S2
-rw-r--r--arch/parisc/lib/memcpy.c2
-rw-r--r--arch/parisc/mm/fault.c2
-rw-r--r--arch/powerpc/kernel/pci_64.c10
-rw-r--r--arch/powerpc/mm/numa.c1
-rw-r--r--arch/s390/include/asm/sigp.h19
-rw-r--r--arch/s390/include/asm/smp.h13
-rw-r--r--arch/s390/include/uapi/asm/unistd.h3
-rw-r--r--arch/s390/kernel/compat_wrapper.c3
-rw-r--r--arch/s390/kernel/dumpstack.c8
-rw-r--r--arch/s390/kernel/ptrace.c2
-rw-r--r--arch/s390/kernel/setup.c32
-rw-r--r--arch/s390/kernel/smp.c15
-rw-r--r--arch/s390/kernel/syscalls.S1
-rw-r--r--arch/s390/lib/uaccess.c5
-rw-r--r--arch/s390/mm/fault.c140
-rw-r--r--arch/um/include/shared/os.h1
-rw-r--r--arch/um/kernel/physmem.c1
-rw-r--r--arch/um/os-Linux/file.c6
-rw-r--r--arch/um/os-Linux/main.c1
-rw-r--r--arch/um/os-Linux/mem.c372
-rw-r--r--arch/x86/Makefile4
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel_rapl.c45
-rw-r--r--arch/x86/kernel/early-quirks.c2
-rw-r--r--arch/x86/kernel/kprobes/core.c16
-rw-r--r--arch/x86/kernel/reboot.c72
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/mmu.c38
-rw-r--r--arch/x86/kvm/mmu.h44
-rw-r--r--arch/x86/kvm/paging_tmpl.h2
-rw-r--r--arch/x86/kvm/vmx.c11
-rw-r--r--arch/x86/kvm/x86.c10
-rw-r--r--arch/x86/syscalls/Makefile2
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/tools/Makefile2
-rw-r--r--arch/x86/vdso/vdso-layout.lds.S19
-rw-r--r--arch/x86/xen/smp.c3
-rw-r--r--arch/x86/xen/spinlock.c5
-rw-r--r--arch/x86/xen/xen-asm_32.S25
56 files changed, 551 insertions, 502 deletions
diff --git a/arch/arc/include/asm/barrier.h b/arch/arc/include/asm/barrier.h
deleted file mode 100644
index c32245c..0000000
--- a/arch/arc/include/asm/barrier.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef __ASM_BARRIER_H
-#define __ASM_BARRIER_H
-
-#ifndef __ASSEMBLY__
-
-/* TODO-vineetg: Need to see what this does, don't we need sync anywhere */
-#define mb() __asm__ __volatile__ ("" : : : "memory")
-#define rmb() mb()
-#define wmb() mb()
-#define set_mb(var, value) do { var = value; mb(); } while (0)
-#define set_wmb(var, value) do { var = value; wmb(); } while (0)
-#define read_barrier_depends() mb()
-
-/* TODO-vineetg verify the correctness of macros here */
-#ifdef CONFIG_SMP
-#define smp_mb() mb()
-#define smp_rmb() rmb()
-#define smp_wmb() wmb()
-#else
-#define smp_mb() barrier()
-#define smp_rmb() barrier()
-#define smp_wmb() barrier()
-#endif
-
-#define smp_read_barrier_depends() do { } while (0)
-
-#endif
-
-#endif
diff --git a/arch/arm/boot/dts/spear320-hmi.dts b/arch/arm/boot/dts/spear320-hmi.dts
index 3075d2d..0aa6fef 100644
--- a/arch/arm/boot/dts/spear320-hmi.dts
+++ b/arch/arm/boot/dts/spear320-hmi.dts
@@ -1,7 +1,7 @@
/*
* DTS file for SPEAr320 Evaluation Baord
*
- * Copyright 2012 Shiraz Hashim <shiraz.hashim@st.com>
+ * Copyright 2012 Shiraz Hashim <shiraz.linux.kernel@gmail.com>
*
* The code contained herein is licensed under the GNU General Public
* License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 0100464..3df3f3a 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig
@@ -132,7 +132,7 @@ CONFIG_CRC_ITU_T=y
CONFIG_CRC7=y
CONFIG_XZ_DEC=y
CONFIG_AVERAGE=y
-CONFIG_PINCTRL_CAPRI=y
+CONFIG_PINCTRL_BCM281XX=y
CONFIG_WATCHDOG=y
CONFIG_BCM_KONA_WDT=y
CONFIG_BCM_KONA_WDT_DEBUG=y
diff --git a/arch/arm/mach-spear/headsmp.S b/arch/arm/mach-spear/headsmp.S
index ed85473..c52192d 100644
--- a/arch/arm/mach-spear/headsmp.S
+++ b/arch/arm/mach-spear/headsmp.S
@@ -3,7 +3,7 @@
*
* Picked from realview
* Copyright (c) 2012 ST Microelectronics Limited
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/mach-spear/platsmp.c b/arch/arm/mach-spear/platsmp.c
index 5c4a198..c19751f 100644
--- a/arch/arm/mach-spear/platsmp.c
+++ b/arch/arm/mach-spear/platsmp.c
@@ -4,7 +4,7 @@
* based upon linux/arch/arm/mach-realview/platsmp.c
*
* Copyright (C) 2012 ST Microelectronics Ltd.
- * Shiraz Hashim <shiraz.hashim@st.com>
+ * Shiraz Hashim <shiraz.linux.kernel@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index 218ba5b..6479035 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -2,7 +2,7 @@
* arch/arm/plat-spear/time.c
*
* Copyright (C) 2010 ST Microelectronics
- * Shiraz Hashim<shiraz.hashim@st.com>
+ * Shiraz Hashim<shiraz.linux.kernel@gmail.com>
*
* This file is licensed under the terms of the GNU General Public
* License version 2. This program is licensed "as is" without any
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
index e6f80fc..a4acdda 100644
--- a/arch/ia64/kernel/head.S
+++ b/arch/ia64/kernel/head.S
@@ -259,7 +259,7 @@ start_ap:
* Switch into virtual mode:
*/
movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
- |IA64_PSR_DI|IA64_PSR_AC)
+ |IA64_PSR_DI)
;;
mov cr.ipsr=r16
movl r17=1f
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
index 689ffca..18e794a 100644
--- a/arch/ia64/kernel/ivt.S
+++ b/arch/ia64/kernel/ivt.S
@@ -58,7 +58,7 @@
#include <asm/unistd.h>
#include <asm/errno.h>
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
index 2401848..397e34a 100644
--- a/arch/ia64/kvm/vmm_ivt.S
+++ b/arch/ia64/kvm/vmm_ivt.S
@@ -64,7 +64,7 @@
#include "kvm_minstate.h"
#include "vti.h"
-#if 1
+#if 0
# define PSR_DEFAULT_BITS psr.ac
#else
# define PSR_DEFAULT_BITS 0
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index e422b38..9e67cde 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -29,15 +29,15 @@ void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
void (*flush_cache_page)(struct vm_area_struct *vma, unsigned long page,
unsigned long pfn);
void (*flush_icache_range)(unsigned long start, unsigned long end);
+EXPORT_SYMBOL_GPL(flush_icache_range);
void (*local_flush_icache_range)(unsigned long start, unsigned long end);
void (*__flush_cache_vmap)(void);
void (*__flush_cache_vunmap)(void);
void (*__flush_kernel_vmap_range)(unsigned long vaddr, int size);
-void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size);
-
EXPORT_SYMBOL_GPL(__flush_kernel_vmap_range);
+void (*__invalidate_kernel_vmap_range)(unsigned long vaddr, int size);
/* MIPS specific cache operations */
void (*flush_cache_sigtramp)(unsigned long addr);
diff --git a/arch/parisc/include/asm/shmparam.h b/arch/parisc/include/asm/shmparam.h
index 628ddc2..afe1300 100644
--- a/arch/parisc/include/asm/shmparam.h
+++ b/arch/parisc/include/asm/shmparam.h
@@ -1,8 +1,7 @@
#ifndef _ASMPARISC_SHMPARAM_H
#define _ASMPARISC_SHMPARAM_H
-#define __ARCH_FORCE_SHMLBA 1
-
-#define SHMLBA 0x00400000 /* attach addr needs to be 4 Mb aligned */
+#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */
+#define SHM_COLOUR 0x00400000 /* shared mappings colouring */
#endif /* _ASMPARISC_SHMPARAM_H */
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index a6ffc77..f6448c7 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -323,7 +323,8 @@ void flush_dcache_page(struct page *page)
* specifically accesses it, of course) */
flush_tlb_page(mpnt, addr);
- if (old_addr == 0 || (old_addr & (SHMLBA - 1)) != (addr & (SHMLBA - 1))) {
+ if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1))
+ != (addr & (SHM_COLOUR - 1))) {
__flush_cache_page(mpnt, addr, page_to_phys(page));
if (old_addr)
printk(KERN_ERR "INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %s\n", old_addr, addr, mpnt->vm_file ? (char *)mpnt->vm_file->f_path.dentry->d_name.name : "(null)");
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index b7cadc4..31ffa9b 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -45,7 +45,7 @@
static int get_offset(unsigned int last_mmap)
{
- return (last_mmap & (SHMLBA-1)) >> PAGE_SHIFT;
+ return (last_mmap & (SHM_COLOUR-1)) >> PAGE_SHIFT;
}
static unsigned long shared_align_offset(unsigned int last_mmap,
@@ -57,8 +57,8 @@ static unsigned long shared_align_offset(unsigned int last_mmap,
static inline unsigned long COLOR_ALIGN(unsigned long addr,
unsigned int last_mmap, unsigned long pgoff)
{
- unsigned long base = (addr+SHMLBA-1) & ~(SHMLBA-1);
- unsigned long off = (SHMLBA-1) &
+ unsigned long base = (addr+SHM_COLOUR-1) & ~(SHM_COLOUR-1);
+ unsigned long off = (SHM_COLOUR-1) &
(shared_align_offset(last_mmap, pgoff) << PAGE_SHIFT);
return base + off;
@@ -101,7 +101,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
if (flags & MAP_FIXED) {
if ((flags & MAP_SHARED) && last_mmap &&
(addr - shared_align_offset(last_mmap, pgoff))
- & (SHMLBA - 1))
+ & (SHM_COLOUR - 1))
return -EINVAL;
goto found_addr;
}
@@ -122,7 +122,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
info.length = len;
info.low_limit = mm->mmap_legacy_base;
info.high_limit = mmap_upper_limit();
- info.align_mask = last_mmap ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(last_mmap, pgoff);
addr = vm_unmapped_area(&info);
@@ -161,7 +161,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
if (flags & MAP_FIXED) {
if ((flags & MAP_SHARED) && last_mmap &&
(addr - shared_align_offset(last_mmap, pgoff))
- & (SHMLBA - 1))
+ & (SHM_COLOUR - 1))
return -EINVAL;
goto found_addr;
}
@@ -182,7 +182,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.length = len;
info.low_limit = PAGE_SIZE;
info.high_limit = mm->mmap_base;
- info.align_mask = last_mmap ? (PAGE_MASK & (SHMLBA - 1)) : 0;
+ info.align_mask = last_mmap ? (PAGE_MASK & (SHM_COLOUR - 1)) : 0;
info.align_offset = shared_align_offset(last_mmap, pgoff);
addr = vm_unmapped_area(&info);
if (!(addr & ~PAGE_MASK))
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 80e5dd2..83ead0e 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -392,7 +392,7 @@
ENTRY_COMP(vmsplice)
ENTRY_COMP(move_pages) /* 295 */
ENTRY_SAME(getcpu)
- ENTRY_SAME(epoll_pwait)
+ ENTRY_COMP(epoll_pwait)
ENTRY_COMP(statfs64)
ENTRY_COMP(fstatfs64)
ENTRY_COMP(kexec_load) /* 300 */
diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c
index 413dc17..b2b441b 100644
--- a/arch/parisc/lib/memcpy.c
+++ b/arch/parisc/lib/memcpy.c
@@ -470,7 +470,7 @@ static unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
return 0;
/* if a load or store fault occured we can get the faulty addr */
- d = &__get_cpu_var(exception_data);
+ d = this_cpu_ptr(&exception_data);
fault_addr = d->fault_addr;
/* error in load or store? */
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index 9d08c71..7475507 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -151,7 +151,7 @@ int fixup_exception(struct pt_regs *regs)
fix = search_exception_tables(regs->iaoq[0]);
if (fix) {
struct exception_data *d;
- d = &__get_cpu_var(exception_data);
+ d = this_cpu_ptr(&exception_data);
d->fault_ip = regs->iaoq[0];
d->fault_space = regs->isr;
d->fault_addr = regs->ior;
diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c
index 2a47790..155013d 100644
--- a/arch/powerpc/kernel/pci_64.c
+++ b/arch/powerpc/kernel/pci_64.c
@@ -208,7 +208,7 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
unsigned long in_devfn)
{
struct pci_controller* hose;
- struct pci_bus *bus = NULL;
+ struct pci_bus *tmp_bus, *bus = NULL;
struct device_node *hose_node;
/* Argh ! Please forgive me for that hack, but that's the
@@ -229,10 +229,12 @@ long sys_pciconfig_iobase(long which, unsigned long in_bus,
* used on pre-domains setup. We return the first match
*/
- list_for_each_entry(bus, &pci_root_buses, node) {
- if (in_bus >= bus->number && in_bus <= bus->busn_res.end)
+ list_for_each_entry(tmp_bus, &pci_root_buses, node) {
+ if (in_bus >= tmp_bus->number &&
+ in_bus <= tmp_bus->busn_res.end) {
+ bus = tmp_bus;
break;
- bus = NULL;
+ }
}
if (bus == NULL || bus->dev.of_node == NULL)
return -ENODEV;
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 4ebbb9e..3b181b2 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -232,6 +232,7 @@ int __node_distance(int a, int b)
return distance;
}
+EXPORT_SYMBOL(__node_distance);
static void initialize_distance_lookup_table(int nid,
const __be32 *associativity)
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index d091aa1..bf9c823 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -31,4 +31,23 @@
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL
+#ifndef __ASSEMBLY__
+
+static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+{
+ register unsigned int reg1 asm ("1") = parm;
+ int cc;
+
+ asm volatile(
+ " sigp %1,%2,0(%3)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
+ if (status && cc == 1)
+ *status = reg1;
+ return cc;
+}
+
+#endif /* __ASSEMBLY__ */
+
#endif /* __S390_ASM_SIGP_H */
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 1607793..21703f8 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -7,6 +7,8 @@
#ifndef __ASM_SMP_H
#define __ASM_SMP_H
+#include <asm/sigp.h>
+
#ifdef CONFIG_SMP
#include <asm/lowcore.h>
@@ -50,9 +52,18 @@ static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_yield(void) { }
-static inline void smp_stop_cpu(void) { }
static inline void smp_fill_possible_mask(void) { }
+static inline void smp_stop_cpu(void)
+{
+ u16 pcpu = stap();
+
+ for (;;) {
+ __pcpu_sigp(pcpu, SIGP_STOP, 0, NULL);
+ cpu_relax();
+ }
+}
+
#endif /* CONFIG_SMP */
#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 5eb5c9d..3802d2d 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -282,7 +282,8 @@
#define __NR_finit_module 344
#define __NR_sched_setattr 345
#define __NR_sched_getattr 346
-#define NR_syscalls 345
+#define __NR_renameat2 347
+#define NR_syscalls 348
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 824c39d..45cdb37 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -1,5 +1,5 @@
/*
- * Compat sytem call wrappers.
+ * Compat system call wrappers.
*
* Copyright IBM Corp. 2014
*/
@@ -213,3 +213,4 @@ COMPAT_SYSCALL_WRAP5(kcmp, pid_t, pid1, pid_t, pid2, int, type, unsigned long, i
COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, flags);
COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
+COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index e6af940..acb4124 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -144,10 +144,10 @@ void show_registers(struct pt_regs *regs)
char *mode;
mode = user_mode(regs) ? "User" : "Krnl";
- printk("%s PSW : %p %p (%pSR)\n",
- mode, (void *) regs->psw.mask,
- (void *) regs->psw.addr,
- (void *) regs->psw.addr);
+ printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr);
+ if (!user_mode(regs))
+ printk(" (%pSR)", (void *)regs->psw.addr);
+ printk("\n");
printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x "
"P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER),
mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO),
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 4ac8faf..1c82619 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -64,7 +64,7 @@ void update_cr_regs(struct task_struct *task)
if (task->thread.per_flags & PER_FLAG_NO_TE)
cr_new &= ~(1UL << 55);
if (cr_new != cr)
- __ctl_load(cr, 0, 0);
+ __ctl_load(cr_new, 0, 0);
/* Set or clear transaction execution TDC bits 62 and 63. */
__ctl_store(cr, 2, 2);
cr_new = cr & ~3UL;
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index f70f248..88d1ca8 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -1027,3 +1027,35 @@ void __init setup_arch(char **cmdline_p)
/* Setup zfcpdump support */
setup_zfcpdump();
}
+
+#ifdef CONFIG_32BIT
+static int no_removal_warning __initdata;
+
+static int __init parse_no_removal_warning(char *str)
+{
+ no_removal_warning = 1;
+ return 0;
+}
+__setup("no_removal_warning", parse_no_removal_warning);
+
+static int __init removal_warning(void)
+{
+ if (no_removal_warning)
+ return 0;
+ printk(KERN_ALERT "\n\n");
+ printk(KERN_CONT "Warning - you are using a 31 bit kernel!\n\n");
+ printk(KERN_CONT "We plan to remove 31 bit kernel support from the kernel sources in March 2015.\n");
+ printk(KERN_CONT "Currently we assume that nobody is using the 31 bit kernel on old 31 bit\n");
+ printk(KERN_CONT "hardware anymore. If you think that the code should not be removed and also\n");
+ printk(KERN_CONT "future versions of the Linux kernel should be able to run in 31 bit mode\n");
+ printk(KERN_CONT "please let us know. Please write to:\n");
+ printk(KERN_CONT "linux390@de.ibm.com (mail address) and/or\n");
+ printk(KERN_CONT "linux-s390@vger.kernel.org (mailing list).\n\n");
+ printk(KERN_CONT "Thank you!\n\n");
+ printk(KERN_CONT "If this kernel runs on a 64 bit machine you may consider using a 64 bit kernel.\n");
+ printk(KERN_CONT "This message can be disabled with the \"no_removal_warning\" kernel parameter.\n");
+ schedule_timeout_uninterruptible(300 * HZ);
+ return 0;
+}
+early_initcall(removal_warning);
+#endif
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 512ce1c..86e65ec 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -82,21 +82,6 @@ DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
-{
- register unsigned int reg1 asm ("1") = parm;
- int cc;
-
- asm volatile(
- " sigp %1,%2,0(%3)\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (cc), "+d" (reg1) : "d" (addr), "a" (order) : "cc");
- if (status && cc == 1)
- *status = reg1;
- return cc;
-}
-
static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
{
int cc;
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 542ef48..fe5cdf2 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -355,3 +355,4 @@ SYSCALL(sys_kcmp,sys_kcmp,compat_sys_kcmp)
SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
+SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
diff --git a/arch/s390/lib/uaccess.c b/arch/s390/lib/uaccess.c
index 23f866b..7416efe 100644
--- a/arch/s390/lib/uaccess.c
+++ b/arch/s390/lib/uaccess.c
@@ -338,9 +338,6 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
register unsigned long reg0 asm("0") = 0;
unsigned long tmp1, tmp2;
- if (unlikely(!size))
- return 0;
- update_primary_asce(current);
asm volatile(
" la %2,0(%1)\n"
" la %3,0(%0,%1)\n"
@@ -359,6 +356,8 @@ static inline unsigned long strnlen_user_srst(const char __user *src,
unsigned long __strnlen_user(const char __user *src, unsigned long size)
{
+ if (unlikely(!size))
+ return 0;
update_primary_asce(current);
return strnlen_user_srst(src, size);
}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 19f623f..2f51a99 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -126,6 +126,133 @@ static inline int user_space_fault(struct pt_regs *regs)
return 0;
}
+static int bad_address(void *p)
+{
+ unsigned long dummy;
+
+ return probe_kernel_address((unsigned long *)p, dummy);
+}
+
+#ifdef CONFIG_64BIT
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%016lx ", asce);
+ switch (asce & _ASCE_TYPE_MASK) {
+ case _ASCE_TYPE_REGION1:
+ table = table + ((address >> 53) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R1:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION2:
+ table = table + ((address >> 42) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R2:%016lx ", *table);
+ if (*table & _REGION_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_REGION3:
+ table = table + ((address >> 31) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("R3:%016lx ", *table);
+ if (*table & (_REGION_ENTRY_INVALID | _REGION3_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ /* fallthrough */
+ case _ASCE_TYPE_SEGMENT:
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont(KERN_CONT "S:%016lx ", *table);
+ if (*table & (_SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_LARGE))
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ }
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%016lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#else /* CONFIG_64BIT */
+
+static void dump_pagetable(unsigned long asce, unsigned long address)
+{
+ unsigned long *table = __va(asce & PAGE_MASK);
+
+ pr_alert("AS:%08lx ", asce);
+ table = table + ((address >> 20) & 0x7ff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("S:%08lx ", *table);
+ if (*table & _SEGMENT_ENTRY_INVALID)
+ goto out;
+ table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+ table = table + ((address >> 12) & 0xff);
+ if (bad_address(table))
+ goto bad;
+ pr_cont("P:%08lx ", *table);
+out:
+ pr_cont("\n");
+ return;
+bad:
+ pr_cont("BAD\n");
+}
+
+#endif /* CONFIG_64BIT */
+
+static void dump_fault_info(struct pt_regs *regs)
+{
+ unsigned long asce;
+
+ pr_alert("Fault in ");
+ switch (regs->int_parm_long & 3) {
+ case 3:
+ pr_cont("home space ");
+ break;
+ case 2:
+ pr_cont("secondary space ");
+ break;
+ case 1:
+ pr_cont("access register ");
+ break;
+ case 0:
+ pr_cont("primary space ");
+ break;
+ }
+ pr_cont("mode while using ");
+ if (!user_space_fault(regs)) {
+ asce = S390_lowcore.kernel_asce;
+ pr_cont("kernel ");
+ }
+#ifdef CONFIG_PGSTE
+ else if ((current->flags & PF_VCPU) && S390_lowcore.gmap) {
+ struct gmap *gmap = (struct gmap *)S390_lowcore.gmap;
+ asce = gmap->asce;
+ pr_cont("gmap ");
+ }
+#endif
+ else {
+ asce = S390_lowcore.user_asce;
+ pr_cont("user ");
+ }
+ pr_cont("ASCE.\n");
+ dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK);
+}
+
static inline void report_user_fault(struct pt_regs *regs, long signr)
{
if ((task_pid_nr(current) > 1) && !show_unhandled_signals)
@@ -138,8 +265,9 @@ static inline void report_user_fault(struct pt_regs *regs, long signr)
regs->int_code);
print_vma_addr(KERN_CONT "in ", regs->psw.addr & PSW_ADDR_INSN);
printk(KERN_CONT "\n");
- printk(KERN_ALERT "failing address: %lX\n",
- regs->int_parm_long & __FAIL_ADDR_MASK);
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
show_regs(regs);
}
@@ -177,11 +305,13 @@ static noinline void do_no_context(struct pt_regs *regs)
address = regs->int_parm_long & __FAIL_ADDR_MASK;
if (!user_space_fault(regs))
printk(KERN_ALERT "Unable to handle kernel pointer dereference"
- " at virtual kernel address %p\n", (void *)address);
+ " in virtual kernel address space\n");
else
printk(KERN_ALERT "Unable to handle kernel paging request"
- " at virtual user address %p\n", (void *)address);
-
+ " in virtual user address space\n");
+ printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n",
+ regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long);
+ dump_fault_info(regs);
die(regs, "Oops");
do_exit(SIGKILL);
}
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 75298d3..08eec0b 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -136,6 +136,7 @@ extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
extern int os_get_ifname(int fd, char *namebuf);
extern int os_set_slip(int fd);
extern int os_mode_fd(int fd, int mode);
+extern int os_fsync_file(int fd);
extern int os_seek_file(int fd, unsigned long long offset);
extern int os_open_file(const char *file, struct openflags flags, int mode);
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index f116db1..30fdd5d 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -103,6 +103,7 @@ void __init setup_physmem(unsigned long start, unsigned long reserve_end,
*/
os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+ os_fsync_file(physmem_fd);
bootmap_size = init_bootmem(pfn, pfn + delta);
free_bootmem(__pa(reserve_end) + bootmap_size,
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index 07a7501..08d90fb 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -237,6 +237,12 @@ void os_close_file(int fd)
{
close(fd);
}
+int os_fsync_file(int fd)
+{
+ if (fsync(fd) < 0)
+ return -errno;
+ return 0;
+}
int os_seek_file(int fd, unsigned long long offset)
{
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index e1704ff..df9191a 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -151,6 +151,7 @@ int __init main(int argc, char **argv, char **envp)
#endif
do_uml_initcalls();
+ change_sig(SIGPIPE, 0);
ret = linux_main(argc, argv);
/*
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 3c4af77..897e9ad 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -12,337 +12,117 @@
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#include <sys/param.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
#include <init.h>
#include <os.h>
-/* Modified by which_tmpdir, which is called during early boot */
-static char *default_tmpdir = "/tmp";
-
-/*
- * Modified when creating the physical memory file and when checking
- * the tmp filesystem for usability, both happening during early boot.
- */
+/* Set by make_tempfile() during early boot. */
static char *tempdir = NULL;
-static void __init find_tempdir(void)
+/* Check if dir is on tmpfs. Return 0 if yes, -1 if no or error. */
+static int __init check_tmpfs(const char *dir)
{
- const char *dirs[] = { "TMP", "TEMP", "TMPDIR", NULL };
- int i;
- char *dir = NULL;
-
- if (tempdir != NULL)
- /* We've already been called */
- return;
- for (i = 0; dirs[i]; i++) {
- dir = getenv(dirs[i]);
- if ((dir != NULL) && (*dir != '\0'))
- break;
- }
- if ((dir == NULL) || (*dir == '\0'))
- dir = default_tmpdir;
+ struct statfs st;
- tempdir = malloc(strlen(dir) + 2);
- if (tempdir == NULL) {
- fprintf(stderr, "Failed to malloc tempdir, "
- "errno = %d\n", errno);
- return;
- }
- strcpy(tempdir, dir);
- strcat(tempdir, "/");
-}
-
-/*
- * Remove bytes from the front of the buffer and refill it so that if there's a
- * partial string that we care about, it will be completed, and we can recognize
- * it.
- */
-static int pop(int fd, char *buf, size_t size, size_t npop)
-{
- ssize_t n;
- size_t len = strlen(&buf[npop]);
-
- memmove(buf, &buf[npop], len + 1);
- n = read(fd, &buf[len], size - len - 1);
- if (n < 0)
- return -errno;
-
- buf[len + n] = '\0';
- return 1;
-}
-
-/*
- * This will return 1, with the first character in buf being the
- * character following the next instance of c in the file. This will
- * read the file as needed. If there's an error, -errno is returned;
- * if the end of the file is reached, 0 is returned.
- */
-static int next(int fd, char *buf, size_t size, char c)
-{
- ssize_t n;
- char *ptr;
-
- while ((ptr = strchr(buf, c)) == NULL) {
- n = read(fd, buf, size - 1);
- if (n == 0)
- return 0;
- else if (n < 0)
- return -errno;
-
- buf[n] = '\0';
+ printf("Checking if %s is on tmpfs...", dir);
+ if (statfs(dir, &st) < 0) {
+ printf("%s\n", strerror(errno));
+ } else if (st.f_type != TMPFS_MAGIC) {
+ printf("no\n");
+ } else {
+ printf("OK\n");
+ return 0;
}
-
- return pop(fd, buf, size, ptr - buf + 1);
+ return -1;
}
/*
- * Decode an octal-escaped and space-terminated path of the form used by
- * /proc/mounts. May be used to decode a path in-place. "out" must be at least
- * as large as the input. The output is always null-terminated. "len" gets the
- * length of the output, excluding the trailing null. Returns 0 if a full path
- * was successfully decoded, otherwise an error.
+ * Choose the tempdir to use. We want something on tmpfs so that our memory is
+ * not subject to the host's vm.dirty_ratio. If a tempdir is specified in the
+ * environment, we use that even if it's not on tmpfs, but we warn the user.
+ * Otherwise, we try common tmpfs locations, and if no tmpfs directory is found
+ * then we fall back to /tmp.
*/
-static int decode_path(const char *in, char *out, size_t *len)
+static char * __init choose_tempdir(void)
{
- char *first = out;
- int c;
+ static const char * const vars[] = {
+ "TMPDIR",
+ "TMP",
+ "TEMP",
+ NULL
+ };
+ static const char fallback_dir[] = "/tmp";
+ static const char * const tmpfs_dirs[] = {
+ "/dev/shm",
+ fallback_dir,
+ NULL
+ };
int i;
- int ret = -EINVAL;
- while (1) {
- switch (*in) {
- case '\0':
- goto out;
-
- case ' ':
- ret = 0;
- goto out;
-
- case '\\':
- in++;
- c = 0;
- for (i = 0; i < 3; i++) {
- if (*in < '0' || *in > '7')
- goto out;
- c = (c << 3) | (*in++ - '0');
- }
- *(unsigned char *)out++ = (unsigned char) c;
- break;
-
- default:
- *out++ = *in++;
- break;
+ const char *dir;
+
+ printf("Checking environment variables for a tempdir...");
+ for (i = 0; vars[i]; i++) {
+ dir = getenv(vars[i]);
+ if ((dir != NULL) && (*dir != '\0')) {
+ printf("%s\n", dir);
+ if (check_tmpfs(dir) >= 0)
+ goto done;
+ else
+ goto warn;
}
}
+ printf("none found\n");
-out:
- *out = '\0';
- *len = out - first;
- return ret;
-}
-
-/*
- * Computes the length of s when encoded with three-digit octal escape sequences
- * for the characters in chars.
- */
-static size_t octal_encoded_length(const char *s, const char *chars)
-{
- size_t len = strlen(s);
- while ((s = strpbrk(s, chars)) != NULL) {
- len += 3;
- s++;
- }
-
- return len;
-}
-
-enum {
- OUTCOME_NOTHING_MOUNTED,
- OUTCOME_TMPFS_MOUNT,
- OUTCOME_NON_TMPFS_MOUNT,
-};
-
-/* Read a line of /proc/mounts data looking for a tmpfs mount at "path". */
-static int read_mount(int fd, char *buf, size_t bufsize, const char *path,
- int *outcome)
-{
- int found;
- int match;
- char *space;
- size_t len;
-
- enum {
- MATCH_NONE,
- MATCH_EXACT,
- MATCH_PARENT,
- };
-
- found = next(fd, buf, bufsize, ' ');
- if (found != 1)
- return found;
-
- /*
- * If there's no following space in the buffer, then this path is
- * truncated, so it can't be the one we're looking for.
- */
- space = strchr(buf, ' ');
- if (space) {
- match = MATCH_NONE;
- if (!decode_path(buf, buf, &len)) {
- if (!strcmp(buf, path))
- match = MATCH_EXACT;
- else if (!strncmp(buf, path, len)
- && (path[len] == '/' || !strcmp(buf, "/")))
- match = MATCH_PARENT;
- }
-
- found = pop(fd, buf, bufsize, space - buf + 1);
- if (found != 1)
- return found;
-
- switch (match) {
- case MATCH_EXACT:
- if (!strncmp(buf, "tmpfs", strlen("tmpfs")))
- *outcome = OUTCOME_TMPFS_MOUNT;
- else
- *outcome = OUTCOME_NON_TMPFS_MOUNT;
- break;
-
- case MATCH_PARENT:
- /* This mount obscures any previous ones. */
- *outcome = OUTCOME_NOTHING_MOUNTED;
- break;
- }
+ for (i = 0; tmpfs_dirs[i]; i++) {
+ dir = tmpfs_dirs[i];
+ if (check_tmpfs(dir) >= 0)
+ goto done;
}
- return next(fd, buf, bufsize, '\n');
+ dir = fallback_dir;
+warn:
+ printf("Warning: tempdir %s is not on tmpfs\n", dir);
+done:
+ /* Make a copy since getenv results may not remain valid forever. */
+ return strdup(dir);
}
-/* which_tmpdir is called only during early boot */
-static int checked_tmpdir = 0;
-
/*
- * Look for a tmpfs mounted at /dev/shm. I couldn't find a cleaner
- * way to do this than to parse /proc/mounts. statfs will return the
- * same filesystem magic number and fs id for both /dev and /dev/shm
- * when they are both tmpfs, so you can't tell if they are different
- * filesystems. Also, there seems to be no other way of finding the
- * mount point of a filesystem from within it.
- *
- * If a /dev/shm tmpfs entry is found, then we switch to using it.
- * Otherwise, we stay with the default /tmp.
+ * Create an unlinked tempfile in a suitable tempdir. template must be the
+ * basename part of the template with a leading '/'.
*/
-static void which_tmpdir(void)
+static int __init make_tempfile(const char *template)
{
+ char *tempname;
int fd;
- int found;
- int outcome;
- char *path;
- char *buf;
- size_t bufsize;
- if (checked_tmpdir)
- return;
-
- checked_tmpdir = 1;
-
- printf("Checking for tmpfs mount on /dev/shm...");
-
- path = realpath("/dev/shm", NULL);
- if (!path) {
- printf("failed to check real path, errno = %d\n", errno);
- return;
- }
- printf("%s...", path);
-
- /*
- * The buffer needs to be able to fit the full octal-escaped path, a
- * space, and a trailing null in order to successfully decode it.
- */
- bufsize = octal_encoded_length(path, " \t\n\\") + 2;
-
- if (bufsize < 128)
- bufsize = 128;
-
- buf = malloc(bufsize);
- if (!buf) {
- printf("malloc failed, errno = %d\n", errno);
- goto out;
- }
- buf[0] = '\0';
-
- fd = open("/proc/mounts", O_RDONLY);
- if (fd < 0) {
- printf("failed to open /proc/mounts, errno = %d\n", errno);
- goto out1;
- }
-
- outcome = OUTCOME_NOTHING_MOUNTED;
- while (1) {
- found = read_mount(fd, buf, bufsize, path, &outcome);
- if (found != 1)
- break;
- }
-
- if (found < 0) {
- printf("read returned errno %d\n", -found);
- } else {
- switch (outcome) {
- case OUTCOME_TMPFS_MOUNT:
- printf("OK\n");
- default_tmpdir = "/dev/shm";
- break;
-
- case OUTCOME_NON_TMPFS_MOUNT:
- printf("not tmpfs\n");
- break;
-
- default:
- printf("nothing mounted on /dev/shm\n");
- break;
+ if (tempdir == NULL) {
+ tempdir = choose_tempdir();
+ if (tempdir == NULL) {
+ fprintf(stderr, "Failed to choose tempdir: %s\n",
+ strerror(errno));
+ return -1;
}
}
- close(fd);
-out1:
- free(buf);
-out:
- free(path);
-}
-
-static int __init make_tempfile(const char *template, char **out_tempname,
- int do_unlink)
-{
- char *tempname;
- int fd;
-
- which_tmpdir();
- tempname = malloc(MAXPATHLEN);
+ tempname = malloc(strlen(tempdir) + strlen(template) + 1);
if (tempname == NULL)
return -1;
- find_tempdir();
- if ((tempdir == NULL) || (strlen(tempdir) >= MAXPATHLEN))
- goto out;
-
- if (template[0] != '/')
- strcpy(tempname, tempdir);
- else
- tempname[0] = '\0';
- strncat(tempname, template, MAXPATHLEN-1-strlen(tempname));
+ strcpy(tempname, tempdir);
+ strcat(tempname, template);
fd = mkstemp(tempname);
if (fd < 0) {
fprintf(stderr, "open - cannot create %s: %s\n", tempname,
strerror(errno));
goto out;
}
- if (do_unlink && (unlink(tempname) < 0)) {
+ if (unlink(tempname) < 0) {
perror("unlink");
goto close;
}
- if (out_tempname) {
- *out_tempname = tempname;
- } else
- free(tempname);
+ free(tempname);
return fd;
close:
close(fd);
@@ -351,14 +131,14 @@ out:
return -1;
}
-#define TEMPNAME_TEMPLATE "vm_file-XXXXXX"
+#define TEMPNAME_TEMPLATE "/vm_file-XXXXXX"
static int __init create_tmp_file(unsigned long long len)
{
int fd, err;
char zero;
- fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1);
+ fd = make_tempfile(TEMPNAME_TEMPLATE);
if (fd < 0)
exit(1);
@@ -402,7 +182,6 @@ int __init create_mem_file(unsigned long long len)
return fd;
}
-
void __init check_tmpexec(void)
{
void *addr;
@@ -410,14 +189,13 @@ void __init check_tmpexec(void)
addr = mmap(NULL, UM_KERN_PAGE_SIZE,
PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE, fd, 0);
- printf("Checking PROT_EXEC mmap in %s...",tempdir);
- fflush(stdout);
+ printf("Checking PROT_EXEC mmap in %s...", tempdir);
if (addr == MAP_FAILED) {
err = errno;
- perror("failed");
+ printf("%s\n", strerror(err));
close(fd);
if (err == EPERM)
- printf("%s must be not mounted noexec\n",tempdir);
+ printf("%s must be not mounted noexec\n", tempdir);
exit(1);
}
printf("OK\n");
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 602f57e..d1b7c37 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -250,8 +250,8 @@ archclean:
PHONY += kvmconfig
kvmconfig:
$(if $(wildcard $(objtree)/.config),, $(error You need an existing .config for this target))
- $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config arch/x86/configs/kvm_guest.config
- $(Q)yes "" | $(MAKE) oldconfig
+ $(Q)$(CONFIG_SHELL) $(srctree)/scripts/kconfig/merge_config.sh -m -O $(objtree) $(objtree)/.config $(srctree)/arch/x86/configs/kvm_guest.config
+ $(Q)yes "" | $(MAKE) -f $(srctree)/Makefile oldconfig
define archhelp
echo '* bzImage - Compressed kernel image (arch/x86/boot/bzImage)'
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fcaf9c9..7de069af 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -60,7 +60,7 @@
| X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
| X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
| X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
+ | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE | X86_CR4_SMAP))
#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index eeee23f..68317c8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -598,7 +598,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
{
struct mce m;
int i;
- unsigned long *v;
this_cpu_inc(mce_poll_count);
@@ -618,8 +617,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
if (!(m.status & MCI_STATUS_VAL))
continue;
- v = &get_cpu_var(mce_polled_error);
- set_bit(0, v);
+ this_cpu_write(mce_polled_error, 1);
/*
* Uncorrected or signalled events are handled by the exception
* handler when it is enabled, so don't process those here.
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 3bdb95a..9a316b2 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -42,7 +42,7 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
* cmci_discover_lock protects against parallel discovery attempts
* which could race against each other.
*/
-static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
+static DEFINE_SPINLOCK(cmci_discover_lock);
#define CMCI_THRESHOLD 1
#define CMCI_POLL_INTERVAL (30 * HZ)
@@ -144,14 +144,14 @@ static void cmci_storm_disable_banks(void)
int bank;
u64 val;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
owned = __get_cpu_var(mce_banks_owned);
for_each_set_bit(bank, owned, MAX_NR_BANKS) {
rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
val &= ~MCI_CTL2_CMCI_EN;
wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
}
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static bool cmci_storm_detect(void)
@@ -211,7 +211,7 @@ static void cmci_discover(int banks)
int i;
int bios_wrong_thresh = 0;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++) {
u64 val;
int bios_zero_thresh = 0;
@@ -266,7 +266,7 @@ static void cmci_discover(int banks)
WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
}
}
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
pr_info_once(
"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
@@ -316,10 +316,10 @@ void cmci_clear(void)
if (!cmci_supported(&banks))
return;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
for (i = 0; i < banks; i++)
__cmci_disable_bank(i);
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void cmci_rediscover_work_func(void *arg)
@@ -360,9 +360,9 @@ void cmci_disable_bank(int bank)
if (!cmci_supported(&banks))
return;
- raw_spin_lock_irqsave(&cmci_discover_lock, flags);
+ spin_lock_irqsave(&cmci_discover_lock, flags);
__cmci_disable_bank(bank);
- raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
}
static void intel_init_cmci(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index 059218e..7c87424 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -59,7 +59,7 @@
#define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */
#define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */
#define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */
-#define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */
+#define RAPL_IDX_PP1_NRG_STAT 3 /* gpu */
#define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */
/* Clients have PP0, PKG */
@@ -72,6 +72,12 @@
1<<RAPL_IDX_PKG_NRG_STAT|\
1<<RAPL_IDX_RAM_NRG_STAT)
+/* Servers have PP0, PKG, RAM, PP1 */
+#define RAPL_IDX_HSW (1<<RAPL_IDX_PP0_NRG_STAT|\
+ 1<<RAPL_IDX_PKG_NRG_STAT|\
+ 1<<RAPL_IDX_RAM_NRG_STAT|\
+ 1<<RAPL_IDX_PP1_NRG_STAT)
+
/*
* event code: LSB 8 bits, passed in attr->config
* any other bit is reserved
@@ -425,6 +431,24 @@ static struct attribute *rapl_events_cln_attr[] = {
NULL,
};
+static struct attribute *rapl_events_hsw_attr[] = {
+ EVENT_PTR(rapl_cores),
+ EVENT_PTR(rapl_pkg),
+ EVENT_PTR(rapl_gpu),
+ EVENT_PTR(rapl_ram),
+
+ EVENT_PTR(rapl_cores_unit),
+ EVENT_PTR(rapl_pkg_unit),
+ EVENT_PTR(rapl_gpu_unit),
+ EVENT_PTR(rapl_ram_unit),
+
+ EVENT_PTR(rapl_cores_scale),
+ EVENT_PTR(rapl_pkg_scale),
+ EVENT_PTR(rapl_gpu_scale),
+ EVENT_PTR(rapl_ram_scale),
+ NULL,
+};
+
static struct attribute_group rapl_pmu_events_group = {
.name = "events",
.attrs = NULL, /* patched at runtime */
@@ -511,6 +535,7 @@ static int rapl_cpu_prepare(int cpu)
struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu);
int phys_id = topology_physical_package_id(cpu);
u64 ms;
+ u64 msr_rapl_power_unit_bits;
if (pmu)
return 0;
@@ -518,6 +543,9 @@ static int rapl_cpu_prepare(int cpu)
if (phys_id < 0)
return -1;
+ if (!rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits))
+ return -1;
+
pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
if (!pmu)
return -1;
@@ -531,8 +559,7 @@ static int rapl_cpu_prepare(int cpu)
*
* we cache in local PMU instance
*/
- rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit);
- pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL;
+ pmu->hw_unit = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
pmu->pmu = &rapl_pmu_class;
/*
@@ -631,11 +658,14 @@ static int __init rapl_pmu_init(void)
switch (boot_cpu_data.x86_model) {
case 42: /* Sandy Bridge */
case 58: /* Ivy Bridge */
- case 60: /* Haswell */
- case 69: /* Haswell-Celeron */
rapl_cntr_mask = RAPL_IDX_CLN;
rapl_pmu_events_group.attrs = rapl_events_cln_attr;
break;
+ case 60: /* Haswell */
+ case 69: /* Haswell-Celeron */
+ rapl_cntr_mask = RAPL_IDX_HSW;
+ rapl_pmu_events_group.attrs = rapl_events_hsw_attr;
+ break;
case 45: /* Sandy Bridge-EP */
case 62: /* IvyTown */
rapl_cntr_mask = RAPL_IDX_SRV;
@@ -650,7 +680,9 @@ static int __init rapl_pmu_init(void)
cpu_notifier_register_begin();
for_each_online_cpu(cpu) {
- rapl_cpu_prepare(cpu);
+ ret = rapl_cpu_prepare(cpu);
+ if (ret)
+ goto out;
rapl_cpu_init(cpu);
}
@@ -673,6 +705,7 @@ static int __init rapl_pmu_init(void)
hweight32(rapl_cntr_mask),
ktime_to_ms(pmu->timer_interval));
+out:
cpu_notifier_register_done();
return 0;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index b0cc380..6e2537c 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -240,7 +240,7 @@ static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_s
return base;
}
-#define KB(x) ((x) * 1024)
+#define KB(x) ((x) * 1024UL)
#define MB(x) (KB (KB (x)))
#define GB(x) (MB (KB (x)))
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 79a3f96..61b17dc 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -897,9 +897,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- switch (kcb->kprobe_status) {
- case KPROBE_HIT_SS:
- case KPROBE_REENTER:
+ if (unlikely(regs->ip == (unsigned long)cur->ainsn.insn)) {
+ /* This must happen on single-stepping */
+ WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS &&
+ kcb->kprobe_status != KPROBE_REENTER);
/*
* We are here because the instruction being single
* stepped caused a page fault. We reset the current
@@ -914,9 +915,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
else
reset_current_kprobe();
preempt_enable_no_resched();
- break;
- case KPROBE_HIT_ACTIVE:
- case KPROBE_HIT_SSDONE:
+ } else if (kcb->kprobe_status == KPROBE_HIT_ACTIVE ||
+ kcb->kprobe_status == KPROBE_HIT_SSDONE) {
/*
* We increment the nmissed count for accounting,
* we can also use npre/npostfault count for accounting
@@ -945,10 +945,8 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* fixup routine could not handle it,
* Let do_page_fault() fix it.
*/
- break;
- default:
- break;
}
+
return 0;
}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 654b465..3399d3a 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -114,8 +114,8 @@ EXPORT_SYMBOL(machine_real_restart);
*/
static int __init set_pci_reboot(const struct dmi_system_id *d)
{
- if (reboot_type != BOOT_CF9) {
- reboot_type = BOOT_CF9;
+ if (reboot_type != BOOT_CF9_FORCE) {
+ reboot_type = BOOT_CF9_FORCE;
pr_info("%s series board detected. Selecting %s-method for reboots.\n",
d->ident, "PCI");
}
@@ -458,20 +458,23 @@ void __attribute__((weak)) mach_reboot_fixups(void)
}
/*
- * Windows compatible x86 hardware expects the following on reboot:
+ * To the best of our knowledge Windows compatible x86 hardware expects
+ * the following on reboot:
*
* 1) If the FADT has the ACPI reboot register flag set, try it
* 2) If still alive, write to the keyboard controller
* 3) If still alive, write to the ACPI reboot register again
* 4) If still alive, write to the keyboard controller again
* 5) If still alive, call the EFI runtime service to reboot
- * 6) If still alive, write to the PCI IO port 0xCF9 to reboot
- * 7) If still alive, inform BIOS to do a proper reboot
+ * 6) If no EFI runtime service, call the BIOS to do a reboot
*
- * If the machine is still alive at this stage, it gives up. We default to
- * following the same pattern, except that if we're still alive after (7) we'll
- * try to force a triple fault and then cycle between hitting the keyboard
- * controller and doing that
+ * We default to following the same pattern. We also have
+ * two other reboot methods: 'triple fault' and 'PCI', which
+ * can be triggered via the reboot= kernel boot option or
+ * via quirks.
+ *
+ * This means that this function can never return, it can misbehave
+ * by not rebooting properly and hanging.
*/
static void native_machine_emergency_restart(void)
{
@@ -492,6 +495,11 @@ static void native_machine_emergency_restart(void)
for (;;) {
/* Could also try the reset bit in the Hammer NB */
switch (reboot_type) {
+ case BOOT_ACPI:
+ acpi_reboot();
+ reboot_type = BOOT_KBD;
+ break;
+
case BOOT_KBD:
mach_reboot_fixups(); /* For board specific fixups */
@@ -509,43 +517,29 @@ static void native_machine_emergency_restart(void)
}
break;
- case BOOT_TRIPLE:
- load_idt(&no_idt);
- __asm__ __volatile__("int3");
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_KBD;
- break;
-
- case BOOT_BIOS:
- machine_real_restart(MRR_BIOS);
-
- /* We're probably dead after this, but... */
- reboot_type = BOOT_TRIPLE;
- break;
-
- case BOOT_ACPI:
- acpi_reboot();
- reboot_type = BOOT_KBD;
- break;
-
case BOOT_EFI:
if (efi_enabled(EFI_RUNTIME_SERVICES))
efi.reset_system(reboot_mode == REBOOT_WARM ?
EFI_RESET_WARM :
EFI_RESET_COLD,
EFI_SUCCESS, 0, NULL);
- reboot_type = BOOT_CF9_COND;
+ reboot_type = BOOT_BIOS;
+ break;
+
+ case BOOT_BIOS:
+ machine_real_restart(MRR_BIOS);
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_CF9_SAFE;
break;
- case BOOT_CF9:
+ case BOOT_CF9_FORCE:
port_cf9_safe = true;
/* Fall through */
- case BOOT_CF9_COND:
+ case BOOT_CF9_SAFE:
if (port_cf9_safe) {
- u8 reboot_code = reboot_mode == REBOOT_WARM ?
- 0x06 : 0x0E;
+ u8 reboot_code = reboot_mode == REBOOT_WARM ? 0x06 : 0x0E;
u8 cf9 = inb(0xcf9) & ~reboot_code;
outb(cf9|2, 0xcf9); /* Request hard reset */
udelay(50);
@@ -553,7 +547,15 @@ static void native_machine_emergency_restart(void)
outb(cf9|reboot_code, 0xcf9);
udelay(50);
}
- reboot_type = BOOT_BIOS;
+ reboot_type = BOOT_TRIPLE;
+ break;
+
+ case BOOT_TRIPLE:
+ load_idt(&no_idt);
+ __asm__ __volatile__("int3");
+
+ /* We're probably dead after this, but... */
+ reboot_type = BOOT_KBD;
break;
}
}
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index bea6067..f47a104 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -308,7 +308,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
- F(ADX);
+ F(ADX) | F(SMAP);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index a2a1bb7..eeecbed 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -48,6 +48,14 @@ static inline bool guest_cpuid_has_smep(struct kvm_vcpu *vcpu)
return best && (best->ebx & bit(X86_FEATURE_SMEP));
}
+static inline bool guest_cpuid_has_smap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 7, 0);
+ return best && (best->ebx & bit(X86_FEATURE_SMAP));
+}
+
static inline bool guest_cpuid_has_fsgsbase(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f5704d9..813d310 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3601,20 +3601,27 @@ static void reset_rsvds_bits_mask_ept(struct kvm_vcpu *vcpu,
}
}
-static void update_permission_bitmask(struct kvm_vcpu *vcpu,
+void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
- bool fault, x, w, u, wf, uf, ff, smep;
+ bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
- smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+ cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
+ /*
+ * PFERR_RSVD_MASK bit is set in PFEC if the access is not
+ * subject to SMAP restrictions, and cleared otherwise. The
+ * bit is only meaningful if the SMAP bit is set in CR4.
+ */
+ smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
@@ -3626,12 +3633,33 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
/* Disallow supervisor fetches of user code if cr4.smep */
- x &= !(smep && u && !uf);
+ x &= !(cr4_smep && u && !uf);
+
+ /*
+ * SMAP:kernel-mode data accesses from user-mode
+ * mappings should fault. A fault is considered
+ * as a SMAP violation if all of the following
+ * conditions are ture:
+ * - X86_CR4_SMAP is set in CR4
+ * - An user page is accessed
+ * - Page fault in kernel mode
+ * - if CPL = 3 or X86_EFLAGS_AC is clear
+ *
+ * Here, we cover the first three conditions.
+ * The fourth is computed dynamically in
+ * permission_fault() and is in smapf.
+ *
+ * Also, SMAP does not affect instruction
+ * fetches, add the !ff check here to make it
+ * clearer.
+ */
+ smap = cr4_smap && u && !uf && !ff;
} else
/* Not really needed: no U/S accesses on ept */
u = 1;
- fault = (ff && !x) || (uf && !u) || (wf && !w);
+ fault = (ff && !x) || (uf && !u) || (wf && !w) ||
+ (smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2926152..3842e70 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,11 +44,17 @@
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+
+#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
+#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
+#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask);
@@ -73,6 +79,8 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *context,
bool execonly);
+void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ bool ept);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
@@ -110,10 +118,30 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
* Will a fault with a given page-fault error code (pfec) cause a permission
* fault with the given access (in ACC_* format)?
*/
-static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
- unsigned pfec)
+static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
+ unsigned pte_access, unsigned pfec)
{
- return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
+ int cpl = kvm_x86_ops->get_cpl(vcpu);
+ unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
+
+ /*
+ * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
+ *
+ * If CPL = 3, SMAP applies to all supervisor-mode data accesses
+ * (these are implicit supervisor accesses) regardless of the value
+ * of EFLAGS.AC.
+ *
+ * This computes (cpl < 3) && (rflags & X86_EFLAGS_AC), leaving
+ * the result in X86_EFLAGS_AC. We then insert it in place of
+ * the PFERR_RSVD_MASK bit; this bit will always be zero in pfec,
+ * but it will be one in index if SMAP checks are being overridden.
+ * It is important to keep this branchless.
+ */
+ unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
+ int index = (pfec >> 1) +
+ (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
+
+ return (mmu->permissions[index] >> pte_access) & 1;
}
void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b1e6c1b..123efd3 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -353,7 +353,7 @@ retry_walk:
walker->ptes[walker->level - 1] = pte;
} while (!is_last_gpte(mmu, walker->level, pte));
- if (unlikely(permission_fault(mmu, pte_access, access))) {
+ if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
errcode |= PFERR_PRESENT_MASK;
goto error;
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1320e0f..1f68c58 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3484,13 +3484,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
hw_cr4 &= ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
/*
- * SMEP is disabled if CPU is in non-paging mode in
- * hardware. However KVM always uses paging mode to
+ * SMEP/SMAP is disabled if CPU is in non-paging mode
+ * in hardware. However KVM always uses paging mode to
* emulate guest non-paging mode with TDP.
- * To emulate this behavior, SMEP needs to be manually
- * disabled when guest switches to non-paging mode.
+ * To emulate this behavior, SMEP/SMAP needs to be
+ * manually disabled when guest switches to non-paging
+ * mode.
*/
- hw_cr4 &= ~X86_CR4_SMEP;
+ hw_cr4 &= ~(X86_CR4_SMEP | X86_CR4_SMAP);
} else if (!(cr4 & X86_CR4_PAE)) {
hw_cr4 &= ~X86_CR4_PAE;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9d1b5cd..8b8fc0b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -652,6 +652,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
if (!guest_cpuid_has_smep(vcpu) && (cr4 & X86_CR4_SMEP))
return 1;
+ if (!guest_cpuid_has_smap(vcpu) && (cr4 & X86_CR4_SMAP))
+ return 1;
+
if (!guest_cpuid_has_fsgsbase(vcpu) && (cr4 & X86_CR4_FSGSBASE))
return 1;
@@ -680,6 +683,9 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
(!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE)))
kvm_mmu_reset_context(vcpu);
+ if ((cr4 ^ old_cr4) & X86_CR4_SMAP)
+ update_permission_bitmask(vcpu, vcpu->arch.walk_mmu, false);
+
if ((cr4 ^ old_cr4) & X86_CR4_OSXSAVE)
kvm_update_cpuid(vcpu);
@@ -1117,7 +1123,6 @@ static inline u64 get_kernel_ns(void)
{
struct timespec ts;
- WARN_ON(preemptible());
ktime_get_ts(&ts);
monotonic_to_bootbased(&ts);
return timespec_to_ns(&ts);
@@ -4164,7 +4169,8 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
| (write ? PFERR_WRITE_MASK : 0);
if (vcpu_match_mmio_gva(vcpu, gva)
- && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
+ && !permission_fault(vcpu, vcpu->arch.walk_mmu,
+ vcpu->arch.access, access)) {
*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
(gva & (PAGE_SIZE - 1));
trace_vcpu_match_mmio(gva, *gpa, write, false);
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index f325af2..3323c27 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -54,5 +54,7 @@ syshdr-$(CONFIG_X86_64) += syscalls_64.h
targets += $(uapisyshdr-y) $(syshdr-y)
+PHONY += all
all: $(addprefix $(uapi)/,$(uapisyshdr-y))
all: $(addprefix $(out)/,$(syshdr-y))
+ @:
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 96bc506..d6b8679 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -359,3 +359,4 @@
350 i386 finit_module sys_finit_module
351 i386 sched_setattr sys_sched_setattr
352 i386 sched_getattr sys_sched_getattr
+353 i386 renameat2 sys_renameat2
diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index e812034..604a37e 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -40,4 +40,6 @@ $(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/ina
HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-y += relocs
relocs-objs := relocs_32.o relocs_64.o relocs_common.o
+PHONY += relocs
relocs: $(obj)/relocs
+ @:
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S
index 2e263f3..9df017a 100644
--- a/arch/x86/vdso/vdso-layout.lds.S
+++ b/arch/x86/vdso/vdso-layout.lds.S
@@ -9,12 +9,9 @@ SECTIONS
#ifdef BUILD_VDSO32
#include <asm/vdso32.h>
- .hpet_sect : {
- hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
- } :text :hpet_sect
+ hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE);
- .vvar_sect : {
- vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
+ vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE);
/* Place all vvars at the offsets in asm/vvar.h. */
#define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset;
@@ -22,7 +19,6 @@ SECTIONS
#include <asm/vvar.h>
#undef __VVAR_KERNEL_LDS
#undef EMIT_VVAR
- } :text :vvar_sect
#endif
. = SIZEOF_HEADERS;
@@ -61,7 +57,12 @@ SECTIONS
*/
. = ALIGN(0x100);
- .text : { *(.text*) } :text =0x90909090
+ .text : { *(.text*) } :text =0x90909090,
+
+ /*
+ * The comma above works around a bug in gold:
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=16804
+ */
/DISCARD/ : {
*(.discard)
@@ -84,8 +85,4 @@ PHDRS
dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
note PT_NOTE FLAGS(4); /* PF_R */
eh_frame_hdr PT_GNU_EH_FRAME;
-#ifdef BUILD_VDSO32
- vvar_sect PT_NULL FLAGS(4); /* PF_R */
- hpet_sect PT_NULL FLAGS(4); /* PF_R */
-#endif
}
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index a18eadd..7005974 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -441,10 +441,11 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
irq_ctx_init(cpu);
#else
clear_tsk_thread_flag(idle, TIF_FORK);
+#endif
per_cpu(kernel_stack, cpu) =
(unsigned long)task_stack_page(idle) -
KERNEL_STACK_OFFSET + THREAD_SIZE;
-#endif
+
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
xen_init_lock_cpu(cpu);
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
index 4d3acc3..0ba5f3b 100644
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -274,7 +274,7 @@ void __init xen_init_spinlocks(void)
printk(KERN_DEBUG "xen: PV spinlocks disabled\n");
return;
}
-
+ printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
pv_lock_ops.unlock_kick = xen_unlock_kick;
}
@@ -290,6 +290,9 @@ static __init int xen_init_spinlocks_jump(void)
if (!xen_pvspin)
return 0;
+ if (!xen_domain())
+ return 0;
+
static_key_slow_inc(&paravirt_ticketlocks_enabled);
return 0;
}
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index 33ca6e4..fd92a64 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -75,6 +75,17 @@ ENDPROC(xen_sysexit)
* stack state in whatever form its in, we keep things simple by only
* using a single register which is pushed/popped on the stack.
*/
+
+.macro POP_FS
+1:
+ popw %fs
+.pushsection .fixup, "ax"
+2: movw $0, (%esp)
+ jmp 1b
+.popsection
+ _ASM_EXTABLE(1b,2b)
+.endm
+
ENTRY(xen_iret)
/* test eflags for special cases */
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
@@ -83,15 +94,13 @@ ENTRY(xen_iret)
push %eax
ESP_OFFSET=4 # bytes pushed onto stack
- /*
- * Store vcpu_info pointer for easy access. Do it this way to
- * avoid having to reload %fs
- */
+ /* Store vcpu_info pointer for easy access */
#ifdef CONFIG_SMP
- GET_THREAD_INFO(%eax)
- movl %ss:TI_cpu(%eax), %eax
- movl %ss:__per_cpu_offset(,%eax,4), %eax
- mov %ss:xen_vcpu(%eax), %eax
+ pushw %fs
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax, %fs
+ movl %fs:xen_vcpu, %eax
+ POP_FS
#else
movl %ss:xen_vcpu, %eax
#endif