summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig20
-rw-r--r--arch/s390/Makefile17
-rw-r--r--arch/s390/configs/default_defconfig3
-rw-r--r--arch/s390/configs/gcov_defconfig3
-rw-r--r--arch/s390/configs/performance_defconfig3
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/include/asm/Kbuild2
-rw-r--r--arch/s390/include/asm/barrier.h6
-rw-r--r--arch/s390/include/asm/cputime.h28
-rw-r--r--arch/s390/include/asm/dis.h13
-rw-r--r--arch/s390/include/asm/dma-mapping.h31
-rw-r--r--arch/s390/include/asm/elf.h3
-rw-r--r--arch/s390/include/asm/ftrace.h9
-rw-r--r--arch/s390/include/asm/idle.h26
-rw-r--r--arch/s390/include/asm/ipl.h12
-rw-r--r--arch/s390/include/asm/irq.h3
-rw-r--r--arch/s390/include/asm/kprobes.h4
-rw-r--r--arch/s390/include/asm/kvm_host.h37
-rw-r--r--arch/s390/include/asm/lowcore.h21
-rw-r--r--arch/s390/include/asm/nmi.h2
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/include/asm/percpu.h16
-rw-r--r--arch/s390/include/asm/pgalloc.h8
-rw-r--r--arch/s390/include/asm/pgtable.h298
-rw-r--r--arch/s390/include/asm/processor.h14
-rw-r--r--arch/s390/include/asm/ptrace.h6
-rw-r--r--arch/s390/include/asm/qdio.h4
-rw-r--r--arch/s390/include/asm/scatterlist.h3
-rw-r--r--arch/s390/include/asm/setup.h6
-rw-r--r--arch/s390/include/asm/sigp.h6
-rw-r--r--arch/s390/include/asm/smp.h2
-rw-r--r--arch/s390/include/asm/spinlock.h135
-rw-r--r--arch/s390/include/asm/spinlock_types.h1
-rw-r--r--arch/s390/include/asm/switch_to.h61
-rw-r--r--arch/s390/include/asm/syscall.h2
-rw-r--r--arch/s390/include/asm/thread_info.h3
-rw-r--r--arch/s390/include/asm/tlb.h2
-rw-r--r--arch/s390/include/asm/uprobes.h42
-rw-r--r--arch/s390/include/asm/vdso.h18
-rw-r--r--arch/s390/include/asm/vtimer.h2
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/kvm.h10
-rw-r--r--arch/s390/include/uapi/asm/kvm_perf.h25
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/include/uapi/asm/sigcontext.h20
-rw-r--r--arch/s390/include/uapi/asm/types.h4
-rw-r--r--arch/s390/include/uapi/asm/ucontext.h15
-rw-r--r--arch/s390/include/uapi/asm/unistd.h5
-rw-r--r--arch/s390/kernel/Makefile8
-rw-r--r--arch/s390/kernel/asm-offsets.c9
-rw-r--r--arch/s390/kernel/compat_linux.h9
-rw-r--r--arch/s390/kernel/compat_signal.c275
-rw-r--r--arch/s390/kernel/compat_wrapper.c3
-rw-r--r--arch/s390/kernel/crash_dump.c58
-rw-r--r--arch/s390/kernel/dis.c245
-rw-r--r--arch/s390/kernel/early.c4
-rw-r--r--arch/s390/kernel/entry.h10
-rw-r--r--arch/s390/kernel/entry64.S17
-rw-r--r--arch/s390/kernel/ftrace.c139
-rw-r--r--arch/s390/kernel/head.S2
-rw-r--r--arch/s390/kernel/idle.c124
-rw-r--r--arch/s390/kernel/ipl.c135
-rw-r--r--arch/s390/kernel/irq.c98
-rw-r--r--arch/s390/kernel/kprobes.c167
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/mcount.S90
-rw-r--r--arch/s390/kernel/mcount64.S65
-rw-r--r--arch/s390/kernel/nmi.c26
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c34
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c16
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c24
-rw-r--r--arch/s390/kernel/processor.c6
-rw-r--r--arch/s390/kernel/ptrace.c256
-rw-r--r--arch/s390/kernel/setup.c34
-rw-r--r--arch/s390/kernel/signal.c364
-rw-r--r--arch/s390/kernel/smp.c80
-rw-r--r--arch/s390/kernel/suspend.c6
-rw-r--r--arch/s390/kernel/syscalls.S3
-rw-r--r--arch/s390/kernel/time.c35
-rw-r--r--arch/s390/kernel/topology.c18
-rw-r--r--arch/s390/kernel/traps.c115
-rw-r--r--arch/s390/kernel/uprobes.c332
-rw-r--r--arch/s390/kernel/vdso.c15
-rw-r--r--arch/s390/kernel/vdso32/clock_getres.S11
-rw-r--r--arch/s390/kernel/vdso32/clock_gettime.S42
-rw-r--r--arch/s390/kernel/vdso32/gettimeofday.S4
-rw-r--r--arch/s390/kernel/vdso64/clock_getres.S8
-rw-r--r--arch/s390/kernel/vdso64/clock_gettime.S42
-rw-r--r--arch/s390/kernel/vdso64/gettimeofday.S4
-rw-r--r--arch/s390/kernel/vtime.c77
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/diag.c31
-rw-r--r--arch/s390/kvm/gaccess.c3
-rw-r--r--arch/s390/kvm/intercept.c32
-rw-r--r--arch/s390/kvm/interrupt.c254
-rw-r--r--arch/s390/kvm/kvm-s390.c258
-rw-r--r--arch/s390/kvm/kvm-s390.h18
-rw-r--r--arch/s390/kvm/priv.c11
-rw-r--r--arch/s390/kvm/sigp.c44
-rw-r--r--arch/s390/lib/Makefile2
-rw-r--r--arch/s390/lib/delay.c4
-rw-r--r--arch/s390/lib/probes.c159
-rw-r--r--arch/s390/lib/spinlock.c105
-rw-r--r--arch/s390/mm/dump_pagetables.c5
-rw-r--r--arch/s390/mm/fault.c25
-rw-r--r--arch/s390/mm/hugetlbpage.c105
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/pageattr.c38
-rw-r--r--arch/s390/mm/pgtable.c713
-rw-r--r--arch/s390/mm/vmem.c10
-rw-r--r--arch/s390/net/bpf_jit_comp.c88
-rw-r--r--arch/s390/oprofile/hwsampler.c2
-rw-r--r--arch/s390/pci/pci.c4
-rw-r--r--arch/s390/pci/pci_clp.c4
-rw-r--r--arch/s390/pci/pci_debug.c4
-rw-r--r--arch/s390/pci/pci_dma.c50
-rw-r--r--arch/s390/pci/pci_event.c4
-rw-r--r--arch/s390/pci/pci_sysfs.c4
120 files changed, 3718 insertions, 2238 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index bb63499..f2cf1f9 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -58,6 +58,9 @@ config NO_IOPORT_MAP
config PCI_QUIRKS
def_bool n
+config ARCH_SUPPORTS_UPROBES
+ def_bool 64BIT
+
config S390
def_bool y
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
@@ -92,10 +95,12 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
+ select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
select CLONE_BACKWARDS2
+ select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_DEVICES if !SMP
select GENERIC_FIND_FIRST_BIT
@@ -112,11 +117,11 @@ config S390
select HAVE_CMPXCHG_LOCAL
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DYNAMIC_FTRACE
+ select HAVE_DYNAMIC_FTRACE if 64BIT
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if 64BIT
select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_FUNCTION_GRAPH_TRACER
- select HAVE_FUNCTION_TRACER
- select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ select HAVE_FUNCTION_GRAPH_TRACER if 64BIT
+ select HAVE_FUNCTION_TRACER if 64BIT
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
@@ -137,7 +142,6 @@ config S390
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16 if 32BIT
select HAVE_VIRT_CPU_ACCOUNTING
- select KTIME_SCALAR if 32BIT
select MODULES_USE_ELF_RELA
select NO_BOOTMEM
select OLD_SIGACTION
@@ -146,6 +150,7 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select VIRT_TO_BUS
+ select ARCH_HAS_SG_CHAIN
config SCHED_OMIT_FRAME_POINTER
def_bool y
@@ -496,8 +501,8 @@ config QDIO
menuconfig PCI
bool "PCI support"
- default n
depends on 64BIT
+ select HAVE_DMA_ATTRS
select PCI_MSI
help
Enable PCI support.
@@ -544,9 +549,6 @@ config HAS_DMA
config NEED_SG_DMA_LENGTH
def_bool PCI
-config HAVE_DMA_ATTRS
- def_bool PCI
-
config NEED_DMA_MAP_STATE
def_bool PCI
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 874e6d6..878e679 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -35,13 +35,16 @@ endif
export LD_BFD
-cflags-$(CONFIG_MARCH_G5) += -march=g5
-cflags-$(CONFIG_MARCH_Z900) += -march=z900
-cflags-$(CONFIG_MARCH_Z990) += -march=z990
-cflags-$(CONFIG_MARCH_Z9_109) += -march=z9-109
-cflags-$(CONFIG_MARCH_Z10) += -march=z10
-cflags-$(CONFIG_MARCH_Z196) += -march=z196
-cflags-$(CONFIG_MARCH_ZEC12) += -march=zEC12
+mflags-$(CONFIG_MARCH_G5) := -march=g5
+mflags-$(CONFIG_MARCH_Z900) := -march=z900
+mflags-$(CONFIG_MARCH_Z990) := -march=z990
+mflags-$(CONFIG_MARCH_Z9_109) := -march=z9-109
+mflags-$(CONFIG_MARCH_Z10) := -march=z10
+mflags-$(CONFIG_MARCH_Z196) := -march=z196
+mflags-$(CONFIG_MARCH_ZEC12) := -march=zEC12
+
+aflags-y += $(mflags-y)
+cflags-y += $(mflags-y)
cflags-$(CONFIG_MARCH_G5_TUNE) += -mtune=g5
cflags-$(CONFIG_MARCH_Z900_TUNE) += -mtune=z900
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index fd09a10..9d94fdd 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -63,6 +63,7 @@ CONFIG_CRASH_DUMP=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
@@ -366,7 +367,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -380,7 +380,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index b061180..90f514b 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -61,6 +61,7 @@ CONFIG_CRASH_DUMP=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
@@ -363,7 +364,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -377,7 +377,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index d279baa..13559d3 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -59,6 +59,7 @@ CONFIG_CRASH_DUMP=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
@@ -361,7 +362,6 @@ CONFIG_VIRTIO_BLK=y
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
-CONFIG_SCSI_TGT=m
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
@@ -375,7 +375,6 @@ CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
-CONFIG_SCSI_SRP_TGT_ATTRS=y
CONFIG_ISCSI_TCP=m
CONFIG_LIBFCOE=m
CONFIG_SCSI_DEBUG=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 948e0e0..e376789 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -23,6 +23,7 @@ CONFIG_CRASH_DUMP=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
# CONFIG_SECCOMP is not set
# CONFIG_IUCV is not set
+CONFIG_NET=y
CONFIG_ATM=y
CONFIG_ATM_LANE=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 2e56498..fab35a8 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -50,6 +50,7 @@ CONFIG_CMA=y
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
+CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_NET_KEY=y
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 57892a8..773f866 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,6 +2,8 @@
generic-y += clkdev.h
generic-y += hash.h
+generic-y += irq_work.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
+generic-y += scatterlist.h
generic-y += trace_clock.h
diff --git a/arch/s390/include/asm/barrier.h b/arch/s390/include/asm/barrier.h
index 19ff956..b5dce65 100644
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -15,11 +15,13 @@
#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
/* Fast-BCR without checkpoint synchronization */
-#define mb() do { asm volatile("bcr 14,0" : : : "memory"); } while (0)
+#define __ASM_BARRIER "bcr 14,0\n"
#else
-#define mb() do { asm volatile("bcr 15,0" : : : "memory"); } while (0)
+#define __ASM_BARRIER "bcr 15,0\n"
#endif
+#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
#define rmb() mb()
#define wmb() mb()
#define read_barrier_depends() do { } while(0)
diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index f65bd36..f8c1969 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -8,8 +8,6 @@
#define _S390_CPUTIME_H
#include <linux/types.h>
-#include <linux/percpu.h>
-#include <linux/spinlock.h>
#include <asm/div64.h>
@@ -18,6 +16,8 @@
typedef unsigned long long __nocast cputime_t;
typedef unsigned long long __nocast cputime64_t;
+#define cmpxchg_cputime(ptr, old, new) cmpxchg64(ptr, old, new)
+
static inline unsigned long __div(unsigned long long n, unsigned long base)
{
#ifndef CONFIG_64BIT
@@ -165,28 +165,8 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
return clock;
}
-struct s390_idle_data {
- int nohz_delay;
- unsigned int sequence;
- unsigned long long idle_count;
- unsigned long long idle_time;
- unsigned long long clock_idle_enter;
- unsigned long long clock_idle_exit;
- unsigned long long timer_idle_enter;
- unsigned long long timer_idle_exit;
-};
-
-DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
-
-cputime64_t s390_get_idle_time(int cpu);
-
-#define arch_idle_time(cpu) s390_get_idle_time(cpu)
-
-static inline int s390_nohz_delay(int cpu)
-{
- return __get_cpu_var(s390_idle).nohz_delay != 0;
-}
+cputime64_t arch_cpu_idle_time(int cpu);
-#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
+#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
#endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/dis.h b/arch/s390/include/asm/dis.h
index 04a83f5..60323c2 100644
--- a/arch/s390/include/asm/dis.h
+++ b/arch/s390/include/asm/dis.h
@@ -13,12 +13,13 @@
#define OPERAND_FPR 0x2 /* Operand printed as %fx */
#define OPERAND_AR 0x4 /* Operand printed as %ax */
#define OPERAND_CR 0x8 /* Operand printed as %cx */
-#define OPERAND_DISP 0x10 /* Operand printed as displacement */
-#define OPERAND_BASE 0x20 /* Operand printed as base register */
-#define OPERAND_INDEX 0x40 /* Operand printed as index register */
-#define OPERAND_PCREL 0x80 /* Operand printed as pc-relative symbol */
-#define OPERAND_SIGNED 0x100 /* Operand printed as signed value */
-#define OPERAND_LENGTH 0x200 /* Operand printed as length (+1) */
+#define OPERAND_VR 0x10 /* Operand printed as %vx */
+#define OPERAND_DISP 0x20 /* Operand printed as displacement */
+#define OPERAND_BASE 0x40 /* Operand printed as base register */
+#define OPERAND_INDEX 0x80 /* Operand printed as index register */
+#define OPERAND_PCREL 0x100 /* Operand printed as pc-relative symbol */
+#define OPERAND_SIGNED 0x200 /* Operand printed as signed value */
+#define OPERAND_LENGTH 0x400 /* Operand printed as length (+1) */
struct s390_operand {
diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h
index 3fbc67d..709955d 100644
--- a/arch/s390/include/asm/dma-mapping.h
+++ b/arch/s390/include/asm/dma-mapping.h
@@ -56,24 +56,35 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
return dma_addr == DMA_ERROR_CODE;
}
-static inline void *dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag)
+#define dma_alloc_coherent(d, s, h, f) dma_alloc_attrs(d, s, h, f, NULL)
+
+static inline void *dma_alloc_attrs(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t flags,
+ struct dma_attrs *attrs)
{
struct dma_map_ops *ops = get_dma_ops(dev);
- void *ret;
+ void *cpu_addr;
+
+ BUG_ON(!ops);
- ret = ops->alloc(dev, size, dma_handle, flag, NULL);
- debug_dma_alloc_coherent(dev, size, *dma_handle, ret);
- return ret;
+ cpu_addr = ops->alloc(dev, size, dma_handle, flags, attrs);
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+
+ return cpu_addr;
}
-static inline void dma_free_coherent(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_handle)
+#define dma_free_coherent(d, s, c, h) dma_free_attrs(d, s, c, h, NULL)
+
+static inline void dma_free_attrs(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_handle,
+ struct dma_attrs *attrs)
{
- struct dma_map_ops *dma_ops = get_dma_ops(dev);
+ struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!ops);
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- dma_ops->free(dev, size, cpu_addr, dma_handle, NULL);
+ ops->free(dev, size, cpu_addr, dma_handle, attrs);
}
#endif /* _ASM_S390_DMA_MAPPING_H */
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 78f4f87..f6e43d3 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -102,6 +102,7 @@
#define HWCAP_S390_ETF3EH 256
#define HWCAP_S390_HIGH_GPRS 512
#define HWCAP_S390_TE 1024
+#define HWCAP_S390_VXRS 2048
/*
* These are used to set parameters in the core dumps.
@@ -225,6 +226,6 @@ int arch_setup_additional_pages(struct linux_binprm *, int);
extern unsigned long arch_randomize_brk(struct mm_struct *mm);
#define arch_randomize_brk arch_randomize_brk
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa);
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vxrs);
#endif
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index bf246da..3aef8af 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -4,6 +4,7 @@
#ifndef __ASSEMBLY__
extern void _mcount(void);
+extern char ftrace_graph_caller_end;
struct dyn_arch_ftrace { };
@@ -17,10 +18,8 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
#endif /* __ASSEMBLY__ */
-#ifdef CONFIG_64BIT
-#define MCOUNT_INSN_SIZE 12
-#else
-#define MCOUNT_INSN_SIZE 22
-#endif
+#define MCOUNT_INSN_SIZE 18
+
+#define ARCH_SUPPORTS_FTRACE_OPS 1
#endif /* _ASM_S390_FTRACE_H */
diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h
new file mode 100644
index 0000000..6af037f
--- /dev/null
+++ b/arch/s390/include/asm/idle.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright IBM Corp. 2014
+ *
+ * Author: Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef _S390_IDLE_H
+#define _S390_IDLE_H
+
+#include <linux/types.h>
+#include <linux/device.h>
+
+struct s390_idle_data {
+ unsigned int sequence;
+ unsigned long long idle_count;
+ unsigned long long idle_time;
+ unsigned long long clock_idle_enter;
+ unsigned long long clock_idle_exit;
+ unsigned long long timer_idle_enter;
+ unsigned long long timer_idle_exit;
+};
+
+extern struct device_attribute dev_attr_idle_count;
+extern struct device_attribute dev_attr_idle_time_us;
+
+#endif /* _S390_IDLE_H */
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index 2fcccc0..ece606c 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -17,12 +17,12 @@
#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
sizeof(struct ipl_block_fcp))
-#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 8)
+#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
sizeof(struct ipl_block_ccw))
-#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 8)
+#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
#define IPL_MAX_SUPPORTED_VERSION (0)
@@ -38,10 +38,11 @@ struct ipl_list_hdr {
u8 pbt;
u8 flags;
u16 reserved2;
+ u8 loadparm[8];
} __attribute__((packed));
struct ipl_block_fcp {
- u8 reserved1[313-1];
+ u8 reserved1[305-1];
u8 opt;
u8 reserved2[3];
u16 reserved3;
@@ -62,7 +63,6 @@ struct ipl_block_fcp {
offsetof(struct ipl_block_fcp, scp_data)))
struct ipl_block_ccw {
- u8 load_parm[8];
u8 reserved1[84];
u8 reserved2[2];
u16 devno;
@@ -89,12 +89,12 @@ extern u32 ipl_flags;
extern u32 dump_prefix_page;
struct dump_save_areas {
- struct save_area **areas;
+ struct save_area_ext **areas;
int count;
};
extern struct dump_save_areas dump_save_areas;
-struct save_area *dump_save_area_create(int cpu);
+struct save_area_ext *dump_save_area_create(int cpu);
extern void do_reipl(void);
extern void do_halt(void);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index c4dd400..b0d5f0a 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -51,6 +51,7 @@ enum interruption_class {
IRQEXT_CMS,
IRQEXT_CMC,
IRQEXT_CMR,
+ IRQEXT_FTP,
IRQIO_CIO,
IRQIO_QAI,
IRQIO_DAS,
@@ -81,7 +82,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
static __always_inline void inc_irq_stat(enum interruption_class irq)
{
- __get_cpu_var(irq_stat).irqs[irq]++;
+ __this_cpu_inc(irq_stat.irqs[irq]);
}
struct ext_code {
diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h
index 4176dfe..9862917 100644
--- a/arch/s390/include/asm/kprobes.h
+++ b/arch/s390/include/asm/kprobes.h
@@ -84,6 +84,10 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
+int probe_is_prohibited_opcode(u16 *insn);
+int probe_get_fixup_type(u16 *insn);
+int probe_is_insn_relative_long(u16 *insn);
+
#define flush_insn_slot(p) do { } while (0)
#endif /* _ASM_S390_KPROBES_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 4181d7b..2175f911 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
+
+#include <linux/types.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <asm/debug.h>
@@ -154,7 +157,9 @@ struct kvm_s390_sie_block {
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
- __u8 reservedf0[16]; /* 0x00f0 */
+ __u8 reservedf0[12]; /* 0x00f0 */
+#define CRYCB_FORMAT1 0x00000001
+ __u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
@@ -187,6 +192,7 @@ struct kvm_vcpu_stat {
u32 exit_stop_request;
u32 exit_validity;
u32 exit_instruction;
+ u32 halt_wakeup;
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
@@ -305,7 +311,6 @@ struct kvm_s390_local_interrupt {
struct list_head list;
atomic_t active;
struct kvm_s390_float_interrupt *float_int;
- int timer_due; /* event indicator for waitqueue below */
wait_queue_head_t *wq;
atomic_t *cpuflags;
unsigned int action_bits;
@@ -367,7 +372,6 @@ struct kvm_vcpu_arch {
s390_fp_regs guest_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
- struct tasklet_struct tasklet;
struct kvm_s390_pgm_info pgm;
union {
struct cpuid cpu_id;
@@ -409,6 +413,15 @@ struct s390_io_adapter {
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
#define MAX_S390_ADAPTER_MAPS 256
+struct kvm_s390_crypto {
+ struct kvm_s390_crypto_cb *crycb;
+ __u32 crycbd;
+};
+
+struct kvm_s390_crypto_cb {
+ __u8 reserved00[128]; /* 0x0000 */
+};
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
@@ -418,9 +431,11 @@ struct kvm_arch{
int css_support;
int use_irqchip;
int use_cmma;
+ int user_cpu_state_ctrl;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
spinlock_t start_stop_lock;
+ struct kvm_s390_crypto crypto;
};
#define KVM_HVA_ERR_BAD (-1UL)
@@ -432,8 +447,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
}
#define ASYNC_PF_PER_VCPU 64
-struct kvm_vcpu;
-struct kvm_async_pf;
struct kvm_arch_async_pf {
unsigned long pfault_token;
};
@@ -451,4 +464,18 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
+
+static inline void kvm_arch_hardware_disable(void) {}
+static inline void kvm_arch_check_processor_compat(void *rtn) {}
+static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_sync_events(struct kvm *kvm) {}
+static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_free_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
+static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
+static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot) {}
+
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 4349197..6cc51fe 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <asm/ptrace.h>
#include <asm/cpu.h>
+#include <asm/types.h>
#ifdef CONFIG_32BIT
@@ -31,6 +32,11 @@ struct save_area {
u32 ctrl_regs[16];
} __packed;
+struct save_area_ext {
+ struct save_area sa;
+ __vector128 vx_regs[32];
+};
+
struct _lowcore {
psw_t restart_psw; /* 0x0000 */
psw_t restart_old_psw; /* 0x0008 */
@@ -183,6 +189,11 @@ struct save_area {
u64 ctrl_regs[16];
} __packed;
+struct save_area_ext {
+ struct save_area sa;
+ __vector128 vx_regs[32];
+};
+
struct _lowcore {
__u8 pad_0x0000[0x0014-0x0000]; /* 0x0000 */
__u32 ipl_parmblock_ptr; /* 0x0014 */
@@ -310,7 +321,10 @@ struct _lowcore {
/* Extended facility list */
__u64 stfle_fac_list[32]; /* 0x0f00 */
- __u8 pad_0x1000[0x11b8-0x1000]; /* 0x1000 */
+ __u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
+
+ /* Pointer to vector register save area */
+ __u64 vector_save_area_addr; /* 0x11b0 */
/* 64 bit extparam used for pfault/diag 250: defined by architecture */
__u64 ext_params2; /* 0x11B8 */
@@ -334,9 +348,10 @@ struct _lowcore {
/* Transaction abort diagnostic block */
__u8 pgm_tdb[256]; /* 0x1800 */
+ __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */
- /* align to the top of the prefix area */
- __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
+ /* Software defined save area for vector registers */
+ __u8 vector_save_area[1024]; /* 0x1c00 */
} __packed;
#endif /* CONFIG_32BIT */
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index 35f8ec1..3027a5a 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -38,7 +38,7 @@ struct mci {
__u32 pm : 1; /* 22 psw program mask and cc validity */
__u32 ia : 1; /* 23 psw instruction address validity */
__u32 fa : 1; /* 24 failing storage address validity */
- __u32 : 1; /* 25 */
+ __u32 vr : 1; /* 25 vector register validity */
__u32 ec : 1; /* 26 external damage code validity */
__u32 fp : 1; /* 27 floating point register validity */
__u32 gr : 1; /* 28 general register validity */
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 114258e..7b2ac6e 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-#define __HAVE_ARCH_GATE_AREA 1
-
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h
index fa91e00..933355e 100644
--- a/arch/s390/include/asm/percpu.h
+++ b/arch/s390/include/asm/percpu.h
@@ -31,7 +31,7 @@
pcp_op_T__ old__, new__, prev__; \
pcp_op_T__ *ptr__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
prev__ = *ptr__; \
do { \
old__ = prev__; \
@@ -70,7 +70,7 @@
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
if (__builtin_constant_p(val__) && \
((szcast)val__ > -129) && ((szcast)val__ < 128)) { \
asm volatile( \
@@ -97,7 +97,7 @@
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
op " %[old__],%[val__],%[ptr__]\n" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
@@ -116,7 +116,7 @@
pcp_op_T__ val__ = (val); \
pcp_op_T__ old__, *ptr__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
asm volatile( \
op " %[old__],%[val__],%[ptr__]\n" \
: [old__] "=d" (old__), [ptr__] "+Q" (*ptr__) \
@@ -138,7 +138,7 @@
pcp_op_T__ ret__; \
pcp_op_T__ *ptr__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
ret__ = cmpxchg(ptr__, oval, nval); \
preempt_enable(); \
ret__; \
@@ -154,7 +154,7 @@
typeof(pcp) *ptr__; \
typeof(pcp) ret__; \
preempt_disable(); \
- ptr__ = __this_cpu_ptr(&(pcp)); \
+ ptr__ = raw_cpu_ptr(&(pcp)); \
ret__ = xchg(ptr__, nval); \
preempt_enable(); \
ret__; \
@@ -173,8 +173,8 @@
typeof(pcp2) *p2__; \
int ret__; \
preempt_disable(); \
- p1__ = __this_cpu_ptr(&(pcp1)); \
- p2__ = __this_cpu_ptr(&(pcp2)); \
+ p1__ = raw_cpu_ptr(&(pcp1)); \
+ p2__ = raw_cpu_ptr(&(pcp2)); \
ret__ = __cmpxchg_double(p1__, p2__, o1__, o2__, n1__, n2__); \
preempt_enable(); \
ret__; \
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 9e18a61..d39a31c 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -18,9 +18,9 @@
unsigned long *crst_table_alloc(struct mm_struct *);
void crst_table_free(struct mm_struct *, unsigned long *);
-unsigned long *page_table_alloc(struct mm_struct *, unsigned long);
+unsigned long *page_table_alloc(struct mm_struct *);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *);
+void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long,
bool init_skey);
@@ -145,8 +145,8 @@ static inline void pmd_populate(struct mm_struct *mm,
/*
* page table entry allocation/free routines.
*/
-#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
-#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm, vmaddr))
+#define pte_alloc_one_kernel(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
+#define pte_alloc_one(mm, vmaddr) ((pte_t *) page_table_alloc(mm))
#define pte_free_kernel(mm, pte) page_table_free(mm, (unsigned long *) pte)
#define pte_free(mm, pte) page_table_free(mm, (unsigned long *) pte)
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index fcba5e0..57c8827 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -30,6 +30,7 @@
#include <linux/sched.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
+#include <linux/radix-tree.h>
#include <asm/bug.h>
#include <asm/page.h>
@@ -216,7 +217,6 @@ extern unsigned long MODULES_END;
*/
/* Hardware bits in the page table entry */
-#define _PAGE_CO 0x100 /* HW Change-bit override */
#define _PAGE_PROTECT 0x200 /* HW read-only bit */
#define _PAGE_INVALID 0x400 /* HW invalid bit */
#define _PAGE_LARGE 0x800 /* Bit to mark a large pte */
@@ -233,8 +233,8 @@ extern unsigned long MODULES_END;
#define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_CO | \
- _PAGE_DIRTY | _PAGE_YOUNG)
+#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_SPECIAL | _PAGE_DIRTY | \
+ _PAGE_YOUNG)
/*
* handle_pte_fault uses pte_present, pte_none and pte_file to find out the
@@ -287,7 +287,14 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
#define _SEGMENT_ENTRY_COMMON 0x10 /* common segment bit */
#define _SEGMENT_ENTRY_PTL 0x0f /* page table length */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_PROTECT
+
+#define _SEGMENT_ENTRY_DIRTY 0 /* No sw dirty bit for 31-bit */
+#define _SEGMENT_ENTRY_YOUNG 0 /* No sw young bit for 31-bit */
+#define _SEGMENT_ENTRY_READ 0 /* No sw read bit for 31-bit */
+#define _SEGMENT_ENTRY_WRITE 0 /* No sw write bit for 31-bit */
+#define _SEGMENT_ENTRY_LARGE 0 /* No large pages for 31-bit */
+#define _SEGMENT_ENTRY_BITS_LARGE 0
+#define _SEGMENT_ENTRY_ORIGIN_LARGE 0
#define _SEGMENT_ENTRY (_SEGMENT_ENTRY_PTL)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
@@ -346,11 +353,10 @@ extern unsigned long MODULES_END;
#define _REGION3_ENTRY_LARGE 0x400 /* RTTE-format control, large page */
#define _REGION3_ENTRY_RO 0x200 /* page protection bit */
-#define _REGION3_ENTRY_CO 0x100 /* change-recording override */
/* Bits in the segment table entry */
#define _SEGMENT_ENTRY_BITS 0xfffffffffffffe33UL
-#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff1ff33UL
+#define _SEGMENT_ENTRY_BITS_LARGE 0xfffffffffff0ff33UL
#define _SEGMENT_ENTRY_ORIGIN_LARGE ~0xfffffUL /* large page address */
#define _SEGMENT_ENTRY_ORIGIN ~0x7ffUL/* segment table origin */
#define _SEGMENT_ENTRY_PROTECT 0x200 /* page protection bit */
@@ -359,30 +365,33 @@ extern unsigned long MODULES_END;
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
-#define _SEGMENT_ENTRY_LARGE 0x400 /* STE-format control, large page */
-#define _SEGMENT_ENTRY_CO 0x100 /* change-recording override */
-#define _SEGMENT_ENTRY_SPLIT 0x001 /* THP splitting bit */
-#define _SEGMENT_ENTRY_YOUNG 0x002 /* SW segment young bit */
-#define _SEGMENT_ENTRY_NONE _SEGMENT_ENTRY_YOUNG
+#define _SEGMENT_ENTRY_DIRTY 0x2000 /* SW segment dirty bit */
+#define _SEGMENT_ENTRY_YOUNG 0x1000 /* SW segment young bit */
+#define _SEGMENT_ENTRY_SPLIT 0x0800 /* THP splitting bit */
+#define _SEGMENT_ENTRY_LARGE 0x0400 /* STE-format control, large page */
+#define _SEGMENT_ENTRY_READ 0x0002 /* SW segment read bit */
+#define _SEGMENT_ENTRY_WRITE 0x0001 /* SW segment write bit */
/*
* Segment table entry encoding (R = read-only, I = invalid, y = young bit):
- * ..R...I...y.
- * prot-none, old ..0...1...1.
- * prot-none, young ..1...1...1.
- * read-only, old ..1...1...0.
- * read-only, young ..1...0...1.
- * read-write, old ..0...1...0.
- * read-write, young ..0...0...1.
+ * dy..R...I...wr
+ * prot-none, clean, old 00..1...1...00
+ * prot-none, clean, young 01..1...1...00
+ * prot-none, dirty, old 10..1...1...00
+ * prot-none, dirty, young 11..1...1...00
+ * read-only, clean, old 00..1...1...01
+ * read-only, clean, young 01..1...0...01
+ * read-only, dirty, old 10..1...1...01
+ * read-only, dirty, young 11..1...0...01
+ * read-write, clean, old 00..1...1...11
+ * read-write, clean, young 01..1...0...11
+ * read-write, dirty, old 10..0...1...11
+ * read-write, dirty, young 11..0...0...11
* The segment table origin is used to distinguish empty (origin==0) from
* read-write, old segment table entries (origin!=0)
*/
-#define _SEGMENT_ENTRY_SPLIT_BIT 0 /* THP splitting bit number */
-
-/* Set of bits not changed in pmd_modify */
-#define _SEGMENT_CHG_MASK (_SEGMENT_ENTRY_ORIGIN | _SEGMENT_ENTRY_LARGE \
- | _SEGMENT_ENTRY_SPLIT | _SEGMENT_ENTRY_CO)
+#define _SEGMENT_ENTRY_SPLIT_BIT 11 /* THP splitting bit number */
/* Page status table bits for virtualization */
#define PGSTE_ACC_BITS 0xf000000000000000UL
@@ -455,10 +464,11 @@ extern unsigned long MODULES_END;
* Segment entry (large page) protection definitions.
*/
#define SEGMENT_NONE __pgprot(_SEGMENT_ENTRY_INVALID | \
- _SEGMENT_ENTRY_NONE)
-#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_INVALID | \
_SEGMENT_ENTRY_PROTECT)
-#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_INVALID)
+#define SEGMENT_READ __pgprot(_SEGMENT_ENTRY_PROTECT | \
+ _SEGMENT_ENTRY_READ)
+#define SEGMENT_WRITE __pgprot(_SEGMENT_ENTRY_READ | \
+ _SEGMENT_ENTRY_WRITE)
static inline int mm_has_pgste(struct mm_struct *mm)
{
@@ -569,25 +579,23 @@ static inline int pmd_none(pmd_t pmd)
static inline int pmd_large(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) != 0;
-#else
- return 0;
-#endif
}
-static inline int pmd_prot_none(pmd_t pmd)
+static inline int pmd_pfn(pmd_t pmd)
{
- return (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) &&
- (pmd_val(pmd) & _SEGMENT_ENTRY_NONE);
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
}
static inline int pmd_bad(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
-#endif
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
}
@@ -607,20 +615,22 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
#define __HAVE_ARCH_PMD_WRITE
static inline int pmd_write(pmd_t pmd)
{
- if (pmd_prot_none(pmd))
- return 0;
- return (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) == 0;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) != 0;
+}
+
+static inline int pmd_dirty(pmd_t pmd)
+{
+ int dirty = 1;
+ if (pmd_large(pmd))
+ dirty = (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) != 0;
+ return dirty;
}
static inline int pmd_young(pmd_t pmd)
{
- int young = 0;
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd))
- young = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) != 0;
- else
+ int young = 1;
+ if (pmd_large(pmd))
young = (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG) != 0;
-#endif
return young;
}
@@ -777,82 +787,67 @@ static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry)
/**
* struct gmap_struct - guest address space
+ * @crst_list: list of all crst tables used in the guest address space
* @mm: pointer to the parent mm_struct
+ * @guest_to_host: radix tree with guest to host address translation
+ * @host_to_guest: radix tree with pointer to segment table entries
+ * @guest_table_lock: spinlock to protect all entries in the guest page table
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
- * @crst_list: list of all crst tables used in the guest address space
* @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
+ struct list_head crst_list;
struct mm_struct *mm;
+ struct radix_tree_root guest_to_host;
+ struct radix_tree_root host_to_guest;
+ spinlock_t guest_table_lock;
unsigned long *table;
unsigned long asce;
+ unsigned long asce_end;
void *private;
- struct list_head crst_list;
bool pfault_enabled;
};
/**
- * struct gmap_rmap - reverse mapping for segment table entries
- * @gmap: pointer to the gmap_struct
- * @entry: pointer to a segment table entry
- * @vmaddr: virtual address in the guest address space
- */
-struct gmap_rmap {
- struct list_head list;
- struct gmap *gmap;
- unsigned long *entry;
- unsigned long vmaddr;
-};
-
-/**
- * struct gmap_pgtable - gmap information attached to a page table
- * @vmaddr: address of the 1MB segment in the process virtual memory
- * @mapper: list of segment table entries mapping a page table
- */
-struct gmap_pgtable {
- unsigned long vmaddr;
- struct list_head mapper;
-};
-
-/**
* struct gmap_notifier - notify function block for page invalidation
* @notifier_call: address of callback function
*/
struct gmap_notifier {
struct list_head list;
- void (*notifier_call)(struct gmap *gmap, unsigned long address);
+ void (*notifier_call)(struct gmap *gmap, unsigned long gaddr);
};
-struct gmap *gmap_alloc(struct mm_struct *mm);
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit);
void gmap_free(struct gmap *gmap);
void gmap_enable(struct gmap *gmap);
void gmap_disable(struct gmap *gmap);
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len);
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
-unsigned long __gmap_translate(unsigned long address, struct gmap *);
-unsigned long gmap_translate(unsigned long address, struct gmap *);
-unsigned long __gmap_fault(unsigned long address, struct gmap *);
-unsigned long gmap_fault(unsigned long address, struct gmap *);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
-void __gmap_zap(unsigned long address, struct gmap *);
+unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
+unsigned long gmap_translate(struct gmap *, unsigned long gaddr);
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
+int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags);
+void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
+void __gmap_zap(struct gmap *, unsigned long gaddr);
bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *);
void gmap_register_ipte_notifier(struct gmap_notifier *);
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
+void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
+ unsigned long addr,
pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
if (pgste_val(pgste) & PGSTE_IN_BIT) {
pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, ptep);
+ gmap_do_ipte_notify(mm, addr, ptep);
}
#endif
return pgste;
@@ -875,8 +870,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else {
- if (!(pte_val(entry) & _PAGE_INVALID) && MACHINE_HAS_EDAT1)
- pte_val(entry) |= _PAGE_CO;
*ptep = entry;
}
}
@@ -1046,6 +1039,22 @@ static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
}
+static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
+{
+ unsigned long pto = (unsigned long) ptep;
+
+#ifndef CONFIG_64BIT
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
+#endif
+ /* Invalidate a range of ptes + global TLB flush of the ptes */
+ do {
+ asm volatile(
+ " .insn rrf,0xb2210000,%2,%0,%1,0"
+ : "+a" (address), "+a" (nr) : "a" (pto) : "memory");
+ } while (nr != 255);
+}
+
static inline void ptep_flush_direct(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
@@ -1098,7 +1107,7 @@ static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm,
pgste_val(pgste) &= ~PGSTE_UC_BIT;
pte = *ptep;
if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, addr, ptep, pgste);
__ptep_ipte(addr, ptep);
if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
pte_val(pte) |= _PAGE_PROTECT;
@@ -1115,20 +1124,21 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
pgste_t pgste;
- pte_t pte;
+ pte_t pte, oldpte;
int young;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
}
- pte = *ptep;
+ oldpte = pte = *ptep;
ptep_flush_direct(vma->vm_mm, addr, ptep);
young = pte_young(pte);
pte = pte_mkold(pte);
if (mm_has_pgste(vma->vm_mm)) {
+ pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm);
pgste = pgste_set_pte(ptep, pgste, pte);
pgste_set_unlock(ptep, pgste);
} else
@@ -1166,7 +1176,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1190,7 +1200,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm,
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, ptep, pgste);
+ pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1227,7 +1237,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma,
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1261,7 +1271,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
pte = *ptep;
@@ -1286,7 +1296,7 @@ static inline pte_t ptep_set_wrprotect(struct mm_struct *mm,
if (pte_write(pte)) {
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, address, ptep, pgste);
}
ptep_flush_lazy(mm, address, ptep);
@@ -1312,12 +1322,13 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma,
return 0;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
}
ptep_flush_direct(vma->vm_mm, address, ptep);
if (mm_has_pgste(vma->vm_mm)) {
+ pgste_set_key(ptep, pgste, entry, vma->vm_mm);
pgste = pgste_set_pte(ptep, pgste, entry);
pgste_set_unlock(ptep, pgste);
} else
@@ -1391,7 +1402,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pte_page(x) pfn_to_page(pte_pfn(x))
-#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/* Find an entry in the lowest level page table.. */
#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
@@ -1413,41 +1424,75 @@ static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
return pgprot_val(SEGMENT_WRITE);
}
-static inline pmd_t pmd_mkyoung(pmd_t pmd)
+static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkwrite(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_WRITE;
+ if (pmd_large(pmd) && !(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ return pmd;
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ return pmd;
+}
+
+static inline pmd_t pmd_mkclean(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_DIRTY;
pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- } else {
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkdirty(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) |= _SEGMENT_ENTRY_DIRTY;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
+ }
+ return pmd;
+}
+
+static inline pmd_t pmd_mkyoung(pmd_t pmd)
+{
+ if (pmd_large(pmd)) {
pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
+ if (pmd_val(pmd) & _SEGMENT_ENTRY_READ)
+ pmd_val(pmd) &= ~_SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_mkold(pmd_t pmd)
{
-#ifdef CONFIG_64BIT
- if (pmd_prot_none(pmd)) {
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- } else {
+ if (pmd_large(pmd)) {
pmd_val(pmd) &= ~_SEGMENT_ENTRY_YOUNG;
pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
}
-#endif
return pmd;
}
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
- int young;
-
- young = pmd_young(pmd);
- pmd_val(pmd) &= _SEGMENT_CHG_MASK;
+ if (pmd_large(pmd)) {
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN_LARGE |
+ _SEGMENT_ENTRY_DIRTY | _SEGMENT_ENTRY_YOUNG |
+ _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_SPLIT;
+ pmd_val(pmd) |= massage_pgprot_pmd(newprot);
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ if (!(pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG))
+ pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
+ return pmd;
+ }
+ pmd_val(pmd) &= _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= massage_pgprot_pmd(newprot);
- if (young)
- pmd = pmd_mkyoung(pmd);
return pmd;
}
@@ -1455,16 +1500,9 @@ static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
pmd_t __pmd;
pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return pmd_mkyoung(__pmd);
+ return __pmd;
}
-static inline pmd_t pmd_mkwrite(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) &= ~_SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
static inline void __pmdp_csp(pmd_t *pmdp)
@@ -1555,34 +1593,21 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
static inline int pmd_trans_splitting(pmd_t pmd)
{
- return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+ return (pmd_val(pmd) & _SEGMENT_ENTRY_LARGE) &&
+ (pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT);
}
static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t entry)
{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INVALID) && MACHINE_HAS_EDAT1)
- pmd_val(entry) |= _SEGMENT_ENTRY_CO;
*pmdp = entry;
}
static inline pmd_t pmd_mkhuge(pmd_t pmd)
{
pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- return pmd;
-}
-
-static inline pmd_t pmd_wrprotect(pmd_t pmd)
-{
- /* Do not clobber PROT_NONE segments! */
- if (!pmd_prot_none(pmd))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
- return pmd;
-}
-
-static inline pmd_t pmd_mkdirty(pmd_t pmd)
-{
- /* No dirty bit in the segment table entry. */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
return pmd;
}
@@ -1647,11 +1672,6 @@ static inline int has_transparent_hugepage(void)
{
return MACHINE_HAS_HPAGE ? 1 : 0;
}
-
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return pmd_val(pmd) >> PAGE_SHIFT;
-}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
/*
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 6f02d45..d559bdb 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -13,9 +13,11 @@
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define _CIF_MCCK_PENDING (1<<CIF_MCCK_PENDING)
#define _CIF_ASCE (1<<CIF_ASCE)
+#define _CIF_NOHZ_DELAY (1<<CIF_NOHZ_DELAY)
#ifndef __ASSEMBLY__
@@ -43,6 +45,8 @@ static inline int test_cpu_flag(int flag)
return !!(S390_lowcore.cpu_flags & (1U << flag));
}
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -113,6 +117,7 @@ struct thread_struct {
int ri_signum;
#ifdef CONFIG_64BIT
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
+ __vector128 *vxrs; /* Vector register save area */
#endif
};
@@ -217,7 +222,7 @@ static inline void cpu_relax(void)
barrier();
}
-#define arch_mutex_cpu_relax() barrier()
+#define cpu_relax_lowlatency() barrier()
static inline void psw_set_key(unsigned int key)
{
@@ -285,7 +290,12 @@ static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
return (psw.addr - ilc) & mask;
#endif
}
-
+
+/*
+ * Function to stop a processor until the next interrupt occurs
+ */
+void enabled_wait(void);
+
/*
* Function to drop a processor into disabled wait state
*/
diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index 55d69dd..be317fe 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -161,6 +161,12 @@ static inline long regs_return_value(struct pt_regs *regs)
return regs->gprs[2];
}
+static inline void instruction_pointer_set(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->psw.addr = val | PSW_ADDR_AMODE;
+}
+
int regs_query_register_offset(const char *name);
const char *regs_query_register_name(unsigned int offset);
unsigned long regs_get_register(struct pt_regs *regs, unsigned int offset);
diff --git a/arch/s390/include/asm/qdio.h b/arch/s390/include/asm/qdio.h
index d786c63..06f3034 100644
--- a/arch/s390/include/asm/qdio.h
+++ b/arch/s390/include/asm/qdio.h
@@ -415,6 +415,10 @@ struct qdio_brinfo_entry_l2 {
#define QDIO_FLAG_SYNC_OUTPUT 0x02
#define QDIO_FLAG_PCI_OUT 0x10
+int qdio_alloc_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_free_buffers(struct qdio_buffer **buf, unsigned int count);
+void qdio_reset_buffers(struct qdio_buffer **buf, unsigned int count);
+
extern int qdio_allocate(struct qdio_initialize *);
extern int qdio_establish(struct qdio_initialize *);
extern int qdio_activate(struct ccw_device *);
diff --git a/arch/s390/include/asm/scatterlist.h b/arch/s390/include/asm/scatterlist.h
deleted file mode 100644
index 6d45ef6..0000000
--- a/arch/s390/include/asm/scatterlist.h
+++ /dev/null
@@ -1,3 +0,0 @@
-#include <asm-generic/scatterlist.h>
-
-#define ARCH_HAS_SG_CHAIN
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 089a498..7736fdd 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -55,8 +55,8 @@ extern void detect_memory_memblock(void);
#define MACHINE_FLAG_LPP (1UL << 13)
#define MACHINE_FLAG_TOPOLOGY (1UL << 14)
#define MACHINE_FLAG_TE (1UL << 15)
-#define MACHINE_FLAG_RRBM (1UL << 16)
#define MACHINE_FLAG_TLB_LC (1UL << 17)
+#define MACHINE_FLAG_VX (1UL << 18)
#define MACHINE_IS_VM (S390_lowcore.machine_flags & MACHINE_FLAG_VM)
#define MACHINE_IS_KVM (S390_lowcore.machine_flags & MACHINE_FLAG_KVM)
@@ -78,8 +78,8 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_LPP (0)
#define MACHINE_HAS_TOPOLOGY (0)
#define MACHINE_HAS_TE (0)
-#define MACHINE_HAS_RRBM (0)
#define MACHINE_HAS_TLB_LC (0)
+#define MACHINE_HAS_VX (0)
#else /* CONFIG_64BIT */
#define MACHINE_HAS_IEEE (1)
#define MACHINE_HAS_CSP (1)
@@ -91,8 +91,8 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_LPP (S390_lowcore.machine_flags & MACHINE_FLAG_LPP)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
#define MACHINE_HAS_TE (S390_lowcore.machine_flags & MACHINE_FLAG_TE)
-#define MACHINE_HAS_RRBM (S390_lowcore.machine_flags & MACHINE_FLAG_RRBM)
#define MACHINE_HAS_TLB_LC (S390_lowcore.machine_flags & MACHINE_FLAG_TLB_LC)
+#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#endif /* CONFIG_64BIT */
/*
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index bf9c823..4957611 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -15,6 +15,7 @@
#define SIGP_SET_ARCHITECTURE 18
#define SIGP_COND_EMERGENCY_SIGNAL 19
#define SIGP_SENSE_RUNNING 21
+#define SIGP_STORE_ADDITIONAL_STATUS 23
/* SIGP condition codes */
#define SIGP_CC_ORDER_CODE_ACCEPTED 0
@@ -33,9 +34,10 @@
#ifndef __ASSEMBLY__
-static inline int __pcpu_sigp(u16 addr, u8 order, u32 parm, u32 *status)
+static inline int __pcpu_sigp(u16 addr, u8 order, unsigned long parm,
+ u32 *status)
{
- register unsigned int reg1 asm ("1") = parm;
+ register unsigned long reg1 asm ("1") = parm;
int cc;
asm volatile(
diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h
index 4f13079..762d4f8 100644
--- a/arch/s390/include/asm/smp.h
+++ b/arch/s390/include/asm/smp.h
@@ -29,7 +29,6 @@ extern int smp_find_processor_id(u16 address);
extern int smp_store_status(int cpu);
extern int smp_vcpu_scheduled(int cpu);
extern void smp_yield_cpu(int cpu);
-extern void smp_yield(void);
extern void smp_cpu_set_polarization(int cpu, int val);
extern int smp_cpu_get_polarization(int cpu);
extern void smp_fill_possible_mask(void);
@@ -50,7 +49,6 @@ static inline int smp_find_processor_id(u16 address) { return 0; }
static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
-static inline void smp_yield(void) { }
static inline void smp_fill_possible_mask(void) { }
#endif /* CONFIG_SMP */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 96879f7..d6bdf90 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -37,11 +37,17 @@ _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new)
* (the type definitions are in asm/spinlock_types.h)
*/
+void arch_lock_relax(unsigned int cpu);
+
void arch_spin_lock_wait(arch_spinlock_t *);
int arch_spin_trylock_retry(arch_spinlock_t *);
-void arch_spin_relax(arch_spinlock_t *);
void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
+static inline void arch_spin_relax(arch_spinlock_t *lock)
+{
+ arch_lock_relax(lock->lock);
+}
+
static inline u32 arch_spin_lockval(int cpu)
{
return ~cpu;
@@ -64,11 +70,6 @@ static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
_raw_compare_and_swap(&lp->lock, 0, SPINLOCK_LOCKVAL));
}
-static inline int arch_spin_tryrelease_once(arch_spinlock_t *lp)
-{
- return _raw_compare_and_swap(&lp->lock, SPINLOCK_LOCKVAL, 0);
-}
-
static inline void arch_spin_lock(arch_spinlock_t *lp)
{
if (!arch_spin_trylock_once(lp))
@@ -91,7 +92,13 @@ static inline int arch_spin_trylock(arch_spinlock_t *lp)
static inline void arch_spin_unlock(arch_spinlock_t *lp)
{
- arch_spin_tryrelease_once(lp);
+ typecheck(unsigned int, lp->lock);
+ asm volatile(
+ __ASM_BARRIER
+ "st %1,%0\n"
+ : "+Q" (lp->lock)
+ : "d" (0)
+ : "cc", "memory");
}
static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
@@ -123,13 +130,12 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
*/
#define arch_write_can_lock(x) ((x)->lock == 0)
-extern void _raw_read_lock_wait(arch_rwlock_t *lp);
-extern void _raw_read_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
extern int _raw_read_trylock_retry(arch_rwlock_t *lp);
-extern void _raw_write_lock_wait(arch_rwlock_t *lp);
-extern void _raw_write_lock_wait_flags(arch_rwlock_t *lp, unsigned long flags);
extern int _raw_write_trylock_retry(arch_rwlock_t *lp);
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
static inline int arch_read_trylock_once(arch_rwlock_t *rw)
{
unsigned int old = ACCESS_ONCE(rw->lock);
@@ -144,16 +150,82 @@ static inline int arch_write_trylock_once(arch_rwlock_t *rw)
_raw_compare_and_swap(&rw->lock, 0, 0x80000000));
}
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+#define __RAW_OP_OR "lao"
+#define __RAW_OP_AND "lan"
+#define __RAW_OP_ADD "laa"
+
+#define __RAW_LOCK(ptr, op_val, op_string) \
+({ \
+ unsigned int old_val; \
+ \
+ typecheck(unsigned int *, ptr); \
+ asm volatile( \
+ op_string " %0,%2,%1\n" \
+ "bcr 14,0\n" \
+ : "=d" (old_val), "+Q" (*ptr) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+#define __RAW_UNLOCK(ptr, op_val, op_string) \
+({ \
+ unsigned int old_val; \
+ \
+ typecheck(unsigned int *, ptr); \
+ asm volatile( \
+ "bcr 14,0\n" \
+ op_string " %0,%2,%1\n" \
+ : "=d" (old_val), "+Q" (*ptr) \
+ : "d" (op_val) \
+ : "cc", "memory"); \
+ old_val; \
+})
+
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp, unsigned int prev);
+
static inline void arch_read_lock(arch_rwlock_t *rw)
{
- if (!arch_read_trylock_once(rw))
+ unsigned int old;
+
+ old = __RAW_LOCK(&rw->lock, 1, __RAW_OP_ADD);
+ if ((int) old < 0)
_raw_read_lock_wait(rw);
}
-static inline void arch_read_lock_flags(arch_rwlock_t *rw, unsigned long flags)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+ __RAW_UNLOCK(&rw->lock, -1, __RAW_OP_ADD);
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+ unsigned int old;
+
+ old = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
+ if (old != 0)
+ _raw_write_lock_wait(rw, old);
+ rw->owner = SPINLOCK_LOCKVAL;
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+ rw->owner = 0;
+ __RAW_UNLOCK(&rw->lock, 0x7fffffff, __RAW_OP_AND);
+}
+
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+extern void _raw_read_lock_wait(arch_rwlock_t *lp);
+extern void _raw_write_lock_wait(arch_rwlock_t *lp);
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
- _raw_read_lock_wait_flags(rw, flags);
+ _raw_read_lock_wait(rw);
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
@@ -169,19 +241,24 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
{
if (!arch_write_trylock_once(rw))
_raw_write_lock_wait(rw);
-}
-
-static inline void arch_write_lock_flags(arch_rwlock_t *rw, unsigned long flags)
-{
- if (!arch_write_trylock_once(rw))
- _raw_write_lock_wait_flags(rw, flags);
+ rw->owner = SPINLOCK_LOCKVAL;
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
- _raw_compare_and_swap(&rw->lock, 0x80000000, 0);
+ typecheck(unsigned int, rw->lock);
+
+ rw->owner = 0;
+ asm volatile(
+ __ASM_BARRIER
+ "st %1,%0\n"
+ : "+Q" (rw->lock)
+ : "d" (0)
+ : "cc", "memory");
}
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
if (!arch_read_trylock_once(rw))
@@ -191,12 +268,20 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
- if (!arch_write_trylock_once(rw))
- return _raw_write_trylock_retry(rw);
+ if (!arch_write_trylock_once(rw) && !_raw_write_trylock_retry(rw))
+ return 0;
+ rw->owner = SPINLOCK_LOCKVAL;
return 1;
}
-#define arch_read_relax(lock) cpu_relax()
-#define arch_write_relax(lock) cpu_relax()
+static inline void arch_read_relax(arch_rwlock_t *rw)
+{
+ arch_lock_relax(rw->owner);
+}
+
+static inline void arch_write_relax(arch_rwlock_t *rw)
+{
+ arch_lock_relax(rw->owner);
+}
#endif /* __ASM_SPINLOCK_H */
diff --git a/arch/s390/include/asm/spinlock_types.h b/arch/s390/include/asm/spinlock_types.h
index b2cd6ff..d84b693 100644
--- a/arch/s390/include/asm/spinlock_types.h
+++ b/arch/s390/include/asm/spinlock_types.h
@@ -13,6 +13,7 @@ typedef struct {
typedef struct {
unsigned int lock;
+ unsigned int owner;
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 18ea9e3..2542a7e 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -103,6 +103,61 @@ static inline void restore_fp_regs(freg_t *fprs)
asm volatile("ld 15,%0" : : "Q" (fprs[15]));
}
+static inline void save_vx_regs(__vector128 *vxrs)
+{
+ typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x003e\n" /* vstm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c3e\n" /* vstm 16,31,256(1) */
+ : "=Q" (*(addrtype *) vxrs) : : "1");
+}
+
+static inline void save_vx_regs_safe(__vector128 *vxrs)
+{
+ unsigned long cr0, flags;
+
+ flags = arch_local_irq_save();
+ __ctl_store(cr0, 0, 0);
+ __ctl_set_bit(0, 17);
+ __ctl_set_bit(0, 18);
+ save_vx_regs(vxrs);
+ __ctl_load(cr0, 0, 0);
+ arch_local_irq_restore(flags);
+}
+
+static inline void restore_vx_regs(__vector128 *vxrs)
+{
+ typedef struct { __vector128 _[__NUM_VXRS]; } addrtype;
+
+ asm volatile(
+ " la 1,%0\n"
+ " .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
+ " .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
+ : : "Q" (*(addrtype *) vxrs) : "1");
+}
+
+static inline void save_fp_vx_regs(struct task_struct *task)
+{
+#ifdef CONFIG_64BIT
+ if (task->thread.vxrs)
+ save_vx_regs(task->thread.vxrs);
+ else
+#endif
+ save_fp_regs(task->thread.fp_regs.fprs);
+}
+
+static inline void restore_fp_vx_regs(struct task_struct *task)
+{
+#ifdef CONFIG_64BIT
+ if (task->thread.vxrs)
+ restore_vx_regs(task->thread.vxrs);
+ else
+#endif
+ restore_fp_regs(task->thread.fp_regs.fprs);
+}
+
static inline void save_access_regs(unsigned int *acrs)
{
typedef struct { int _[NUM_ACRS]; } acrstype;
@@ -120,16 +175,16 @@ static inline void restore_access_regs(unsigned int *acrs)
#define switch_to(prev,next,last) do { \
if (prev->mm) { \
save_fp_ctl(&prev->thread.fp_regs.fpc); \
- save_fp_regs(prev->thread.fp_regs.fprs); \
+ save_fp_vx_regs(prev); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
} \
if (next->mm) { \
+ update_cr_regs(next); \
restore_fp_ctl(&next->thread.fp_regs.fpc); \
- restore_fp_regs(next->thread.fp_regs.fprs); \
+ restore_fp_vx_regs(next); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
- update_cr_regs(next); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index abad78d..5bc1259 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -54,7 +54,7 @@ static inline void syscall_set_return_value(struct task_struct *task,
struct pt_regs *regs,
int error, long val)
{
- regs->gprs[2] = error ? -error : val;
+ regs->gprs[2] = error ? error : val;
}
static inline void syscall_get_arguments(struct task_struct *task,
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index b833e9c..4d62fd5 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -84,11 +84,13 @@ static inline struct thread_info *current_thread_info(void)
#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
#define TIF_SECCOMP 5 /* secure computing */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
+#define TIF_UPROBE 7 /* breakpointed or single-stepping */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
#define TIF_SINGLE_STEP 19 /* This task is single stepped */
#define TIF_BLOCK_STEP 20 /* This task is block stepped */
+#define TIF_UPROBE_SINGLESTEP 21 /* This task is uprobe single stepped */
#define _TIF_NOTIFY_RESUME (1<<TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1<<TIF_SIGPENDING)
@@ -97,6 +99,7 @@ static inline struct thread_info *current_thread_info(void)
#define _TIF_SYSCALL_AUDIT (1<<TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1<<TIF_SECCOMP)
#define _TIF_SYSCALL_TRACEPOINT (1<<TIF_SYSCALL_TRACEPOINT)
+#define _TIF_UPROBE (1<<TIF_UPROBE)
#define _TIF_31BIT (1<<TIF_31BIT)
#define _TIF_SINGLE_STEP (1<<TIF_SINGLE_STEP)
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index a25f09f..572c599 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -105,7 +105,7 @@ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long address)
{
- page_table_free_rcu(tlb, (unsigned long *) pte);
+ page_table_free_rcu(tlb, (unsigned long *) pte, address);
}
/*
diff --git a/arch/s390/include/asm/uprobes.h b/arch/s390/include/asm/uprobes.h
new file mode 100644
index 0000000..1411dff
--- /dev/null
+++ b/arch/s390/include/asm/uprobes.h
@@ -0,0 +1,42 @@
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#ifndef _ASM_UPROBES_H
+#define _ASM_UPROBES_H
+
+#include <linux/notifier.h>
+
+typedef u16 uprobe_opcode_t;
+
+#define UPROBE_XOL_SLOT_BYTES 256 /* cache aligned */
+
+#define UPROBE_SWBP_INSN 0x0002
+#define UPROBE_SWBP_INSN_SIZE 2
+
+struct arch_uprobe {
+ union{
+ uprobe_opcode_t insn[3];
+ uprobe_opcode_t ixol[3];
+ };
+ unsigned int saved_per : 1;
+ unsigned int saved_int_code;
+};
+
+struct arch_uprobe_task {
+};
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm,
+ unsigned long addr);
+int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs);
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+ void *data);
+void arch_uprobe_abort_xol(struct arch_uprobe *ap, struct pt_regs *regs);
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+ struct pt_regs *regs);
+#endif /* _ASM_UPROBES_H */
diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h
index bc9746a..a62526d 100644
--- a/arch/s390/include/asm/vdso.h
+++ b/arch/s390/include/asm/vdso.h
@@ -22,13 +22,17 @@ struct vdso_data {
__u64 xtime_tod_stamp; /* TOD clock for xtime 0x08 */
__u64 xtime_clock_sec; /* Kernel time 0x10 */
__u64 xtime_clock_nsec; /* 0x18 */
- __u64 wtom_clock_sec; /* Wall to monotonic clock 0x20 */
- __u64 wtom_clock_nsec; /* 0x28 */
- __u32 tz_minuteswest; /* Minutes west of Greenwich 0x30 */
- __u32 tz_dsttime; /* Type of dst correction 0x34 */
- __u32 ectg_available; /* ECTG instruction present 0x38 */
- __u32 tk_mult; /* Mult. used for xtime_nsec 0x3c */
- __u32 tk_shift; /* Shift used for xtime_nsec 0x40 */
+ __u64 xtime_coarse_sec; /* Coarse kernel time 0x20 */
+ __u64 xtime_coarse_nsec; /* 0x28 */
+ __u64 wtom_clock_sec; /* Wall to monotonic clock 0x30 */
+ __u64 wtom_clock_nsec; /* 0x38 */
+ __u64 wtom_coarse_sec; /* Coarse wall to monotonic 0x40 */
+ __u64 wtom_coarse_nsec; /* 0x48 */
+ __u32 tz_minuteswest; /* Minutes west of Greenwich 0x50 */
+ __u32 tz_dsttime; /* Type of dst correction 0x54 */
+ __u32 ectg_available; /* ECTG instruction present 0x58 */
+ __u32 tk_mult; /* Mult. used for xtime_nsec 0x5c */
+ __u32 tk_shift; /* Shift used for xtime_nsec 0x60 */
};
struct vdso_per_cpu_data {
diff --git a/arch/s390/include/asm/vtimer.h b/arch/s390/include/asm/vtimer.h
index bfe25d5..10a179a 100644
--- a/arch/s390/include/asm/vtimer.h
+++ b/arch/s390/include/asm/vtimer.h
@@ -28,6 +28,4 @@ extern int del_virt_timer(struct vtimer_list *timer);
extern void init_cpu_vtimer(void);
extern void vtime_init(void);
-extern void vtime_stop_cpu(void);
-
#endif /* _ASM_S390_TIMER_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 7366373..08fe6da 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -16,6 +16,7 @@ header-y += ioctls.h
header-y += ipcbuf.h
header-y += kvm.h
header-y += kvm_para.h
+header-y += kvm_perf.h
header-y += kvm_virtio.h
header-y += mman.h
header-y += monwriter.h
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 0fc2643..48eda3a 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -111,12 +111,22 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GPRS (1UL << 1)
#define KVM_SYNC_ACRS (1UL << 2)
#define KVM_SYNC_CRS (1UL << 3)
+#define KVM_SYNC_ARCH0 (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
__u64 gprs[16]; /* general purpose registers */
__u32 acrs[16]; /* access registers */
__u64 crs[16]; /* control registers */
+ __u64 todpr; /* tod programmable register [ARCH0] */
+ __u64 cputm; /* cpu timer [ARCH0] */
+ __u64 ckc; /* clock comparator [ARCH0] */
+ __u64 pp; /* program parameter [ARCH0] */
+ __u64 gbea; /* guest breaking-event address [ARCH0] */
+ __u64 pft; /* pfault token [PFAULT] */
+ __u64 pfs; /* pfault select [PFAULT] */
+ __u64 pfc; /* pfault compare [PFAULT] */
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/kvm_perf.h b/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 0000000..3972827
--- /dev/null
+++ b/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,25 @@
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 5d9cc19..d4096fd 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -108,6 +108,7 @@
exit_code_ipa0(0xB2, 0x17, "STETR"), \
exit_code_ipa0(0xB2, 0x18, "PC"), \
exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x21, "IPTE"), \
exit_code_ipa0(0xB2, 0x28, "PT"), \
exit_code_ipa0(0xB2, 0x29, "ISKE"), \
exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
diff --git a/arch/s390/include/uapi/asm/sigcontext.h b/arch/s390/include/uapi/asm/sigcontext.h
index b30de9c..5f0b8d7 100644
--- a/arch/s390/include/uapi/asm/sigcontext.h
+++ b/arch/s390/include/uapi/asm/sigcontext.h
@@ -7,10 +7,14 @@
#define _ASM_S390_SIGCONTEXT_H
#include <linux/compiler.h>
+#include <linux/types.h>
-#define __NUM_GPRS 16
-#define __NUM_FPRS 16
-#define __NUM_ACRS 16
+#define __NUM_GPRS 16
+#define __NUM_FPRS 16
+#define __NUM_ACRS 16
+#define __NUM_VXRS 32
+#define __NUM_VXRS_LOW 16
+#define __NUM_VXRS_HIGH 16
#ifndef __s390x__
@@ -59,6 +63,16 @@ typedef struct
_s390_fp_regs fpregs;
} _sigregs;
+typedef struct
+{
+#ifndef __s390x__
+ unsigned long gprs_high[__NUM_GPRS];
+#endif
+ unsigned long long vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ unsigned char __reserved[128];
+} _sigregs_ext;
+
struct sigcontext
{
unsigned long oldmask[_SIGCONTEXT_NSIG_WORDS];
diff --git a/arch/s390/include/uapi/asm/types.h b/arch/s390/include/uapi/asm/types.h
index 038f2b9..3c3951e 100644
--- a/arch/s390/include/uapi/asm/types.h
+++ b/arch/s390/include/uapi/asm/types.h
@@ -17,6 +17,10 @@
typedef unsigned long addr_t;
typedef __signed__ long saddr_t;
+typedef struct {
+ __u32 u[4];
+} __vector128;
+
#endif /* __ASSEMBLY__ */
#endif /* _UAPI_S390_TYPES_H */
diff --git a/arch/s390/include/uapi/asm/ucontext.h b/arch/s390/include/uapi/asm/ucontext.h
index 3e077b2..64a69aa 100644
--- a/arch/s390/include/uapi/asm/ucontext.h
+++ b/arch/s390/include/uapi/asm/ucontext.h
@@ -7,10 +7,15 @@
#ifndef _ASM_S390_UCONTEXT_H
#define _ASM_S390_UCONTEXT_H
-#define UC_EXTENDED 0x00000001
-
-#ifndef __s390x__
+#define UC_GPRS_HIGH 1 /* uc_mcontext_ext has valid high gprs */
+#define UC_VXRS 2 /* uc_mcontext_ext has valid vector regs */
+/*
+ * The struct ucontext_extended describes how the registers are stored
+ * on a rt signal frame. Please note that the structure is not fixed,
+ * if new CPU registers are added to the user state the size of the
+ * struct ucontext_extended will increase.
+ */
struct ucontext_extended {
unsigned long uc_flags;
struct ucontext *uc_link;
@@ -19,11 +24,9 @@ struct ucontext_extended {
sigset_t uc_sigmask;
/* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
unsigned char __unused[128 - sizeof(sigset_t)];
- unsigned long uc_gprs_high[16];
+ _sigregs_ext uc_mcontext_ext;
};
-#endif
-
struct ucontext {
unsigned long uc_flags;
struct ucontext *uc_link;
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 3802d2d..940ac49 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -283,7 +283,10 @@
#define __NR_sched_setattr 345
#define __NR_sched_getattr 346
#define __NR_renameat2 347
-#define NR_syscalls 348
+#define __NR_seccomp 348
+#define __NR_getrandom 349
+#define __NR_memfd_create 350
+#define NR_syscalls 351
/*
* There are some system calls that are not present on 64 bit, some
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index a95c4ca..204c43a 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -28,7 +28,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
CFLAGS_sysinfo.o += -Iinclude/math-emu -Iarch/s390/math-emu -w
-obj-y := traps.o time.o process.o base.o early.o setup.o vtime.o
+obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o sclp.o vdso.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
@@ -52,11 +52,9 @@ obj-$(CONFIG_COMPAT) += compat_wrapper.o $(compat-obj-y)
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_FUNCTION_TRACER) += $(if $(CONFIG_64BIT),mcount64.o,mcount.o)
-obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
-obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
-obj-$(CONFIG_FTRACE_SYSCALLS) += ftrace.o
+obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
+obj-$(CONFIG_UPROBES) += uprobes.o
ifdef CONFIG_64BIT
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf.o perf_cpum_sf.o \
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index afe1715..ef279a1 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -9,7 +9,7 @@
#include <linux/kbuild.h>
#include <linux/kvm_host.h>
#include <linux/sched.h>
-#include <asm/cputime.h>
+#include <asm/idle.h>
#include <asm/vdso.h>
#include <asm/pgtable.h>
@@ -62,8 +62,12 @@ int main(void)
DEFINE(__VDSO_XTIME_STAMP, offsetof(struct vdso_data, xtime_tod_stamp));
DEFINE(__VDSO_XTIME_SEC, offsetof(struct vdso_data, xtime_clock_sec));
DEFINE(__VDSO_XTIME_NSEC, offsetof(struct vdso_data, xtime_clock_nsec));
+ DEFINE(__VDSO_XTIME_CRS_SEC, offsetof(struct vdso_data, xtime_coarse_sec));
+ DEFINE(__VDSO_XTIME_CRS_NSEC, offsetof(struct vdso_data, xtime_coarse_nsec));
DEFINE(__VDSO_WTOM_SEC, offsetof(struct vdso_data, wtom_clock_sec));
DEFINE(__VDSO_WTOM_NSEC, offsetof(struct vdso_data, wtom_clock_nsec));
+ DEFINE(__VDSO_WTOM_CRS_SEC, offsetof(struct vdso_data, wtom_coarse_sec));
+ DEFINE(__VDSO_WTOM_CRS_NSEC, offsetof(struct vdso_data, wtom_coarse_nsec));
DEFINE(__VDSO_TIMEZONE, offsetof(struct vdso_data, tz_minuteswest));
DEFINE(__VDSO_ECTG_OK, offsetof(struct vdso_data, ectg_available));
DEFINE(__VDSO_TK_MULT, offsetof(struct vdso_data, tk_mult));
@@ -73,8 +77,11 @@ int main(void)
/* constants used by the vdso */
DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
+ DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
+ DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
DEFINE(__CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
BLANK();
/* idle data offsets */
DEFINE(__CLOCK_IDLE_ENTER, offsetof(struct s390_idle_data, clock_idle_enter));
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 70d4b7c..a0a886c 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -50,6 +50,14 @@ typedef struct
_s390_fp_regs32 fpregs;
} _sigregs32;
+typedef struct
+{
+ __u32 gprs_high[__NUM_GPRS];
+ __u64 vxrs_low[__NUM_VXRS_LOW];
+ __vector128 vxrs_high[__NUM_VXRS_HIGH];
+ __u8 __reserved[128];
+} _sigregs_ext32;
+
#define _SIGCONTEXT_NSIG32 64
#define _SIGCONTEXT_NSIG_BPW32 32
#define __SIGNAL_FRAMESIZE32 96
@@ -72,6 +80,7 @@ struct ucontext32 {
compat_sigset_t uc_sigmask;
/* Allow for uc_sigmask growth. Glibc uses a 1024-bit sigset_t. */
unsigned char __unused[128 - sizeof(compat_sigset_t)];
+ _sigregs_ext32 uc_mcontext_ext;
};
struct stat64_emu31;
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index f204d69..009f5eb 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -36,17 +36,16 @@ typedef struct
struct sigcontext32 sc;
_sigregs32 sregs;
int signo;
- __u32 gprs_high[NUM_GPRS];
- __u8 retcode[S390_SYSCALL_SIZE];
+ _sigregs_ext32 sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
} sigframe32;
typedef struct
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
- __u8 retcode[S390_SYSCALL_SIZE];
+ __u16 svc_insn;
compat_siginfo_t info;
struct ucontext32 uc;
- __u32 gprs_high[NUM_GPRS];
} rt_sigframe32;
int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from)
@@ -151,6 +150,38 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
return err ? -EFAULT : 0;
}
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ int i;
+
+ save_access_regs(current->thread.acrs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+ if (current->thread.vxrs) {
+ save_vx_regs(current->thread.vxrs);
+ for (i = 0; i < __NUM_FPRS; i++)
+ current->thread.fp_regs.fprs[i] =
+ *(freg_t *)(current->thread.vxrs + i);
+ } else
+ save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ int i;
+
+ restore_access_regs(current->thread.acrs);
+ /* restore_fp_ctl is done in restore_sigregs */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(current->thread.vxrs + i) =
+ current->thread.fp_regs.fprs[i];
+ restore_vx_regs(current->thread.vxrs);
+ } else
+ restore_fp_regs(current->thread.fp_regs.fprs);
+}
+
static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
{
_sigregs32 user_sregs;
@@ -163,11 +194,8 @@ static int save_sigregs32(struct pt_regs *regs, _sigregs32 __user *sregs)
(__u32)(regs->psw.mask & PSW_MASK_BA);
for (i = 0; i < NUM_GPRS; i++)
user_sregs.regs.gprs[i] = (__u32) regs->gprs[i];
- save_access_regs(current->thread.acrs);
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- save_fp_ctl(&current->thread.fp_regs.fpc);
- save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
sizeof(user_sregs.fpregs));
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs32)))
@@ -207,37 +235,67 @@ static int restore_sigregs32(struct pt_regs *regs,_sigregs32 __user *sregs)
regs->gprs[i] = (__u64) user_sregs.regs.gprs[i];
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- restore_access_regs(current->thread.acrs);
memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
sizeof(current->thread.fp_regs));
- restore_fp_regs(current->thread.fp_regs.fprs);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
}
-static int save_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+static int save_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
{
__u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
int i;
+ /* Save high gprs to signal stack */
for (i = 0; i < NUM_GPRS; i++)
gprs_high[i] = regs->gprs[i] >> 32;
- if (__copy_to_user(uregs, &gprs_high, sizeof(gprs_high)))
+ if (__copy_to_user(&sregs_ext->gprs_high, &gprs_high,
+ sizeof(sregs_ext->gprs_high)))
return -EFAULT;
+
+ /* Save vector registers to signal stack */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
return 0;
}
-static int restore_sigregs_gprs_high(struct pt_regs *regs, __u32 __user *uregs)
+static int restore_sigregs_ext32(struct pt_regs *regs,
+ _sigregs_ext32 __user *sregs_ext)
{
__u32 gprs_high[NUM_GPRS];
+ __u64 vxrs[__NUM_VXRS_LOW];
int i;
- if (__copy_from_user(&gprs_high, uregs, sizeof(gprs_high)))
+ /* Restore high gprs from signal stack */
+ if (__copy_from_user(&gprs_high, &sregs_ext->gprs_high,
+ sizeof(&sregs_ext->gprs_high)))
return -EFAULT;
for (i = 0; i < NUM_GPRS; i++)
*(__u32 *)&regs->gprs[i] = gprs_high[i];
+
+ /* Restore vector registers from signal stack */
+ if (current->thread.vxrs) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ }
return 0;
}
@@ -252,8 +310,9 @@ COMPAT_SYSCALL_DEFINE0(sigreturn)
set_current_blocked(&set);
if (restore_sigregs32(regs, &frame->sregs))
goto badframe;
- if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+ if (restore_sigregs_ext32(regs, &frame->sregs_ext))
goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
force_sig(SIGSEGV, current);
@@ -269,12 +328,13 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
+ if (compat_restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (restore_sigregs_gprs_high(regs, frame->gprs_high))
+ if (restore_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
goto badframe;
- if (compat_restore_altstack(&frame->uc.uc_stack))
- goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
force_sig(SIGSEGV, current);
@@ -320,46 +380,74 @@ static inline int map_signal(int sig)
return sig;
}
-static int setup_frame32(int sig, struct k_sigaction *ka,
- sigset_t *set, struct pt_regs * regs)
+static int setup_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
{
- sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(sigframe32));
-
+ int sig = ksig->sig;
+ sigframe32 __user *frame;
+ struct sigcontext32 sc;
+ unsigned long restorer;
+ size_t frame_size;
+
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext.__reserved);
+ if (!MACHINE_HAS_VX)
+ frame_size -= sizeof(frame->sregs_ext.vxrs_low) +
+ sizeof(frame->sregs_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
+ return -EFAULT;
+
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext32 on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32);
+ sc.sregs = (__u32)(unsigned long __force) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
- if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32))
- goto give_sigsegv;
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+ /* Create _sigregs32 on the signal stack */
if (save_sigregs32(regs, &frame->sregs))
- goto give_sigsegv;
- if (save_sigregs_gprs_high(regs, frame->gprs_high))
- goto give_sigsegv;
- if (__put_user((unsigned long) &frame->sregs, &frame->sc.sregs))
- goto give_sigsegv;
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext32 on the signal stack */
+ if (save_sigregs_ext32(regs, &frame->sregs_ext))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
- (u16 __force __user *)(frame->retcode)))
- goto give_sigsegv;
+ /* Signal frames without vectors registers are short ! */
+ __u16 __user *svc = (void *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
}
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __user *) frame))
- goto give_sigsegv;
-
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (__force __u64) frame;
/* Force 31 bit amode and default user address space control. */
regs->psw.mask = PSW_MASK_BA |
(PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__force __u64) ka->sa.sa_handler;
+ regs->psw.addr = (__force __u64) ksig->ka.sa.sa_handler;
regs->gprs[2] = map_signal(sig);
regs->gprs[3] = (__force __u64) &frame->sc;
@@ -374,89 +462,98 @@ static int setup_frame32(int sig, struct k_sigaction *ka,
regs->gprs[6] = task_thread_info(current)->last_break;
}
- /* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __force __user *) &frame->signo))
- goto give_sigsegv;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static int setup_rt_frame32(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
+static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
{
- int err = 0;
- rt_sigframe32 __user *frame = get_sigframe(ka, regs, sizeof(rt_sigframe32));
-
+ rt_sigframe32 __user *frame;
+ unsigned long restorer;
+ size_t frame_size;
+ u32 uc_flags;
+
+ frame_size = sizeof(*frame) -
+ sizeof(frame->uc.uc_mcontext_ext.__reserved);
+ /*
+ * gprs_high are always present for 31-bit compat tasks.
+ * The space for vector registers is only allocated if
+ * the machine supports it
+ */
+ uc_flags = UC_GPRS_HIGH;
+ if (MACHINE_HAS_VX) {
+ if (current->thread.vxrs)
+ uc_flags |= UC_VXRS;
+ } else
+ frame_size -= sizeof(frame->uc.uc_mcontext_ext.vxrs_low) +
+ sizeof(frame->uc.uc_mcontext_ext.vxrs_high);
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
- if (copy_siginfo_to_user32(&frame->info, info))
- goto give_sigsegv;
+ return -EFAULT;
- /* Create the ucontext. */
- err |= __put_user(UC_EXTENDED, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
- err |= save_sigregs32(regs, &frame->uc.uc_mcontext);
- err |= save_sigregs_gprs_high(regs, frame->gprs_high);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (__u64 __force) ka->sa.sa_restorer | PSW32_ADDR_AMODE;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long __force)
+ ksig->ka.sa.sa_restorer | PSW32_ADDR_AMODE;
} else {
- regs->gprs[14] = (__u64 __force) frame->retcode | PSW32_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __force __user *)(frame->retcode)))
- goto give_sigsegv;
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long __force) svc | PSW32_ADDR_AMODE;
}
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (unsigned int __force __user *) frame))
- goto give_sigsegv;
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user32(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(0, &frame->uc.uc_link) ||
+ __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs32(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (__force __u64) frame;
/* Force 31 bit amode and default user address space control. */
regs->psw.mask = PSW_MASK_BA |
(PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (__u64 __force) ka->sa.sa_handler;
+ regs->psw.addr = (__u64 __force) ksig->ka.sa.sa_handler;
- regs->gprs[2] = map_signal(sig);
+ regs->gprs[2] = map_signal(ksig->sig);
regs->gprs[3] = (__force __u64) &frame->info;
regs->gprs[4] = (__force __u64) &frame->uc;
regs->gprs[5] = task_thread_info(current)->last_break;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
/*
* OK, we're invoking a handler
*/
-void handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
{
int ret;
/* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame32(sig, ka, info, oldset, regs);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame32(ksig, oldset, regs);
else
- ret = setup_frame32(sig, ka, oldset, regs);
- if (ret)
- return;
- signal_delivered(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLE_STEP));
+ ret = setup_frame32(ksig, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
}
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index 45cdb37..faf6caa 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -214,3 +214,6 @@ COMPAT_SYSCALL_WRAP3(finit_module, int, fd, const char __user *, uargs, int, fla
COMPAT_SYSCALL_WRAP3(sched_setattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, flags);
COMPAT_SYSCALL_WRAP4(sched_getattr, pid_t, pid, struct sched_attr __user *, attr, unsigned int, size, unsigned int, flags);
COMPAT_SYSCALL_WRAP5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags);
+COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char __user *, uargs)
+COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags)
+COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags)
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index a3b9150..9f73c80 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -46,9 +46,9 @@ struct dump_save_areas dump_save_areas;
/*
* Allocate and add a save area for a CPU
*/
-struct save_area *dump_save_area_create(int cpu)
+struct save_area_ext *dump_save_area_create(int cpu)
{
- struct save_area **save_areas, *save_area;
+ struct save_area_ext **save_areas, *save_area;
save_area = kmalloc(sizeof(*save_area), GFP_KERNEL);
if (!save_area)
@@ -386,9 +386,45 @@ static void *nt_s390_prefix(void *ptr, struct save_area *sa)
}
/*
+ * Initialize vxrs high note (full 128 bit VX registers 16-31)
+ */
+static void *nt_s390_vx_high(void *ptr, __vector128 *vx_regs)
+{
+ return nt_init(ptr, NT_S390_VXRS_HIGH, &vx_regs[16],
+ 16 * sizeof(__vector128), KEXEC_CORE_NOTE_NAME);
+}
+
+/*
+ * Initialize vxrs low note (lower halves of VX registers 0-15)
+ */
+static void *nt_s390_vx_low(void *ptr, __vector128 *vx_regs)
+{
+ Elf64_Nhdr *note;
+ u64 len;
+ int i;
+
+ note = (Elf64_Nhdr *)ptr;
+ note->n_namesz = strlen(KEXEC_CORE_NOTE_NAME) + 1;
+ note->n_descsz = 16 * 8;
+ note->n_type = NT_S390_VXRS_LOW;
+ len = sizeof(Elf64_Nhdr);
+
+ memcpy(ptr + len, KEXEC_CORE_NOTE_NAME, note->n_namesz);
+ len = roundup(len + note->n_namesz, 4);
+
+ ptr += len;
+ /* Copy lower halves of SIMD registers 0-15 */
+ for (i = 0; i < 16; i++) {
+ memcpy(ptr, &vx_regs[i], 8);
+ ptr += 8;
+ }
+ return ptr;
+}
+
+/*
* Fill ELF notes for one CPU with save area registers
*/
-void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
+void *fill_cpu_elf_notes(void *ptr, struct save_area *sa, __vector128 *vx_regs)
{
ptr = nt_prstatus(ptr, sa);
ptr = nt_fpregset(ptr, sa);
@@ -397,6 +433,10 @@ void *fill_cpu_elf_notes(void *ptr, struct save_area *sa)
ptr = nt_s390_tod_preg(ptr, sa);
ptr = nt_s390_ctrs(ptr, sa);
ptr = nt_s390_prefix(ptr, sa);
+ if (MACHINE_HAS_VX && vx_regs) {
+ ptr = nt_s390_vx_low(ptr, vx_regs);
+ ptr = nt_s390_vx_high(ptr, vx_regs);
+ }
return ptr;
}
@@ -484,7 +524,7 @@ static int get_cpu_cnt(void)
int i, cpus = 0;
for (i = 0; i < dump_save_areas.count; i++) {
- if (dump_save_areas.areas[i]->pref_reg == 0)
+ if (dump_save_areas.areas[i]->sa.pref_reg == 0)
continue;
cpus++;
}
@@ -530,17 +570,17 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
*/
static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
{
- struct save_area *sa;
+ struct save_area_ext *sa_ext;
void *ptr_start = ptr;
int i;
ptr = nt_prpsinfo(ptr);
for (i = 0; i < dump_save_areas.count; i++) {
- sa = dump_save_areas.areas[i];
- if (sa->pref_reg == 0)
+ sa_ext = dump_save_areas.areas[i];
+ if (sa_ext->sa.pref_reg == 0)
continue;
- ptr = fill_cpu_elf_notes(ptr, sa);
+ ptr = fill_cpu_elf_notes(ptr, &sa_ext->sa, sa_ext->vx_regs);
}
ptr = nt_vmcoreinfo(ptr);
memset(phdr, 0, sizeof(*phdr));
@@ -581,7 +621,7 @@ int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
mem_chunk_cnt = get_mem_chunk_cnt();
- alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
+ alloc_size = 0x1000 + get_cpu_cnt() * 0x4a0 +
mem_chunk_cnt * sizeof(Elf64_Phdr);
hdr = kzalloc_panic(alloc_size);
/* Init elf header */
diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c
index 993efe6..f376293 100644
--- a/arch/s390/kernel/dis.c
+++ b/arch/s390/kernel/dis.c
@@ -60,6 +60,11 @@ enum {
A_28, /* Access reg. starting at position 28 */
C_8, /* Control reg. starting at position 8 */
C_12, /* Control reg. starting at position 12 */
+ V_8, /* Vector reg. starting at position 8, extension bit at 36 */
+ V_12, /* Vector reg. starting at position 12, extension bit at 37 */
+ V_16, /* Vector reg. starting at position 16, extension bit at 38 */
+ V_32, /* Vector reg. starting at position 32, extension bit at 39 */
+ W_12, /* Vector reg. at bit 12, extension at bit 37, used as index */
B_16, /* Base register starting at position 16 */
B_32, /* Base register starting at position 32 */
X_12, /* Index register starting at position 12 */
@@ -82,6 +87,8 @@ enum {
U8_24, /* 8 bit unsigned value starting at 24 */
U8_32, /* 8 bit unsigned value starting at 32 */
I8_8, /* 8 bit signed value starting at 8 */
+ I8_16, /* 8 bit signed value starting at 16 */
+ I8_24, /* 8 bit signed value starting at 24 */
I8_32, /* 8 bit signed value starting at 32 */
J12_12, /* PC relative offset at 12 */
I16_16, /* 16 bit signed value starting at 16 */
@@ -96,6 +103,9 @@ enum {
U32_16, /* 32 bit unsigned value starting at 16 */
M_16, /* 4 bit optional mask starting at 16 */
M_20, /* 4 bit optional mask starting at 20 */
+ M_24, /* 4 bit optional mask starting at 24 */
+ M_28, /* 4 bit optional mask starting at 28 */
+ M_32, /* 4 bit optional mask starting at 32 */
RO_28, /* optional GPR starting at position 28 */
};
@@ -130,7 +140,7 @@ enum {
INSTR_RSY_RDRM,
INSTR_RS_AARD, INSTR_RS_CCRD, INSTR_RS_R0RD, INSTR_RS_RRRD,
INSTR_RS_RURD,
- INSTR_RXE_FRRD, INSTR_RXE_RRRD,
+ INSTR_RXE_FRRD, INSTR_RXE_RRRD, INSTR_RXE_RRRDM,
INSTR_RXF_FRRDF,
INSTR_RXY_FRRD, INSTR_RXY_RRRD, INSTR_RXY_URRD,
INSTR_RX_FRRD, INSTR_RX_RRRD, INSTR_RX_URRD,
@@ -143,6 +153,17 @@ enum {
INSTR_SS_L0RDRD, INSTR_SS_LIRDRD, INSTR_SS_LLRDRD, INSTR_SS_RRRDRD,
INSTR_SS_RRRDRD2, INSTR_SS_RRRDRD3,
INSTR_S_00, INSTR_S_RD,
+ INSTR_VRI_V0IM, INSTR_VRI_V0I0, INSTR_VRI_V0IIM, INSTR_VRI_VVIM,
+ INSTR_VRI_VVV0IM, INSTR_VRI_VVV0I0, INSTR_VRI_VVIMM,
+ INSTR_VRR_VV00MMM, INSTR_VRR_VV000MM, INSTR_VRR_VV0000M,
+ INSTR_VRR_VV00000, INSTR_VRR_VVV0M0M, INSTR_VRR_VV00M0M,
+ INSTR_VRR_VVV000M, INSTR_VRR_VVV000V, INSTR_VRR_VVV0000,
+ INSTR_VRR_VVV0MMM, INSTR_VRR_VVV00MM, INSTR_VRR_VVVMM0V,
+ INSTR_VRR_VVVM0MV, INSTR_VRR_VVVM00V, INSTR_VRR_VRR0000,
+ INSTR_VRS_VVRDM, INSTR_VRS_VVRD0, INSTR_VRS_VRRDM, INSTR_VRS_VRRD0,
+ INSTR_VRS_RVRDM,
+ INSTR_VRV_VVRDM, INSTR_VRV_VWRDM,
+ INSTR_VRX_VRRDM, INSTR_VRX_VRRD0,
};
static const struct s390_operand operands[] =
@@ -168,6 +189,11 @@ static const struct s390_operand operands[] =
[A_28] = { 4, 28, OPERAND_AR },
[C_8] = { 4, 8, OPERAND_CR },
[C_12] = { 4, 12, OPERAND_CR },
+ [V_8] = { 4, 8, OPERAND_VR },
+ [V_12] = { 4, 12, OPERAND_VR },
+ [V_16] = { 4, 16, OPERAND_VR },
+ [V_32] = { 4, 32, OPERAND_VR },
+ [W_12] = { 4, 12, OPERAND_INDEX | OPERAND_VR },
[B_16] = { 4, 16, OPERAND_BASE | OPERAND_GPR },
[B_32] = { 4, 32, OPERAND_BASE | OPERAND_GPR },
[X_12] = { 4, 12, OPERAND_INDEX | OPERAND_GPR },
@@ -190,6 +216,11 @@ static const struct s390_operand operands[] =
[U8_24] = { 8, 24, 0 },
[U8_32] = { 8, 32, 0 },
[J12_12] = { 12, 12, OPERAND_PCREL },
+ [I8_8] = { 8, 8, OPERAND_SIGNED },
+ [I8_16] = { 8, 16, OPERAND_SIGNED },
+ [I8_24] = { 8, 24, OPERAND_SIGNED },
+ [I8_32] = { 8, 32, OPERAND_SIGNED },
+ [I16_32] = { 16, 32, OPERAND_SIGNED },
[I16_16] = { 16, 16, OPERAND_SIGNED },
[U16_16] = { 16, 16, 0 },
[U16_32] = { 16, 32, 0 },
@@ -202,6 +233,9 @@ static const struct s390_operand operands[] =
[U32_16] = { 32, 16, 0 },
[M_16] = { 4, 16, 0 },
[M_20] = { 4, 20, 0 },
+ [M_24] = { 4, 24, 0 },
+ [M_28] = { 4, 28, 0 },
+ [M_32] = { 4, 32, 0 },
[RO_28] = { 4, 28, OPERAND_GPR }
};
@@ -283,6 +317,7 @@ static const unsigned char formats[][7] = {
[INSTR_RS_RURD] = { 0xff, R_8,U4_12,D_20,B_16,0,0 },
[INSTR_RXE_FRRD] = { 0xff, F_8,D_20,X_12,B_16,0,0 },
[INSTR_RXE_RRRD] = { 0xff, R_8,D_20,X_12,B_16,0,0 },
+ [INSTR_RXE_RRRDM] = { 0xff, R_8,D_20,X_12,B_16,M_32,0 },
[INSTR_RXF_FRRDF] = { 0xff, F_32,F_8,D_20,X_12,B_16,0 },
[INSTR_RXY_FRRD] = { 0xff, F_8,D20_20,X_12,B_16,0,0 },
[INSTR_RXY_RRRD] = { 0xff, R_8,D20_20,X_12,B_16,0,0 },
@@ -307,6 +342,37 @@ static const unsigned char formats[][7] = {
[INSTR_SS_RRRDRD] = { 0xff, D_20,R_8,B_16,D_36,B_32,R_12 },
[INSTR_S_00] = { 0xff, 0,0,0,0,0,0 },
[INSTR_S_RD] = { 0xff, D_20,B_16,0,0,0,0 },
+ [INSTR_VRI_V0IM] = { 0xff, V_8,I16_16,M_32,0,0,0 },
+ [INSTR_VRI_V0I0] = { 0xff, V_8,I16_16,0,0,0,0 },
+ [INSTR_VRI_V0IIM] = { 0xff, V_8,I8_16,I8_24,M_32,0,0 },
+ [INSTR_VRI_VVIM] = { 0xff, V_8,I16_16,V_12,M_32,0,0 },
+ [INSTR_VRI_VVV0IM]= { 0xff, V_8,V_12,V_16,I8_24,M_32,0 },
+ [INSTR_VRI_VVV0I0]= { 0xff, V_8,V_12,V_16,I8_24,0,0 },
+ [INSTR_VRI_VVIMM] = { 0xff, V_8,V_12,I16_16,M_32,M_28,0 },
+ [INSTR_VRR_VV00MMM]={ 0xff, V_8,V_12,M_32,M_28,M_24,0 },
+ [INSTR_VRR_VV000MM]={ 0xff, V_8,V_12,M_32,M_28,0,0 },
+ [INSTR_VRR_VV0000M]={ 0xff, V_8,V_12,M_32,0,0,0 },
+ [INSTR_VRR_VV00000]={ 0xff, V_8,V_12,0,0,0,0 },
+ [INSTR_VRR_VVV0M0M]={ 0xff, V_8,V_12,V_16,M_32,M_24,0 },
+ [INSTR_VRR_VV00M0M]={ 0xff, V_8,V_12,M_32,M_24,0,0 },
+ [INSTR_VRR_VVV000M]={ 0xff, V_8,V_12,V_16,M_32,0,0 },
+ [INSTR_VRR_VVV000V]={ 0xff, V_8,V_12,V_16,V_32,0,0 },
+ [INSTR_VRR_VVV0000]={ 0xff, V_8,V_12,V_16,0,0,0 },
+ [INSTR_VRR_VVV0MMM]={ 0xff, V_8,V_12,V_16,M_32,M_28,M_24 },
+ [INSTR_VRR_VVV00MM]={ 0xff, V_8,V_12,V_16,M_32,M_28,0 },
+ [INSTR_VRR_VVVMM0V]={ 0xff, V_8,V_12,V_16,V_32,M_20,M_24 },
+ [INSTR_VRR_VVVM0MV]={ 0xff, V_8,V_12,V_16,V_32,M_28,M_20 },
+ [INSTR_VRR_VVVM00V]={ 0xff, V_8,V_12,V_16,V_32,M_20,0 },
+ [INSTR_VRR_VRR0000]={ 0xff, V_8,R_12,R_16,0,0,0 },
+ [INSTR_VRS_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRS_VVRD0] = { 0xff, V_8,V_12,D_20,B_16,0,0 },
+ [INSTR_VRS_VRRDM] = { 0xff, V_8,R_12,D_20,B_16,M_32,0 },
+ [INSTR_VRS_VRRD0] = { 0xff, V_8,R_12,D_20,B_16,0,0 },
+ [INSTR_VRS_RVRDM] = { 0xff, R_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRV_VVRDM] = { 0xff, V_8,V_12,D_20,B_16,M_32,0 },
+ [INSTR_VRV_VWRDM] = { 0xff, V_8,D_20,W_12,B_16,M_32,0 },
+ [INSTR_VRX_VRRDM] = { 0xff, V_8,D_20,X_12,B_16,M_32,0 },
+ [INSTR_VRX_VRRD0] = { 0xff, V_8,D_20,X_12,B_16,0,0 },
};
enum {
@@ -381,6 +447,11 @@ enum {
LONG_INSN_MPCIFC,
LONG_INSN_STPCIFC,
LONG_INSN_PCISTB,
+ LONG_INSN_VPOPCT,
+ LONG_INSN_VERLLV,
+ LONG_INSN_VESRAV,
+ LONG_INSN_VESRLV,
+ LONG_INSN_VSBCBI
};
static char *long_insn_name[] = {
@@ -455,6 +526,11 @@ static char *long_insn_name[] = {
[LONG_INSN_MPCIFC] = "mpcifc",
[LONG_INSN_STPCIFC] = "stpcifc",
[LONG_INSN_PCISTB] = "pcistb",
+ [LONG_INSN_VPOPCT] = "vpopct",
+ [LONG_INSN_VERLLV] = "verllv",
+ [LONG_INSN_VESRAV] = "vesrav",
+ [LONG_INSN_VESRLV] = "vesrlv",
+ [LONG_INSN_VSBCBI] = "vsbcbi",
};
static struct s390_insn opcode[] = {
@@ -1369,6 +1445,150 @@ static struct s390_insn opcode_e5[] = {
{ "", 0, INSTR_INVALID }
};
+static struct s390_insn opcode_e7[] = {
+#ifdef CONFIG_64BIT
+ { "lcbb", 0x27, INSTR_RXE_RRRDM },
+ { "vgef", 0x13, INSTR_VRV_VVRDM },
+ { "vgeg", 0x12, INSTR_VRV_VVRDM },
+ { "vgbm", 0x44, INSTR_VRI_V0I0 },
+ { "vgm", 0x46, INSTR_VRI_V0IIM },
+ { "vl", 0x06, INSTR_VRX_VRRD0 },
+ { "vlr", 0x56, INSTR_VRR_VV00000 },
+ { "vlrp", 0x05, INSTR_VRX_VRRDM },
+ { "vleb", 0x00, INSTR_VRX_VRRDM },
+ { "vleh", 0x01, INSTR_VRX_VRRDM },
+ { "vlef", 0x03, INSTR_VRX_VRRDM },
+ { "vleg", 0x02, INSTR_VRX_VRRDM },
+ { "vleib", 0x40, INSTR_VRI_V0IM },
+ { "vleih", 0x41, INSTR_VRI_V0IM },
+ { "vleif", 0x43, INSTR_VRI_V0IM },
+ { "vleig", 0x42, INSTR_VRI_V0IM },
+ { "vlgv", 0x21, INSTR_VRS_RVRDM },
+ { "vllez", 0x04, INSTR_VRX_VRRDM },
+ { "vlm", 0x36, INSTR_VRS_VVRD0 },
+ { "vlbb", 0x07, INSTR_VRX_VRRDM },
+ { "vlvg", 0x22, INSTR_VRS_VRRDM },
+ { "vlvgp", 0x62, INSTR_VRR_VRR0000 },
+ { "vll", 0x37, INSTR_VRS_VRRD0 },
+ { "vmrh", 0x61, INSTR_VRR_VVV000M },
+ { "vmrl", 0x60, INSTR_VRR_VVV000M },
+ { "vpk", 0x94, INSTR_VRR_VVV000M },
+ { "vpks", 0x97, INSTR_VRR_VVV0M0M },
+ { "vpkls", 0x95, INSTR_VRR_VVV0M0M },
+ { "vperm", 0x8c, INSTR_VRR_VVV000V },
+ { "vpdi", 0x84, INSTR_VRR_VVV000M },
+ { "vrep", 0x4d, INSTR_VRI_VVIM },
+ { "vrepi", 0x45, INSTR_VRI_V0IM },
+ { "vscef", 0x1b, INSTR_VRV_VWRDM },
+ { "vsceg", 0x1a, INSTR_VRV_VWRDM },
+ { "vsel", 0x8d, INSTR_VRR_VVV000V },
+ { "vseg", 0x5f, INSTR_VRR_VV0000M },
+ { "vst", 0x0e, INSTR_VRX_VRRD0 },
+ { "vsteb", 0x08, INSTR_VRX_VRRDM },
+ { "vsteh", 0x09, INSTR_VRX_VRRDM },
+ { "vstef", 0x0b, INSTR_VRX_VRRDM },
+ { "vsteg", 0x0a, INSTR_VRX_VRRDM },
+ { "vstm", 0x3e, INSTR_VRS_VVRD0 },
+ { "vstl", 0x3f, INSTR_VRS_VRRD0 },
+ { "vuph", 0xd7, INSTR_VRR_VV0000M },
+ { "vuplh", 0xd5, INSTR_VRR_VV0000M },
+ { "vupl", 0xd6, INSTR_VRR_VV0000M },
+ { "vupll", 0xd4, INSTR_VRR_VV0000M },
+ { "va", 0xf3, INSTR_VRR_VVV000M },
+ { "vacc", 0xf1, INSTR_VRR_VVV000M },
+ { "vac", 0xbb, INSTR_VRR_VVVM00V },
+ { "vaccc", 0xb9, INSTR_VRR_VVVM00V },
+ { "vn", 0x68, INSTR_VRR_VVV0000 },
+ { "vnc", 0x69, INSTR_VRR_VVV0000 },
+ { "vavg", 0xf2, INSTR_VRR_VVV000M },
+ { "vavgl", 0xf0, INSTR_VRR_VVV000M },
+ { "vcksm", 0x66, INSTR_VRR_VVV0000 },
+ { "vec", 0xdb, INSTR_VRR_VV0000M },
+ { "vecl", 0xd9, INSTR_VRR_VV0000M },
+ { "vceq", 0xf8, INSTR_VRR_VVV0M0M },
+ { "vch", 0xfb, INSTR_VRR_VVV0M0M },
+ { "vchl", 0xf9, INSTR_VRR_VVV0M0M },
+ { "vclz", 0x53, INSTR_VRR_VV0000M },
+ { "vctz", 0x52, INSTR_VRR_VV0000M },
+ { "vx", 0x6d, INSTR_VRR_VVV0000 },
+ { "vgfm", 0xb4, INSTR_VRR_VVV000M },
+ { "vgfma", 0xbc, INSTR_VRR_VVVM00V },
+ { "vlc", 0xde, INSTR_VRR_VV0000M },
+ { "vlp", 0xdf, INSTR_VRR_VV0000M },
+ { "vmx", 0xff, INSTR_VRR_VVV000M },
+ { "vmxl", 0xfd, INSTR_VRR_VVV000M },
+ { "vmn", 0xfe, INSTR_VRR_VVV000M },
+ { "vmnl", 0xfc, INSTR_VRR_VVV000M },
+ { "vmal", 0xaa, INSTR_VRR_VVVM00V },
+ { "vmae", 0xae, INSTR_VRR_VVVM00V },
+ { "vmale", 0xac, INSTR_VRR_VVVM00V },
+ { "vmah", 0xab, INSTR_VRR_VVVM00V },
+ { "vmalh", 0xa9, INSTR_VRR_VVVM00V },
+ { "vmao", 0xaf, INSTR_VRR_VVVM00V },
+ { "vmalo", 0xad, INSTR_VRR_VVVM00V },
+ { "vmh", 0xa3, INSTR_VRR_VVV000M },
+ { "vmlh", 0xa1, INSTR_VRR_VVV000M },
+ { "vml", 0xa2, INSTR_VRR_VVV000M },
+ { "vme", 0xa6, INSTR_VRR_VVV000M },
+ { "vmle", 0xa4, INSTR_VRR_VVV000M },
+ { "vmo", 0xa7, INSTR_VRR_VVV000M },
+ { "vmlo", 0xa5, INSTR_VRR_VVV000M },
+ { "vno", 0x6b, INSTR_VRR_VVV0000 },
+ { "vo", 0x6a, INSTR_VRR_VVV0000 },
+ { { 0, LONG_INSN_VPOPCT }, 0x50, INSTR_VRR_VV0000M },
+ { { 0, LONG_INSN_VERLLV }, 0x73, INSTR_VRR_VVV000M },
+ { "verll", 0x33, INSTR_VRS_VVRDM },
+ { "verim", 0x72, INSTR_VRI_VVV0IM },
+ { "veslv", 0x70, INSTR_VRR_VVV000M },
+ { "vesl", 0x30, INSTR_VRS_VVRDM },
+ { { 0, LONG_INSN_VESRAV }, 0x7a, INSTR_VRR_VVV000M },
+ { "vesra", 0x3a, INSTR_VRS_VVRDM },
+ { { 0, LONG_INSN_VESRLV }, 0x78, INSTR_VRR_VVV000M },
+ { "vesrl", 0x38, INSTR_VRS_VVRDM },
+ { "vsl", 0x74, INSTR_VRR_VVV0000 },
+ { "vslb", 0x75, INSTR_VRR_VVV0000 },
+ { "vsldb", 0x77, INSTR_VRI_VVV0I0 },
+ { "vsra", 0x7e, INSTR_VRR_VVV0000 },
+ { "vsrab", 0x7f, INSTR_VRR_VVV0000 },
+ { "vsrl", 0x7c, INSTR_VRR_VVV0000 },
+ { "vsrlb", 0x7d, INSTR_VRR_VVV0000 },
+ { "vs", 0xf7, INSTR_VRR_VVV000M },
+ { "vscb", 0xf5, INSTR_VRR_VVV000M },
+ { "vsb", 0xbf, INSTR_VRR_VVVM00V },
+ { { 0, LONG_INSN_VSBCBI }, 0xbd, INSTR_VRR_VVVM00V },
+ { "vsumg", 0x65, INSTR_VRR_VVV000M },
+ { "vsumq", 0x67, INSTR_VRR_VVV000M },
+ { "vsum", 0x64, INSTR_VRR_VVV000M },
+ { "vtm", 0xd8, INSTR_VRR_VV00000 },
+ { "vfae", 0x82, INSTR_VRR_VVV0M0M },
+ { "vfee", 0x80, INSTR_VRR_VVV0M0M },
+ { "vfene", 0x81, INSTR_VRR_VVV0M0M },
+ { "vistr", 0x5c, INSTR_VRR_VV00M0M },
+ { "vstrc", 0x8a, INSTR_VRR_VVVMM0V },
+ { "vfa", 0xe3, INSTR_VRR_VVV00MM },
+ { "wfc", 0xcb, INSTR_VRR_VV000MM },
+ { "wfk", 0xca, INSTR_VRR_VV000MM },
+ { "vfce", 0xe8, INSTR_VRR_VVV0MMM },
+ { "vfch", 0xeb, INSTR_VRR_VVV0MMM },
+ { "vfche", 0xea, INSTR_VRR_VVV0MMM },
+ { "vcdg", 0xc3, INSTR_VRR_VV00MMM },
+ { "vcdlg", 0xc1, INSTR_VRR_VV00MMM },
+ { "vcgd", 0xc2, INSTR_VRR_VV00MMM },
+ { "vclgd", 0xc0, INSTR_VRR_VV00MMM },
+ { "vfd", 0xe5, INSTR_VRR_VVV00MM },
+ { "vfi", 0xc7, INSTR_VRR_VV00MMM },
+ { "vlde", 0xc4, INSTR_VRR_VV000MM },
+ { "vled", 0xc5, INSTR_VRR_VV00MMM },
+ { "vfm", 0xe7, INSTR_VRR_VVV00MM },
+ { "vfma", 0x8f, INSTR_VRR_VVVM0MV },
+ { "vfms", 0x8e, INSTR_VRR_VVVM0MV },
+ { "vfpso", 0xcc, INSTR_VRR_VV00MMM },
+ { "vfsq", 0xce, INSTR_VRR_VV000MM },
+ { "vfs", 0xe2, INSTR_VRR_VVV00MM },
+ { "vftci", 0x4a, INSTR_VRI_VVIMM },
+#endif
+};
+
static struct s390_insn opcode_eb[] = {
#ifdef CONFIG_64BIT
{ "lmg", 0x04, INSTR_RSY_RRRD },
@@ -1552,16 +1772,17 @@ static struct s390_insn opcode_ed[] = {
static unsigned int extract_operand(unsigned char *code,
const struct s390_operand *operand)
{
+ unsigned char *cp;
unsigned int val;
int bits;
/* Extract fragments of the operand byte for byte. */
- code += operand->shift / 8;
+ cp = code + operand->shift / 8;
bits = (operand->shift & 7) + operand->bits;
val = 0;
do {
val <<= 8;
- val |= (unsigned int) *code++;
+ val |= (unsigned int) *cp++;
bits -= 8;
} while (bits > 0);
val >>= -bits;
@@ -1571,6 +1792,18 @@ static unsigned int extract_operand(unsigned char *code,
if (operand->bits == 20 && operand->shift == 20)
val = (val & 0xff) << 12 | (val & 0xfff00) >> 8;
+ /* Check for register extensions bits for vector registers. */
+ if (operand->flags & OPERAND_VR) {
+ if (operand->shift == 8)
+ val |= (code[4] & 8) << 1;
+ else if (operand->shift == 12)
+ val |= (code[4] & 4) << 2;
+ else if (operand->shift == 16)
+ val |= (code[4] & 2) << 3;
+ else if (operand->shift == 32)
+ val |= (code[4] & 1) << 4;
+ }
+
/* Sign extend value if the operand is signed or pc relative. */
if ((operand->flags & (OPERAND_SIGNED | OPERAND_PCREL)) &&
(val & (1U << (operand->bits - 1))))
@@ -1639,6 +1872,10 @@ struct s390_insn *find_insn(unsigned char *code)
case 0xe5:
table = opcode_e5;
break;
+ case 0xe7:
+ table = opcode_e7;
+ opfrag = code[5];
+ break;
case 0xeb:
table = opcode_eb;
opfrag = code[5];
@@ -1734,6 +1971,8 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr)
ptr += sprintf(ptr, "%%a%i", value);
else if (operand->flags & OPERAND_CR)
ptr += sprintf(ptr, "%%c%i", value);
+ else if (operand->flags & OPERAND_VR)
+ ptr += sprintf(ptr, "%%v%i", value);
else if (operand->flags & OPERAND_PCREL)
ptr += sprintf(ptr, "%lx", (signed int) value
+ addr);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 0dff972..cef2879e 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -390,10 +390,10 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_LPP;
if (test_facility(50) && test_facility(73))
S390_lowcore.machine_flags |= MACHINE_FLAG_TE;
- if (test_facility(66))
- S390_lowcore.machine_flags |= MACHINE_FLAG_RRBM;
if (test_facility(51))
S390_lowcore.machine_flags |= MACHINE_FLAG_TLB_LC;
+ if (test_facility(129))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_VX;
#endif
}
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 6ac7819..0554b97 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -4,7 +4,7 @@
#include <linux/types.h>
#include <linux/signal.h>
#include <asm/ptrace.h>
-#include <asm/cputime.h>
+#include <asm/idle.h>
extern void *restart_stack;
extern unsigned long suspend_zero_pages;
@@ -21,6 +21,8 @@ void psw_idle(struct s390_idle_data *, unsigned long);
asmlinkage long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_exit(struct pt_regs *regs);
+int alloc_vector_registers(struct task_struct *tsk);
+
void do_protection_exception(struct pt_regs *regs);
void do_dat_exception(struct pt_regs *regs);
@@ -43,13 +45,15 @@ void special_op_exception(struct pt_regs *regs);
void specification_exception(struct pt_regs *regs);
void transaction_exception(struct pt_regs *regs);
void translation_exception(struct pt_regs *regs);
+void vector_exception(struct pt_regs *regs);
void do_per_trap(struct pt_regs *regs);
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str);
void syscall_trace(struct pt_regs *regs, int entryexit);
void kernel_stack_overflow(struct pt_regs * regs);
void do_signal(struct pt_regs *regs);
-void handle_signal32(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset, struct pt_regs *regs);
+void handle_signal32(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs);
void do_notify_resume(struct pt_regs *regs);
void __init init_IRQ(void);
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index f2e674c..7b2e03a 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -42,7 +42,8 @@ STACK_SHIFT = PAGE_SHIFT + THREAD_ORDER
STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
-_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
+_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
+ _TIF_UPROBE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE)
@@ -265,6 +266,10 @@ sysc_work:
jo sysc_mcck_pending
tm __TI_flags+7(%r12),_TIF_NEED_RESCHED
jo sysc_reschedule
+#ifdef CONFIG_UPROBES
+ tm __TI_flags+7(%r12),_TIF_UPROBE
+ jo sysc_uprobe_notify
+#endif
tm __PT_FLAGS+7(%r11),_PIF_PER_TRAP
jo sysc_singlestep
tm __TI_flags+7(%r12),_TIF_SIGPENDING
@@ -323,6 +328,16 @@ sysc_notify_resume:
jg do_notify_resume
#
+# _TIF_UPROBE is set, call uprobe_notify_resume
+#
+#ifdef CONFIG_UPROBES
+sysc_uprobe_notify:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,sysc_return
+ jg uprobe_notify_resume
+#endif
+
+#
# _PIF_PER_TRAP is set, call do_per_trap
#
sysc_singlestep:
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 54d6493..51d14fe5 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -1,7 +1,7 @@
/*
* Dynamic function tracer architecture backend.
*
- * Copyright IBM Corp. 2009
+ * Copyright IBM Corp. 2009,2014
*
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
* Martin Schwidefsky <schwidefsky@de.ibm.com>
@@ -17,100 +17,76 @@
#include <asm/asm-offsets.h>
#include "entry.h"
-#ifdef CONFIG_DYNAMIC_FTRACE
-
+void mcount_replace_code(void);
void ftrace_disable_code(void);
void ftrace_enable_insn(void);
-#ifdef CONFIG_64BIT
/*
- * The 64-bit mcount code looks like this:
+ * The mcount code looks like this:
* stg %r14,8(%r15) # offset 0
- * > larl %r1,<&counter> # offset 6
- * > brasl %r14,_mcount # offset 12
+ * larl %r1,<&counter> # offset 6
+ * brasl %r14,_mcount # offset 12
* lg %r14,8(%r15) # offset 18
- * Total length is 24 bytes. The middle two instructions of the mcount
- * block get overwritten by ftrace_make_nop / ftrace_make_call.
- * The 64-bit enabled ftrace code block looks like this:
- * stg %r14,8(%r15) # offset 0
+ * Total length is 24 bytes. The complete mcount block initially gets replaced
+ * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop
+ * only patch the jg/lg instruction within the block.
+ * Note: we do not patch the first instruction to an unconditional branch,
+ * since that would break kprobes/jprobes. It is easier to leave the larl
+ * instruction in and only modify the second instruction.
+ * The enabled ftrace code block looks like this:
+ * larl %r0,.+24 # offset 0
* > lg %r1,__LC_FTRACE_FUNC # offset 6
- * > lgr %r0,%r0 # offset 12
- * > basr %r14,%r1 # offset 16
- * lg %r14,8(%15) # offset 18
- * The return points of the mcount/ftrace function have the same offset 18.
- * The 64-bit disable ftrace code block looks like this:
- * stg %r14,8(%r15) # offset 0
+ * br %r1 # offset 12
+ * brcl 0,0 # offset 14
+ * brc 0,0 # offset 20
+ * The ftrace function gets called with a non-standard C function call ABI
+ * where r0 contains the return address. It is also expected that the called
+ * function only clobbers r0 and r1, but restores r2-r15.
+ * The return point of the ftrace function has offset 24, so execution
+ * continues behind the mcount block.
+ * larl %r0,.+24 # offset 0
* > jg .+18 # offset 6
- * > lgr %r0,%r0 # offset 12
- * > basr %r14,%r1 # offset 16
- * lg %r14,8(%15) # offset 18
+ * br %r1 # offset 12
+ * brcl 0,0 # offset 14
+ * brc 0,0 # offset 20
* The jg instruction branches to offset 24 to skip as many instructions
* as possible.
*/
asm(
" .align 4\n"
+ "mcount_replace_code:\n"
+ " larl %r0,0f\n"
"ftrace_disable_code:\n"
" jg 0f\n"
- " lgr %r0,%r0\n"
- " basr %r14,%r1\n"
+ " br %r1\n"
+ " brcl 0,0\n"
+ " brc 0,0\n"
"0:\n"
" .align 4\n"
"ftrace_enable_insn:\n"
" lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n");
+#define MCOUNT_BLOCK_SIZE 24
+#define MCOUNT_INSN_OFFSET 6
#define FTRACE_INSN_SIZE 6
-#else /* CONFIG_64BIT */
-/*
- * The 31-bit mcount code looks like this:
- * st %r14,4(%r15) # offset 0
- * > bras %r1,0f # offset 4
- * > .long _mcount # offset 8
- * > .long <&counter> # offset 12
- * > 0: l %r14,0(%r1) # offset 16
- * > l %r1,4(%r1) # offset 20
- * basr %r14,%r14 # offset 24
- * l %r14,4(%r15) # offset 26
- * Total length is 30 bytes. The twenty bytes starting from offset 4
- * to offset 24 get overwritten by ftrace_make_nop / ftrace_make_call.
- * The 31-bit enabled ftrace code block looks like this:
- * st %r14,4(%r15) # offset 0
- * > l %r14,__LC_FTRACE_FUNC # offset 4
- * > j 0f # offset 8
- * > .fill 12,1,0x07 # offset 12
- * 0: basr %r14,%r14 # offset 24
- * l %r14,4(%r14) # offset 26
- * The return points of the mcount/ftrace function have the same offset 26.
- * The 31-bit disabled ftrace code block looks like this:
- * st %r14,4(%r15) # offset 0
- * > j .+26 # offset 4
- * > j 0f # offset 8
- * > .fill 12,1,0x07 # offset 12
- * 0: basr %r14,%r14 # offset 24
- * l %r14,4(%r14) # offset 26
- * The j instruction branches to offset 30 to skip as many instructions
- * as possible.
- */
-asm(
- " .align 4\n"
- "ftrace_disable_code:\n"
- " j 1f\n"
- " j 0f\n"
- " .fill 12,1,0x07\n"
- "0: basr %r14,%r14\n"
- "1:\n"
- " .align 4\n"
- "ftrace_enable_insn:\n"
- " l %r14,"__stringify(__LC_FTRACE_FUNC)"\n");
-
-#define FTRACE_INSN_SIZE 4
-
-#endif /* CONFIG_64BIT */
-
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+{
+ return 0;
+}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
+ /* Initial replacement of the whole mcount block */
+ if (addr == MCOUNT_ADDR) {
+ if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET,
+ mcount_replace_code,
+ MCOUNT_BLOCK_SIZE))
+ return -EPERM;
+ return 0;
+ }
if (probe_kernel_write((void *) rec->ip, ftrace_disable_code,
MCOUNT_INSN_SIZE))
return -EPERM;
@@ -135,8 +111,6 @@ int __init ftrace_dyn_arch_init(void)
return 0;
}
-#endif /* CONFIG_DYNAMIC_FTRACE */
-
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
/*
* Hook the return address and push it in the stack of return addresses
@@ -162,31 +136,26 @@ out:
return parent;
}
-#ifdef CONFIG_DYNAMIC_FTRACE
/*
* Patch the kernel code at ftrace_graph_caller location. The instruction
- * there is branch relative and save to prepare_ftrace_return. To disable
- * the call to prepare_ftrace_return we patch the bras offset to point
- * directly after the instructions. To enable the call we calculate
- * the original offset to prepare_ftrace_return and put it back.
+ * there is branch relative on condition. To enable the ftrace graph code
+ * block, we simply patch the mask field of the instruction to zero and
+ * turn the instruction into a nop.
+ * To disable the ftrace graph code the mask field will be patched to
+ * all ones, which turns the instruction into an unconditional branch.
*/
int ftrace_enable_ftrace_graph_caller(void)
{
- unsigned short offset;
+ u8 op = 0x04; /* set mask field to zero */
- offset = ((void *) prepare_ftrace_return -
- (void *) ftrace_graph_caller) / 2;
- return probe_kernel_write((void *) ftrace_graph_caller + 2,
- &offset, sizeof(offset));
+ return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
}
int ftrace_disable_ftrace_graph_caller(void)
{
- static unsigned short offset = 0x0002;
+ u8 op = 0xf4; /* set mask field to all ones */
- return probe_kernel_write((void *) ftrace_graph_caller + 2,
- &offset, sizeof(offset));
+ return probe_kernel_write(__va(ftrace_graph_caller)+1, &op, sizeof(op));
}
-#endif /* CONFIG_DYNAMIC_FTRACE */
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index e88d35d..d62eee1 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -398,7 +398,7 @@ ENTRY(startup_kdump)
xc __LC_STFL_FAC_LIST(8),__LC_STFL_FAC_LIST
#ifndef CONFIG_MARCH_G5
# check capabilities against MARCH_{G5,Z900,Z990,Z9_109,Z10}
- .insn s,0xb2b10000,__LC_STFL_FAC_LIST # store facility list
+ .insn s,0xb2b10000,0 # store facilities @ __LC_STFL_FAC_LIST
tm __LC_STFL_FAC_LIST,0x01 # stfle available ?
jz 0f
la %r0,1
diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c
new file mode 100644
index 0000000..7559f1b
--- /dev/null
+++ b/arch/s390/kernel/idle.c
@@ -0,0 +1,124 @@
+/*
+ * Idle functions for s390.
+ *
+ * Copyright IBM Corp. 2014
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kernel_stat.h>
+#include <linux/kprobes.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <asm/cputime.h>
+#include <asm/nmi.h>
+#include <asm/smp.h>
+#include "entry.h"
+
+static DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
+
+void __kprobes enabled_wait(void)
+{
+ struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
+ unsigned long long idle_time;
+ unsigned long psw_mask;
+
+ trace_hardirqs_on();
+
+ /* Wait for external, I/O or machine check interrupt. */
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ clear_cpu_flag(CIF_NOHZ_DELAY);
+
+ /* Call the assembler magic in entry.S */
+ psw_idle(idle, psw_mask);
+
+ /* Account time spent with enabled wait psw loaded as idle time. */
+ idle->sequence++;
+ smp_wmb();
+ idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
+ idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
+ idle->idle_time += idle_time;
+ idle->idle_count++;
+ account_idle_time(idle_time);
+ smp_wmb();
+ idle->sequence++;
+}
+
+static ssize_t show_idle_count(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long idle_count;
+ unsigned int sequence;
+
+ do {
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_count = ACCESS_ONCE(idle->idle_count);
+ if (ACCESS_ONCE(idle->clock_idle_enter))
+ idle_count++;
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+ return sprintf(buf, "%llu\n", idle_count);
+}
+DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
+
+static ssize_t show_idle_time(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
+ unsigned long long now, idle_time, idle_enter, idle_exit;
+ unsigned int sequence;
+
+ do {
+ now = get_tod_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_time = ACCESS_ONCE(idle->idle_time);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+ idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
+ return sprintf(buf, "%llu\n", idle_time >> 12);
+}
+DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
+
+cputime64_t arch_cpu_idle_time(int cpu)
+{
+ struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
+ unsigned long long now, idle_enter, idle_exit;
+ unsigned int sequence;
+
+ do {
+ now = get_tod_clock();
+ sequence = ACCESS_ONCE(idle->sequence);
+ idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
+ idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
+ } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
+ return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
+}
+
+void arch_cpu_idle_enter(void)
+{
+ local_mcck_disable();
+}
+
+void arch_cpu_idle(void)
+{
+ if (!test_cpu_flag(CIF_MCCK_PENDING))
+ /* Halt the cpu and keep track of cpu time accounting. */
+ enabled_wait();
+ local_irq_enable();
+}
+
+void arch_cpu_idle_exit(void)
+{
+ local_mcck_enable();
+ if (test_cpu_flag(CIF_MCCK_PENDING))
+ s390_handle_mcck();
+}
+
+void arch_cpu_idle_dead(void)
+{
+ cpu_die();
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 633ca75..39badb9 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -455,22 +455,6 @@ DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n", (unsigned long long)
DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n", (unsigned long long)
IPL_PARMBLOCK_START->ipl_info.fcp.br_lba);
-static struct attribute *ipl_fcp_attrs[] = {
- &sys_ipl_type_attr.attr,
- &sys_ipl_device_attr.attr,
- &sys_ipl_fcp_wwpn_attr.attr,
- &sys_ipl_fcp_lun_attr.attr,
- &sys_ipl_fcp_bootprog_attr.attr,
- &sys_ipl_fcp_br_lba_attr.attr,
- NULL,
-};
-
-static struct attribute_group ipl_fcp_attr_group = {
- .attrs = ipl_fcp_attrs,
-};
-
-/* CCW ipl device attributes */
-
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
@@ -487,6 +471,23 @@ static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
static struct kobj_attribute sys_ipl_ccw_loadparm_attr =
__ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL);
+static struct attribute *ipl_fcp_attrs[] = {
+ &sys_ipl_type_attr.attr,
+ &sys_ipl_device_attr.attr,
+ &sys_ipl_fcp_wwpn_attr.attr,
+ &sys_ipl_fcp_lun_attr.attr,
+ &sys_ipl_fcp_bootprog_attr.attr,
+ &sys_ipl_fcp_br_lba_attr.attr,
+ &sys_ipl_ccw_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ipl_fcp_attr_group = {
+ .attrs = ipl_fcp_attrs,
+};
+
+/* CCW ipl device attributes */
+
static struct attribute *ipl_ccw_attrs_vm[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
@@ -765,28 +766,10 @@ DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
reipl_block_fcp->ipl_info.fcp.devno);
-static struct attribute *reipl_fcp_attrs[] = {
- &sys_reipl_fcp_device_attr.attr,
- &sys_reipl_fcp_wwpn_attr.attr,
- &sys_reipl_fcp_lun_attr.attr,
- &sys_reipl_fcp_bootprog_attr.attr,
- &sys_reipl_fcp_br_lba_attr.attr,
- NULL,
-};
-
-static struct attribute_group reipl_fcp_attr_group = {
- .attrs = reipl_fcp_attrs,
-};
-
-/* CCW reipl device attributes */
-
-DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_ccw->ipl_info.ccw.devno);
-
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
{
- memcpy(loadparm, ibp->ipl_info.ccw.load_parm, LOADPARM_LEN);
+ memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
loadparm[LOADPARM_LEN] = 0;
strim(loadparm);
@@ -821,13 +804,50 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return -EINVAL;
}
/* initialize loadparm with blanks */
- memset(ipb->ipl_info.ccw.load_parm, ' ', LOADPARM_LEN);
+ memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
/* copy and convert to ebcdic */
- memcpy(ipb->ipl_info.ccw.load_parm, buf, lp_len);
- ASCEBC(ipb->ipl_info.ccw.load_parm, LOADPARM_LEN);
+ memcpy(ipb->hdr.loadparm, buf, lp_len);
+ ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
return len;
}
+/* FCP wrapper */
+static ssize_t reipl_fcp_loadparm_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return reipl_generic_loadparm_show(reipl_block_fcp, page);
+}
+
+static ssize_t reipl_fcp_loadparm_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t len)
+{
+ return reipl_generic_loadparm_store(reipl_block_fcp, buf, len);
+}
+
+static struct kobj_attribute sys_reipl_fcp_loadparm_attr =
+ __ATTR(loadparm, S_IRUGO | S_IWUSR, reipl_fcp_loadparm_show,
+ reipl_fcp_loadparm_store);
+
+static struct attribute *reipl_fcp_attrs[] = {
+ &sys_reipl_fcp_device_attr.attr,
+ &sys_reipl_fcp_wwpn_attr.attr,
+ &sys_reipl_fcp_lun_attr.attr,
+ &sys_reipl_fcp_bootprog_attr.attr,
+ &sys_reipl_fcp_br_lba_attr.attr,
+ &sys_reipl_fcp_loadparm_attr.attr,
+ NULL,
+};
+
+static struct attribute_group reipl_fcp_attr_group = {
+ .attrs = reipl_fcp_attrs,
+};
+
+/* CCW reipl device attributes */
+
+DEFINE_IPL_ATTR_RW(reipl_ccw, device, "0.0.%04llx\n", "0.0.%llx\n",
+ reipl_block_ccw->ipl_info.ccw.devno);
+
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -1125,11 +1145,10 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
/* LOADPARM */
/* check if read scp info worked and set loadparm */
if (sclp_ipl_info.is_valid)
- memcpy(ipb->ipl_info.ccw.load_parm,
- &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
else
/* read scp info failed: set empty loadparm (EBCDIC blanks) */
- memset(ipb->ipl_info.ccw.load_parm, 0x40, LOADPARM_LEN);
+ memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
/* VM PARM */
@@ -1251,9 +1270,16 @@ static int __init reipl_fcp_init(void)
return rc;
}
- if (ipl_info.type == IPL_TYPE_FCP)
+ if (ipl_info.type == IPL_TYPE_FCP) {
memcpy(reipl_block_fcp, IPL_PARMBLOCK_START, PAGE_SIZE);
- else {
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * is invalid in the SCSI IPL parameter block, so take it
+ * always from sclp_ipl_info.
+ */
+ memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+ LOADPARM_LEN);
+ } else {
reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
@@ -1864,7 +1890,23 @@ static void __init shutdown_actions_init(void)
static int __init s390_ipl_init(void)
{
+ char str[8] = {0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40};
+
sclp_get_ipl_info(&sclp_ipl_info);
+ /*
+ * Fix loadparm: There are systems where the (SCSI) LOADPARM
+ * returned by read SCP info is invalid (contains EBCDIC blanks)
+ * when the system has been booted via diag308. In that case we use
+ * the value from diag308, if available.
+ *
+ * There are also systems where diag308 store does not work in
+ * case the system is booted from HMC. Fortunately in this case
+ * READ SCP info provides the correct value.
+ */
+ if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 &&
+ diag308_set_works)
+ memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm,
+ LOADPARM_LEN);
shutdown_actions_init();
shutdown_triggers_init();
return 0;
@@ -2060,6 +2102,13 @@ void s390_reset_system(void (*func)(void *), void *data)
S390_lowcore.program_new_psw.addr =
PSW_ADDR_AMODE | (unsigned long) s390_base_pgm_handler;
+ /*
+ * Clear subchannel ID and number to signal new kernel that no CCW or
+ * SCSI IPL has been done (for kexec and kdump)
+ */
+ S390_lowcore.subchannel_id = 0;
+ S390_lowcore.subchannel_nr = 0;
+
/* Store status at absolute zero */
store_status();
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 99b0b09..1b8a38a 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -30,6 +30,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
EXPORT_PER_CPU_SYMBOL_GPL(irq_stat);
struct irq_class {
+ int irq;
char *name;
char *desc;
};
@@ -45,9 +46,9 @@ struct irq_class {
* up with having a sum which accounts each interrupt twice.
*/
static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
- [EXT_INTERRUPT] = {.name = "EXT"},
- [IO_INTERRUPT] = {.name = "I/O"},
- [THIN_INTERRUPT] = {.name = "AIO"},
+ {.irq = EXT_INTERRUPT, .name = "EXT"},
+ {.irq = IO_INTERRUPT, .name = "I/O"},
+ {.irq = THIN_INTERRUPT, .name = "AIO"},
};
/*
@@ -56,38 +57,39 @@ static const struct irq_class irqclass_main_desc[NR_IRQS_BASE] = {
* In addition this list contains non external / I/O events like NMIs.
*/
static const struct irq_class irqclass_sub_desc[NR_ARCH_IRQS] = {
- [IRQEXT_CLK] = {.name = "CLK", .desc = "[EXT] Clock Comparator"},
- [IRQEXT_EXC] = {.name = "EXC", .desc = "[EXT] External Call"},
- [IRQEXT_EMS] = {.name = "EMS", .desc = "[EXT] Emergency Signal"},
- [IRQEXT_TMR] = {.name = "TMR", .desc = "[EXT] CPU Timer"},
- [IRQEXT_TLA] = {.name = "TAL", .desc = "[EXT] Timing Alert"},
- [IRQEXT_PFL] = {.name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
- [IRQEXT_DSD] = {.name = "DSD", .desc = "[EXT] DASD Diag"},
- [IRQEXT_VRT] = {.name = "VRT", .desc = "[EXT] Virtio"},
- [IRQEXT_SCP] = {.name = "SCP", .desc = "[EXT] Service Call"},
- [IRQEXT_IUC] = {.name = "IUC", .desc = "[EXT] IUCV"},
- [IRQEXT_CMS] = {.name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
- [IRQEXT_CMC] = {.name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
- [IRQEXT_CMR] = {.name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
- [IRQIO_CIO] = {.name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
- [IRQIO_QAI] = {.name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
- [IRQIO_DAS] = {.name = "DAS", .desc = "[I/O] DASD"},
- [IRQIO_C15] = {.name = "C15", .desc = "[I/O] 3215"},
- [IRQIO_C70] = {.name = "C70", .desc = "[I/O] 3270"},
- [IRQIO_TAP] = {.name = "TAP", .desc = "[I/O] Tape"},
- [IRQIO_VMR] = {.name = "VMR", .desc = "[I/O] Unit Record Devices"},
- [IRQIO_LCS] = {.name = "LCS", .desc = "[I/O] LCS"},
- [IRQIO_CLW] = {.name = "CLW", .desc = "[I/O] CLAW"},
- [IRQIO_CTC] = {.name = "CTC", .desc = "[I/O] CTC"},
- [IRQIO_APB] = {.name = "APB", .desc = "[I/O] AP Bus"},
- [IRQIO_ADM] = {.name = "ADM", .desc = "[I/O] EADM Subchannel"},
- [IRQIO_CSC] = {.name = "CSC", .desc = "[I/O] CHSC Subchannel"},
- [IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
- [IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
- [IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
- [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
- [NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
- [CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
+ {.irq = IRQEXT_CLK, .name = "CLK", .desc = "[EXT] Clock Comparator"},
+ {.irq = IRQEXT_EXC, .name = "EXC", .desc = "[EXT] External Call"},
+ {.irq = IRQEXT_EMS, .name = "EMS", .desc = "[EXT] Emergency Signal"},
+ {.irq = IRQEXT_TMR, .name = "TMR", .desc = "[EXT] CPU Timer"},
+ {.irq = IRQEXT_TLA, .name = "TAL", .desc = "[EXT] Timing Alert"},
+ {.irq = IRQEXT_PFL, .name = "PFL", .desc = "[EXT] Pseudo Page Fault"},
+ {.irq = IRQEXT_DSD, .name = "DSD", .desc = "[EXT] DASD Diag"},
+ {.irq = IRQEXT_VRT, .name = "VRT", .desc = "[EXT] Virtio"},
+ {.irq = IRQEXT_SCP, .name = "SCP", .desc = "[EXT] Service Call"},
+ {.irq = IRQEXT_IUC, .name = "IUC", .desc = "[EXT] IUCV"},
+ {.irq = IRQEXT_CMS, .name = "CMS", .desc = "[EXT] CPU-Measurement: Sampling"},
+ {.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
+ {.irq = IRQEXT_CMR, .name = "CMR", .desc = "[EXT] CPU-Measurement: RI"},
+ {.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
+ {.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
+ {.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
+ {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
+ {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"},
+ {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
+ {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
+ {.irq = IRQIO_CLW, .name = "CLW", .desc = "[I/O] CLAW"},
+ {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
+ {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
+ {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
+ {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
+ {.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
+ {.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
+ {.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
void __init init_IRQ(void)
@@ -116,33 +118,37 @@ void do_IRQ(struct pt_regs *regs, int irq)
*/
int show_interrupts(struct seq_file *p, void *v)
{
- int irq = *(loff_t *) v;
- int cpu;
+ int index = *(loff_t *) v;
+ int cpu, irq;
get_online_cpus();
- if (irq == 0) {
+ if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
seq_printf(p, "CPU%d ", cpu);
seq_putc(p, '\n');
goto out;
}
- if (irq < NR_IRQS) {
- if (irq >= NR_IRQS_BASE)
+ if (index < NR_IRQS) {
+ if (index >= NR_IRQS_BASE)
goto out;
- seq_printf(p, "%s: ", irqclass_main_desc[irq].name);
+ /* Adjust index to process irqclass_main_desc array entries */
+ index--;
+ seq_printf(p, "%s: ", irqclass_main_desc[index].name);
+ irq = irqclass_main_desc[index].irq;
for_each_online_cpu(cpu)
seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
seq_putc(p, '\n');
goto out;
}
- for (irq = 0; irq < NR_ARCH_IRQS; irq++) {
- seq_printf(p, "%s: ", irqclass_sub_desc[irq].name);
+ for (index = 0; index < NR_ARCH_IRQS; index++) {
+ seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
+ irq = irqclass_sub_desc[index].irq;
for_each_online_cpu(cpu)
seq_printf(p, "%10u ",
per_cpu(irq_stat, cpu).irqs[irq]);
- if (irqclass_sub_desc[irq].desc)
- seq_printf(p, " %s", irqclass_sub_desc[irq].desc);
+ if (irqclass_sub_desc[index].desc)
+ seq_printf(p, " %s", irqclass_sub_desc[index].desc);
seq_putc(p, '\n');
}
out:
@@ -253,7 +259,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
ext_code = *(struct ext_code *) &regs->int_code;
if (ext_code.code != EXT_IRQ_CLK_COMP)
- __get_cpu_var(s390_idle).nohz_delay = 1;
+ set_cpu_flag(CIF_NOHZ_DELAY);
index = ext_hash(ext_code.code);
rcu_read_lock();
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index bc71a7b..014d472 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -58,161 +58,13 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = {
.insn_size = MAX_INSN_SIZE,
};
-static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
-{
- if (!is_known_insn((unsigned char *)insn))
- return -EINVAL;
- switch (insn[0] >> 8) {
- case 0x0c: /* bassm */
- case 0x0b: /* bsm */
- case 0x83: /* diag */
- case 0x44: /* ex */
- case 0xac: /* stnsm */
- case 0xad: /* stosm */
- return -EINVAL;
- case 0xc6:
- switch (insn[0] & 0x0f) {
- case 0x00: /* exrl */
- return -EINVAL;
- }
- }
- switch (insn[0]) {
- case 0x0101: /* pr */
- case 0xb25a: /* bsa */
- case 0xb240: /* bakr */
- case 0xb258: /* bsg */
- case 0xb218: /* pc */
- case 0xb228: /* pt */
- case 0xb98d: /* epsw */
- return -EINVAL;
- }
- return 0;
-}
-
-static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
-{
- /* default fixup method */
- int fixup = FIXUP_PSW_NORMAL;
-
- switch (insn[0] >> 8) {
- case 0x05: /* balr */
- case 0x0d: /* basr */
- fixup = FIXUP_RETURN_REGISTER;
- /* if r2 = 0, no branch will be taken */
- if ((insn[0] & 0x0f) == 0)
- fixup |= FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x06: /* bctr */
- case 0x07: /* bcr */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x45: /* bal */
- case 0x4d: /* bas */
- fixup = FIXUP_RETURN_REGISTER;
- break;
- case 0x47: /* bc */
- case 0x46: /* bct */
- case 0x86: /* bxh */
- case 0x87: /* bxle */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0x82: /* lpsw */
- fixup = FIXUP_NOT_REQUIRED;
- break;
- case 0xb2: /* lpswe */
- if ((insn[0] & 0xff) == 0xb2)
- fixup = FIXUP_NOT_REQUIRED;
- break;
- case 0xa7: /* bras */
- if ((insn[0] & 0x0f) == 0x05)
- fixup |= FIXUP_RETURN_REGISTER;
- break;
- case 0xc0:
- if ((insn[0] & 0x0f) == 0x05) /* brasl */
- fixup |= FIXUP_RETURN_REGISTER;
- break;
- case 0xeb:
- switch (insn[2] & 0xff) {
- case 0x44: /* bxhg */
- case 0x45: /* bxleg */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- }
- break;
- case 0xe3: /* bctg */
- if ((insn[2] & 0xff) == 0x46)
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- case 0xec:
- switch (insn[2] & 0xff) {
- case 0xe5: /* clgrb */
- case 0xe6: /* cgrb */
- case 0xf6: /* crb */
- case 0xf7: /* clrb */
- case 0xfc: /* cgib */
- case 0xfd: /* cglib */
- case 0xfe: /* cib */
- case 0xff: /* clib */
- fixup = FIXUP_BRANCH_NOT_TAKEN;
- break;
- }
- break;
- }
- return fixup;
-}
-
-static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
-{
- /* Check if we have a RIL-b or RIL-c format instruction which
- * we need to modify in order to avoid instruction emulation. */
- switch (insn[0] >> 8) {
- case 0xc0:
- if ((insn[0] & 0x0f) == 0x00) /* larl */
- return true;
- break;
- case 0xc4:
- switch (insn[0] & 0x0f) {
- case 0x02: /* llhrl */
- case 0x04: /* lghrl */
- case 0x05: /* lhrl */
- case 0x06: /* llghrl */
- case 0x07: /* sthrl */
- case 0x08: /* lgrl */
- case 0x0b: /* stgrl */
- case 0x0c: /* lgfrl */
- case 0x0d: /* lrl */
- case 0x0e: /* llgfrl */
- case 0x0f: /* strl */
- return true;
- }
- break;
- case 0xc6:
- switch (insn[0] & 0x0f) {
- case 0x02: /* pfdrl */
- case 0x04: /* cghrl */
- case 0x05: /* chrl */
- case 0x06: /* clghrl */
- case 0x07: /* clhrl */
- case 0x08: /* cgrl */
- case 0x0a: /* clgrl */
- case 0x0c: /* cgfrl */
- case 0x0d: /* crl */
- case 0x0e: /* clgfrl */
- case 0x0f: /* clrl */
- return true;
- }
- break;
- }
- return false;
-}
-
static void __kprobes copy_instruction(struct kprobe *p)
{
s64 disp, new_disp;
u64 addr, new_addr;
memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8));
- if (!is_insn_relative_long(p->ainsn.insn))
+ if (!probe_is_insn_relative_long(p->ainsn.insn))
return;
/*
* For pc-relative instructions in RIL-b or RIL-c format patch the
@@ -276,7 +128,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
if ((unsigned long) p->addr & 0x01)
return -EINVAL;
/* Make sure the probe isn't going on a difficult instruction */
- if (is_prohibited_opcode(p->addr))
+ if (probe_is_prohibited_opcode(p->addr))
return -EINVAL;
if (s390_get_insn_slot(p))
return -ENOMEM;
@@ -366,9 +218,9 @@ static void __kprobes disable_singlestep(struct kprobe_ctlblk *kcb,
*/
static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
{
- kcb->prev_kprobe.kp = __get_cpu_var(current_kprobe);
+ kcb->prev_kprobe.kp = __this_cpu_read(current_kprobe);
kcb->prev_kprobe.status = kcb->kprobe_status;
- __get_cpu_var(current_kprobe) = p;
+ __this_cpu_write(current_kprobe, p);
}
/*
@@ -378,7 +230,7 @@ static void __kprobes push_kprobe(struct kprobe_ctlblk *kcb, struct kprobe *p)
*/
static void __kprobes pop_kprobe(struct kprobe_ctlblk *kcb)
{
- __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp;
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
kcb->kprobe_status = kcb->prev_kprobe.status;
}
@@ -459,7 +311,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
enable_singlestep(kcb, regs, (unsigned long) p->ainsn.insn);
return 1;
} else if (kprobe_running()) {
- p = __get_cpu_var(current_kprobe);
+ p = __this_cpu_read(current_kprobe);
if (p->break_handler && p->break_handler(p, regs)) {
/*
* Continuation after the jprobe completed and
@@ -605,7 +457,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
unsigned long ip = regs->psw.addr & PSW_ADDR_INSN;
- int fixup = get_fixup_type(p->ainsn.insn);
+ int fixup = probe_get_fixup_type(p->ainsn.insn);
if (fixup & FIXUP_PSW_NORMAL)
ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn;
@@ -789,11 +641,6 @@ void __kprobes jprobe_return(void)
asm volatile(".word 0x0002");
}
-static void __used __kprobes jprobe_return_end(void)
-{
- asm volatile("bcr 0,0");
-}
-
int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 719e27b..4685337 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -25,6 +25,7 @@
#include <asm/elf.h>
#include <asm/asm-offsets.h>
#include <asm/os_info.h>
+#include <asm/switch_to.h>
typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
@@ -43,7 +44,7 @@ static void add_elf_notes(int cpu)
memcpy((void *) (4608UL + sa->pref_reg), sa, sizeof(*sa));
ptr = (u64 *) per_cpu_ptr(crash_notes, cpu);
- ptr = fill_cpu_elf_notes(ptr, sa);
+ ptr = fill_cpu_elf_notes(ptr, sa, NULL);
memset(ptr, 0, sizeof(struct elf_note));
}
@@ -53,8 +54,11 @@ static void add_elf_notes(int cpu)
static void setup_regs(void)
{
unsigned long sa = S390_lowcore.prefixreg_save_area + SAVE_AREA_BASE;
+ struct _lowcore *lc;
int cpu, this_cpu;
+ /* Get lowcore pointer from store status of this CPU (absolute zero) */
+ lc = (struct _lowcore *)(unsigned long)S390_lowcore.prefixreg_save_area;
this_cpu = smp_find_processor_id(stap());
add_elf_notes(this_cpu);
for_each_online_cpu(cpu) {
@@ -64,6 +68,8 @@ static void setup_regs(void)
continue;
add_elf_notes(cpu);
}
+ if (MACHINE_HAS_VX)
+ save_vx_regs_safe((void *) lc->vector_save_area_addr);
/* Copy dump CPU store status info to absolute zero */
memcpy((void *) SAVE_AREA_BASE, (void *) sa, sizeof(struct save_area));
}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 08dcf21..4300ea3 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -8,66 +8,72 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
+#include <asm/ptrace.h>
.section .kprobes.text, "ax"
ENTRY(ftrace_stub)
br %r14
+#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
+#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
+#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS)
+#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
+
ENTRY(_mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
br %r14
ENTRY(ftrace_caller)
+ .globl ftrace_regs_caller
+ .set ftrace_regs_caller,ftrace_caller
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_SIZE
+ stg %r1,__SF_BACKCHAIN(%r15)
+ stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15)
+ stg %r0,(STACK_PTREGS_PSW+8)(%r15)
+ stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ aghik %r2,%r0,-MCOUNT_INSN_SIZE
+ lgrl %r4,function_trace_op
+ lgrl %r1,ftrace_trace_function
+#else
+ lgr %r2,%r0
+ aghi %r2,-MCOUNT_INSN_SIZE
+ larl %r4,function_trace_op
+ lg %r4,0(%r4)
+ larl %r1,ftrace_trace_function
+ lg %r1,0(%r1)
#endif
- stm %r2,%r5,16(%r15)
- bras %r1,2f
-0: .long ftrace_trace_function
-1: .long function_trace_stop
-2: l %r2,1b-0b(%r1)
- icm %r2,0xf,0(%r2)
- jnz 3f
- st %r14,56(%r15)
- lr %r0,%r15
- ahi %r15,-96
- l %r3,100(%r15)
- la %r2,0(%r14)
- st %r0,__SF_BACKCHAIN(%r15)
- la %r3,0(%r3)
- ahi %r2,-MCOUNT_INSN_SIZE
- l %r14,0b-0b(%r1)
- l %r14,0(%r14)
- basr %r14,%r14
+ lgr %r3,%r14
+ la %r5,STACK_PTREGS(%r15)
+ basr %r14,%r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- l %r2,100(%r15)
- l %r3,152(%r15)
+# The j instruction gets runtime patched to a nop instruction.
+# See ftrace_enable_ftrace_graph_caller.
ENTRY(ftrace_graph_caller)
-# The bras instruction gets runtime patched to call prepare_ftrace_return.
-# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
-# bras %r14,prepare_ftrace_return
- bras %r14,0f
-0: st %r2,100(%r15)
+ j ftrace_graph_caller_end
+ lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ lg %r3,(STACK_PTREGS_PSW+8)(%r15)
+ brasl %r14,prepare_ftrace_return
+ stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
+ftrace_graph_caller_end:
+ .globl ftrace_graph_caller_end
#endif
- ahi %r15,96
- l %r14,56(%r15)
-3: lm %r2,%r5,16(%r15)
- br %r14
+ lg %r1,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
+ br %r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(return_to_handler)
- stm %r2,%r5,16(%r15)
- st %r14,56(%r15)
- lr %r0,%r15
- ahi %r15,-96
- st %r0,__SF_BACKCHAIN(%r15)
- bras %r1,0f
- .long ftrace_return_to_handler
-0: l %r2,0b-0b(%r1)
- basr %r14,%r2
- lr %r14,%r2
- ahi %r15,96
- lm %r2,%r5,16(%r15)
+ stmg %r2,%r5,32(%r15)
+ lgr %r1,%r15
+ aghi %r15,-STACK_FRAME_OVERHEAD
+ stg %r1,__SF_BACKCHAIN(%r15)
+ brasl %r14,ftrace_return_to_handler
+ aghi %r15,STACK_FRAME_OVERHEAD
+ lgr %r14,%r2
+ lmg %r2,%r5,32(%r15)
br %r14
#endif
diff --git a/arch/s390/kernel/mcount64.S b/arch/s390/kernel/mcount64.S
deleted file mode 100644
index 1c52eae..0000000
--- a/arch/s390/kernel/mcount64.S
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright IBM Corp. 2008, 2009
- *
- * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
- *
- */
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-#include <asm/ftrace.h>
-
- .section .kprobes.text, "ax"
-
-ENTRY(ftrace_stub)
- br %r14
-
-ENTRY(_mcount)
-#ifdef CONFIG_DYNAMIC_FTRACE
- br %r14
-
-ENTRY(ftrace_caller)
-#endif
- larl %r1,function_trace_stop
- icm %r1,0xf,0(%r1)
- bnzr %r14
- stmg %r2,%r5,32(%r15)
- stg %r14,112(%r15)
- lgr %r1,%r15
- aghi %r15,-160
- stg %r1,__SF_BACKCHAIN(%r15)
- lgr %r2,%r14
- lg %r3,168(%r15)
- aghi %r2,-MCOUNT_INSN_SIZE
- larl %r14,ftrace_trace_function
- lg %r14,0(%r14)
- basr %r14,%r14
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- lg %r2,168(%r15)
- lg %r3,272(%r15)
-ENTRY(ftrace_graph_caller)
-# The bras instruction gets runtime patched to call prepare_ftrace_return.
-# See ftrace_enable_ftrace_graph_caller. The patched instruction is:
-# bras %r14,prepare_ftrace_return
- bras %r14,0f
-0: stg %r2,168(%r15)
-#endif
- aghi %r15,160
- lmg %r2,%r5,32(%r15)
- lg %r14,112(%r15)
- br %r14
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-ENTRY(return_to_handler)
- stmg %r2,%r5,32(%r15)
- lgr %r1,%r15
- aghi %r15,-160
- stg %r1,__SF_BACKCHAIN(%r15)
- brasl %r14,ftrace_return_to_handler
- aghi %r15,160
- lgr %r14,%r2
- lmg %r2,%r5,32(%r15)
- br %r14
-
-#endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 210e128..dd1c24c 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -20,6 +20,7 @@
#include <asm/cputime.h>
#include <asm/nmi.h>
#include <asm/crw.h>
+#include <asm/switch_to.h>
struct mcck_struct {
int kill_task;
@@ -53,8 +54,12 @@ void s390_handle_mcck(void)
*/
local_irq_save(flags);
local_mcck_disable();
- mcck = __get_cpu_var(cpu_mcck);
- memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
+ /*
+ * Ummm... Does this make sense at all? Copying the percpu struct
+ * and then zapping it one statement later?
+ */
+ memcpy(&mcck, this_cpu_ptr(&cpu_mcck), sizeof(mcck));
+ memset(&mcck, 0, sizeof(struct mcck_struct));
clear_cpu_flag(CIF_MCCK_PENDING);
local_mcck_enable();
local_irq_restore(flags);
@@ -163,6 +168,21 @@ static int notrace s390_revalidate_registers(struct mci *mci)
" ld 15,120(%0)\n"
: : "a" (fpt_save_area));
}
+
+#ifdef CONFIG_64BIT
+ /* Revalidate vector registers */
+ if (MACHINE_HAS_VX && current->thread.vxrs) {
+ if (!mci->vr) {
+ /*
+ * Vector registers can't be restored and therefore
+ * the process needs to be terminated.
+ */
+ kill_task = 1;
+ }
+ restore_vx_regs((__vector128 *)
+ S390_lowcore.vector_save_area_addr);
+ }
+#endif
/* Revalidate access registers */
asm volatile(
" lam 0,15,0(%0)"
@@ -253,7 +273,7 @@ void notrace s390_do_machine_check(struct pt_regs *regs)
nmi_enter();
inc_irq_stat(NMI_NMI);
mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
- mcck = &__get_cpu_var(cpu_mcck);
+ mcck = this_cpu_ptr(&cpu_mcck);
umode = user_mode(regs);
if (mci->sd) {
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index ea75d01..56fdad4 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -173,7 +173,7 @@ static int validate_ctr_auth(const struct hw_perf_event *hwc)
*/
static void cpumf_pmu_enable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
int err;
if (cpuhw->flags & PMU_F_ENABLED)
@@ -196,7 +196,7 @@ static void cpumf_pmu_enable(struct pmu *pmu)
*/
static void cpumf_pmu_disable(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
int err;
u64 inactive;
@@ -230,7 +230,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
return;
inc_irq_stat(IRQEXT_CMC);
- cpuhw = &__get_cpu_var(cpu_hw_events);
+ cpuhw = this_cpu_ptr(&cpu_hw_events);
/* Measurement alerts are shared and might happen when the PMU
* is not reserved. Ignore these alerts in this case. */
@@ -250,7 +250,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
#define PMC_RELEASE 1
static void setup_pmc_cpu(void *flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
switch (*((int *) flags)) {
case PMC_INIT:
@@ -411,12 +411,6 @@ static int cpumf_pmu_event_init(struct perf_event *event)
case PERF_TYPE_HARDWARE:
case PERF_TYPE_HW_CACHE:
case PERF_TYPE_RAW:
- /* The CPU measurement counter facility does not have overflow
- * interrupts to do sampling. Sampling must be provided by
- * external means, for example, by timers.
- */
- if (is_sampling_event(event))
- return -ENOENT;
err = __hw_perf_event_init(event);
break;
default:
@@ -481,7 +475,7 @@ static void cpumf_pmu_read(struct perf_event *event)
static void cpumf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
@@ -512,7 +506,7 @@ static void cpumf_pmu_start(struct perf_event *event, int flags)
static void cpumf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
if (!(hwc->state & PERF_HES_STOPPED)) {
@@ -533,7 +527,7 @@ static void cpumf_pmu_stop(struct perf_event *event, int flags)
static int cpumf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
/* Check authorization for the counter set to which this
* counter belongs.
@@ -557,7 +551,7 @@ static int cpumf_pmu_add(struct perf_event *event, int flags)
static void cpumf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
cpumf_pmu_stop(event, PERF_EF_UPDATE);
@@ -581,7 +575,7 @@ static void cpumf_pmu_del(struct perf_event *event, int flags)
*/
static void cpumf_pmu_start_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
perf_pmu_disable(pmu);
cpuhw->flags |= PERF_EVENT_TXN;
@@ -595,7 +589,7 @@ static void cpumf_pmu_start_txn(struct pmu *pmu)
*/
static void cpumf_pmu_cancel_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
WARN_ON(cpuhw->tx_state != cpuhw->state);
@@ -610,7 +604,7 @@ static void cpumf_pmu_cancel_txn(struct pmu *pmu)
*/
static int cpumf_pmu_commit_txn(struct pmu *pmu)
{
- struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
+ struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events);
u64 state;
/* check if the updated state can be scheduled */
@@ -681,6 +675,12 @@ static int __init cpumf_pmu_init(void)
goto out;
}
+ /* The CPU measurement counter facility does not have overflow
+ * interrupts to do sampling. Sampling must be provided by
+ * external means, for example, by timers.
+ */
+ cpumf_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
+
cpumf_pmu.attr_groups = cpumf_cf_event_group();
rc = perf_pmu_register(&cpumf_pmu, "cpum_cf", PERF_TYPE_RAW);
if (rc) {
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index ea0c7b2..08e7613 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -562,7 +562,7 @@ static DEFINE_MUTEX(pmc_reserve_mutex);
static void setup_pmc_cpu(void *flags)
{
int err;
- struct cpu_hw_sf *cpusf = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpusf = this_cpu_ptr(&cpu_hw_sf);
err = 0;
switch (*((int *) flags)) {
@@ -849,7 +849,7 @@ static int cpumsf_pmu_event_init(struct perf_event *event)
static void cpumsf_pmu_enable(struct pmu *pmu)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct hw_perf_event *hwc;
int err;
@@ -898,7 +898,7 @@ static void cpumsf_pmu_enable(struct pmu *pmu)
static void cpumsf_pmu_disable(struct pmu *pmu)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
struct hws_lsctl_request_block inactive;
struct hws_qsi_info_block si;
int err;
@@ -1306,7 +1306,7 @@ static void cpumsf_pmu_read(struct perf_event *event)
*/
static void cpumsf_pmu_start(struct perf_event *event, int flags)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
return;
@@ -1327,7 +1327,7 @@ static void cpumsf_pmu_start(struct perf_event *event, int flags)
*/
static void cpumsf_pmu_stop(struct perf_event *event, int flags)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
if (event->hw.state & PERF_HES_STOPPED)
return;
@@ -1346,7 +1346,7 @@ static void cpumsf_pmu_stop(struct perf_event *event, int flags)
static int cpumsf_pmu_add(struct perf_event *event, int flags)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
int err;
if (cpuhw->flags & PMU_F_IN_USE)
@@ -1397,7 +1397,7 @@ out:
static void cpumsf_pmu_del(struct perf_event *event, int flags)
{
- struct cpu_hw_sf *cpuhw = &__get_cpu_var(cpu_hw_sf);
+ struct cpu_hw_sf *cpuhw = this_cpu_ptr(&cpu_hw_sf);
perf_pmu_disable(event->pmu);
cpumsf_pmu_stop(event, PERF_EF_UPDATE);
@@ -1470,7 +1470,7 @@ static void cpumf_measurement_alert(struct ext_code ext_code,
if (!(alert & CPU_MF_INT_SF_MASK))
return;
inc_irq_stat(IRQEXT_CMS);
- cpuhw = &__get_cpu_var(cpu_hw_sf);
+ cpuhw = this_cpu_ptr(&cpu_hw_sf);
/* Measurement alerts are shared and might happen when the PMU
* is not reserved. Ignore these alerts in this case. */
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 813ec72..f6f8886 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -49,7 +49,7 @@ PGM_CHECK_DEFAULT /* 17 */
PGM_CHECK_64BIT(transaction_exception) /* 18 */
PGM_CHECK_DEFAULT /* 19 */
PGM_CHECK_DEFAULT /* 1a */
-PGM_CHECK_DEFAULT /* 1b */
+PGM_CHECK_64BIT(vector_exception) /* 1b */
PGM_CHECK(space_switch_exception) /* 1c */
PGM_CHECK(hfp_sqrt_exception) /* 1d */
PGM_CHECK_DEFAULT /* 1e */
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 93b9ca4..ed84cc2 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -61,30 +61,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
return sf->gprs[8];
}
-void arch_cpu_idle(void)
-{
- local_mcck_disable();
- if (test_cpu_flag(CIF_MCCK_PENDING)) {
- local_mcck_enable();
- local_irq_enable();
- return;
- }
- /* Halt the cpu and keep track of cpu time accounting. */
- vtime_stop_cpu();
- local_irq_enable();
-}
-
-void arch_cpu_idle_exit(void)
-{
- if (test_cpu_flag(CIF_MCCK_PENDING))
- s390_handle_mcck();
-}
-
-void arch_cpu_idle_dead(void)
-{
- cpu_die();
-}
-
extern void __kprobes kernel_thread_starter(void);
/*
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 2461202..dbdd33e 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -23,15 +23,13 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id);
*/
void cpu_init(void)
{
- struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
- struct cpuid *id = &__get_cpu_var(cpu_id);
+ struct cpuid *id = this_cpu_ptr(&cpu_id);
get_cpu_id(id);
atomic_inc(&init_mm.mm_count);
current->active_mm = &init_mm;
BUG_ON(current->mm);
enter_lazy_tlb(&init_mm, current);
- memset(idle, 0, sizeof(*idle));
}
/*
@@ -41,7 +39,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te"
+ "edat", "etf3eh", "highgprs", "te", "vx"
};
unsigned long n = (unsigned long) v - 1;
int i;
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 910f253..99a567b 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -38,15 +38,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
-enum s390_regset {
- REGSET_GENERAL,
- REGSET_FP,
- REGSET_LAST_BREAK,
- REGSET_TDB,
- REGSET_SYSTEM_CALL,
- REGSET_GENERAL_EXTENDED,
-};
-
void update_cr_regs(struct task_struct *task)
{
struct pt_regs *regs = task_pt_regs(task);
@@ -55,27 +46,39 @@ void update_cr_regs(struct task_struct *task)
#ifdef CONFIG_64BIT
/* Take care of the enable/disable of transactional execution. */
- if (MACHINE_HAS_TE) {
+ if (MACHINE_HAS_TE || MACHINE_HAS_VX) {
unsigned long cr, cr_new;
__ctl_store(cr, 0, 0);
- /* Set or clear transaction execution TXC bit 8. */
- cr_new = cr | (1UL << 55);
- if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
+ cr_new = cr;
+ if (MACHINE_HAS_TE) {
+ /* Set or clear transaction execution TXC bit 8. */
+ cr_new |= (1UL << 55);
+ if (task->thread.per_flags & PER_FLAG_NO_TE)
+ cr_new &= ~(1UL << 55);
+ }
+ if (MACHINE_HAS_VX) {
+ /* Enable/disable of vector extension */
+ cr_new &= ~(1UL << 17);
+ if (task->thread.vxrs)
+ cr_new |= (1UL << 17);
+ }
if (cr_new != cr)
__ctl_load(cr_new, 0, 0);
- /* Set or clear transaction execution TDC bits 62 and 63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
- if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
- if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
- else
- cr_new |= 2UL;
+ if (MACHINE_HAS_TE) {
+ /* Set/clear transaction execution TDC bits 62/63. */
+ __ctl_store(cr, 2, 2);
+ cr_new = cr & ~3UL;
+ if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
+ if (task->thread.per_flags &
+ PER_FLAG_TE_ABORT_RAND_TEND)
+ cr_new |= 1UL;
+ else
+ cr_new |= 2UL;
+ }
+ if (cr_new != cr)
+ __ctl_load(cr_new, 2, 2);
}
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
}
#endif
/* Copy user specified PER registers */
@@ -84,7 +87,8 @@ void update_cr_regs(struct task_struct *task)
new.end = thread->per_user.end;
/* merge TIF_SINGLE_STEP into user specified PER registers. */
- if (test_tsk_thread_flag(task, TIF_SINGLE_STEP)) {
+ if (test_tsk_thread_flag(task, TIF_SINGLE_STEP) ||
+ test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP)) {
if (test_tsk_thread_flag(task, TIF_BLOCK_STEP))
new.control |= PER_EVENT_BRANCH;
else
@@ -93,6 +97,8 @@ void update_cr_regs(struct task_struct *task)
new.control |= PER_CONTROL_SUSPENSION;
new.control |= PER_EVENT_TRANSACTION_END;
#endif
+ if (test_tsk_thread_flag(task, TIF_UPROBE_SINGLESTEP))
+ new.control |= PER_EVENT_IFETCH;
new.start = 0;
new.end = PSW_ADDR_INSN;
}
@@ -803,7 +809,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
long ret = 0;
/* Do the secure computing check first. */
- if (secure_computing(regs->gprs[2])) {
+ if (secure_computing()) {
/* seccomp failures shouldn't expose any additional code. */
ret = -1;
goto out;
@@ -921,7 +927,15 @@ static int s390_fpregs_get(struct task_struct *target,
save_fp_ctl(&target->thread.fp_regs.fpc);
save_fp_regs(target->thread.fp_regs.fprs);
}
+#ifdef CONFIG_64BIT
+ else if (target->thread.vxrs) {
+ int i;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ target->thread.fp_regs.fprs[i] =
+ *(freg_t *)(target->thread.vxrs + i);
+ }
+#endif
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fp_regs, 0, -1);
}
@@ -955,9 +969,20 @@ static int s390_fpregs_set(struct task_struct *target,
target->thread.fp_regs.fprs,
offsetof(s390_fp_regs, fprs), -1);
- if (rc == 0 && target == current) {
- restore_fp_ctl(&target->thread.fp_regs.fpc);
- restore_fp_regs(target->thread.fp_regs.fprs);
+ if (rc == 0) {
+ if (target == current) {
+ restore_fp_ctl(&target->thread.fp_regs.fpc);
+ restore_fp_regs(target->thread.fp_regs.fprs);
+ }
+#ifdef CONFIG_64BIT
+ else if (target->thread.vxrs) {
+ int i;
+
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *(freg_t *)(target->thread.vxrs + i) =
+ target->thread.fp_regs.fprs[i];
+ }
+#endif
}
return rc;
@@ -1013,6 +1038,95 @@ static int s390_tdb_set(struct task_struct *target,
return 0;
}
+static int s390_vxrs_active(struct task_struct *target,
+ const struct user_regset *regset)
+{
+ return !!target->thread.vxrs;
+}
+
+static int s390_vxrs_low_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ if (target->thread.vxrs) {
+ if (target == current)
+ save_vx_regs(target->thread.vxrs);
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(target->thread.vxrs + i) + 1);
+ } else
+ memset(vxrs, 0, sizeof(vxrs));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_low_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i, rc;
+
+ if (!target->thread.vxrs) {
+ rc = alloc_vector_registers(target);
+ if (rc)
+ return rc;
+ } else if (target == current)
+ save_vx_regs(target->thread.vxrs);
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+ if (rc == 0) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(target->thread.vxrs + i) + 1) = vxrs[i];
+ if (target == current)
+ restore_vx_regs(target->thread.vxrs);
+ }
+
+ return rc;
+}
+
+static int s390_vxrs_high_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ __vector128 vxrs[__NUM_VXRS_HIGH];
+
+ if (target->thread.vxrs) {
+ if (target == current)
+ save_vx_regs(target->thread.vxrs);
+ memcpy(vxrs, target->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(vxrs));
+ } else
+ memset(vxrs, 0, sizeof(vxrs));
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf, vxrs, 0, -1);
+}
+
+static int s390_vxrs_high_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ int rc;
+
+ if (!target->thread.vxrs) {
+ rc = alloc_vector_registers(target);
+ if (rc)
+ return rc;
+ } else if (target == current)
+ save_vx_regs(target->thread.vxrs);
+
+ rc = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ target->thread.vxrs + __NUM_VXRS_LOW, 0, -1);
+ if (rc == 0 && target == current)
+ restore_vx_regs(target->thread.vxrs);
+
+ return rc;
+}
+
#endif
static int s390_system_call_get(struct task_struct *target,
@@ -1036,7 +1150,7 @@ static int s390_system_call_set(struct task_struct *target,
}
static const struct user_regset s390_regsets[] = {
- [REGSET_GENERAL] = {
+ {
.core_note_type = NT_PRSTATUS,
.n = sizeof(s390_regs) / sizeof(long),
.size = sizeof(long),
@@ -1044,7 +1158,7 @@ static const struct user_regset s390_regsets[] = {
.get = s390_regs_get,
.set = s390_regs_set,
},
- [REGSET_FP] = {
+ {
.core_note_type = NT_PRFPREG,
.n = sizeof(s390_fp_regs) / sizeof(long),
.size = sizeof(long),
@@ -1052,8 +1166,16 @@ static const struct user_regset s390_regsets[] = {
.get = s390_fpregs_get,
.set = s390_fpregs_set,
},
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(unsigned int),
+ .align = sizeof(unsigned int),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
#ifdef CONFIG_64BIT
- [REGSET_LAST_BREAK] = {
+ {
.core_note_type = NT_S390_LAST_BREAK,
.n = 1,
.size = sizeof(long),
@@ -1061,7 +1183,7 @@ static const struct user_regset s390_regsets[] = {
.get = s390_last_break_get,
.set = s390_last_break_set,
},
- [REGSET_TDB] = {
+ {
.core_note_type = NT_S390_TDB,
.n = 1,
.size = 256,
@@ -1069,15 +1191,25 @@ static const struct user_regset s390_regsets[] = {
.get = s390_tdb_get,
.set = s390_tdb_set,
},
-#endif
- [REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_S390_SYSTEM_CALL,
- .n = 1,
- .size = sizeof(unsigned int),
- .align = sizeof(unsigned int),
- .get = s390_system_call_get,
- .set = s390_system_call_set,
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .active = s390_vxrs_active,
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
},
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .active = s390_vxrs_active,
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
+ },
+#endif
};
static const struct user_regset_view user_s390_view = {
@@ -1242,7 +1374,7 @@ static int s390_compat_last_break_set(struct task_struct *target,
}
static const struct user_regset s390_compat_regsets[] = {
- [REGSET_GENERAL] = {
+ {
.core_note_type = NT_PRSTATUS,
.n = sizeof(s390_compat_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
@@ -1250,7 +1382,7 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_regs_get,
.set = s390_compat_regs_set,
},
- [REGSET_FP] = {
+ {
.core_note_type = NT_PRFPREG,
.n = sizeof(s390_fp_regs) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
@@ -1258,7 +1390,15 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_fpregs_get,
.set = s390_fpregs_set,
},
- [REGSET_LAST_BREAK] = {
+ {
+ .core_note_type = NT_S390_SYSTEM_CALL,
+ .n = 1,
+ .size = sizeof(compat_uint_t),
+ .align = sizeof(compat_uint_t),
+ .get = s390_system_call_get,
+ .set = s390_system_call_set,
+ },
+ {
.core_note_type = NT_S390_LAST_BREAK,
.n = 1,
.size = sizeof(long),
@@ -1266,7 +1406,7 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_last_break_get,
.set = s390_compat_last_break_set,
},
- [REGSET_TDB] = {
+ {
.core_note_type = NT_S390_TDB,
.n = 1,
.size = 256,
@@ -1274,15 +1414,25 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_tdb_get,
.set = s390_tdb_set,
},
- [REGSET_SYSTEM_CALL] = {
- .core_note_type = NT_S390_SYSTEM_CALL,
- .n = 1,
- .size = sizeof(compat_uint_t),
- .align = sizeof(compat_uint_t),
- .get = s390_system_call_get,
- .set = s390_system_call_set,
+ {
+ .core_note_type = NT_S390_VXRS_LOW,
+ .n = __NUM_VXRS_LOW,
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .active = s390_vxrs_active,
+ .get = s390_vxrs_low_get,
+ .set = s390_vxrs_low_set,
+ },
+ {
+ .core_note_type = NT_S390_VXRS_HIGH,
+ .n = __NUM_VXRS_HIGH,
+ .size = sizeof(__vector128),
+ .align = sizeof(__vector128),
+ .active = s390_vxrs_active,
+ .get = s390_vxrs_high_get,
+ .set = s390_vxrs_high_set,
},
- [REGSET_GENERAL_EXTENDED] = {
+ {
.core_note_type = NT_S390_HIGH_GPRS,
.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
.size = sizeof(compat_long_t),
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 1e2264b..e80d9ff 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -24,6 +24,7 @@
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
+#include <linux/random.h>
#include <linux/user.h>
#include <linux/tty.h>
#include <linux/ioport.h>
@@ -61,6 +62,7 @@
#include <asm/diag.h>
#include <asm/os_info.h>
#include <asm/sclp.h>
+#include <asm/sysinfo.h>
#include "entry.h"
/*
@@ -341,6 +343,9 @@ static void __init setup_lowcore(void)
__ctl_set_bit(14, 29);
}
#else
+ if (MACHINE_HAS_VX)
+ lc->vector_save_area_addr =
+ (unsigned long) &lc->vector_save_area;
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
#endif
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
@@ -450,8 +455,8 @@ static void __init setup_memory_end(void)
#ifdef CONFIG_64BIT
vmalloc_size = VMALLOC_END ?: (128UL << 30) - MODULES_LEN;
tmp = (memory_end ?: max_physmem_end) / PAGE_SIZE;
- tmp = tmp * (sizeof(struct page) + PAGE_SIZE) + vmalloc_size;
- if (tmp <= (1UL << 42))
+ tmp = tmp * (sizeof(struct page) + PAGE_SIZE);
+ if (tmp + vmalloc_size + MODULES_LEN <= (1UL << 42))
vmax = 1UL << 42; /* 3-level kernel page table */
else
vmax = 1UL << 53; /* 4-level kernel page table */
@@ -501,6 +506,8 @@ static int kdump_mem_notifier(struct notifier_block *nb,
{
struct memory_notify *arg = data;
+ if (action != MEM_GOING_OFFLINE)
+ return NOTIFY_OK;
if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
return NOTIFY_BAD;
if (arg->start_pfn > PFN_DOWN(crashk_res.end))
@@ -761,9 +768,16 @@ static void __init setup_hwcaps(void)
*/
if (test_facility(50) && test_facility(73))
elf_hwcap |= HWCAP_S390_TE;
+
+ /*
+ * Vector extension HWCAP_S390_VXRS is bit 11.
+ */
+ if (test_facility(129))
+ elf_hwcap |= HWCAP_S390_VXRS;
#endif
get_cpu_id(&cpu_id);
+ add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
case 0x9672:
#if !defined(CONFIG_64BIT)
@@ -802,6 +816,19 @@ static void __init setup_hwcaps(void)
}
/*
+ * Add system information as device randomness
+ */
+static void __init setup_randomness(void)
+{
+ struct sysinfo_3_2_2 *vmms;
+
+ vmms = (struct sysinfo_3_2_2 *) alloc_page(GFP_KERNEL);
+ if (vmms && stsi(vmms, 3, 2, 2) == 0 && vmms->count)
+ add_device_randomness(&vmms, vmms->count);
+ free_page((unsigned long) vmms);
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -899,6 +926,9 @@ void __init setup_arch(char **cmdline_p)
/* Setup zfcpdump support */
setup_zfcpdump();
+
+ /* Add system specific data to the random pool */
+ setup_randomness();
}
#ifdef CONFIG_32BIT
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 42b49f9..0c1a0ff 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -31,30 +31,117 @@
#include <asm/switch_to.h>
#include "entry.h"
-typedef struct
+/*
+ * Layout of an old-style signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | struct sigcontext |
+ * | oldmask |
+ * | _sigregs * |
+ * -----------------------------------------
+ * | _sigregs with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * -----------------------------------------
+ * | int signo |
+ * -----------------------------------------
+ * | _sigregs_ext with |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt) |
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * | __u16 svc_insn |
+ * -----------------------------------------
+ * The svc_insn entry with the sigreturn system call opcode does not
+ * have a fixed position and moves if gprs_high or vxrs exist.
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct sigframe
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
struct sigcontext sc;
_sigregs sregs;
int signo;
- __u8 retcode[S390_SYSCALL_SIZE];
-} sigframe;
+ _sigregs_ext sregs_ext;
+ __u16 svc_insn; /* Offset of svc_insn is NOT fixed! */
+};
-typedef struct
+/*
+ * Layout of an rt signal-frame:
+ * -----------------------------------------
+ * | save area (_SIGNAL_FRAMESIZE) |
+ * -----------------------------------------
+ * | svc __NR_rt_sigreturn 2 byte |
+ * -----------------------------------------
+ * | struct siginfo |
+ * -----------------------------------------
+ * | struct ucontext_extended with |
+ * | unsigned long uc_flags |
+ * | struct ucontext *uc_link |
+ * | stack_t uc_stack |
+ * | _sigregs uc_mcontext with |
+ * | _s390_regs_common |
+ * | _s390_fp_regs |
+ * | sigset_t uc_sigmask |
+ * | _sigregs_ext uc_mcontext_ext |
+ * | gprs_high 64 byte (opt) |
+ * | vxrs_low 128 byte (opt) |
+ * | vxrs_high 256 byte (opt)|
+ * | reserved 128 byte (opt) |
+ * -----------------------------------------
+ * Future extensions will be added to _sigregs_ext.
+ */
+struct rt_sigframe
{
__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
- __u8 retcode[S390_SYSCALL_SIZE];
+ __u16 svc_insn;
struct siginfo info;
- struct ucontext uc;
-} rt_sigframe;
+ struct ucontext_extended uc;
+};
+
+/* Store registers needed to create the signal frame */
+static void store_sigregs(void)
+{
+ save_access_regs(current->thread.acrs);
+ save_fp_ctl(&current->thread.fp_regs.fpc);
+#ifdef CONFIG_64BIT
+ if (current->thread.vxrs) {
+ int i;
+
+ save_vx_regs(current->thread.vxrs);
+ for (i = 0; i < __NUM_FPRS; i++)
+ current->thread.fp_regs.fprs[i] =
+ *(freg_t *)(current->thread.vxrs + i);
+ } else
+#endif
+ save_fp_regs(current->thread.fp_regs.fprs);
+}
+
+/* Load registers after signal return */
+static void load_sigregs(void)
+{
+ restore_access_regs(current->thread.acrs);
+ /* restore_fp_ctl is done in restore_sigregs */
+#ifdef CONFIG_64BIT
+ if (current->thread.vxrs) {
+ int i;
+
+ for (i = 0; i < __NUM_FPRS; i++)
+ *(freg_t *)(current->thread.vxrs + i) =
+ current->thread.fp_regs.fprs[i];
+ restore_vx_regs(current->thread.vxrs);
+ } else
+#endif
+ restore_fp_regs(current->thread.fp_regs.fprs);
+}
/* Returns non-zero on fault. */
static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
{
_sigregs user_sregs;
- save_access_regs(current->thread.acrs);
-
/* Copy a 'clean' PSW mask to the user to avoid leaking
information about whether PER is currently on. */
user_sregs.regs.psw.mask = PSW_USER_BITS |
@@ -63,12 +150,6 @@ static int save_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&user_sregs.regs.gprs, &regs->gprs, sizeof(sregs->regs.gprs));
memcpy(&user_sregs.regs.acrs, current->thread.acrs,
sizeof(user_sregs.regs.acrs));
- /*
- * We have to store the fp registers to current->thread.fp_regs
- * to merge them with the emulated registers.
- */
- save_fp_ctl(&current->thread.fp_regs.fpc);
- save_fp_regs(current->thread.fp_regs.fprs);
memcpy(&user_sregs.fpregs, &current->thread.fp_regs,
sizeof(user_sregs.fpregs));
if (__copy_to_user(sregs, &user_sregs, sizeof(_sigregs)))
@@ -107,20 +188,64 @@ static int restore_sigregs(struct pt_regs *regs, _sigregs __user *sregs)
memcpy(&regs->gprs, &user_sregs.regs.gprs, sizeof(sregs->regs.gprs));
memcpy(&current->thread.acrs, &user_sregs.regs.acrs,
sizeof(current->thread.acrs));
- restore_access_regs(current->thread.acrs);
memcpy(&current->thread.fp_regs, &user_sregs.fpregs,
sizeof(current->thread.fp_regs));
- restore_fp_regs(current->thread.fp_regs.fprs);
clear_pt_regs_flag(regs, PIF_SYSCALL); /* No longer in a system call */
return 0;
}
+/* Returns non-zero on fault. */
+static int save_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+#ifdef CONFIG_64BIT
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Save vector registers to signal stack */
+ if (current->thread.vxrs) {
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ vxrs[i] = *((__u64 *)(current->thread.vxrs + i) + 1);
+ if (__copy_to_user(&sregs_ext->vxrs_low, vxrs,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_to_user(&sregs_ext->vxrs_high,
+ current->thread.vxrs + __NUM_VXRS_LOW,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ }
+#endif
+ return 0;
+}
+
+static int restore_sigregs_ext(struct pt_regs *regs,
+ _sigregs_ext __user *sregs_ext)
+{
+#ifdef CONFIG_64BIT
+ __u64 vxrs[__NUM_VXRS_LOW];
+ int i;
+
+ /* Restore vector registers from signal stack */
+ if (current->thread.vxrs) {
+ if (__copy_from_user(vxrs, &sregs_ext->vxrs_low,
+ sizeof(sregs_ext->vxrs_low)) ||
+ __copy_from_user(current->thread.vxrs + __NUM_VXRS_LOW,
+ &sregs_ext->vxrs_high,
+ sizeof(sregs_ext->vxrs_high)))
+ return -EFAULT;
+ for (i = 0; i < __NUM_VXRS_LOW; i++)
+ *((__u64 *)(current->thread.vxrs + i) + 1) = vxrs[i];
+ }
+#endif
+ return 0;
+}
+
SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
- sigframe __user *frame = (sigframe __user *)regs->gprs[15];
+ struct sigframe __user *frame =
+ (struct sigframe __user *) regs->gprs[15];
sigset_t set;
if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
@@ -128,6 +253,9 @@ SYSCALL_DEFINE0(sigreturn)
set_current_blocked(&set);
if (restore_sigregs(regs, &frame->sregs))
goto badframe;
+ if (restore_sigregs_ext(regs, &frame->sregs_ext))
+ goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
force_sig(SIGSEGV, current);
@@ -137,16 +265,20 @@ badframe:
SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = task_pt_regs(current);
- rt_sigframe __user *frame = (rt_sigframe __user *)regs->gprs[15];
+ struct rt_sigframe __user *frame =
+ (struct rt_sigframe __user *)regs->gprs[15];
sigset_t set;
if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
set_current_blocked(&set);
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
if (restore_sigregs(regs, &frame->uc.uc_mcontext))
goto badframe;
- if (restore_altstack(&frame->uc.uc_stack))
+ if (restore_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
goto badframe;
+ load_sigregs();
return regs->gprs[2];
badframe:
force_sig(SIGSEGV, current);
@@ -154,11 +286,6 @@ badframe:
}
/*
- * Set up a signal frame.
- */
-
-
-/*
* Determine which stack to use..
*/
static inline void __user *
@@ -195,39 +322,63 @@ static inline int map_signal(int sig)
static int setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
- sigframe __user *frame;
-
- frame = get_sigframe(ka, regs, sizeof(sigframe));
+ struct sigframe __user *frame;
+ struct sigcontext sc;
+ unsigned long restorer;
+ size_t frame_size;
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ frame_size = sizeof(*frame) - sizeof(frame->sregs_ext);
+ if (MACHINE_HAS_VX)
+ frame_size += sizeof(frame->sregs_ext);
+ frame = get_sigframe(ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
+ return -EFAULT;
- if (__copy_to_user(&frame->sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE))
- goto give_sigsegv;
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
+
+ /* Create struct sigcontext on the signal stack */
+ memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE);
+ sc.sregs = (_sigregs __user __force *) &frame->sregs;
+ if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc)))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+ /* Create _sigregs on the signal stack */
if (save_sigregs(regs, &frame->sregs))
- goto give_sigsegv;
- if (__put_user(&frame->sregs, &frame->sc.sregs))
- goto give_sigsegv;
+ return -EFAULT;
+
+ /* Place signal number on stack to allow backtrace from handler. */
+ if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
+ return -EFAULT;
+
+ /* Create _sigregs_ext on the signal stack */
+ if (save_sigregs_ext(regs, &frame->sregs_ext))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (unsigned long)
- ka->sa.sa_restorer | PSW_ADDR_AMODE;
+ restorer = (unsigned long) ka->sa.sa_restorer | PSW_ADDR_AMODE;
} else {
- regs->gprs[14] = (unsigned long)
- frame->retcode | PSW_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn,
- (u16 __user *)(frame->retcode)))
- goto give_sigsegv;
+ /* Signal frame without vector registers are short ! */
+ __u16 __user *svc = (void *) frame + frame_size - 2;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc | PSW_ADDR_AMODE;
}
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (addr_t __user *) frame))
- goto give_sigsegv;
-
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
@@ -247,91 +398,95 @@ static int setup_frame(int sig, struct k_sigaction *ka,
regs->gprs[5] = regs->int_parm_long;
regs->gprs[6] = task_thread_info(current)->last_break;
}
-
- /* Place signal number on stack to allow backtrace from handler. */
- if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
- goto give_sigsegv;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
- sigset_t *set, struct pt_regs * regs)
+static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
+ struct pt_regs *regs)
{
- int err = 0;
- rt_sigframe __user *frame;
-
- frame = get_sigframe(ka, regs, sizeof(rt_sigframe));
+ struct rt_sigframe __user *frame;
+ unsigned long uc_flags, restorer;
+ size_t frame_size;
+ frame_size = sizeof(struct rt_sigframe) - sizeof(_sigregs_ext);
+ /*
+ * gprs_high are only present for a 31-bit task running on
+ * a 64-bit kernel (see compat_signal.c) but the space for
+ * gprs_high need to be allocated if vector registers are
+ * included in the signal frame on a 31-bit system.
+ */
+ uc_flags = 0;
+#ifdef CONFIG_64BIT
+ if (MACHINE_HAS_VX) {
+ frame_size += sizeof(_sigregs_ext);
+ if (current->thread.vxrs)
+ uc_flags |= UC_VXRS;
+ }
+#endif
+ frame = get_sigframe(&ksig->ka, regs, frame_size);
if (frame == (void __user *) -1UL)
- goto give_sigsegv;
-
- if (copy_siginfo_to_user(&frame->info, info))
- goto give_sigsegv;
+ return -EFAULT;
- /* Create the ucontext. */
- err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(NULL, &frame->uc.uc_link);
- err |= __save_altstack(&frame->uc.uc_stack, regs->gprs[15]);
- err |= save_sigregs(regs, &frame->uc.uc_mcontext);
- err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
- if (err)
- goto give_sigsegv;
+ /* Set up backchain. */
+ if (__put_user(regs->gprs[15], (addr_t __user *) frame))
+ return -EFAULT;
/* Set up to return from userspace. If provided, use a stub
already in userspace. */
- if (ka->sa.sa_flags & SA_RESTORER) {
- regs->gprs[14] = (unsigned long)
- ka->sa.sa_restorer | PSW_ADDR_AMODE;
+ if (ksig->ka.sa.sa_flags & SA_RESTORER) {
+ restorer = (unsigned long)
+ ksig->ka.sa.sa_restorer | PSW_ADDR_AMODE;
} else {
- regs->gprs[14] = (unsigned long)
- frame->retcode | PSW_ADDR_AMODE;
- if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn,
- (u16 __user *)(frame->retcode)))
- goto give_sigsegv;
+ __u16 __user *svc = &frame->svc_insn;
+ if (__put_user(S390_SYSCALL_OPCODE | __NR_rt_sigreturn, svc))
+ return -EFAULT;
+ restorer = (unsigned long) svc | PSW_ADDR_AMODE;
}
- /* Set up backchain. */
- if (__put_user(regs->gprs[15], (addr_t __user *) frame))
- goto give_sigsegv;
+ /* Create siginfo on the signal stack */
+ if (copy_siginfo_to_user(&frame->info, &ksig->info))
+ return -EFAULT;
+
+ /* Store registers needed to create the signal frame */
+ store_sigregs();
+
+ /* Create ucontext on the signal stack. */
+ if (__put_user(uc_flags, &frame->uc.uc_flags) ||
+ __put_user(NULL, &frame->uc.uc_link) ||
+ __save_altstack(&frame->uc.uc_stack, regs->gprs[15]) ||
+ save_sigregs(regs, &frame->uc.uc_mcontext) ||
+ __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) ||
+ save_sigregs_ext(regs, &frame->uc.uc_mcontext_ext))
+ return -EFAULT;
/* Set up registers for signal handler */
+ regs->gprs[14] = restorer;
regs->gprs[15] = (unsigned long) frame;
/* Force default amode and default user address space control. */
regs->psw.mask = PSW_MASK_EA | PSW_MASK_BA |
(PSW_USER_BITS & PSW_MASK_ASC) |
(regs->psw.mask & ~PSW_MASK_ASC);
- regs->psw.addr = (unsigned long) ka->sa.sa_handler | PSW_ADDR_AMODE;
+ regs->psw.addr = (unsigned long) ksig->ka.sa.sa_handler | PSW_ADDR_AMODE;
- regs->gprs[2] = map_signal(sig);
+ regs->gprs[2] = map_signal(ksig->sig);
regs->gprs[3] = (unsigned long) &frame->info;
regs->gprs[4] = (unsigned long) &frame->uc;
regs->gprs[5] = task_thread_info(current)->last_break;
return 0;
-
-give_sigsegv:
- force_sigsegv(sig, current);
- return -EFAULT;
}
-static void handle_signal(unsigned long sig, struct k_sigaction *ka,
- siginfo_t *info, sigset_t *oldset,
- struct pt_regs *regs)
+static void handle_signal(struct ksignal *ksig, sigset_t *oldset,
+ struct pt_regs *regs)
{
int ret;
/* Set up the stack frame */
- if (ka->sa.sa_flags & SA_SIGINFO)
- ret = setup_rt_frame(sig, ka, info, oldset, regs);
+ if (ksig->ka.sa.sa_flags & SA_SIGINFO)
+ ret = setup_rt_frame(ksig, oldset, regs);
else
- ret = setup_frame(sig, ka, oldset, regs);
- if (ret)
- return;
- signal_delivered(sig, info, ka, regs,
- test_thread_flag(TIF_SINGLE_STEP));
+ ret = setup_frame(ksig->sig, &ksig->ka, oldset, regs);
+
+ signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLE_STEP));
}
/*
@@ -345,9 +500,7 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka,
*/
void do_signal(struct pt_regs *regs)
{
- siginfo_t info;
- int signr;
- struct k_sigaction ka;
+ struct ksignal ksig;
sigset_t *oldset = sigmask_to_save();
/*
@@ -357,9 +510,8 @@ void do_signal(struct pt_regs *regs)
*/
current_thread_info()->system_call =
test_pt_regs_flag(regs, PIF_SYSCALL) ? regs->int_code : 0;
- signr = get_signal_to_deliver(&info, &ka, regs, NULL);
- if (signr > 0) {
+ if (get_signal(&ksig)) {
/* Whee! Actually deliver the signal. */
if (current_thread_info()->system_call) {
regs->int_code = current_thread_info()->system_call;
@@ -370,7 +522,7 @@ void do_signal(struct pt_regs *regs)
regs->gprs[2] = -EINTR;
break;
case -ERESTARTSYS:
- if (!(ka.sa.sa_flags & SA_RESTART)) {
+ if (!(ksig.ka.sa.sa_flags & SA_RESTART)) {
regs->gprs[2] = -EINTR;
break;
}
@@ -387,9 +539,9 @@ void do_signal(struct pt_regs *regs)
clear_pt_regs_flag(regs, PIF_SYSCALL);
if (is_compat_task())
- handle_signal32(signr, &ka, &info, oldset, regs);
+ handle_signal32(&ksig, oldset, regs);
else
- handle_signal(signr, &ka, &info, oldset, regs);
+ handle_signal(&ksig, oldset, regs);
return;
}
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 243c7e5..6fd9e60 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -45,6 +45,7 @@
#include <asm/debug.h>
#include <asm/os_info.h>
#include <asm/sigp.h>
+#include <asm/idle.h>
#include "entry.h"
enum {
@@ -82,7 +83,8 @@ DEFINE_MUTEX(smp_cpu_state_mutex);
/*
* Signal processor helper functions.
*/
-static inline int __pcpu_sigp_relax(u16 addr, u8 order, u32 parm, u32 *status)
+static inline int __pcpu_sigp_relax(u16 addr, u8 order, unsigned long parm,
+ u32 *status)
{
int cc;
@@ -178,6 +180,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
goto out;
}
#else
+ if (MACHINE_HAS_VX)
+ lc->vector_save_area_addr =
+ (unsigned long) &lc->vector_save_area;
if (vdso_alloc_per_cpu(lc))
goto out;
#endif
@@ -333,12 +338,6 @@ int smp_vcpu_scheduled(int cpu)
return pcpu_running(pcpu_devices + cpu);
}
-void smp_yield(void)
-{
- if (MACHINE_HAS_DIAG44)
- asm volatile("diag 0,0,0x44");
-}
-
void smp_yield_cpu(int cpu)
{
if (MACHINE_HAS_DIAG9C)
@@ -517,35 +516,53 @@ EXPORT_SYMBOL(smp_ctl_clear_bit);
static void __init smp_get_save_area(int cpu, u16 address)
{
void *lc = pcpu_devices[0].lowcore;
- struct save_area *save_area;
+ struct save_area_ext *sa_ext;
+ unsigned long vx_sa;
if (is_kdump_kernel())
return;
if (!OLDMEM_BASE && (address == boot_cpu_address ||
ipl_info.type != IPL_TYPE_FCP_DUMP))
return;
- save_area = dump_save_area_create(cpu);
- if (!save_area)
+ sa_ext = dump_save_area_create(cpu);
+ if (!sa_ext)
panic("could not allocate memory for save area\n");
if (address == boot_cpu_address) {
/* Copy the registers of the boot cpu. */
- copy_oldmem_page(1, (void *) save_area, sizeof(*save_area),
+ copy_oldmem_page(1, (void *) &sa_ext->sa, sizeof(sa_ext->sa),
SAVE_AREA_BASE - PAGE_SIZE, 0);
+ if (MACHINE_HAS_VX)
+ save_vx_regs_safe(sa_ext->vx_regs);
return;
}
/* Get the registers of a non-boot cpu. */
__pcpu_sigp_relax(address, SIGP_STOP_AND_STORE_STATUS, 0, NULL);
- memcpy_real(save_area, lc + SAVE_AREA_BASE, sizeof(*save_area));
+ memcpy_real(&sa_ext->sa, lc + SAVE_AREA_BASE, sizeof(sa_ext->sa));
+ if (!MACHINE_HAS_VX)
+ return;
+ /* Get the VX registers */
+ vx_sa = __get_free_page(GFP_KERNEL);
+ if (!vx_sa)
+ panic("could not allocate memory for VX save area\n");
+ __pcpu_sigp_relax(address, SIGP_STORE_ADDITIONAL_STATUS, vx_sa, NULL);
+ memcpy(sa_ext->vx_regs, (void *) vx_sa, sizeof(sa_ext->vx_regs));
+ free_page(vx_sa);
}
int smp_store_status(int cpu)
{
+ unsigned long vx_sa;
struct pcpu *pcpu;
pcpu = pcpu_devices + cpu;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STOP_AND_STORE_STATUS,
0, NULL) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
+ if (!MACHINE_HAS_VX)
+ return 0;
+ vx_sa = __pa(pcpu->lowcore->vector_save_area_addr);
+ __pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
+ vx_sa, NULL);
return 0;
}
@@ -667,7 +684,7 @@ static void smp_start_secondary(void *cpuvoid)
cpu_init();
preempt_disable();
init_cpu_timer();
- init_cpu_vtimer();
+ vtime_init();
pfault_init();
notify_cpu_starting(smp_processor_id());
set_cpu_online(smp_processor_id(), true);
@@ -726,6 +743,7 @@ int __cpu_disable(void)
cregs[6] &= ~0xff000000UL; /* disable all I/O interrupts */
cregs[14] &= ~0x1f000000UL; /* disable most machine checks */
__ctl_load(cregs, 0, 15);
+ clear_cpu_flag(CIF_NOHZ_DELAY);
return 0;
}
@@ -898,42 +916,6 @@ static struct attribute_group cpu_common_attr_group = {
.attrs = cpu_common_attrs,
};
-static ssize_t show_idle_count(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long idle_count;
- unsigned int sequence;
-
- do {
- sequence = ACCESS_ONCE(idle->sequence);
- idle_count = ACCESS_ONCE(idle->idle_count);
- if (ACCESS_ONCE(idle->clock_idle_enter))
- idle_count++;
- } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
- return sprintf(buf, "%llu\n", idle_count);
-}
-static DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
-
-static ssize_t show_idle_time(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
- unsigned long long now, idle_time, idle_enter, idle_exit;
- unsigned int sequence;
-
- do {
- now = get_tod_clock();
- sequence = ACCESS_ONCE(idle->sequence);
- idle_time = ACCESS_ONCE(idle->idle_time);
- idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
- idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
- idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0;
- return sprintf(buf, "%llu\n", idle_time >> 12);
-}
-static DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
-
static struct attribute *cpu_online_attrs[] = {
&dev_attr_idle_count.attr,
&dev_attr_idle_time_us.attr,
diff --git a/arch/s390/kernel/suspend.c b/arch/s390/kernel/suspend.c
index a7a7537..1c4c5ac 100644
--- a/arch/s390/kernel/suspend.c
+++ b/arch/s390/kernel/suspend.c
@@ -13,14 +13,10 @@
#include <asm/ipl.h>
#include <asm/cio.h>
#include <asm/pci.h>
+#include <asm/sections.h>
#include "entry.h"
/*
- * References to section boundaries
- */
-extern const void __nosave_begin, __nosave_end;
-
-/*
* The restore of the saved pages in an hibernation image will set
* the change and referenced bits in the storage key for each page.
* Overindication of the referenced bits after an hibernation cycle
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index fe5cdf2..6fe886a 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -356,3 +356,6 @@ SYSCALL(sys_finit_module,sys_finit_module,compat_sys_finit_module)
SYSCALL(sys_sched_setattr,sys_sched_setattr,compat_sys_sched_setattr) /* 345 */
SYSCALL(sys_sched_getattr,sys_sched_getattr,compat_sys_sched_getattr)
SYSCALL(sys_renameat2,sys_renameat2,compat_sys_renameat2)
+SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp)
+SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom)
+SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 0931b11..005d665 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -92,7 +92,7 @@ void clock_comparator_work(void)
struct clock_event_device *cd;
S390_lowcore.clock_comparator = -1ULL;
- cd = &__get_cpu_var(comparators);
+ cd = this_cpu_ptr(&comparators);
cd->event_handler(cd);
}
@@ -214,26 +214,39 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
- if (tk->clock != &clocksource_tod)
+ if (tk->tkr.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
- vdso_data->xtime_tod_stamp = tk->clock->cycle_last;
+ vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->xtime_nsec;
+ vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->shift);
- nsecps = (u64) NSEC_PER_SEC << tk->shift;
+ vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
}
- vdso_data->tk_mult = tk->mult;
- vdso_data->tk_shift = tk->shift;
+
+ vdso_data->xtime_coarse_sec = tk->xtime_sec;
+ vdso_data->xtime_coarse_nsec =
+ (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+ vdso_data->wtom_coarse_sec =
+ vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
+ vdso_data->wtom_coarse_nsec =
+ vdso_data->xtime_coarse_nsec + tk->wall_to_monotonic.tv_nsec;
+ while (vdso_data->wtom_coarse_nsec >= NSEC_PER_SEC) {
+ vdso_data->wtom_coarse_nsec -= NSEC_PER_SEC;
+ vdso_data->wtom_coarse_sec++;
+ }
+
+ vdso_data->tk_mult = tk->tkr.mult;
+ vdso_data->tk_shift = tk->tkr.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
@@ -360,7 +373,7 @@ EXPORT_SYMBOL(get_sync_clock);
*/
static void disable_sync_clock(void *dummy)
{
- atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
/*
* Clear the in-sync bit 2^31. All get_sync_clock calls will
* fail until the sync bit is turned back on. In addition
@@ -377,7 +390,7 @@ static void disable_sync_clock(void *dummy)
*/
static void enable_sync_clock(void)
{
- atomic_t *sw_ptr = &__get_cpu_var(clock_sync_word);
+ atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
atomic_set_mask(0x80000000, sw_ptr);
}
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 355a16c..b93bed7 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -464,15 +464,17 @@ static struct sched_domain_topology_level s390_topology[] = {
static int __init topology_init(void)
{
- if (!MACHINE_HAS_TOPOLOGY) {
+ if (MACHINE_HAS_TOPOLOGY)
+ set_topology_timer();
+ else
topology_update_polarization_simple();
- goto out;
- }
- set_topology_timer();
-out:
-
- set_sched_topology(s390_topology);
-
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);
+
+static int __init early_topology_init(void)
+{
+ set_sched_topology(s390_topology);
+ return 0;
+}
+early_initcall(early_topology_init);
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index c576232..9ff5ecb 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -18,6 +18,8 @@
#include <linux/ptrace.h>
#include <linux/sched.h>
#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/switch_to.h>
#include "entry.h"
int show_unhandled_signals = 1;
@@ -58,15 +60,10 @@ int is_valid_bugaddr(unsigned long addr)
return 1;
}
-static void __kprobes do_trap(struct pt_regs *regs,
- int si_signo, int si_code, char *str)
+void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
{
siginfo_t info;
- if (notify_die(DIE_TRAP, str, regs, 0,
- regs->int_code, si_signo) == NOTIFY_STOP)
- return;
-
if (user_mode(regs)) {
info.si_signo = si_signo;
info.si_errno = 0;
@@ -90,6 +87,15 @@ static void __kprobes do_trap(struct pt_regs *regs,
}
}
+static void __kprobes do_trap(struct pt_regs *regs, int si_signo, int si_code,
+ char *str)
+{
+ if (notify_die(DIE_TRAP, str, regs, 0,
+ regs->int_code, si_signo) == NOTIFY_STOP)
+ return;
+ do_report_trap(regs, si_signo, si_code, str);
+}
+
void __kprobes do_per_trap(struct pt_regs *regs)
{
siginfo_t info;
@@ -178,6 +184,7 @@ void __kprobes illegal_op(struct pt_regs *regs)
siginfo_t info;
__u8 opcode[6];
__u16 __user *location;
+ int is_uprobe_insn = 0;
int signal = 0;
location = get_trap_ip(regs);
@@ -194,6 +201,10 @@ void __kprobes illegal_op(struct pt_regs *regs)
force_sig_info(SIGTRAP, &info, current);
} else
signal = SIGILL;
+#ifdef CONFIG_UPROBES
+ } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) {
+ is_uprobe_insn = 1;
+#endif
#ifdef CONFIG_MATHEMU
} else if (opcode[0] == 0xb3) {
if (get_user(*((__u16 *) (opcode+2)), location+1))
@@ -219,11 +230,13 @@ void __kprobes illegal_op(struct pt_regs *regs)
#endif
} else
signal = SIGILL;
- } else {
- /*
- * If we get an illegal op in kernel mode, send it through the
- * kprobes notifier. If kprobes doesn't pick it up, SIGILL
- */
+ }
+ /*
+ * We got either an illegal op in kernel mode, or user space trapped
+ * on a uprobes illegal instruction. See if kprobes or uprobes picks
+ * it up. If not, SIGILL.
+ */
+ if (is_uprobe_insn || !user_mode(regs)) {
if (notify_die(DIE_BPT, "bpt", regs, 0,
3, SIGTRAP) != NOTIFY_STOP)
signal = SIGILL;
@@ -292,6 +305,74 @@ DO_ERROR_INFO(specification_exception, SIGILL, ILL_ILLOPN,
"specification exception");
#endif
+#ifdef CONFIG_64BIT
+int alloc_vector_registers(struct task_struct *tsk)
+{
+ __vector128 *vxrs;
+ int i;
+
+ /* Allocate vector register save area. */
+ vxrs = kzalloc(sizeof(__vector128) * __NUM_VXRS,
+ GFP_KERNEL|__GFP_REPEAT);
+ if (!vxrs)
+ return -ENOMEM;
+ preempt_disable();
+ if (tsk == current)
+ save_fp_regs(tsk->thread.fp_regs.fprs);
+ /* Copy the 16 floating point registers */
+ for (i = 0; i < 16; i++)
+ *(freg_t *) &vxrs[i] = tsk->thread.fp_regs.fprs[i];
+ tsk->thread.vxrs = vxrs;
+ if (tsk == current) {
+ __ctl_set_bit(0, 17);
+ restore_vx_regs(vxrs);
+ }
+ preempt_enable();
+ return 0;
+}
+
+void vector_exception(struct pt_regs *regs)
+{
+ int si_code, vic;
+
+ if (!MACHINE_HAS_VX) {
+ do_trap(regs, SIGILL, ILL_ILLOPN, "illegal operation");
+ return;
+ }
+
+ /* get vector interrupt code from fpc */
+ asm volatile("stfpc %0" : "=m" (current->thread.fp_regs.fpc));
+ vic = (current->thread.fp_regs.fpc & 0xf00) >> 8;
+ switch (vic) {
+ case 1: /* invalid vector operation */
+ si_code = FPE_FLTINV;
+ break;
+ case 2: /* division by zero */
+ si_code = FPE_FLTDIV;
+ break;
+ case 3: /* overflow */
+ si_code = FPE_FLTOVF;
+ break;
+ case 4: /* underflow */
+ si_code = FPE_FLTUND;
+ break;
+ case 5: /* inexact */
+ si_code = FPE_FLTRES;
+ break;
+ default: /* unknown cause */
+ si_code = 0;
+ }
+ do_trap(regs, SIGFPE, si_code, "vector exception");
+}
+
+static int __init disable_vector_extension(char *str)
+{
+ S390_lowcore.machine_flags &= ~MACHINE_FLAG_VX;
+ return 1;
+}
+__setup("novx", disable_vector_extension);
+#endif
+
void data_exception(struct pt_regs *regs)
{
__u16 __user *location;
@@ -357,6 +438,18 @@ void data_exception(struct pt_regs *regs)
}
}
#endif
+#ifdef CONFIG_64BIT
+ /* Check for vector register enablement */
+ if (MACHINE_HAS_VX && !current->thread.vxrs &&
+ (current->thread.fp_regs.fpc & FPC_DXC_MASK) == 0xfe00) {
+ alloc_vector_registers(current);
+ /* Vector data exception is suppressing, rewind psw. */
+ regs->psw.addr = __rewind_psw(regs->psw, regs->int_code >> 16);
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ return;
+ }
+#endif
+
if (current->thread.fp_regs.fpc & FPC_DXC_MASK)
signal = SIGFPE;
else
diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c
new file mode 100644
index 0000000..956f4f7
--- /dev/null
+++ b/arch/s390/kernel/uprobes.c
@@ -0,0 +1,332 @@
+/*
+ * User-space Probes (UProbes) for s390
+ *
+ * Copyright IBM Corp. 2014
+ * Author(s): Jan Willeke,
+ */
+
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+#include <linux/uprobes.h>
+#include <linux/compat.h>
+#include <linux/kdebug.h>
+#include <asm/switch_to.h>
+#include <asm/facility.h>
+#include <asm/dis.h>
+#include "entry.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ return probe_is_prohibited_opcode(auprobe->insn);
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if (psw_bits(regs->psw).eaba == PSW_AMODE_24BIT)
+ return -EINVAL;
+ if (!is_compat_task() && psw_bits(regs->psw).eaba == PSW_AMODE_31BIT)
+ return -EINVAL;
+ clear_pt_regs_flag(regs, PIF_PER_TRAP);
+ auprobe->saved_per = psw_bits(regs->psw).r;
+ auprobe->saved_int_code = regs->int_code;
+ regs->int_code = UPROBE_TRAP_NR;
+ regs->psw.addr = current->utask->xol_vaddr;
+ set_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *tsk)
+{
+ struct pt_regs *regs = task_pt_regs(tsk);
+
+ if (regs->int_code != UPROBE_TRAP_NR)
+ return true;
+ return false;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ int fixup = probe_get_fixup_type(auprobe->insn);
+ struct uprobe_task *utask = current->utask;
+
+ clear_tsk_thread_flag(current, TIF_UPROBE_SINGLESTEP);
+ update_cr_regs(current);
+ psw_bits(regs->psw).r = auprobe->saved_per;
+ regs->int_code = auprobe->saved_int_code;
+
+ if (fixup & FIXUP_PSW_NORMAL)
+ regs->psw.addr += utask->vaddr - utask->xol_vaddr;
+ if (fixup & FIXUP_RETURN_REGISTER) {
+ int reg = (auprobe->insn[0] & 0xf0) >> 4;
+
+ regs->gprs[reg] += utask->vaddr - utask->xol_vaddr;
+ }
+ if (fixup & FIXUP_BRANCH_NOT_TAKEN) {
+ int ilen = insn_length(auprobe->insn[0] >> 8);
+
+ if (regs->psw.addr - utask->xol_vaddr == ilen)
+ regs->psw.addr = utask->vaddr + ilen;
+ }
+ /* If per tracing was active generate trap */
+ if (regs->psw.mask & PSW_MASK_PER)
+ do_per_trap(regs);
+ return 0;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ struct die_args *args = data;
+ struct pt_regs *regs = args->regs;
+
+ if (!user_mode(regs))
+ return NOTIFY_DONE;
+ if (regs->int_code & 0x200) /* Trap during transaction */
+ return NOTIFY_DONE;
+ switch (val) {
+ case DIE_BPT:
+ if (uprobe_pre_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ break;
+ case DIE_SSTEP:
+ if (uprobe_post_sstep_notifier(regs))
+ return NOTIFY_STOP;
+ default:
+ break;
+ }
+ return NOTIFY_DONE;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ clear_thread_flag(TIF_UPROBE_SINGLESTEP);
+ regs->int_code = auprobe->saved_int_code;
+ regs->psw.addr = current->utask->vaddr;
+}
+
+unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline,
+ struct pt_regs *regs)
+{
+ unsigned long orig;
+
+ orig = regs->gprs[14];
+ regs->gprs[14] = trampoline;
+ return orig;
+}
+
+/* Instruction Emulation */
+
+static void adjust_psw_addr(psw_t *psw, unsigned long len)
+{
+ psw->addr = __rewind_psw(*psw, -len);
+}
+
+#define EMU_ILLEGAL_OP 1
+#define EMU_SPECIFICATION 2
+#define EMU_ADDRESSING 3
+
+#define emu_load_ril(ptr, output) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else \
+ *(output) = input; \
+ __rc; \
+})
+
+#define emu_store_ril(ptr, input) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (put_user(*(input), ptr)) \
+ __rc = EMU_ADDRESSING; \
+ __rc; \
+})
+
+#define emu_cmp_ril(regs, ptr, cmp) \
+({ \
+ unsigned int mask = sizeof(*(ptr)) - 1; \
+ __typeof__(*(ptr)) input; \
+ int __rc = 0; \
+ \
+ if (!test_facility(34)) \
+ __rc = EMU_ILLEGAL_OP; \
+ else if ((u64 __force)ptr & mask) \
+ __rc = EMU_SPECIFICATION; \
+ else if (get_user(input, ptr)) \
+ __rc = EMU_ADDRESSING; \
+ else if (input > *(cmp)) \
+ psw_bits((regs)->psw).cc = 1; \
+ else if (input < *(cmp)) \
+ psw_bits((regs)->psw).cc = 2; \
+ else \
+ psw_bits((regs)->psw).cc = 0; \
+ __rc; \
+})
+
+struct insn_ril {
+ u8 opc0;
+ u8 reg : 4;
+ u8 opc1 : 4;
+ s32 disp;
+} __packed;
+
+union split_register {
+ u64 u64;
+ u32 u32[2];
+ u16 u16[4];
+ s64 s64;
+ s32 s32[2];
+ s16 s16[4];
+};
+
+/*
+ * pc relative instructions are emulated, since parameters may not be
+ * accessible from the xol area due to range limitations.
+ */
+static void handle_insn_ril(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ union split_register *rx;
+ struct insn_ril *insn;
+ unsigned int ilen;
+ void *uptr;
+ int rc = 0;
+
+ insn = (struct insn_ril *) &auprobe->insn;
+ rx = (union split_register *) &regs->gprs[insn->reg];
+ uptr = (void *)(regs->psw.addr + (insn->disp * 2));
+ ilen = insn_length(insn->opc0);
+
+ switch (insn->opc0) {
+ case 0xc0:
+ switch (insn->opc1) {
+ case 0x00: /* larl */
+ rx->u64 = (unsigned long)uptr;
+ break;
+ }
+ break;
+ case 0xc4:
+ switch (insn->opc1) {
+ case 0x02: /* llhrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x04: /* lghrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u64);
+ break;
+ case 0x05: /* lhrl */
+ rc = emu_load_ril((s16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x06: /* llghrl */
+ rc = emu_load_ril((u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x08: /* lgrl */
+ rc = emu_load_ril((u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* lgfrl */
+ rc = emu_load_ril((s32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0d: /* lrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x0e: /* llgfrl */
+ rc = emu_load_ril((u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* sthrl */
+ rc = emu_store_ril((u16 __user *)uptr, &rx->u16[3]);
+ break;
+ case 0x0b: /* stgrl */
+ rc = emu_store_ril((u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* strl */
+ rc = emu_store_ril((u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ case 0xc6:
+ switch (insn->opc1) {
+ case 0x02: /* pfdrl */
+ if (!test_facility(34))
+ rc = EMU_ILLEGAL_OP;
+ break;
+ case 0x04: /* cghrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s64);
+ break;
+ case 0x05: /* chrl */
+ rc = emu_cmp_ril(regs, (s16 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x06: /* clghrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u64);
+ break;
+ case 0x07: /* clhrl */
+ rc = emu_cmp_ril(regs, (u16 __user *)uptr, &rx->u32[1]);
+ break;
+ case 0x08: /* cgrl */
+ rc = emu_cmp_ril(regs, (s64 __user *)uptr, &rx->s64);
+ break;
+ case 0x0a: /* clgrl */
+ rc = emu_cmp_ril(regs, (u64 __user *)uptr, &rx->u64);
+ break;
+ case 0x0c: /* cgfrl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s64);
+ break;
+ case 0x0d: /* crl */
+ rc = emu_cmp_ril(regs, (s32 __user *)uptr, &rx->s32[1]);
+ break;
+ case 0x0e: /* clgfrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u64);
+ break;
+ case 0x0f: /* clrl */
+ rc = emu_cmp_ril(regs, (u32 __user *)uptr, &rx->u32[1]);
+ break;
+ }
+ break;
+ }
+ adjust_psw_addr(&regs->psw, ilen);
+ switch (rc) {
+ case EMU_ILLEGAL_OP:
+ regs->int_code = ilen << 16 | 0x0001;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC, NULL);
+ break;
+ case EMU_SPECIFICATION:
+ regs->int_code = ilen << 16 | 0x0006;
+ do_report_trap(regs, SIGILL, ILL_ILLOPC , NULL);
+ break;
+ case EMU_ADDRESSING:
+ regs->int_code = ilen << 16 | 0x0005;
+ do_report_trap(regs, SIGSEGV, SEGV_MAPERR, NULL);
+ break;
+ }
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ if ((psw_bits(regs->psw).eaba == PSW_AMODE_24BIT) ||
+ ((psw_bits(regs->psw).eaba == PSW_AMODE_31BIT) &&
+ !is_compat_task())) {
+ regs->psw.addr = __rewind_psw(regs->psw, UPROBE_SWBP_INSN_SIZE);
+ do_report_trap(regs, SIGILL, ILL_ILLADR, NULL);
+ return true;
+ }
+ if (probe_is_insn_relative_long(auprobe->insn)) {
+ handle_insn_ril(auprobe, regs);
+ return true;
+ }
+ return false;
+}
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 6136490..0bbb7e0 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
return 0;
}
early_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
diff --git a/arch/s390/kernel/vdso32/clock_getres.S b/arch/s390/kernel/vdso32/clock_getres.S
index 36aaa25..eca3f00 100644
--- a/arch/s390/kernel/vdso32/clock_getres.S
+++ b/arch/s390/kernel/vdso32/clock_getres.S
@@ -19,14 +19,20 @@
.type __kernel_clock_getres,@function
__kernel_clock_getres:
.cfi_startproc
+ basr %r1,0
+ la %r1,4f-.(%r1)
chi %r2,__CLOCK_REALTIME
je 0f
chi %r2,__CLOCK_MONOTONIC
+ je 0f
+ la %r1,5f-4f(%r1)
+ chi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
jne 3f
0: ltr %r3,%r3
jz 2f /* res == NULL */
- basr %r1,0
-1: l %r0,4f-1b(%r1)
+1: l %r0,0(%r1)
xc 0(4,%r3),0(%r3) /* set tp->tv_sec to zero */
st %r0,4(%r3) /* store tp->tv_usec */
2: lhi %r2,0
@@ -35,5 +41,6 @@ __kernel_clock_getres:
svc 0
br %r14
4: .long __CLOCK_REALTIME_RES
+5: .long __CLOCK_COARSE_RES
.cfi_endproc
.size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso32/clock_gettime.S b/arch/s390/kernel/vdso32/clock_gettime.S
index 65fc397..48c2206 100644
--- a/arch/s390/kernel/vdso32/clock_gettime.S
+++ b/arch/s390/kernel/vdso32/clock_gettime.S
@@ -21,19 +21,21 @@ __kernel_clock_gettime:
.cfi_startproc
basr %r5,0
0: al %r5,21f-0b(%r5) /* get &_vdso_data */
- chi %r2,__CLOCK_REALTIME
+ chi %r2,__CLOCK_REALTIME_COARSE
je 10f
+ chi %r2,__CLOCK_REALTIME
+ je 11f
+ chi %r2,__CLOCK_MONOTONIC_COARSE
+ je 9f
chi %r2,__CLOCK_MONOTONIC
jne 19f
/* CLOCK_MONOTONIC */
- ltr %r3,%r3
- jz 9f /* tp == NULL */
1: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
tml %r4,0x0001 /* pending update ? loop */
jnz 1b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
+ stcke 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,25(%r15)
s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,2f
@@ -67,17 +69,35 @@ __kernel_clock_gettime:
j 6b
8: st %r2,0(%r3) /* store tp->tv_sec */
st %r1,4(%r3) /* store tp->tv_nsec */
-9: lhi %r2,0
+ lhi %r2,0
br %r14
+ /* CLOCK_MONOTONIC_COARSE */
+9: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 9b
+ l %r2,__VDSO_WTOM_CRS_SEC+4(%r5)
+ l %r1,__VDSO_WTOM_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 9b
+ j 8b
+
+ /* CLOCK_REALTIME_COARSE */
+10: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
+ tml %r4,0x0001 /* pending update ? loop */
+ jnz 10b
+ l %r2,__VDSO_XTIME_CRS_SEC+4(%r5)
+ l %r1,__VDSO_XTIME_CRS_NSEC+4(%r5)
+ cl %r4,__VDSO_UPD_COUNT+4(%r5) /* check update counter */
+ jne 10b
+ j 17f
+
/* CLOCK_REALTIME */
-10: ltr %r3,%r3 /* tp == NULL */
- jz 18f
11: l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
tml %r4,0x0001 /* pending update ? loop */
jnz 11b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
+ stcke 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,25(%r15)
s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,12f
@@ -111,7 +131,7 @@ __kernel_clock_gettime:
j 15b
17: st %r2,0(%r3) /* store tp->tv_sec */
st %r1,4(%r3) /* store tp->tv_nsec */
-18: lhi %r2,0
+ lhi %r2,0
br %r14
/* Fallback to system call */
diff --git a/arch/s390/kernel/vdso32/gettimeofday.S b/arch/s390/kernel/vdso32/gettimeofday.S
index fd621a9..60def5f 100644
--- a/arch/s390/kernel/vdso32/gettimeofday.S
+++ b/arch/s390/kernel/vdso32/gettimeofday.S
@@ -29,8 +29,8 @@ __kernel_gettimeofday:
l %r4,__VDSO_UPD_COUNT+4(%r5) /* load update counter */
tml %r4,0x0001 /* pending update ? loop */
jnz 1b
- stck 24(%r15) /* Store TOD clock */
- lm %r0,%r1,24(%r15)
+ stcke 24(%r15) /* Store TOD clock */
+ lm %r0,%r1,25(%r15)
s %r0,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
sl %r1,__VDSO_XTIME_STAMP+4(%r5)
brc 3,3f
diff --git a/arch/s390/kernel/vdso64/clock_getres.S b/arch/s390/kernel/vdso64/clock_getres.S
index 34deba7..c8513de 100644
--- a/arch/s390/kernel/vdso64/clock_getres.S
+++ b/arch/s390/kernel/vdso64/clock_getres.S
@@ -19,6 +19,12 @@
.type __kernel_clock_getres,@function
__kernel_clock_getres:
.cfi_startproc
+ larl %r1,4f
+ cghi %r2,__CLOCK_REALTIME_COARSE
+ je 0f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 0f
+ larl %r1,3f
cghi %r2,__CLOCK_REALTIME
je 0f
cghi %r2,__CLOCK_MONOTONIC
@@ -32,7 +38,6 @@ __kernel_clock_getres:
jz 2f
0: ltgr %r3,%r3
jz 1f /* res == NULL */
- larl %r1,3f
lg %r0,0(%r1)
xc 0(8,%r3),0(%r3) /* set tp->tv_sec to zero */
stg %r0,8(%r3) /* store tp->tv_usec */
@@ -42,5 +47,6 @@ __kernel_clock_getres:
svc 0
br %r14
3: .quad __CLOCK_REALTIME_RES
+4: .quad __CLOCK_COARSE_RES
.cfi_endproc
.size __kernel_clock_getres,.-__kernel_clock_getres
diff --git a/arch/s390/kernel/vdso64/clock_gettime.S b/arch/s390/kernel/vdso64/clock_gettime.S
index 91940ed..9d9761f 100644
--- a/arch/s390/kernel/vdso64/clock_gettime.S
+++ b/arch/s390/kernel/vdso64/clock_gettime.S
@@ -20,25 +20,27 @@
__kernel_clock_gettime:
.cfi_startproc
larl %r5,_vdso_data
- cghi %r2,__CLOCK_REALTIME
+ cghi %r2,__CLOCK_REALTIME_COARSE
je 4f
+ cghi %r2,__CLOCK_REALTIME
+ je 5f
cghi %r2,__CLOCK_THREAD_CPUTIME_ID
je 9f
cghi %r2,-2 /* Per-thread CPUCLOCK with PID=0, VIRT=1 */
je 9f
+ cghi %r2,__CLOCK_MONOTONIC_COARSE
+ je 3f
cghi %r2,__CLOCK_MONOTONIC
jne 12f
/* CLOCK_MONOTONIC */
- ltgr %r3,%r3
- jz 3f /* tp == NULL */
0: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
tmll %r4,0x0001 /* pending update ? loop */
jnz 0b
- stck 48(%r15) /* Store TOD clock */
+ stcke 48(%r15) /* Store TOD clock */
lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
lg %r0,__VDSO_WTOM_SEC(%r5)
- lg %r1,48(%r15)
+ lg %r1,49(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
alg %r1,__VDSO_WTOM_NSEC(%r5)
@@ -53,18 +55,36 @@ __kernel_clock_gettime:
j 1b
2: stg %r0,0(%r3) /* store tp->tv_sec */
stg %r1,8(%r3) /* store tp->tv_nsec */
-3: lghi %r2,0
+ lghi %r2,0
br %r14
+ /* CLOCK_MONOTONIC_COARSE */
+3: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 3b
+ lg %r0,__VDSO_WTOM_CRS_SEC(%r5)
+ lg %r1,__VDSO_WTOM_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 3b
+ j 2b
+
+ /* CLOCK_REALTIME_COARSE */
+4: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
+ tmll %r4,0x0001 /* pending update ? loop */
+ jnz 4b
+ lg %r0,__VDSO_XTIME_CRS_SEC(%r5)
+ lg %r1,__VDSO_XTIME_CRS_NSEC(%r5)
+ clg %r4,__VDSO_UPD_COUNT(%r5) /* check update counter */
+ jne 4b
+ j 7f
+
/* CLOCK_REALTIME */
-4: ltr %r3,%r3 /* tp == NULL */
- jz 8f
5: lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
tmll %r4,0x0001 /* pending update ? loop */
jnz 5b
- stck 48(%r15) /* Store TOD clock */
+ stcke 48(%r15) /* Store TOD clock */
lgf %r2,__VDSO_TK_SHIFT(%r5) /* Timekeeper shift */
- lg %r1,48(%r15)
+ lg %r1,49(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
@@ -80,7 +100,7 @@ __kernel_clock_gettime:
j 6b
7: stg %r0,0(%r3) /* store tp->tv_sec */
stg %r1,8(%r3) /* store tp->tv_nsec */
-8: lghi %r2,0
+ lghi %r2,0
br %r14
/* CLOCK_THREAD_CPUTIME_ID for this thread */
diff --git a/arch/s390/kernel/vdso64/gettimeofday.S b/arch/s390/kernel/vdso64/gettimeofday.S
index d0860d1..7a34499 100644
--- a/arch/s390/kernel/vdso64/gettimeofday.S
+++ b/arch/s390/kernel/vdso64/gettimeofday.S
@@ -28,8 +28,8 @@ __kernel_gettimeofday:
lg %r4,__VDSO_UPD_COUNT(%r5) /* load update counter */
tmll %r4,0x0001 /* pending update ? loop */
jnz 0b
- stck 48(%r15) /* Store TOD clock */
- lg %r1,48(%r15)
+ stcke 48(%r15) /* Store TOD clock */
+ lg %r1,49(%r15)
sg %r1,__VDSO_XTIME_STAMP(%r5) /* TOD - cycle_last */
msgf %r1,__VDSO_TK_MULT(%r5) /* * tk->mult */
alg %r1,__VDSO_XTIME_NSEC(%r5) /* + tk->xtime_nsec */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 8c34363..416f2a3 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -6,27 +6,18 @@
*/
#include <linux/kernel_stat.h>
-#include <linux/notifier.h>
-#include <linux/kprobes.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/timex.h>
#include <linux/types.h>
#include <linux/time.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/vtimer.h>
#include <asm/vtime.h>
-#include <asm/irq.h>
-#include "entry.h"
static void virt_timer_expire(void);
-DEFINE_PER_CPU(struct s390_idle_data, s390_idle);
-
static LIST_HEAD(virt_timer_list);
static DEFINE_SPINLOCK(virt_timer_lock);
static atomic64_t virt_timer_current;
@@ -152,49 +143,6 @@ void vtime_account_system(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
EXPORT_SYMBOL_GPL(vtime_account_system);
-void __kprobes vtime_stop_cpu(void)
-{
- struct s390_idle_data *idle = &__get_cpu_var(s390_idle);
- unsigned long long idle_time;
- unsigned long psw_mask;
-
- trace_hardirqs_on();
-
- /* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
- idle->nohz_delay = 0;
-
- /* Call the assembler magic in entry.S */
- psw_idle(idle, psw_mask);
-
- /* Account time spent with enabled wait psw loaded as idle time. */
- idle->sequence++;
- smp_wmb();
- idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
- idle->clock_idle_enter = idle->clock_idle_exit = 0ULL;
- idle->idle_time += idle_time;
- idle->idle_count++;
- account_idle_time(idle_time);
- smp_wmb();
- idle->sequence++;
-}
-
-cputime64_t s390_get_idle_time(int cpu)
-{
- struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
- unsigned long long now, idle_enter, idle_exit;
- unsigned int sequence;
-
- do {
- now = get_tod_clock();
- sequence = ACCESS_ONCE(idle->sequence);
- idle_enter = ACCESS_ONCE(idle->clock_idle_enter);
- idle_exit = ACCESS_ONCE(idle->clock_idle_exit);
- } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence));
- return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0;
-}
-
/*
* Sorted add to a list. List is linear searched until first bigger
* element is found.
@@ -372,31 +320,8 @@ EXPORT_SYMBOL(del_virt_timer);
/*
* Start the virtual CPU timer on the current CPU.
*/
-void init_cpu_vtimer(void)
+void vtime_init(void)
{
/* set initial cpu timer */
set_vtimer(VTIMER_MAX_SLICE);
}
-
-static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
-{
- struct s390_idle_data *idle;
- long cpu = (long) hcpu;
-
- idle = &per_cpu(s390_idle, cpu);
- switch (action & ~CPU_TASKS_FROZEN) {
- case CPU_DYING:
- idle->nohz_delay = 0;
- default:
- break;
- }
- return NOTIFY_OK;
-}
-
-void __init vtime_init(void)
-{
- /* Enable cpu timer interrupts on the boot cpu. */
- init_cpu_vtimer();
- cpu_notifier(s390_nohz_notify, 0);
-}
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 10d529a..646db9c 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -26,6 +26,7 @@ config KVM
select KVM_ASYNC_PF
select KVM_ASYNC_PF_SYNC
select HAVE_KVM_IRQCHIP
+ select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
---help---
Support hosting paravirtualized guest machines using the SIE
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 0161675..9254aff 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -28,22 +28,32 @@ static int diag_release_pages(struct kvm_vcpu *vcpu)
start = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
end = vcpu->run->s.regs.gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
- if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+ if (start & ~PAGE_MASK || end & ~PAGE_MASK || start >= end
|| start < 2 * PAGE_SIZE)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
vcpu->stat.diagnose_10++;
- /* we checked for start > end above */
- if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(start, end, vcpu->arch.gmap);
+ /*
+ * We checked for start >= end above, so lets check for the
+ * fast path (no prefix swap page involved)
+ */
+ if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
+ gmap_discard(vcpu->arch.gmap, start, end);
} else {
- if (start < prefix)
- gmap_discard(start, prefix, vcpu->arch.gmap);
- if (end >= prefix)
- gmap_discard(prefix + 2 * PAGE_SIZE,
- end, vcpu->arch.gmap);
+ /*
+ * This is slow path. gmap_discard will check for start
+ * so lets split this into before prefix, prefix, after
+ * prefix and let gmap_discard make some of these calls
+ * NOPs.
+ */
+ gmap_discard(vcpu->arch.gmap, start, prefix);
+ if (start <= prefix)
+ gmap_discard(vcpu->arch.gmap, 0, 4096);
+ if (end > prefix + 4096)
+ gmap_discard(vcpu->arch.gmap, 4096, 8192);
+ gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
}
return 0;
}
@@ -176,7 +186,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
- kvm_s390_vcpu_stop(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 4653ac6..0f961a1 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -254,8 +254,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
new = old = ACCESS_ONCE(*ic);
new.k = 0;
} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
- if (!ipte_lock_count)
- wake_up(&vcpu->kvm->arch.ipte_wq);
+ wake_up(&vcpu->kvm->arch.ipte_wq);
out:
mutex_unlock(&ipte_mutex);
}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index a0b586c..eaf4629 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -56,32 +56,26 @@ static int handle_noop(struct kvm_vcpu *vcpu)
static int handle_stop(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ unsigned int action_bits;
vcpu->stat.exit_stop_request++;
- spin_lock_bh(&vcpu->arch.local_int.lock);
-
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
- if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
- kvm_s390_vcpu_stop(vcpu);
- vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
- VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
- rc = -EOPNOTSUPP;
- }
+ action_bits = vcpu->arch.local_int.action_bits;
- if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
- vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
- /* store status must be called unlocked. Since local_int.lock
- * only protects local_int.* and not guest memory we can give
- * up the lock here */
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ if (!(action_bits & ACTION_STOP_ON_STOP))
+ return 0;
+
+ if (action_bits & ACTION_STORE_ON_STOP) {
rc = kvm_s390_vcpu_store_status(vcpu,
KVM_S390_STORE_STATUS_NOADDR);
- if (rc >= 0)
- rc = -EOPNOTSUPP;
- } else
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- return rc;
+ if (rc)
+ return rc;
+ }
+
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
+ return -EOPNOTSUPP;
}
static int handle_validity(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 90c8de2..a398384 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -26,8 +26,9 @@
#define IOINT_SSID_MASK 0x00030000
#define IOINT_CSSID_MASK 0x03fc0000
#define IOINT_AI_MASK 0x04000000
+#define PFAULT_INIT 0x0600
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
static int is_ioint(u64 type)
{
@@ -76,7 +77,7 @@ static u64 int_word_to_isc_bits(u32 int_word)
return (0x80 >> isc) << 24;
}
-static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
+static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
switch (inti->type) {
@@ -85,6 +86,7 @@ static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
return 0;
if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
return 1;
+ return 0;
case KVM_S390_INT_EMERGENCY:
if (psw_extint_disabled(vcpu))
return 0;
@@ -158,6 +160,9 @@ static void __reset_intercept_indicators(struct kvm_vcpu *vcpu)
LCTL_CR10 | LCTL_CR11);
vcpu->arch.sie_block->ictl |= (ICTL_STCTL | ICTL_PINT);
}
+
+ if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP)
+ atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
}
static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
@@ -202,11 +207,30 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
}
}
-static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
- struct kvm_s390_pgm_info *pgm_info)
+static u16 get_ilc(struct kvm_vcpu *vcpu)
{
const unsigned short table[] = { 2, 4, 4, 6 };
+
+ switch (vcpu->arch.sie_block->icptcode) {
+ case ICPT_INST:
+ case ICPT_INSTPROGI:
+ case ICPT_OPEREXC:
+ case ICPT_PARTEXEC:
+ case ICPT_IOINST:
+ /* last instruction only stored for these icptcodes */
+ return table[vcpu->arch.sie_block->ipa >> 14];
+ case ICPT_PROGI:
+ return vcpu->arch.sie_block->pgmilc;
+ default:
+ return 0;
+ }
+}
+
+static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
+ struct kvm_s390_pgm_info *pgm_info)
+{
int rc = 0;
+ u16 ilc = get_ilc(vcpu);
switch (pgm_info->code & ~PGM_PER) {
case PGM_AFX_TRANSLATION:
@@ -273,25 +297,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
(u8 *) __LC_PER_ACCESS_ID);
}
- switch (vcpu->arch.sie_block->icptcode) {
- case ICPT_INST:
- case ICPT_INSTPROGI:
- case ICPT_OPEREXC:
- case ICPT_PARTEXEC:
- case ICPT_IOINST:
- /* last instruction only stored for these icptcodes */
- rc |= put_guest_lc(vcpu, table[vcpu->arch.sie_block->ipa >> 14],
- (u16 *) __LC_PGM_ILC);
- break;
- case ICPT_PROGI:
- rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->pgmilc,
- (u16 *) __LC_PGM_ILC);
- break;
- default:
- rc |= put_guest_lc(vcpu, 0,
- (u16 *) __LC_PGM_ILC);
- }
-
+ rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
rc |= put_guest_lc(vcpu, pgm_info->code,
(u16 *)__LC_PGM_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -302,7 +308,7 @@ static int __deliver_prog_irq(struct kvm_vcpu *vcpu,
return rc;
}
-static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
+static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info *inti)
{
const unsigned short table[] = { 2, 4, 4, 6 };
@@ -340,7 +346,7 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_CLOCK_COMP:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
inti->ext.ext_params, 0);
- deliver_ckc_interrupt(vcpu);
+ rc = deliver_ckc_interrupt(vcpu);
break;
case KVM_S390_INT_CPU_TIMER:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
@@ -373,8 +379,9 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
case KVM_S390_INT_PFAULT_INIT:
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
inti->ext.ext_params2);
- rc = put_guest_lc(vcpu, 0x2603, (u16 *) __LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, 0x0600, (u16 *) __LC_EXT_CPU_ADDR);
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *) __LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
@@ -498,14 +505,11 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu,
default:
BUG();
}
- if (rc) {
- printk("kvm: The guest lowcore is not mapped during interrupt "
- "delivery, killing userspace\n");
- do_exit(SIGKILL);
- }
+
+ return rc;
}
-static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
+static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
{
int rc;
@@ -515,11 +519,7 @@ static void deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw,
sizeof(psw_t));
- if (rc) {
- printk("kvm: The guest lowcore is not mapped during interrupt "
- "delivery, killing userspace\n");
- do_exit(SIGKILL);
- }
+ return rc;
}
/* Check whether SIGP interpretation facility has an external call pending */
@@ -544,13 +544,13 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
int rc = 0;
if (atomic_read(&li->active)) {
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry(inti, &li->list, list)
if (__interrupt_is_deliverable(vcpu, inti)) {
rc = 1;
break;
}
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
}
if ((!rc) && atomic_read(&fi->active)) {
@@ -585,88 +585,57 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
{
u64 now, sltime;
- DECLARE_WAITQUEUE(wait, current);
vcpu->stat.exit_wait_state++;
- if (kvm_cpu_has_interrupt(vcpu))
- return 0;
- __set_cpu_idle(vcpu);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.timer_due = 0;
- spin_unlock_bh(&vcpu->arch.local_int.lock);
+ /* fast path */
+ if (kvm_cpu_has_pending_timer(vcpu) || kvm_arch_vcpu_runnable(vcpu))
+ return 0;
if (psw_interrupts_disabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "disabled wait");
- __unset_cpu_idle(vcpu);
return -EOPNOTSUPP; /* disabled wait */
}
+ __set_cpu_idle(vcpu);
if (!ckc_interrupts_enabled(vcpu)) {
VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer");
goto no_timer;
}
now = get_tod_clock_fast() + vcpu->arch.sie_block->epoch;
- if (vcpu->arch.sie_block->ckc < now) {
- __unset_cpu_idle(vcpu);
- return 0;
- }
-
sltime = tod_to_ns(vcpu->arch.sie_block->ckc - now);
-
hrtimer_start(&vcpu->arch.ckc_timer, ktime_set (0, sltime) , HRTIMER_MODE_REL);
VCPU_EVENT(vcpu, 5, "enabled wait via clock comparator: %llx ns", sltime);
no_timer:
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
- spin_lock(&vcpu->arch.local_int.float_int->lock);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- add_wait_queue(&vcpu->wq, &wait);
- while (list_empty(&vcpu->arch.local_int.list) &&
- list_empty(&vcpu->arch.local_int.float_int->list) &&
- (!vcpu->arch.local_int.timer_due) &&
- !signal_pending(current) &&
- !kvm_s390_si_ext_call_pending(vcpu)) {
- set_current_state(TASK_INTERRUPTIBLE);
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock(&vcpu->arch.local_int.float_int->lock);
- schedule();
- spin_lock(&vcpu->arch.local_int.float_int->lock);
- spin_lock_bh(&vcpu->arch.local_int.lock);
- }
+ kvm_vcpu_block(vcpu);
__unset_cpu_idle(vcpu);
- __set_current_state(TASK_RUNNING);
- remove_wait_queue(&vcpu->wq, &wait);
- spin_unlock_bh(&vcpu->arch.local_int.lock);
- spin_unlock(&vcpu->arch.local_int.float_int->lock);
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
hrtimer_try_to_cancel(&vcpu->arch.ckc_timer);
return 0;
}
-void kvm_s390_tasklet(unsigned long parm)
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{
- struct kvm_vcpu *vcpu = (struct kvm_vcpu *) parm;
-
- spin_lock(&vcpu->arch.local_int.lock);
- vcpu->arch.local_int.timer_due = 1;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq)) {
+ /*
+ * The vcpu gave up the cpu voluntarily, mark it as a good
+ * yield-candidate.
+ */
+ vcpu->preempted = true;
wake_up_interruptible(&vcpu->wq);
- spin_unlock(&vcpu->arch.local_int.lock);
+ vcpu->stat.halt_wakeup++;
+ }
}
-/*
- * low level hrtimer wake routine. Because this runs in hardirq context
- * we schedule a tasklet to do the real work.
- */
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer)
{
struct kvm_vcpu *vcpu;
vcpu = container_of(timer, struct kvm_vcpu, arch.ckc_timer);
- vcpu->preempted = true;
- tasklet_schedule(&vcpu->arch.tasklet);
+ kvm_s390_vcpu_wakeup(vcpu);
return HRTIMER_NORESTART;
}
@@ -676,13 +645,13 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_interrupt_info *n, *inti = NULL;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry_safe(inti, n, &li->list, list) {
list_del(&inti->list);
kfree(inti);
}
atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
/* clear pending external calls set by sigp interpretation facility */
atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
@@ -690,18 +659,19 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
&vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
}
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
struct kvm_s390_interrupt_info *n, *inti = NULL;
int deliver;
+ int rc = 0;
__reset_intercept_indicators(vcpu);
if (atomic_read(&li->active)) {
do {
deliver = 0;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_for_each_entry_safe(inti, n, &li->list, list) {
if (__interrupt_is_deliverable(vcpu, inti)) {
list_del(&inti->list);
@@ -712,18 +682,18 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
}
if (list_empty(&li->list))
atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
+ rc = __do_deliver_interrupt(vcpu, inti);
kfree(inti);
}
- } while (deliver);
+ } while (!rc && deliver);
}
- if (kvm_cpu_has_pending_timer(vcpu))
- deliver_ckc_interrupt(vcpu);
+ if (!rc && kvm_cpu_has_pending_timer(vcpu))
+ rc = deliver_ckc_interrupt(vcpu);
- if (atomic_read(&fi->active)) {
+ if (!rc && atomic_read(&fi->active)) {
do {
deliver = 0;
spin_lock(&fi->lock);
@@ -740,67 +710,13 @@ void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
atomic_set(&fi->active, 0);
spin_unlock(&fi->lock);
if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
+ rc = __do_deliver_interrupt(vcpu, inti);
kfree(inti);
}
- } while (deliver);
+ } while (!rc && deliver);
}
-}
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct kvm_s390_interrupt_info *n, *inti = NULL;
- int deliver;
-
- __reset_intercept_indicators(vcpu);
- if (atomic_read(&li->active)) {
- do {
- deliver = 0;
- spin_lock_bh(&li->lock);
- list_for_each_entry_safe(inti, n, &li->list, list) {
- if ((inti->type == KVM_S390_MCHK) &&
- __interrupt_is_deliverable(vcpu, inti)) {
- list_del(&inti->list);
- deliver = 1;
- break;
- }
- __set_intercept_indicator(vcpu, inti);
- }
- if (list_empty(&li->list))
- atomic_set(&li->active, 0);
- spin_unlock_bh(&li->lock);
- if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
- kfree(inti);
- }
- } while (deliver);
- }
-
- if (atomic_read(&fi->active)) {
- do {
- deliver = 0;
- spin_lock(&fi->lock);
- list_for_each_entry_safe(inti, n, &fi->list, list) {
- if ((inti->type == KVM_S390_MCHK) &&
- __interrupt_is_deliverable(vcpu, inti)) {
- list_del(&inti->list);
- fi->irq_count--;
- deliver = 1;
- break;
- }
- __set_intercept_indicator(vcpu, inti);
- }
- if (list_empty(&fi->list))
- atomic_set(&fi->active, 0);
- spin_unlock(&fi->lock);
- if (deliver) {
- __do_deliver_interrupt(vcpu, inti);
- kfree(inti);
- }
- } while (deliver);
- }
+ return rc;
}
int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
@@ -817,11 +733,11 @@ int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
BUG_ON(waitqueue_active(li->wq));
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return 0;
}
@@ -842,11 +758,11 @@ int kvm_s390_inject_prog_irq(struct kvm_vcpu *vcpu,
inti->type = KVM_S390_PROGRAM_INT;
memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
list_add(&inti->list, &li->list);
atomic_set(&li->active, 1);
BUG_ON(waitqueue_active(li->wq));
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return 0;
}
@@ -934,12 +850,10 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
- kvm_get_vcpu(kvm, sigcpu)->preempted = true;
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
unlock_fi:
spin_unlock(&fi->lock);
mutex_unlock(&kvm->lock);
@@ -1079,9 +993,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
s390int->parm64, 2);
- mutex_lock(&vcpu->kvm->lock);
li = &vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
if (inti->type == KVM_S390_PROGRAM_INT)
list_add(&inti->list, &li->list);
else
@@ -1090,11 +1003,8 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
if (inti->type == KVM_S390_SIGP_STOP)
li->action_bits |= ACTION_STOP_ON_STOP;
atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
- if (waitqueue_active(&vcpu->wq))
- wake_up_interruptible(&vcpu->wq);
- vcpu->preempted = true;
- spin_unlock_bh(&li->lock);
- mutex_unlock(&vcpu->kvm->lock);
+ spin_unlock(&li->lock);
+ kvm_s390_vcpu_wakeup(vcpu);
return 0;
}
@@ -1333,7 +1243,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
}
INIT_LIST_HEAD(&map->list);
map->guest_addr = addr;
- map->addr = gmap_translate(addr, kvm->arch.gmap);
+ map->addr = gmap_translate(kvm->arch.gmap, addr);
if (map->addr == -EFAULT) {
ret = -EFAULT;
goto out;
@@ -1443,7 +1353,6 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
r = enqueue_floating_irq(dev, attr);
break;
case KVM_DEV_FLIC_CLEAR_IRQS:
- r = 0;
kvm_s390_clear_float_irqs(dev->kvm);
break;
case KVM_DEV_FLIC_APF_ENABLE:
@@ -1589,8 +1498,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
return ret;
}
-int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
- struct kvm_kernel_irq_routing_entry *e,
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
int ret;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f3e14f..55aade4 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "halt_wakeup", VCPU_STAT(halt_wakeup) },
{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
@@ -100,16 +101,12 @@ int test_vfacility(unsigned long nr)
}
/* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
{
/* every s390 is virtualization enabled ;-) */
return 0;
}
-void kvm_arch_hardware_disable(void *garbage)
-{
-}
-
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address);
int kvm_arch_hardware_setup(void)
@@ -124,17 +121,10 @@ void kvm_arch_hardware_unsetup(void)
gmap_unregister_ipte_notifier(&gmap_notifier);
}
-void kvm_arch_check_processor_compat(void *rtn)
-{
-}
-
int kvm_arch_init(void *opaque)
{
- return 0;
-}
-
-void kvm_arch_exit(void)
-{
+ /* Register floating interrupt controller interface. */
+ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
}
/* Section: device related */
@@ -146,7 +136,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
return -EINVAL;
}
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
@@ -166,7 +156,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
+ case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
+ case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@@ -402,6 +394,22 @@ long kvm_arch_vm_ioctl(struct file *filp,
return r;
}
+static int kvm_s390_crypto_init(struct kvm *kvm)
+{
+ if (!test_vfacility(76))
+ return 0;
+
+ kvm->arch.crypto.crycb = kzalloc(sizeof(*kvm->arch.crypto.crycb),
+ GFP_KERNEL | GFP_DMA);
+ if (!kvm->arch.crypto.crycb)
+ return -ENOMEM;
+
+ kvm->arch.crypto.crycbd = (__u32) (unsigned long) kvm->arch.crypto.crycb |
+ CRYCB_FORMAT1;
+
+ return 0;
+}
+
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
int rc;
@@ -439,6 +447,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (!kvm->arch.dbf)
goto out_nodbf;
+ if (kvm_s390_crypto_init(kvm) < 0)
+ goto out_crypto;
+
spin_lock_init(&kvm->arch.float_int.lock);
INIT_LIST_HEAD(&kvm->arch.float_int.list);
init_waitqueue_head(&kvm->arch.ipte_wq);
@@ -449,7 +460,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (type & KVM_VM_S390_UCONTROL) {
kvm->arch.gmap = NULL;
} else {
- kvm->arch.gmap = gmap_alloc(current->mm);
+ kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
if (!kvm->arch.gmap)
goto out_nogmap;
kvm->arch.gmap->private = kvm;
@@ -463,6 +474,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
return 0;
out_nogmap:
+ kfree(kvm->arch.crypto.crycb);
+out_crypto:
debug_unregister(kvm->arch.dbf);
out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
@@ -512,15 +525,12 @@ static void kvm_free_vcpus(struct kvm *kvm)
mutex_unlock(&kvm->lock);
}
-void kvm_arch_sync_events(struct kvm *kvm)
-{
-}
-
void kvm_arch_destroy_vm(struct kvm *kvm)
{
kvm_free_vcpus(kvm);
free_page((unsigned long)(kvm->arch.sca));
debug_unregister(kvm->arch.dbf);
+ kfree(kvm->arch.crypto.crycb);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
kvm_s390_destroy_adapters(kvm);
@@ -533,7 +543,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
- vcpu->arch.gmap = gmap_alloc(current->mm);
+ vcpu->arch.gmap = gmap_alloc(current->mm, -1UL);
if (!vcpu->arch.gmap)
return -ENOMEM;
vcpu->arch.gmap->private = vcpu->kvm;
@@ -544,15 +554,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
KVM_SYNC_GPRS |
KVM_SYNC_ACRS |
- KVM_SYNC_CRS;
+ KVM_SYNC_CRS |
+ KVM_SYNC_ARCH0 |
+ KVM_SYNC_PFAULT;
return 0;
}
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
- /* Nothing todo */
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
@@ -595,7 +602,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
- kvm_s390_vcpu_stop(vcpu);
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
+ kvm_s390_vcpu_stop(vcpu);
kvm_s390_clear_local_irqs(vcpu);
}
@@ -604,6 +612,14 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
return 0;
}
+static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
+{
+ if (!test_vfacility(76))
+ return;
+
+ vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
+}
+
void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
{
free_page(vcpu->arch.sie_block->cbrlo);
@@ -647,11 +663,12 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
return rc;
}
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
- tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
- (unsigned long) vcpu);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
get_cpu_id(&vcpu->arch.cpu_id);
vcpu->arch.cpu_id.version = 0xff;
+
+ kvm_s390_vcpu_crypto_setup(vcpu);
+
return rc;
}
@@ -926,7 +943,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
- if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
+ if (!is_vcpu_stopped(vcpu))
rc = -EBUSY;
else {
vcpu->run->psw_mask = psw.mask;
@@ -980,13 +997,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL; /* not implemented yet */
+ /* CHECK_STOP and LOAD are not supported yet */
+ return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
+ KVM_MP_STATE_OPERATING;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL; /* not implemented yet */
+ int rc = 0;
+
+ /* user space knows about this interface - let it control the state */
+ vcpu->kvm->arch.user_cpu_state_ctrl = 1;
+
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_STOPPED:
+ kvm_s390_vcpu_stop(vcpu);
+ break;
+ case KVM_MP_STATE_OPERATING:
+ kvm_s390_vcpu_start(vcpu);
+ break;
+ case KVM_MP_STATE_LOAD:
+ case KVM_MP_STATE_CHECK_STOP:
+ /* fall through - CHECK_STOP and LOAD are not supported yet */
+ default:
+ rc = -ENXIO;
+ }
+
+ return rc;
}
bool kvm_s390_cmma_enabled(struct kvm *kvm)
@@ -1027,6 +1065,11 @@ retry:
goto retry;
}
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ vcpu->arch.sie_block->ihcpu = 0xffff;
+ goto retry;
+ }
+
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
@@ -1045,6 +1088,9 @@ retry:
goto retry;
}
+ /* nothing to do, just clear the request */
+ clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+
return 0;
}
@@ -1060,18 +1106,8 @@ retry:
*/
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
{
- struct mm_struct *mm = current->mm;
- hva_t hva;
- long rc;
-
- hva = gmap_fault(gpa, vcpu->arch.gmap);
- if (IS_ERR_VALUE(hva))
- return (long)hva;
- down_read(&mm->mmap_sem);
- rc = get_user_pages(current, mm, hva, 1, writable, 0, NULL, NULL);
- up_read(&mm->mmap_sem);
-
- return rc < 0 ? rc : 0;
+ return gmap_fault(vcpu->arch.gmap, gpa,
+ writable ? FAULT_FLAG_WRITE : 0);
}
static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
@@ -1166,8 +1202,11 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
if (test_cpu_flag(CIF_MCCK_PENDING))
s390_handle_mcck();
- if (!kvm_is_ucontrol(vcpu->kvm))
- kvm_s390_deliver_pending_interrupts(vcpu);
+ if (!kvm_is_ucontrol(vcpu->kvm)) {
+ rc = kvm_s390_deliver_pending_interrupts(vcpu);
+ if (rc)
+ return rc;
+ }
rc = kvm_s390_handle_requests(vcpu);
if (rc)
@@ -1271,6 +1310,48 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
return rc;
}
+static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
+ vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
+ kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
+ memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
+ /* some control register changes require a tlb flush */
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
+ vcpu->arch.sie_block->cputm = kvm_run->s.regs.cputm;
+ vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
+ vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
+ vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
+ vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
+ }
+ if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
+ vcpu->arch.pfault_token = kvm_run->s.regs.pft;
+ vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
+ vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
+ }
+ kvm_run->kvm_dirty_regs = 0;
+}
+
+static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+ kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
+ kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
+ kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
+ memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ kvm_run->s.regs.cputm = vcpu->arch.sie_block->cputm;
+ kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
+ kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
+ kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
+ kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
+ kvm_run->s.regs.pft = vcpu->arch.pfault_token;
+ kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
+ kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
+}
+
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
int rc;
@@ -1284,32 +1365,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
- kvm_s390_vcpu_start(vcpu);
-
- switch (kvm_run->exit_reason) {
- case KVM_EXIT_S390_SIEIC:
- case KVM_EXIT_UNKNOWN:
- case KVM_EXIT_INTR:
- case KVM_EXIT_S390_RESET:
- case KVM_EXIT_S390_UCONTROL:
- case KVM_EXIT_S390_TSCH:
- case KVM_EXIT_DEBUG:
- break;
- default:
- BUG();
+ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
+ kvm_s390_vcpu_start(vcpu);
+ } else if (is_vcpu_stopped(vcpu)) {
+ pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
+ vcpu->vcpu_id);
+ return -EINVAL;
}
- vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
- vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) {
- kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX;
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- }
- if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
- kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS;
- memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
- kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
- }
+ sync_regs(vcpu, kvm_run);
might_fault();
rc = __vcpu_run(vcpu);
@@ -1339,10 +1403,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = 0;
}
- kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
- kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
- kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
- memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
+ store_regs(vcpu, kvm_run);
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1413,11 +1474,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
return kvm_s390_store_status_unloaded(vcpu, addr);
}
-static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
-{
- return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
-}
-
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -1451,7 +1507,7 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
/* Only one cpu at a time may enter/leave the STOPPED state. */
- spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
for (i = 0; i < online_vcpus; i++) {
@@ -1476,8 +1532,8 @@ void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
* Another VCPU might have used IBS while we were offline.
* Let's play safe and flush the VCPU at startup.
*/
- vcpu->arch.sie_block->ihcpu = 0xffff;
- spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
@@ -1491,10 +1547,18 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
/* Only one cpu at a time may enter/leave the STOPPED state. */
- spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
+ /* Need to lock access to action_bits to avoid a SIGP race condition */
+ spin_lock(&vcpu->arch.local_int.lock);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+
+ /* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
+ vcpu->arch.local_int.action_bits &=
+ ~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP);
+ spin_unlock(&vcpu->arch.local_int.lock);
+
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {
@@ -1512,7 +1576,7 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
__enable_ibs_on_vcpu(started_vcpu);
}
- spin_unlock_bh(&vcpu->kvm->arch.start_stop_lock);
+ spin_unlock(&vcpu->kvm->arch.start_stop_lock);
return;
}
@@ -1623,9 +1687,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
}
#endif
case KVM_S390_VCPU_FAULT: {
- r = gmap_fault(arg, vcpu->arch.gmap);
- if (!IS_ERR_VALUE(r))
- r = 0;
+ r = gmap_fault(vcpu->arch.gmap, arg, 0);
break;
}
case KVM_ENABLE_CAP:
@@ -1656,21 +1718,12 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;
}
-void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
- struct kvm_memory_slot *dont)
-{
-}
-
int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
unsigned long npages)
{
return 0;
}
-void kvm_arch_memslots_updated(struct kvm *kvm)
-{
-}
-
/* Section: memory related */
int kvm_arch_prepare_memory_region(struct kvm *kvm,
struct kvm_memory_slot *memslot,
@@ -1716,15 +1769,6 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
return;
}
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
-}
-
static int __init kvm_s390_init(void)
{
int ret;
@@ -1743,7 +1787,7 @@ static int __init kvm_s390_init(void)
return -ENOMEM;
}
memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16);
- vfacilities[0] &= 0xff82fff3f4fc2000UL;
+ vfacilities[0] &= 0xff82fffbf47c2000UL;
vfacilities[1] &= 0x005c000000000000UL;
return 0;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index a8655ed..244d023 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -45,9 +45,9 @@ do { \
d_args); \
} while (0)
-static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
+static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
- return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
+ return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@@ -70,7 +70,7 @@ static inline u32 kvm_s390_get_prefix(struct kvm_vcpu *vcpu)
static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
{
vcpu->arch.sie_block->prefix = prefix >> GUEST_PREFIX_SHIFT;
- vcpu->arch.sie_block->ihcpu = 0xffff;
+ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
}
@@ -129,11 +129,16 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
vcpu->arch.sie_block->gpsw.mask |= cc << 44;
}
+/* are cpu states controlled by user space */
+static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
+{
+ return kvm->arch.user_cpu_state_ctrl != 0;
+}
+
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
+void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
-void kvm_s390_tasklet(unsigned long parm);
-void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
-void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
void kvm_s390_clear_float_irqs(struct kvm *kvm);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
@@ -222,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int psw_extint_disabled(struct kvm_vcpu *vcpu);
void kvm_s390_destroy_adapters(struct kvm *kvm);
int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f89c1cd..72bb2dd 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -352,13 +352,6 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
return 0;
}
-static void handle_new_psw(struct kvm_vcpu *vcpu)
-{
- /* Check whether the new psw is enabled for machine checks. */
- if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_MCHECK)
- kvm_s390_deliver_pending_machine_checks(vcpu);
-}
-
#define PSW_MASK_ADDR_MODE (PSW_MASK_EA | PSW_MASK_BA)
#define PSW_MASK_UNASSIGNED 0xb80800fe7fffffffUL
#define PSW_ADDR_24 0x0000000000ffffffUL
@@ -405,7 +398,6 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
gpsw->addr = new_psw.addr & ~PSW32_ADDR_AMODE;
if (!is_valid_psw(gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- handle_new_psw(vcpu);
return 0;
}
@@ -427,7 +419,6 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->gpsw = new_psw;
if (!is_valid_psw(&vcpu->arch.sie_block->gpsw))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- handle_new_psw(vcpu);
return 0;
}
@@ -738,7 +729,7 @@ static int handle_essa(struct kvm_vcpu *vcpu)
/* invalid entry */
break;
/* try to free backing */
- __gmap_zap(cbrle, gmap);
+ __gmap_zap(gmap, cbrle);
}
up_read(&gmap->mm->mmap_sem);
if (i < entries)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 43079a4..cf243ba 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -125,8 +125,9 @@ static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
}
-static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
+static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
{
+ struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
struct kvm_s390_interrupt_info *inti;
int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
@@ -135,7 +136,13 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
return -ENOMEM;
inti->type = KVM_S390_SIGP_STOP;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
+ if (li->action_bits & ACTION_STOP_ON_STOP) {
+ /* another SIGP STOP is pending */
+ kfree(inti);
+ rc = SIGP_CC_BUSY;
+ goto out;
+ }
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
kfree(inti);
if ((action & ACTION_STORE_ON_STOP) != 0)
@@ -144,19 +151,17 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
}
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
out:
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
{
- struct kvm_s390_local_interrupt *li;
struct kvm_vcpu *dst_vcpu = NULL;
int rc;
@@ -166,9 +171,8 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- li = &dst_vcpu->arch.local_int;
- rc = __inject_sigp_stop(li, action);
+ rc = __inject_sigp_stop(dst_vcpu, action);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
@@ -238,7 +242,7 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
if (!inti)
return SIGP_CC_BUSY;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
/* cpu must be in stopped state */
if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
@@ -253,13 +257,12 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
- if (waitqueue_active(li->wq))
- wake_up_interruptible(li->wq);
+ kvm_s390_vcpu_wakeup(dst_vcpu);
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
out_li:
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
@@ -275,9 +278,9 @@ static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
- spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+ spin_lock(&dst_vcpu->arch.local_int.lock);
flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
- spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+ spin_unlock(&dst_vcpu->arch.local_int.lock);
if (!(flags & CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -338,10 +341,10 @@ static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
if (!dst_vcpu)
return SIGP_CC_NOT_OPERATIONAL;
li = &dst_vcpu->arch.local_int;
- spin_lock_bh(&li->lock);
+ spin_lock(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP)
rc = SIGP_CC_BUSY;
- spin_unlock_bh(&li->lock);
+ spin_unlock(&li->lock);
return rc;
}
@@ -461,12 +464,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
BUG_ON(dest_vcpu == NULL);
- spin_lock_bh(&dest_vcpu->arch.local_int.lock);
- if (waitqueue_active(&dest_vcpu->wq))
- wake_up_interruptible(&dest_vcpu->wq);
- dest_vcpu->preempted = true;
- spin_unlock_bh(&dest_vcpu->arch.local_int.lock);
-
+ kvm_s390_vcpu_wakeup(dest_vcpu);
kvm_s390_set_psw_cc(vcpu, SIGP_CC_ORDER_CODE_ACCEPTED);
return 0;
}
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index c6d752e..a01df23 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -6,3 +6,5 @@ lib-y += delay.o string.o uaccess.o find.o
obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
obj-$(CONFIG_64BIT) += mem64.o
lib-$(CONFIG_SMP) += spinlock.o
+lib-$(CONFIG_KPROBES) += probes.o
+lib-$(CONFIG_UPROBES) += probes.o
diff --git a/arch/s390/lib/delay.c b/arch/s390/lib/delay.c
index a9f3d00..16dc42d 100644
--- a/arch/s390/lib/delay.c
+++ b/arch/s390/lib/delay.c
@@ -43,7 +43,7 @@ static void __udelay_disabled(unsigned long long usecs)
lockdep_off();
do {
set_clock_comparator(end);
- vtime_stop_cpu();
+ enabled_wait();
} while (get_tod_clock_fast() < end);
lockdep_on();
__ctl_load(cr0, 0, 0);
@@ -62,7 +62,7 @@ static void __udelay_enabled(unsigned long long usecs)
clock_saved = local_tick_disable();
set_clock_comparator(end);
}
- vtime_stop_cpu();
+ enabled_wait();
if (clock_saved)
local_tick_enable(clock_saved);
} while (get_tod_clock_fast() < end);
diff --git a/arch/s390/lib/probes.c b/arch/s390/lib/probes.c
new file mode 100644
index 0000000..c5d64a0
--- /dev/null
+++ b/arch/s390/lib/probes.c
@@ -0,0 +1,159 @@
+/*
+ * Common helper functions for kprobes and uprobes
+ *
+ * Copyright IBM Corp. 2014
+ */
+
+#include <linux/kprobes.h>
+#include <asm/dis.h>
+
+int probe_is_prohibited_opcode(u16 *insn)
+{
+ if (!is_known_insn((unsigned char *)insn))
+ return -EINVAL;
+ switch (insn[0] >> 8) {
+ case 0x0c: /* bassm */
+ case 0x0b: /* bsm */
+ case 0x83: /* diag */
+ case 0x44: /* ex */
+ case 0xac: /* stnsm */
+ case 0xad: /* stosm */
+ return -EINVAL;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x00: /* exrl */
+ return -EINVAL;
+ }
+ }
+ switch (insn[0]) {
+ case 0x0101: /* pr */
+ case 0xb25a: /* bsa */
+ case 0xb240: /* bakr */
+ case 0xb258: /* bsg */
+ case 0xb218: /* pc */
+ case 0xb228: /* pt */
+ case 0xb98d: /* epsw */
+ case 0xe560: /* tbegin */
+ case 0xe561: /* tbeginc */
+ case 0xb2f8: /* tend */
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int probe_get_fixup_type(u16 *insn)
+{
+ /* default fixup method */
+ int fixup = FIXUP_PSW_NORMAL;
+
+ switch (insn[0] >> 8) {
+ case 0x05: /* balr */
+ case 0x0d: /* basr */
+ fixup = FIXUP_RETURN_REGISTER;
+ /* if r2 = 0, no branch will be taken */
+ if ((insn[0] & 0x0f) == 0)
+ fixup |= FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x06: /* bctr */
+ case 0x07: /* bcr */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x45: /* bal */
+ case 0x4d: /* bas */
+ fixup = FIXUP_RETURN_REGISTER;
+ break;
+ case 0x47: /* bc */
+ case 0x46: /* bct */
+ case 0x86: /* bxh */
+ case 0x87: /* bxle */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0x82: /* lpsw */
+ fixup = FIXUP_NOT_REQUIRED;
+ break;
+ case 0xb2: /* lpswe */
+ if ((insn[0] & 0xff) == 0xb2)
+ fixup = FIXUP_NOT_REQUIRED;
+ break;
+ case 0xa7: /* bras */
+ if ((insn[0] & 0x0f) == 0x05)
+ fixup |= FIXUP_RETURN_REGISTER;
+ break;
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x05) /* brasl */
+ fixup |= FIXUP_RETURN_REGISTER;
+ break;
+ case 0xeb:
+ switch (insn[2] & 0xff) {
+ case 0x44: /* bxhg */
+ case 0x45: /* bxleg */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
+ case 0xe3: /* bctg */
+ if ((insn[2] & 0xff) == 0x46)
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ case 0xec:
+ switch (insn[2] & 0xff) {
+ case 0xe5: /* clgrb */
+ case 0xe6: /* cgrb */
+ case 0xf6: /* crb */
+ case 0xf7: /* clrb */
+ case 0xfc: /* cgib */
+ case 0xfd: /* cglib */
+ case 0xfe: /* cib */
+ case 0xff: /* clib */
+ fixup = FIXUP_BRANCH_NOT_TAKEN;
+ break;
+ }
+ break;
+ }
+ return fixup;
+}
+
+int probe_is_insn_relative_long(u16 *insn)
+{
+ /* Check if we have a RIL-b or RIL-c format instruction which
+ * we need to modify in order to avoid instruction emulation. */
+ switch (insn[0] >> 8) {
+ case 0xc0:
+ if ((insn[0] & 0x0f) == 0x00) /* larl */
+ return true;
+ break;
+ case 0xc4:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* llhrl */
+ case 0x04: /* lghrl */
+ case 0x05: /* lhrl */
+ case 0x06: /* llghrl */
+ case 0x07: /* sthrl */
+ case 0x08: /* lgrl */
+ case 0x0b: /* stgrl */
+ case 0x0c: /* lgfrl */
+ case 0x0d: /* lrl */
+ case 0x0e: /* llgfrl */
+ case 0x0f: /* strl */
+ return true;
+ }
+ break;
+ case 0xc6:
+ switch (insn[0] & 0x0f) {
+ case 0x02: /* pfdrl */
+ case 0x04: /* cghrl */
+ case 0x05: /* chrl */
+ case 0x06: /* clghrl */
+ case 0x07: /* clhrl */
+ case 0x08: /* cgrl */
+ case 0x0a: /* clgrl */
+ case 0x0c: /* cgfrl */
+ case 0x0d: /* crl */
+ case 0x0e: /* clgfrl */
+ case 0x0f: /* clrl */
+ return true;
+ }
+ break;
+ }
+ return false;
+}
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 5b0e445..034a35a 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -98,17 +98,6 @@ void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
}
EXPORT_SYMBOL(arch_spin_lock_wait_flags);
-void arch_spin_relax(arch_spinlock_t *lp)
-{
- unsigned int cpu = lp->lock;
- if (cpu != 0) {
- if (MACHINE_IS_VM || MACHINE_IS_KVM ||
- !smp_vcpu_scheduled(~cpu))
- smp_yield_cpu(~cpu);
- }
-}
-EXPORT_SYMBOL(arch_spin_relax);
-
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
int count;
@@ -122,15 +111,21 @@ EXPORT_SYMBOL(arch_spin_trylock_retry);
void _raw_read_lock_wait(arch_rwlock_t *rw)
{
- unsigned int old;
+ unsigned int owner, old;
int count = spin_retry;
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+ __RAW_LOCK(&rw->lock, -1, __RAW_OP_ADD);
+#endif
+ owner = 0;
while (1) {
if (count-- <= 0) {
- smp_yield();
+ if (owner && !smp_vcpu_scheduled(~owner))
+ smp_yield_cpu(~owner);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
+ owner = ACCESS_ONCE(rw->owner);
if ((int) old < 0)
continue;
if (_raw_compare_and_swap(&rw->lock, old, old + 1))
@@ -139,28 +134,6 @@ void _raw_read_lock_wait(arch_rwlock_t *rw)
}
EXPORT_SYMBOL(_raw_read_lock_wait);
-void _raw_read_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
-{
- unsigned int old;
- int count = spin_retry;
-
- local_irq_restore(flags);
- while (1) {
- if (count-- <= 0) {
- smp_yield();
- count = spin_retry;
- }
- old = ACCESS_ONCE(rw->lock);
- if ((int) old < 0)
- continue;
- local_irq_disable();
- if (_raw_compare_and_swap(&rw->lock, old, old + 1))
- return;
- local_irq_restore(flags);
- }
-}
-EXPORT_SYMBOL(_raw_read_lock_wait_flags);
-
int _raw_read_trylock_retry(arch_rwlock_t *rw)
{
unsigned int old;
@@ -177,46 +150,62 @@ int _raw_read_trylock_retry(arch_rwlock_t *rw)
}
EXPORT_SYMBOL(_raw_read_trylock_retry);
-void _raw_write_lock_wait(arch_rwlock_t *rw)
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+
+void _raw_write_lock_wait(arch_rwlock_t *rw, unsigned int prev)
{
- unsigned int old;
+ unsigned int owner, old;
int count = spin_retry;
+ owner = 0;
while (1) {
if (count-- <= 0) {
- smp_yield();
+ if (owner && !smp_vcpu_scheduled(~owner))
+ smp_yield_cpu(~owner);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
- if (old)
- continue;
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
- return;
+ owner = ACCESS_ONCE(rw->owner);
+ smp_rmb();
+ if ((int) old >= 0) {
+ prev = __RAW_LOCK(&rw->lock, 0x80000000, __RAW_OP_OR);
+ old = prev;
+ }
+ if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+ break;
}
}
EXPORT_SYMBOL(_raw_write_lock_wait);
-void _raw_write_lock_wait_flags(arch_rwlock_t *rw, unsigned long flags)
+#else /* CONFIG_HAVE_MARCH_Z196_FEATURES */
+
+void _raw_write_lock_wait(arch_rwlock_t *rw)
{
- unsigned int old;
+ unsigned int owner, old, prev;
int count = spin_retry;
- local_irq_restore(flags);
+ prev = 0x80000000;
+ owner = 0;
while (1) {
if (count-- <= 0) {
- smp_yield();
+ if (owner && !smp_vcpu_scheduled(~owner))
+ smp_yield_cpu(~owner);
count = spin_retry;
}
old = ACCESS_ONCE(rw->lock);
- if (old)
- continue;
- local_irq_disable();
- if (_raw_compare_and_swap(&rw->lock, 0, 0x80000000))
- return;
- local_irq_restore(flags);
+ owner = ACCESS_ONCE(rw->owner);
+ if ((int) old >= 0 &&
+ _raw_compare_and_swap(&rw->lock, old, old | 0x80000000))
+ prev = old;
+ else
+ smp_rmb();
+ if ((old & 0x7fffffff) == 0 && (int) prev >= 0)
+ break;
}
}
-EXPORT_SYMBOL(_raw_write_lock_wait_flags);
+EXPORT_SYMBOL(_raw_write_lock_wait);
+
+#endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */
int _raw_write_trylock_retry(arch_rwlock_t *rw)
{
@@ -233,3 +222,13 @@ int _raw_write_trylock_retry(arch_rwlock_t *rw)
return 0;
}
EXPORT_SYMBOL(_raw_write_trylock_retry);
+
+void arch_lock_relax(unsigned int cpu)
+{
+ if (!cpu)
+ return;
+ if (MACHINE_IS_LPAR && smp_vcpu_scheduled(~cpu))
+ return;
+ smp_yield_cpu(~cpu);
+}
+EXPORT_SYMBOL(arch_lock_relax);
diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c
index 46d517c..d46cade 100644
--- a/arch/s390/mm/dump_pagetables.c
+++ b/arch/s390/mm/dump_pagetables.c
@@ -54,7 +54,6 @@ static void print_prot(struct seq_file *m, unsigned int pr, int level)
return;
}
seq_printf(m, "%s", pr & _PAGE_PROTECT ? "RO " : "RW ");
- seq_printf(m, "%s", pr & _PAGE_CO ? "CO " : " ");
seq_putc(m, '\n');
}
@@ -129,7 +128,7 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st,
}
#ifdef CONFIG_64BIT
-#define _PMD_PROT_MASK (_SEGMENT_ENTRY_PROTECT | _SEGMENT_ENTRY_CO)
+#define _PMD_PROT_MASK _SEGMENT_ENTRY_PROTECT
#else
#define _PMD_PROT_MASK 0
#endif
@@ -157,7 +156,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st,
}
#ifdef CONFIG_64BIT
-#define _PUD_PROT_MASK (_REGION3_ENTRY_RO | _REGION3_ENTRY_CO)
+#define _PUD_PROT_MASK _REGION3_ENTRY_RO
#else
#define _PUD_PROT_MASK 0
#endif
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 3f3b354..a2b81d6 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -442,18 +442,15 @@ static inline int do_exception(struct pt_regs *regs, int access)
down_read(&mm->mmap_sem);
#ifdef CONFIG_PGSTE
- gmap = (struct gmap *)
- ((current->flags & PF_VCPU) ? S390_lowcore.gmap : 0);
+ gmap = (current->flags & PF_VCPU) ?
+ (struct gmap *) S390_lowcore.gmap : NULL;
if (gmap) {
- address = __gmap_fault(address, gmap);
+ current->thread.gmap_addr = address;
+ address = __gmap_translate(gmap, address);
if (address == -EFAULT) {
fault = VM_FAULT_BADMAP;
goto out_up;
}
- if (address == -ENOMEM) {
- fault = VM_FAULT_OOM;
- goto out_up;
- }
if (gmap->pfault_enabled)
flags |= FAULT_FLAG_RETRY_NOWAIT;
}
@@ -530,6 +527,20 @@ retry:
goto retry;
}
}
+#ifdef CONFIG_PGSTE
+ if (gmap) {
+ address = __gmap_link(gmap, current->thread.gmap_addr,
+ address);
+ if (address == -EFAULT) {
+ fault = VM_FAULT_BADMAP;
+ goto out_up;
+ }
+ if (address == -ENOMEM) {
+ fault = VM_FAULT_OOM;
+ goto out_up;
+ }
+ }
+#endif
fault = 0;
out_up:
up_read(&mm->mmap_sem);
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 0ff66a7..3c80d2e 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -10,42 +10,33 @@
static inline pmd_t __pte_to_pmd(pte_t pte)
{
- int none, young, prot;
pmd_t pmd;
/*
- * Convert encoding pte bits pmd bits
- * .IR...wrdytp ..R...I...y.
- * empty .10...000000 -> ..0...1...0.
- * prot-none, clean, old .11...000001 -> ..0...1...1.
- * prot-none, clean, young .11...000101 -> ..1...1...1.
- * prot-none, dirty, old .10...001001 -> ..0...1...1.
- * prot-none, dirty, young .10...001101 -> ..1...1...1.
- * read-only, clean, old .11...010001 -> ..1...1...0.
- * read-only, clean, young .01...010101 -> ..1...0...1.
- * read-only, dirty, old .11...011001 -> ..1...1...0.
- * read-only, dirty, young .01...011101 -> ..1...0...1.
- * read-write, clean, old .11...110001 -> ..0...1...0.
- * read-write, clean, young .01...110101 -> ..0...0...1.
- * read-write, dirty, old .10...111001 -> ..0...1...0.
- * read-write, dirty, young .00...111101 -> ..0...0...1.
- * Huge ptes are dirty by definition, a clean pte is made dirty
- * by the conversion.
+ * Convert encoding pte bits pmd bits
+ * .IR...wrdytp dy..R...I...wr
+ * empty .10...000000 -> 00..0...1...00
+ * prot-none, clean, old .11...000001 -> 00..1...1...00
+ * prot-none, clean, young .11...000101 -> 01..1...1...00
+ * prot-none, dirty, old .10...001001 -> 10..1...1...00
+ * prot-none, dirty, young .10...001101 -> 11..1...1...00
+ * read-only, clean, old .11...010001 -> 00..1...1...01
+ * read-only, clean, young .01...010101 -> 01..1...0...01
+ * read-only, dirty, old .11...011001 -> 10..1...1...01
+ * read-only, dirty, young .01...011101 -> 11..1...0...01
+ * read-write, clean, old .11...110001 -> 00..0...1...11
+ * read-write, clean, young .01...110101 -> 01..0...0...11
+ * read-write, dirty, old .10...111001 -> 10..0...1...11
+ * read-write, dirty, young .00...111101 -> 11..0...0...11
*/
if (pte_present(pte)) {
pmd_val(pmd) = pte_val(pte) & PAGE_MASK;
- if (pte_val(pte) & _PAGE_INVALID)
- pmd_val(pmd) |= _SEGMENT_ENTRY_INVALID;
- none = (pte_val(pte) & _PAGE_PRESENT) &&
- !(pte_val(pte) & _PAGE_READ) &&
- !(pte_val(pte) & _PAGE_WRITE);
- prot = (pte_val(pte) & _PAGE_PROTECT) &&
- !(pte_val(pte) & _PAGE_WRITE);
- young = pte_val(pte) & _PAGE_YOUNG;
- if (none || young)
- pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG;
- if (prot || (none && young))
- pmd_val(pmd) |= _SEGMENT_ENTRY_PROTECT;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_READ) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_WRITE) >> 4;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_INVALID) >> 5;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pmd_val(pmd) = _SEGMENT_ENTRY_INVALID;
return pmd;
@@ -56,34 +47,31 @@ static inline pte_t __pmd_to_pte(pmd_t pmd)
pte_t pte;
/*
- * Convert encoding pmd bits pte bits
- * ..R...I...y. .IR...wrdytp
- * empty ..0...1...0. -> .10...000000
- * prot-none, old ..0...1...1. -> .10...001001
- * prot-none, young ..1...1...1. -> .10...001101
- * read-only, old ..1...1...0. -> .11...011001
- * read-only, young ..1...0...1. -> .01...011101
- * read-write, old ..0...1...0. -> .10...111001
- * read-write, young ..0...0...1. -> .00...111101
- * Huge ptes are dirty by definition
+ * Convert encoding pmd bits pte bits
+ * dy..R...I...wr .IR...wrdytp
+ * empty 00..0...1...00 -> .10...001100
+ * prot-none, clean, old 00..0...1...00 -> .10...000001
+ * prot-none, clean, young 01..0...1...00 -> .10...000101
+ * prot-none, dirty, old 10..0...1...00 -> .10...001001
+ * prot-none, dirty, young 11..0...1...00 -> .10...001101
+ * read-only, clean, old 00..1...1...01 -> .11...010001
+ * read-only, clean, young 01..1...1...01 -> .11...010101
+ * read-only, dirty, old 10..1...1...01 -> .11...011001
+ * read-only, dirty, young 11..1...1...01 -> .11...011101
+ * read-write, clean, old 00..0...1...11 -> .10...110001
+ * read-write, clean, young 01..0...1...11 -> .10...110101
+ * read-write, dirty, old 10..0...1...11 -> .10...111001
+ * read-write, dirty, young 11..0...1...11 -> .10...111101
*/
if (pmd_present(pmd)) {
- pte_val(pte) = _PAGE_PRESENT | _PAGE_LARGE | _PAGE_DIRTY |
- (pmd_val(pmd) & PAGE_MASK);
- if (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID)
- pte_val(pte) |= _PAGE_INVALID;
- if (pmd_prot_none(pmd)) {
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_YOUNG;
- } else {
- pte_val(pte) |= _PAGE_READ;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT)
- pte_val(pte) |= _PAGE_PROTECT;
- else
- pte_val(pte) |= _PAGE_WRITE;
- if (pmd_val(pmd) & _SEGMENT_ENTRY_YOUNG)
- pte_val(pte) |= _PAGE_YOUNG;
- }
+ pte_val(pte) = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN_LARGE;
+ pte_val(pte) |= _PAGE_LARGE | _PAGE_PRESENT;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_READ) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_WRITE) << 4;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_INVALID) << 5;
+ pte_val(pte) |= (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT);
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_DIRTY) << 10;
+ pmd_val(pmd) |= (pte_val(pte) & _PAGE_YOUNG) << 10;
} else
pte_val(pte) = _PAGE_INVALID;
return pte;
@@ -96,10 +84,11 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pmd = __pte_to_pmd(pte);
if (!MACHINE_HAS_HPAGE) {
+ /* Emulated huge ptes loose the dirty and young bit */
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= pte_page(pte)[1].index;
} else
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO;
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
*(pmd_t *) ptep = pmd;
}
@@ -113,6 +102,8 @@ pte_t huge_ptep_get(pte_t *ptep)
origin = pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) &= ~_SEGMENT_ENTRY_ORIGIN;
pmd_val(pmd) |= *(unsigned long *) origin;
+ /* Emulated huge ptes are young and dirty by definition */
+ pmd_val(pmd) |= _SEGMENT_ENTRY_YOUNG | _SEGMENT_ENTRY_DIRTY;
}
return __pmd_to_pte(pmd);
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 0c1073e..c7235e0 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -43,6 +43,7 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
+EXPORT_SYMBOL(zero_page_mask);
static void __init setup_zero_pages(void)
{
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 8400f49..3fef3b2 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -6,6 +6,7 @@
#include <linux/module.h>
#include <linux/mm.h>
#include <asm/cacheflush.h>
+#include <asm/facility.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -103,27 +104,50 @@ int set_memory_x(unsigned long addr, int numpages)
}
#ifdef CONFIG_DEBUG_PAGEALLOC
+
+static void ipte_range(pte_t *pte, unsigned long address, int nr)
+{
+ int i;
+
+ if (test_facility(13) && IS_ENABLED(CONFIG_64BIT)) {
+ __ptep_ipte_range(address, nr - 1, pte);
+ return;
+ }
+ for (i = 0; i < nr; i++) {
+ __ptep_ipte(address, pte);
+ address += PAGE_SIZE;
+ pte++;
+ }
+}
+
void kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long address;
+ int nr, i, j;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- int i;
- for (i = 0; i < numpages; i++) {
+ for (i = 0; i < numpages;) {
address = page_to_phys(page + i);
pgd = pgd_offset_k(address);
pud = pud_offset(pgd, address);
pmd = pmd_offset(pud, address);
pte = pte_offset_kernel(pmd, address);
- if (!enable) {
- __ptep_ipte(address, pte);
- pte_val(*pte) = _PAGE_INVALID;
- continue;
+ nr = (unsigned long)pte >> ilog2(sizeof(long));
+ nr = PTRS_PER_PTE - (nr & (PTRS_PER_PTE - 1));
+ nr = min(numpages - i, nr);
+ if (enable) {
+ for (j = 0; j < nr; j++) {
+ pte_val(*pte) = __pa(address);
+ address += PAGE_SIZE;
+ pte++;
+ }
+ } else {
+ ipte_range(pte, address, nr);
}
- pte_val(*pte) = __pa(address);
+ i += nr;
}
}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 37b8241..296b61a 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -145,30 +145,56 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
/**
* gmap_alloc - allocate a guest address space
* @mm: pointer to the parent mm_struct
+ * @limit: maximum size of the gmap address space
*
* Returns a guest address space structure.
*/
-struct gmap *gmap_alloc(struct mm_struct *mm)
+struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit)
{
struct gmap *gmap;
struct page *page;
unsigned long *table;
-
+ unsigned long etype, atype;
+
+ if (limit < (1UL << 31)) {
+ limit = (1UL << 31) - 1;
+ atype = _ASCE_TYPE_SEGMENT;
+ etype = _SEGMENT_ENTRY_EMPTY;
+ } else if (limit < (1UL << 42)) {
+ limit = (1UL << 42) - 1;
+ atype = _ASCE_TYPE_REGION3;
+ etype = _REGION3_ENTRY_EMPTY;
+ } else if (limit < (1UL << 53)) {
+ limit = (1UL << 53) - 1;
+ atype = _ASCE_TYPE_REGION2;
+ etype = _REGION2_ENTRY_EMPTY;
+ } else {
+ limit = -1UL;
+ atype = _ASCE_TYPE_REGION1;
+ etype = _REGION1_ENTRY_EMPTY;
+ }
gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL);
if (!gmap)
goto out;
INIT_LIST_HEAD(&gmap->crst_list);
+ INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL);
+ INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC);
+ spin_lock_init(&gmap->guest_table_lock);
gmap->mm = mm;
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
if (!page)
goto out_free;
+ page->index = 0;
list_add(&page->lru, &gmap->crst_list);
table = (unsigned long *) page_to_phys(page);
- crst_table_init(table, _REGION1_ENTRY_EMPTY);
+ crst_table_init(table, etype);
gmap->table = table;
- gmap->asce = _ASCE_TYPE_REGION1 | _ASCE_TABLE_LENGTH |
- _ASCE_USER_BITS | __pa(table);
+ gmap->asce = atype | _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS | __pa(table);
+ gmap->asce_end = limit;
+ down_write(&mm->mmap_sem);
list_add(&gmap->list, &mm->context.gmap_list);
+ up_write(&mm->mmap_sem);
return gmap;
out_free:
@@ -178,36 +204,38 @@ out:
}
EXPORT_SYMBOL_GPL(gmap_alloc);
-static int gmap_unlink_segment(struct gmap *gmap, unsigned long *table)
-{
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
-
- if (*table & _SEGMENT_ENTRY_INVALID)
- return 0;
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry(rmap, &mp->mapper, list) {
- if (rmap->entry != table)
- continue;
- list_del(&rmap->list);
- kfree(rmap);
- break;
- }
- *table = mp->vmaddr | _SEGMENT_ENTRY_INVALID | _SEGMENT_ENTRY_PROTECT;
- return 1;
-}
-
static void gmap_flush_tlb(struct gmap *gmap)
{
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
}
+static void gmap_radix_tree_free(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ unsigned long indices[16];
+ unsigned long index;
+ void **slot;
+ int i, nr;
+
+ /* A radix tree is freed by deleting all of its entries */
+ index = 0;
+ do {
+ nr = 0;
+ radix_tree_for_each_slot(slot, root, &iter, index) {
+ indices[nr] = iter.index;
+ if (++nr == 16)
+ break;
+ }
+ for (i = 0; i < nr; i++) {
+ index = indices[i];
+ radix_tree_delete(root, index);
+ }
+ } while (nr > 0);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
@@ -215,31 +243,21 @@ static void gmap_flush_tlb(struct gmap *gmap)
void gmap_free(struct gmap *gmap)
{
struct page *page, *next;
- unsigned long *table;
- int i;
-
/* Flush tlb. */
if (MACHINE_HAS_IDTE)
- __tlb_flush_asce(gmap->mm, (unsigned long) gmap->table |
- _ASCE_TYPE_REGION1);
+ __tlb_flush_asce(gmap->mm, gmap->asce);
else
__tlb_flush_global();
/* Free all segment & region tables. */
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru) {
- table = (unsigned long *) page_to_phys(page);
- if ((*table & _REGION_ENTRY_TYPE_MASK) == 0)
- /* Remove gmap rmap structures for segment table. */
- for (i = 0; i < PTRS_PER_PMD; i++, table++)
- gmap_unlink_segment(gmap, table);
+ list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
__free_pages(page, ALLOC_ORDER);
- }
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ gmap_radix_tree_free(&gmap->guest_to_host);
+ gmap_radix_tree_free(&gmap->host_to_guest);
+ down_write(&gmap->mm->mmap_sem);
list_del(&gmap->list);
+ up_write(&gmap->mm->mmap_sem);
kfree(gmap);
}
EXPORT_SYMBOL_GPL(gmap_free);
@@ -267,42 +285,97 @@ EXPORT_SYMBOL_GPL(gmap_disable);
/*
* gmap_alloc_table is assumed to be called with mmap_sem held
*/
-static int gmap_alloc_table(struct gmap *gmap,
- unsigned long *table, unsigned long init)
- __releases(&gmap->mm->page_table_lock)
- __acquires(&gmap->mm->page_table_lock)
+static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
+ unsigned long init, unsigned long gaddr)
{
struct page *page;
unsigned long *new;
/* since we dont free the gmap table until gmap_free we can unlock */
- spin_unlock(&gmap->mm->page_table_lock);
page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
- spin_lock(&gmap->mm->page_table_lock);
if (!page)
return -ENOMEM;
new = (unsigned long *) page_to_phys(page);
crst_table_init(new, init);
+ spin_lock(&gmap->mm->page_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
list_add(&page->lru, &gmap->crst_list);
*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
- } else
+ page->index = gaddr;
+ page = NULL;
+ }
+ spin_unlock(&gmap->mm->page_table_lock);
+ if (page)
__free_pages(page, ALLOC_ORDER);
return 0;
}
/**
+ * __gmap_segment_gaddr - find virtual address from segment pointer
+ * @entry: pointer to a segment table entry in the guest address space
+ *
+ * Returns the virtual address in the guest address space for the segment
+ */
+static unsigned long __gmap_segment_gaddr(unsigned long *entry)
+{
+ struct page *page;
+ unsigned long offset;
+
+ offset = (unsigned long) entry / sizeof(unsigned long);
+ offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
+ page = pmd_to_page((pmd_t *) entry);
+ return page->index + offset;
+}
+
+/**
+ * __gmap_unlink_by_vmaddr - unlink a single segment via a host address
+ * @gmap: pointer to the guest address space structure
+ * @vmaddr: address in the host process address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr)
+{
+ unsigned long *entry;
+ int flush = 0;
+
+ spin_lock(&gmap->guest_table_lock);
+ entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT);
+ if (entry) {
+ flush = (*entry != _SEGMENT_ENTRY_INVALID);
+ *entry = _SEGMENT_ENTRY_INVALID;
+ }
+ spin_unlock(&gmap->guest_table_lock);
+ return flush;
+}
+
+/**
+ * __gmap_unmap_by_gaddr - unmap a single segment via a guest address
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: address in the guest address space
+ *
+ * Returns 1 if a TLB flush is required
+ */
+static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr)
+{
+ unsigned long vmaddr;
+
+ vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0;
+}
+
+/**
* gmap_unmap_segment - unmap segment from the guest address space
* @gmap: pointer to the guest address space structure
- * @addr: address in the guest address space
+ * @to: address in the guest address space
* @len: length of the memory area to unmap
*
* Returns 0 if the unmap succeeded, -EINVAL if not.
*/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
@@ -312,31 +385,10 @@ int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
- for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the guest addr space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if (*table & _REGION_ENTRY_INVALID)
- goto out;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Clear segment table entry in guest address space. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = _SEGMENT_ENTRY_INVALID;
- }
-out:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ down_write(&gmap->mm->mmap_sem);
+ for (off = 0; off < len; off += PMD_SIZE)
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
return 0;
@@ -348,87 +400,47 @@ EXPORT_SYMBOL_GPL(gmap_unmap_segment);
* @gmap: pointer to the guest address space structure
* @from: source address in the parent address space
* @to: target address in the guest address space
+ * @len: length of the memory area to map
*
* Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not.
*/
int gmap_map_segment(struct gmap *gmap, unsigned long from,
unsigned long to, unsigned long len)
{
- unsigned long *table;
unsigned long off;
int flush;
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > TASK_MAX_SIZE ||
- from + len < from || to + len < to)
+ if (len == 0 || from + len < from || to + len < to ||
+ from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
return -EINVAL;
flush = 0;
- down_read(&gmap->mm->mmap_sem);
- spin_lock(&gmap->mm->page_table_lock);
+ down_write(&gmap->mm->mmap_sem);
for (off = 0; off < len; off += PMD_SIZE) {
- /* Walk the gmap address space page table */
- table = gmap->table + (((to + off) >> 53) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 42) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 31) & 0x7ff);
- if ((*table & _REGION_ENTRY_INVALID) &&
- gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY))
- goto out_unmap;
- table = (unsigned long *) (*table & _REGION_ENTRY_ORIGIN);
- table = table + (((to + off) >> 20) & 0x7ff);
-
- /* Store 'from' address in an invalid segment table entry. */
- flush |= gmap_unlink_segment(gmap, table);
- *table = (from + off) | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
+ /* Remove old translation */
+ flush |= __gmap_unmap_by_gaddr(gmap, to + off);
+ /* Store new translation */
+ if (radix_tree_insert(&gmap->guest_to_host,
+ (to + off) >> PMD_SHIFT,
+ (void *) from + off))
+ break;
}
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ up_write(&gmap->mm->mmap_sem);
if (flush)
gmap_flush_tlb(gmap);
- return 0;
-
-out_unmap:
- spin_unlock(&gmap->mm->page_table_lock);
- up_read(&gmap->mm->mmap_sem);
+ if (off >= len)
+ return 0;
gmap_unmap_segment(gmap, to, len);
return -ENOMEM;
}
EXPORT_SYMBOL_GPL(gmap_map_segment);
-static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table;
-
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID))
- return ERR_PTR(-EFAULT);
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- return table;
-}
-
/**
* __gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
@@ -436,168 +448,161 @@ static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap)
* The mmap_sem of the mm that belongs to the address space must be held
* when this function gets called.
*/
-unsigned long __gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long *segment_ptr, vmaddr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return PTR_ERR(segment_ptr);
- /* Convert the gmap address to an mm address. */
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- } else if (segment & _SEGMENT_ENTRY_PROTECT) {
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- return vmaddr | (address & ~PMD_MASK);
- }
- return -EFAULT;
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT);
+ return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT;
}
EXPORT_SYMBOL_GPL(__gmap_translate);
/**
* gmap_translate - translate a guest address to a user space address
- * @address: guest address
* @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
*
* Returns user space address which corresponds to the guest address or
* -EFAULT if no such mapping exists.
* This function does not establish potentially missing page table entries.
*/
-unsigned long gmap_translate(unsigned long address, struct gmap *gmap)
+unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
unsigned long rc;
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_translate(address, gmap);
+ rc = __gmap_translate(gmap, gaddr);
up_read(&gmap->mm->mmap_sem);
return rc;
}
EXPORT_SYMBOL_GPL(gmap_translate);
-static int gmap_connect_pgtable(unsigned long address, unsigned long segment,
- unsigned long *segment_ptr, struct gmap *gmap)
+/**
+ * gmap_unlink - disconnect a page table from the gmap shadow tables
+ * @gmap: pointer to guest mapping meta data structure
+ * @table: pointer to the host page table
+ * @vmaddr: vm address associated with the host page table
+ */
+static void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
+{
+ struct gmap *gmap;
+ int flush;
+
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ flush = __gmap_unlink_by_vmaddr(gmap, vmaddr);
+ if (flush)
+ gmap_flush_tlb(gmap);
+ }
+}
+
+/**
+ * gmap_link - set up shadow page tables to connect a host to a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @vmaddr: vm address
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
+ * The mmap_sem of the mm that belongs to the address space must be held
+ * when this function gets called.
+ */
+int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
{
- unsigned long vmaddr;
- struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
struct mm_struct *mm;
- struct page *page;
+ unsigned long *table;
+ spinlock_t *ptl;
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
+ int rc;
- mm = gmap->mm;
- vmaddr = segment & _SEGMENT_ENTRY_ORIGIN;
- vma = find_vma(mm, vmaddr);
- if (!vma || vma->vm_start > vmaddr)
- return -EFAULT;
+ /* Create higher level tables in the gmap page table */
+ table = gmap->table;
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) {
+ table += (gaddr >> 53) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
+ gaddr & 0xffe0000000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
+ table += (gaddr >> 42) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
+ gaddr & 0xfffffc0000000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
+ table += (gaddr >> 31) & 0x7ff;
+ if ((*table & _REGION_ENTRY_INVALID) &&
+ gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
+ gaddr & 0xffffffff80000000))
+ return -ENOMEM;
+ table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+ }
+ table += (gaddr >> 20) & 0x7ff;
/* Walk the parent mm page table */
+ mm = gmap->mm;
pgd = pgd_offset(mm, vmaddr);
- pud = pud_alloc(mm, pgd, vmaddr);
- if (!pud)
- return -ENOMEM;
- pmd = pmd_alloc(mm, pud, vmaddr);
- if (!pmd)
- return -ENOMEM;
- if (!pmd_present(*pmd) &&
- __pte_alloc(mm, vma, pmd, vmaddr))
- return -ENOMEM;
+ VM_BUG_ON(pgd_none(*pgd));
+ pud = pud_offset(pgd, vmaddr);
+ VM_BUG_ON(pud_none(*pud));
+ pmd = pmd_offset(pud, vmaddr);
+ VM_BUG_ON(pmd_none(*pmd));
/* large pmds cannot yet be handled */
if (pmd_large(*pmd))
return -EFAULT;
- /* pmd now points to a valid segment table entry. */
- rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT);
- if (!rmap)
- return -ENOMEM;
/* Link gmap segment table entry location to page table. */
- page = pmd_page(*pmd);
- mp = (struct gmap_pgtable *) page->index;
- rmap->gmap = gmap;
- rmap->entry = segment_ptr;
- rmap->vmaddr = address & PMD_MASK;
- spin_lock(&mm->page_table_lock);
- if (*segment_ptr == segment) {
- list_add(&rmap->list, &mp->mapper);
- /* Set gmap segment table entry to page table. */
- *segment_ptr = pmd_val(*pmd) & PAGE_MASK;
- rmap = NULL;
- }
- spin_unlock(&mm->page_table_lock);
- kfree(rmap);
- return 0;
-}
-
-static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table)
-{
- struct gmap_rmap *rmap, *next;
- struct gmap_pgtable *mp;
- struct page *page;
- int flush;
-
- flush = 0;
- spin_lock(&mm->page_table_lock);
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- list_for_each_entry_safe(rmap, next, &mp->mapper, list) {
- *rmap->entry = mp->vmaddr | (_SEGMENT_ENTRY_INVALID |
- _SEGMENT_ENTRY_PROTECT);
- list_del(&rmap->list);
- kfree(rmap);
- flush = 1;
- }
- spin_unlock(&mm->page_table_lock);
- if (flush)
- __tlb_flush_global();
+ rc = radix_tree_preload(GFP_KERNEL);
+ if (rc)
+ return rc;
+ ptl = pmd_lock(mm, pmd);
+ spin_lock(&gmap->guest_table_lock);
+ if (*table == _SEGMENT_ENTRY_INVALID) {
+ rc = radix_tree_insert(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT, table);
+ if (!rc)
+ *table = pmd_val(*pmd);
+ } else
+ rc = 0;
+ spin_unlock(&gmap->guest_table_lock);
+ spin_unlock(ptl);
+ radix_tree_preload_end();
+ return rc;
}
-/*
- * this function is assumed to be called with mmap_sem held
+/**
+ * gmap_fault - resolve a fault on a guest address
+ * @gmap: pointer to guest mapping meta data structure
+ * @gaddr: guest address
+ * @fault_flags: flags to pass down to handle_mm_fault()
+ *
+ * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT
+ * if the vm address is already mapped to a different guest segment.
*/
-unsigned long __gmap_fault(unsigned long address, struct gmap *gmap)
+int gmap_fault(struct gmap *gmap, unsigned long gaddr,
+ unsigned int fault_flags)
{
- unsigned long *segment_ptr, segment;
- struct gmap_pgtable *mp;
- struct page *page;
+ unsigned long vmaddr;
int rc;
- current->thread.gmap_addr = address;
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return -EFAULT;
- /* Convert the gmap address to an mm address. */
- while (1) {
- segment = *segment_ptr;
- if (!(segment & _SEGMENT_ENTRY_INVALID)) {
- /* Page table is present */
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- return mp->vmaddr | (address & ~PMD_MASK);
- }
- if (!(segment & _SEGMENT_ENTRY_PROTECT))
- /* Nothing mapped in the gmap address space. */
- break;
- rc = gmap_connect_pgtable(address, segment, segment_ptr, gmap);
- if (rc)
- return rc;
- }
- return -EFAULT;
-}
-
-unsigned long gmap_fault(unsigned long address, struct gmap *gmap)
-{
- unsigned long rc;
-
down_read(&gmap->mm->mmap_sem);
- rc = __gmap_fault(address, gmap);
+ vmaddr = __gmap_translate(gmap, gaddr);
+ if (IS_ERR_VALUE(vmaddr)) {
+ rc = vmaddr;
+ goto out_up;
+ }
+ if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags)) {
+ rc = -EFAULT;
+ goto out_up;
+ }
+ rc = __gmap_link(gmap, gaddr, vmaddr);
+out_up:
up_read(&gmap->mm->mmap_sem);
-
return rc;
}
EXPORT_SYMBOL_GPL(gmap_fault);
@@ -617,17 +622,24 @@ static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm)
free_swap_and_cache(entry);
}
-/**
- * The mm->mmap_sem lock must be held
+/*
+ * this function is assumed to be called with mmap_sem held
*/
-static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
+void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- unsigned long ptev, pgstev;
+ unsigned long vmaddr, ptev, pgstev;
+ pte_t *ptep, pte;
spinlock_t *ptl;
pgste_t pgste;
- pte_t *ptep, pte;
- ptep = get_locked_pte(mm, address, &ptl);
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
+ return;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
if (unlikely(!ptep))
return;
pte = *ptep;
@@ -639,87 +651,34 @@ static void gmap_zap_unused(struct mm_struct *mm, unsigned long address)
ptev = pte_val(pte);
if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) {
- gmap_zap_swap_entry(pte_to_swp_entry(pte), mm);
- pte_clear(mm, address, ptep);
+ gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm);
+ pte_clear(gmap->mm, vmaddr, ptep);
}
pgste_set_unlock(ptep, pgste);
out_pte:
pte_unmap_unlock(*ptep, ptl);
}
-
-/*
- * this function is assumed to be called with mmap_sem held
- */
-void __gmap_zap(unsigned long address, struct gmap *gmap)
-{
- unsigned long *table, *segment_ptr;
- unsigned long segment, pgstev, ptev;
- struct gmap_pgtable *mp;
- struct page *page;
-
- segment_ptr = gmap_table_walk(address, gmap);
- if (IS_ERR(segment_ptr))
- return;
- segment = *segment_ptr;
- if (segment & _SEGMENT_ENTRY_INVALID)
- return;
- page = pfn_to_page(segment >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- address = mp->vmaddr | (address & ~PMD_MASK);
- /* Page table is present */
- table = (unsigned long *)(segment & _SEGMENT_ENTRY_ORIGIN);
- table = table + ((address >> 12) & 0xff);
- pgstev = table[PTRS_PER_PTE];
- ptev = table[0];
- /* quick check, checked again with locks held */
- if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) ||
- ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID)))
- gmap_zap_unused(gmap->mm, address);
-}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
{
-
- unsigned long *table, address, size;
+ unsigned long gaddr, vmaddr, size;
struct vm_area_struct *vma;
- struct gmap_pgtable *mp;
- struct page *page;
down_read(&gmap->mm->mmap_sem);
- address = from;
- while (address < to) {
- /* Walk the gmap address space page table */
- table = gmap->table + ((address >> 53) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 42) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 31) & 0x7ff);
- if (unlikely(*table & _REGION_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
+ for (gaddr = from; gaddr < to;
+ gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
+ /* Find the vm address for the guest address */
+ vmaddr = (unsigned long)
+ radix_tree_lookup(&gmap->guest_to_host,
+ gaddr >> PMD_SHIFT);
+ if (!vmaddr)
continue;
- }
- table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
- table = table + ((address >> 20) & 0x7ff);
- if (unlikely(*table & _SEGMENT_ENTRY_INVALID)) {
- address = (address + PMD_SIZE) & PMD_MASK;
- continue;
- }
- page = pfn_to_page(*table >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- vma = find_vma(gmap->mm, mp->vmaddr);
- size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
- zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
- size, NULL);
- address = (address + PMD_SIZE) & PMD_MASK;
+ vmaddr |= gaddr & ~PMD_MASK;
+ /* Find vma in the parent mm */
+ vma = find_vma(gmap->mm, vmaddr);
+ size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
+ zap_page_range(vma, vmaddr, size, NULL);
}
up_read(&gmap->mm->mmap_sem);
}
@@ -755,7 +714,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
/**
* gmap_ipte_notify - mark a range of ptes for invalidation notification
* @gmap: pointer to guest mapping meta data structure
- * @start: virtual address in the guest address space
+ * @gaddr: virtual address in the guest address space
* @len: size of area
*
* Returns 0 if for each page in the given range a gmap mapping exists and
@@ -763,7 +722,7 @@ EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier);
* for one or more pages -EFAULT is returned. If no memory could be allocated
* -ENOMEM is returned. This function establishes missing page table entries.
*/
-int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
+int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len)
{
unsigned long addr;
spinlock_t *ptl;
@@ -771,12 +730,12 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste_t pgste;
int rc = 0;
- if ((start & ~PAGE_MASK) || (len & ~PAGE_MASK))
+ if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK))
return -EINVAL;
down_read(&gmap->mm->mmap_sem);
while (len) {
/* Convert gmap address and connect the page tables */
- addr = __gmap_fault(start, gmap);
+ addr = __gmap_translate(gmap, gaddr);
if (IS_ERR_VALUE(addr)) {
rc = addr;
break;
@@ -786,6 +745,9 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
rc = -EFAULT;
break;
}
+ rc = __gmap_link(gmap, gaddr, addr);
+ if (rc)
+ break;
/* Walk the process page table, lock and get pte pointer */
ptep = get_locked_pte(gmap->mm, addr, &ptl);
if (unlikely(!ptep))
@@ -796,7 +758,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long start, unsigned long len)
pgste = pgste_get_lock(ptep);
pgste_val(pgste) |= PGSTE_IN_BIT;
pgste_set_unlock(ptep, pgste);
- start += PAGE_SIZE;
+ gaddr += PAGE_SIZE;
len -= PAGE_SIZE;
}
spin_unlock(ptl);
@@ -809,28 +771,30 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify);
/**
* gmap_do_ipte_notify - call all invalidation callbacks for a specific pte.
* @mm: pointer to the process mm_struct
+ * @addr: virtual address in the process address space
* @pte: pointer to the page table entry
*
* This function is assumed to be called with the page table lock held
* for the pte to notify.
*/
-void gmap_do_ipte_notify(struct mm_struct *mm, pte_t *pte)
+void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte)
{
- unsigned long segment_offset;
+ unsigned long offset, gaddr;
+ unsigned long *table;
struct gmap_notifier *nb;
- struct gmap_pgtable *mp;
- struct gmap_rmap *rmap;
- struct page *page;
+ struct gmap *gmap;
- segment_offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
- segment_offset = segment_offset * (4096 / sizeof(pte_t));
- page = pfn_to_page(__pa(pte) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
+ offset = ((unsigned long) pte) & (255 * sizeof(pte_t));
+ offset = offset * (4096 / sizeof(pte_t));
spin_lock(&gmap_notifier_lock);
- list_for_each_entry(rmap, &mp->mapper, list) {
+ list_for_each_entry(gmap, &mm->context.gmap_list, list) {
+ table = radix_tree_lookup(&gmap->host_to_guest,
+ vmaddr >> PMD_SHIFT);
+ if (!table)
+ continue;
+ gaddr = __gmap_segment_gaddr(table) + offset;
list_for_each_entry(nb, &gmap_notifier_list, list)
- nb->notifier_call(rmap->gmap,
- rmap->vmaddr + segment_offset);
+ nb->notifier_call(gmap, gaddr);
}
spin_unlock(&gmap_notifier_lock);
}
@@ -841,29 +805,18 @@ static inline int page_table_with_pgste(struct page *page)
return atomic_read(&page->_mapcount) == 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
struct page *page;
unsigned long *table;
- struct gmap_pgtable *mp;
page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
if (!page)
return NULL;
- mp = kmalloc(sizeof(*mp), GFP_KERNEL|__GFP_REPEAT);
- if (!mp) {
- __free_page(page);
- return NULL;
- }
if (!pgtable_page_ctor(page)) {
- kfree(mp);
__free_page(page);
return NULL;
}
- mp->vmaddr = vmaddr & PMD_MASK;
- INIT_LIST_HEAD(&mp->mapper);
- page->index = (unsigned long) mp;
atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
@@ -874,14 +827,10 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
static inline void page_table_free_pgste(unsigned long *table)
{
struct page *page;
- struct gmap_pgtable *mp;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- mp = (struct gmap_pgtable *) page->index;
- BUG_ON(!list_empty(&mp->mapper));
pgtable_page_dtor(page);
atomic_set(&page->_mapcount, -1);
- kfree(mp);
__free_page(page);
}
@@ -986,11 +935,21 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pte_t *ptep;
down_read(&mm->mmap_sem);
+retry:
ptep = get_locked_pte(current->mm, addr, &ptl);
if (unlikely(!ptep)) {
up_read(&mm->mmap_sem);
return -EFAULT;
}
+ if (!(pte_val(*ptep) & _PAGE_INVALID) &&
+ (pte_val(*ptep) & _PAGE_PROTECT)) {
+ pte_unmap_unlock(*ptep, ptl);
+ if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE)) {
+ up_read(&mm->mmap_sem);
+ return -EFAULT;
+ }
+ goto retry;
+ }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -1028,8 +987,7 @@ static inline int page_table_with_pgste(struct page *page)
return 0;
}
-static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
- unsigned long vmaddr)
+static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
{
return NULL;
}
@@ -1043,8 +1001,8 @@ static inline void page_table_free_pgste(unsigned long *table)
{
}
-static inline void gmap_disconnect_pgtable(struct mm_struct *mm,
- unsigned long *table)
+static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table,
+ unsigned long vmaddr)
{
}
@@ -1064,14 +1022,14 @@ static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
/*
* page table entry allocation/free routines.
*/
-unsigned long *page_table_alloc(struct mm_struct *mm, unsigned long vmaddr)
+unsigned long *page_table_alloc(struct mm_struct *mm)
{
unsigned long *uninitialized_var(table);
struct page *uninitialized_var(page);
unsigned int mask, bit;
if (mm_has_pgste(mm))
- return page_table_alloc_pgste(mm, vmaddr);
+ return page_table_alloc_pgste(mm);
/* Allocate fragments of a 4K page as 1K/2K page table */
spin_lock_bh(&mm->context.list_lock);
mask = FRAG_MASK;
@@ -1113,10 +1071,8 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
unsigned int bit, mask;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
- if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ if (page_table_with_pgste(page))
return page_table_free_pgste(table);
- }
/* Free 1K/2K page table fragment of a 4K page */
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
@@ -1148,7 +1104,8 @@ static void __page_table_free_rcu(void *table, unsigned bit)
}
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
+void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
+ unsigned long vmaddr)
{
struct mm_struct *mm;
struct page *page;
@@ -1157,7 +1114,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
mm = tlb->mm;
page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
if (page_table_with_pgste(page)) {
- gmap_disconnect_pgtable(mm, table);
+ gmap_unlink(mm, table, vmaddr);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
@@ -1279,6 +1236,7 @@ static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
{
unsigned long next, *table, *new;
struct page *page;
+ spinlock_t *ptl;
pmd_t *pmd;
pmd = pmd_offset(pud, addr);
@@ -1292,11 +1250,11 @@ again:
if (page_table_with_pgste(page))
continue;
/* Allocate new page table with pgstes */
- new = page_table_alloc_pgste(mm, addr);
+ new = page_table_alloc_pgste(mm);
if (!new)
return -ENOMEM;
- spin_lock(&mm->page_table_lock);
+ ptl = pmd_lock(mm, pmd);
if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
/* Nuke pmd entry pointing to the "short" page table */
pmdp_flush_lazy(mm, addr, pmd);
@@ -1307,10 +1265,10 @@ again:
/* Establish new table */
pmd_populate(mm, pmd, (pte_t *) new);
/* Free old table with rcu, there might be a walker! */
- page_table_free_rcu(tlb, table);
+ page_table_free_rcu(tlb, table, addr);
new = NULL;
}
- spin_unlock(&mm->page_table_lock);
+ spin_unlock(ptl);
if (new) {
page_table_free_pgste(new);
goto again;
@@ -1432,6 +1390,9 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
{
VM_BUG_ON(address & ~HPAGE_PMD_MASK);
+ entry = pmd_mkyoung(entry);
+ if (dirty)
+ entry = pmd_mkdirty(entry);
if (pmd_same(*pmdp, entry))
return 0;
pmdp_invalidate(vma, address, pmdp);
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index fe9012a..b1593c2 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -65,7 +65,7 @@ static pte_t __ref *vmem_pte_alloc(unsigned long address)
pte_t *pte;
if (slab_is_available())
- pte = (pte_t *) page_table_alloc(&init_mm, address);
+ pte = (pte_t *) page_table_alloc(&init_mm);
else
pte = alloc_bootmem_align(PTRS_PER_PTE * sizeof(pte_t),
PTRS_PER_PTE * sizeof(pte_t));
@@ -236,8 +236,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
if (!new_page)
goto out;
pmd_val(*pm_dir) = __pa(new_page) |
- _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE |
- _SEGMENT_ENTRY_CO;
+ _SEGMENT_ENTRY | _SEGMENT_ENTRY_LARGE;
address = (address + PMD_SIZE) & PMD_MASK;
continue;
}
@@ -253,9 +252,9 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pt_dir = pte_offset_kernel(pm_dir, address);
if (pte_none(*pt_dir)) {
- unsigned long new_page;
+ void *new_page;
- new_page =__pa(vmem_alloc_pages(0));
+ new_page = vmemmap_alloc_block(PAGE_SIZE, node);
if (!new_page)
goto out;
pte_val(*pt_dir) =
@@ -263,7 +262,6 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
}
address += PAGE_SIZE;
}
- memset((void *)start, 0, end - start);
ret = 0;
out:
return ret;
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index a2cbd87..c52ac77 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -5,11 +5,9 @@
*
* Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
*/
-#include <linux/moduleloader.h>
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
#include <linux/filter.h>
-#include <linux/random.h>
#include <linux/init.h>
#include <asm/cacheflush.h>
#include <asm/facility.h>
@@ -148,6 +146,12 @@ struct bpf_jit {
ret; \
})
+static void bpf_jit_fill_hole(void *area, unsigned int size)
+{
+ /* Fill whole space with illegal instructions */
+ memset(area, 0, size);
+}
+
static void bpf_jit_prologue(struct bpf_jit *jit)
{
/* Save registers and create stack frame if necessary */
@@ -223,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
EMIT2(0x07fe);
}
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
- struct sk_buff skb_probe = {
- .pkt_type = ~0,
- };
- char *ct = (char *)&skb_probe;
- int off;
-
- pkt_type_offset = -1;
- for (off = 0; off < sizeof(struct sk_buff); off++) {
- if (!ct[off])
- continue;
- if (ct[off] == PKT_TYPE_MAX)
- pkt_type_offset = off;
- else {
- /* Found non matching bit pattern, fix needed. */
- WARN_ON_ONCE(1);
- pkt_type_offset = -1;
- return -1;
- }
- }
- return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
/*
* make sure we dont leak kernel information to user
*/
@@ -753,12 +726,10 @@ call_fn: /* lg %r1,<d(function)>(%r13) */
}
break;
case BPF_ANC | SKF_AD_PKTTYPE:
- if (pkt_type_offset < 0)
- goto out;
/* lhi %r5,0 */
EMIT4(0xa7580000);
/* ic %r5,<d(pkt_type_offset)>(%r2) */
- EMIT4_DISP(0x43502000, pkt_type_offset);
+ EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET());
/* srl %r5,5 */
EMIT4_DISP(0x88500000, 5);
break;
@@ -780,39 +751,7 @@ out:
return -1;
}
-/*
- * Note: for security reasons, bpf code will follow a randomly
- * sized amount of illegal instructions.
- */
-struct bpf_binary_header {
- unsigned int pages;
- u8 image[];
-};
-
-static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
- u8 **image_ptr)
-{
- struct bpf_binary_header *header;
- unsigned int sz, hole;
-
- /* Most BPF filters are really small, but if some of them fill a page,
- * allow at least 128 extra bytes for illegal instructions.
- */
- sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
- header = module_alloc(sz);
- if (!header)
- return NULL;
- memset(header, 0, sz);
- header->pages = sz / PAGE_SIZE;
- hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header));
- /* Insert random number of illegal instructions before BPF code
- * and make sure the first instruction starts at an even address.
- */
- *image_ptr = &header->image[(prandom_u32() % hole) & -2];
- return header;
-}
-
-void bpf_jit_compile(struct sk_filter *fp)
+void bpf_jit_compile(struct bpf_prog *fp)
{
struct bpf_binary_header *header = NULL;
unsigned long size, prg_len, lit_len;
@@ -850,7 +789,8 @@ void bpf_jit_compile(struct sk_filter *fp)
size = prg_len + lit_len;
if (size >= BPF_SIZE_MAX)
goto out;
- header = bpf_alloc_binary(size, &jit.start);
+ header = bpf_jit_binary_alloc(size, &jit.start,
+ 2, bpf_jit_fill_hole);
if (!header)
goto out;
jit.prg = jit.mid = jit.start + prg_len;
@@ -869,13 +809,13 @@ void bpf_jit_compile(struct sk_filter *fp)
if (jit.start) {
set_memory_ro((unsigned long)header, header->pages);
fp->bpf_func = (void *) jit.start;
- fp->jited = 1;
+ fp->jited = true;
}
out:
kfree(addrs);
}
-void bpf_jit_free(struct sk_filter *fp)
+void bpf_jit_free(struct bpf_prog *fp)
{
unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
struct bpf_binary_header *header = (void *)addr;
@@ -884,8 +824,8 @@ void bpf_jit_free(struct sk_filter *fp)
goto free_filter;
set_memory_rw(addr, header->pages);
- module_free(NULL, header);
+ bpf_jit_binary_free(header);
free_filter:
- kfree(fp);
+ bpf_prog_unlock_free(fp);
}
diff --git a/arch/s390/oprofile/hwsampler.c b/arch/s390/oprofile/hwsampler.c
index e53c6f2..ff9b4eb 100644
--- a/arch/s390/oprofile/hwsampler.c
+++ b/arch/s390/oprofile/hwsampler.c
@@ -178,7 +178,7 @@ static int smp_ctl_qsi(int cpu)
static void hws_ext_handler(struct ext_code ext_code,
unsigned int param32, unsigned long param64)
{
- struct hws_cpu_buffer *cb = &__get_cpu_var(sampler_cpu_buffer);
+ struct hws_cpu_buffer *cb = this_cpu_ptr(&sampler_cpu_buffer);
if (!(param32 & CPU_MF_INT_SF_MASK))
return;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 30de427..2fa7b14 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -15,8 +15,8 @@
* Thomas Klein
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index 96545d7..6e22a24 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/slab.h>
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index c5c6684..eec598c 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/seq_file.h>
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index f91c031..4cbb29a 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -16,6 +16,13 @@
static struct kmem_cache *dma_region_table_cache;
static struct kmem_cache *dma_page_table_cache;
+static int s390_iommu_strict;
+
+static int zpci_refresh_global(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
+ zdev->iommu_pages * PAGE_SIZE);
+}
static unsigned long *dma_alloc_cpu_table(void)
{
@@ -155,18 +162,15 @@ static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
}
/*
- * rpcit is not required to establish new translations when previously
- * invalid translation-table entries are validated, however it is
- * required when altering previously valid entries.
+ * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+ * translations when previously invalid translation-table entries are
+ * validated. With lazy unmap, it also is skipped for previously valid
+ * entries, but a global rpcit is then required before any address can
+ * be re-used, i.e. after each iommu bitmap wrap-around.
*/
if (!zdev->tlb_refresh &&
- ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- /*
- * TODO: also need to check that the old entry is indeed INVALID
- * and not only for one page but for the whole range...
- * -> now we WARN_ON in that case but with lazy unmap that
- * needs to be redone!
- */
+ (!s390_iommu_strict ||
+ ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
goto no_refresh;
rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
@@ -220,16 +224,21 @@ static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev,
static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size)
{
unsigned long offset, flags;
+ int wrap = 0;
spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
offset = __dma_alloc_iommu(zdev, zdev->next_bit, size);
- if (offset == -1)
+ if (offset == -1) {
+ /* wrap-around */
offset = __dma_alloc_iommu(zdev, 0, size);
+ wrap = 1;
+ }
if (offset != -1) {
zdev->next_bit = offset + size;
- if (zdev->next_bit >= zdev->iommu_pages)
- zdev->next_bit = 0;
+ if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
+ /* global flush after wrap-around with lazy unmap */
+ zpci_refresh_global(zdev);
}
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
return offset;
@@ -243,7 +252,11 @@ static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size
if (!zdev->iommu_bitmap)
goto out;
bitmap_clear(zdev->iommu_bitmap, offset, size);
- if (offset >= zdev->next_bit)
+ /*
+ * Lazy flush for unmap: need to move next_bit to avoid address re-use
+ * until wrap-around.
+ */
+ if (!s390_iommu_strict && offset >= zdev->next_bit)
zdev->next_bit = offset + size;
out:
spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
@@ -504,3 +517,12 @@ struct dma_map_ops s390_dma_ops = {
/* dma_supported is unconditionally true without a callback */
};
EXPORT_SYMBOL_GPL(s390_dma_ops);
+
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strncmp(str, "strict", 6))
+ s390_iommu_strict = 1;
+ return 0;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index 6d7f5a3..460fdb2 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/pci.h>
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index 9190214..fa3ce89 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -5,8 +5,8 @@
* Jan Glauber <jang@linux.vnet.ibm.com>
*/
-#define COMPONENT "zPCI"
-#define pr_fmt(fmt) COMPONENT ": " fmt
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/kernel.h>
#include <linux/stat.h>