summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/hypfs/hypfs_diag.c375
-rw-r--r--arch/s390/include/asm/cpacf.h10
-rw-r--r--arch/s390/include/asm/diag.h149
-rw-r--r--arch/s390/include/asm/kvm_host.h10
-rw-r--r--arch/s390/include/asm/page.h7
-rw-r--r--arch/s390/include/asm/pgtable.h7
-rw-r--r--arch/s390/include/asm/sclp.h23
-rw-r--r--arch/s390/include/uapi/asm/kvm.h29
-rw-r--r--arch/s390/include/uapi/asm/sie.h1
-rw-r--r--arch/s390/kernel/diag.c37
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.c152
-rw-r--r--arch/s390/kvm/intercept.c15
-rw-r--r--arch/s390/kvm/interrupt.c26
-rw-r--r--arch/s390/kvm/kvm-s390.c255
-rw-r--r--arch/s390/kvm/kvm-s390.h9
-rw-r--r--arch/s390/kvm/priv.c217
-rw-r--r--arch/s390/kvm/sthyi.c471
-rw-r--r--arch/s390/kvm/trace.h49
-rw-r--r--arch/s390/mm/pgtable.c106
21 files changed, 1506 insertions, 449 deletions
diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c
index 0450357..28f03ca 100644
--- a/arch/s390/hypfs/hypfs_diag.c
+++ b/arch/s390/hypfs/hypfs_diag.c
@@ -19,29 +19,10 @@
#include <asm/ebcdic.h>
#include "hypfs.h"
-#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
-#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */
#define TMP_SIZE 64 /* size of temporary buffers */
#define DBFS_D204_HDR_VERSION 0
-/* diag 204 subcodes */
-enum diag204_sc {
- SUBC_STIB4 = 4,
- SUBC_RSI = 5,
- SUBC_STIB6 = 6,
- SUBC_STIB7 = 7
-};
-
-/* The two available diag 204 data formats */
-enum diag204_format {
- INFO_SIMPLE = 0,
- INFO_EXT = 0x00010000
-};
-
-/* bit is set in flags, when physical cpu info is included in diag 204 data */
-#define LPAR_PHYS_FLG 0x80
-
static char *diag224_cpu_names; /* diag 224 name table */
static enum diag204_sc diag204_store_sc; /* used subcode for store */
static enum diag204_format diag204_info_type; /* used diag 204 data format */
@@ -53,7 +34,7 @@ static int diag204_buf_pages; /* number of pages for diag204 data */
static struct dentry *dbfs_d204_file;
/*
- * DIAG 204 data structures and member access functions.
+ * DIAG 204 member access functions.
*
* Since we have two different diag 204 data formats for old and new s390
* machines, we do not access the structs directly, but use getter functions for
@@ -62,302 +43,173 @@ static struct dentry *dbfs_d204_file;
/* Time information block */
-struct info_blk_hdr {
- __u8 npar;
- __u8 flags;
- __u16 tslice;
- __u16 phys_cpus;
- __u16 this_part;
- __u64 curtod;
-} __attribute__ ((packed));
-
-struct x_info_blk_hdr {
- __u8 npar;
- __u8 flags;
- __u16 tslice;
- __u16 phys_cpus;
- __u16 this_part;
- __u64 curtod1;
- __u64 curtod2;
- char reserved[40];
-} __attribute__ ((packed));
-
static inline int info_blk_hdr__size(enum diag204_format type)
{
- if (type == INFO_SIMPLE)
- return sizeof(struct info_blk_hdr);
- else /* INFO_EXT */
- return sizeof(struct x_info_blk_hdr);
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_info_blk_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_info_blk_hdr);
}
static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct info_blk_hdr *)hdr)->npar;
- else /* INFO_EXT */
- return ((struct x_info_blk_hdr *)hdr)->npar;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->npar;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->npar;
}
static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct info_blk_hdr *)hdr)->flags;
- else /* INFO_EXT */
- return ((struct x_info_blk_hdr *)hdr)->flags;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->flags;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->flags;
}
static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct info_blk_hdr *)hdr)->phys_cpus;
- else /* INFO_EXT */
- return ((struct x_info_blk_hdr *)hdr)->phys_cpus;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus;
}
/* Partition header */
-struct part_hdr {
- __u8 pn;
- __u8 cpus;
- char reserved[6];
- char part_name[LPAR_NAME_LEN];
-} __attribute__ ((packed));
-
-struct x_part_hdr {
- __u8 pn;
- __u8 cpus;
- __u8 rcpus;
- __u8 pflag;
- __u32 mlu;
- char part_name[LPAR_NAME_LEN];
- char lpc_name[8];
- char os_name[8];
- __u64 online_cs;
- __u64 online_es;
- __u8 upid;
- char reserved1[3];
- __u32 group_mlu;
- char group_name[8];
- char reserved2[32];
-} __attribute__ ((packed));
-
static inline int part_hdr__size(enum diag204_format type)
{
- if (type == INFO_SIMPLE)
- return sizeof(struct part_hdr);
- else /* INFO_EXT */
- return sizeof(struct x_part_hdr);
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_part_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_part_hdr);
}
static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct part_hdr *)hdr)->cpus;
- else /* INFO_EXT */
- return ((struct x_part_hdr *)hdr)->rcpus;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_part_hdr *)hdr)->cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_part_hdr *)hdr)->rcpus;
}
static inline void part_hdr__part_name(enum diag204_format type, void *hdr,
char *name)
{
- if (type == INFO_SIMPLE)
- memcpy(name, ((struct part_hdr *)hdr)->part_name,
- LPAR_NAME_LEN);
- else /* INFO_EXT */
- memcpy(name, ((struct x_part_hdr *)hdr)->part_name,
- LPAR_NAME_LEN);
- EBCASC(name, LPAR_NAME_LEN);
- name[LPAR_NAME_LEN] = 0;
+ if (type == DIAG204_INFO_SIMPLE)
+ memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name,
+ DIAG204_LPAR_NAME_LEN);
+ else /* DIAG204_INFO_EXT */
+ memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name,
+ DIAG204_LPAR_NAME_LEN);
+ EBCASC(name, DIAG204_LPAR_NAME_LEN);
+ name[DIAG204_LPAR_NAME_LEN] = 0;
strim(name);
}
-struct cpu_info {
- __u16 cpu_addr;
- char reserved1[2];
- __u8 ctidx;
- __u8 cflag;
- __u16 weight;
- __u64 acc_time;
- __u64 lp_time;
-} __attribute__ ((packed));
-
-struct x_cpu_info {
- __u16 cpu_addr;
- char reserved1[2];
- __u8 ctidx;
- __u8 cflag;
- __u16 weight;
- __u64 acc_time;
- __u64 lp_time;
- __u16 min_weight;
- __u16 cur_weight;
- __u16 max_weight;
- char reseved2[2];
- __u64 online_time;
- __u64 wait_time;
- __u32 pma_weight;
- __u32 polar_weight;
- char reserved3[40];
-} __attribute__ ((packed));
-
/* CPU info block */
static inline int cpu_info__size(enum diag204_format type)
{
- if (type == INFO_SIMPLE)
- return sizeof(struct cpu_info);
- else /* INFO_EXT */
- return sizeof(struct x_cpu_info);
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_cpu_info);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_cpu_info);
}
static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct cpu_info *)hdr)->ctidx;
- else /* INFO_EXT */
- return ((struct x_cpu_info *)hdr)->ctidx;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->ctidx;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->ctidx;
}
static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct cpu_info *)hdr)->cpu_addr;
- else /* INFO_EXT */
- return ((struct x_cpu_info *)hdr)->cpu_addr;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->cpu_addr;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->cpu_addr;
}
static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct cpu_info *)hdr)->acc_time;
- else /* INFO_EXT */
- return ((struct x_cpu_info *)hdr)->acc_time;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->acc_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->acc_time;
}
static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct cpu_info *)hdr)->lp_time;
- else /* INFO_EXT */
- return ((struct x_cpu_info *)hdr)->lp_time;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_cpu_info *)hdr)->lp_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->lp_time;
}
static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
+ if (type == DIAG204_INFO_SIMPLE)
return 0; /* online_time not available in simple info */
- else /* INFO_EXT */
- return ((struct x_cpu_info *)hdr)->online_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_cpu_info *)hdr)->online_time;
}
/* Physical header */
-struct phys_hdr {
- char reserved1[1];
- __u8 cpus;
- char reserved2[6];
- char mgm_name[8];
-} __attribute__ ((packed));
-
-struct x_phys_hdr {
- char reserved1[1];
- __u8 cpus;
- char reserved2[6];
- char mgm_name[8];
- char reserved3[80];
-} __attribute__ ((packed));
-
static inline int phys_hdr__size(enum diag204_format type)
{
- if (type == INFO_SIMPLE)
- return sizeof(struct phys_hdr);
- else /* INFO_EXT */
- return sizeof(struct x_phys_hdr);
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_phys_hdr);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_phys_hdr);
}
static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct phys_hdr *)hdr)->cpus;
- else /* INFO_EXT */
- return ((struct x_phys_hdr *)hdr)->cpus;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_hdr *)hdr)->cpus;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_hdr *)hdr)->cpus;
}
/* Physical CPU info block */
-struct phys_cpu {
- __u16 cpu_addr;
- char reserved1[2];
- __u8 ctidx;
- char reserved2[3];
- __u64 mgm_time;
- char reserved3[8];
-} __attribute__ ((packed));
-
-struct x_phys_cpu {
- __u16 cpu_addr;
- char reserved1[2];
- __u8 ctidx;
- char reserved2[3];
- __u64 mgm_time;
- char reserved3[80];
-} __attribute__ ((packed));
-
static inline int phys_cpu__size(enum diag204_format type)
{
- if (type == INFO_SIMPLE)
- return sizeof(struct phys_cpu);
- else /* INFO_EXT */
- return sizeof(struct x_phys_cpu);
+ if (type == DIAG204_INFO_SIMPLE)
+ return sizeof(struct diag204_phys_cpu);
+ else /* DIAG204_INFO_EXT */
+ return sizeof(struct diag204_x_phys_cpu);
}
static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct phys_cpu *)hdr)->cpu_addr;
- else /* INFO_EXT */
- return ((struct x_phys_cpu *)hdr)->cpu_addr;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->cpu_addr;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr;
}
static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct phys_cpu *)hdr)->mgm_time;
- else /* INFO_EXT */
- return ((struct x_phys_cpu *)hdr)->mgm_time;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->mgm_time;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->mgm_time;
}
static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr)
{
- if (type == INFO_SIMPLE)
- return ((struct phys_cpu *)hdr)->ctidx;
- else /* INFO_EXT */
- return ((struct x_phys_cpu *)hdr)->ctidx;
+ if (type == DIAG204_INFO_SIMPLE)
+ return ((struct diag204_phys_cpu *)hdr)->ctidx;
+ else /* DIAG204_INFO_EXT */
+ return ((struct diag204_x_phys_cpu *)hdr)->ctidx;
}
/* Diagnose 204 functions */
-
-static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
-{
- register unsigned long _subcode asm("0") = subcode;
- register unsigned long _size asm("1") = size;
-
- asm volatile(
- " diag %2,%0,0x204\n"
- "0:\n"
- EX_TABLE(0b,0b)
- : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
- if (_subcode)
- return -1;
- return _size;
-}
-
-static int diag204(unsigned long subcode, unsigned long size, void *addr)
-{
- diag_stat_inc(DIAG_STAT_X204);
- return __diag204(subcode, size, addr);
-}
-
/*
* For the old diag subcode 4 with simple data format we have to use real
* memory. If we use subcode 6 or 7 with extended data format, we can (and
@@ -409,12 +261,12 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages)
*pages = diag204_buf_pages;
return diag204_buf;
}
- if (fmt == INFO_SIMPLE) {
+ if (fmt == DIAG204_INFO_SIMPLE) {
*pages = 1;
return diag204_alloc_rbuf();
- } else {/* INFO_EXT */
- *pages = diag204((unsigned long)SUBC_RSI |
- (unsigned long)INFO_EXT, 0, NULL);
+ } else {/* DIAG204_INFO_EXT */
+ *pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+ (unsigned long)DIAG204_INFO_EXT, 0, NULL);
if (*pages <= 0)
return ERR_PTR(-ENOSYS);
else
@@ -441,18 +293,18 @@ static int diag204_probe(void)
void *buf;
int pages, rc;
- buf = diag204_get_buffer(INFO_EXT, &pages);
+ buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages);
if (!IS_ERR(buf)) {
- if (diag204((unsigned long)SUBC_STIB7 |
- (unsigned long)INFO_EXT, pages, buf) >= 0) {
- diag204_store_sc = SUBC_STIB7;
- diag204_info_type = INFO_EXT;
+ if (diag204((unsigned long)DIAG204_SUBC_STIB7 |
+ (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB7;
+ diag204_info_type = DIAG204_INFO_EXT;
goto out;
}
- if (diag204((unsigned long)SUBC_STIB6 |
- (unsigned long)INFO_EXT, pages, buf) >= 0) {
- diag204_store_sc = SUBC_STIB6;
- diag204_info_type = INFO_EXT;
+ if (diag204((unsigned long)DIAG204_SUBC_STIB6 |
+ (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB6;
+ diag204_info_type = DIAG204_INFO_EXT;
goto out;
}
diag204_free_buffer();
@@ -460,15 +312,15 @@ static int diag204_probe(void)
/* subcodes 6 and 7 failed, now try subcode 4 */
- buf = diag204_get_buffer(INFO_SIMPLE, &pages);
+ buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages);
if (IS_ERR(buf)) {
rc = PTR_ERR(buf);
goto fail_alloc;
}
- if (diag204((unsigned long)SUBC_STIB4 |
- (unsigned long)INFO_SIMPLE, pages, buf) >= 0) {
- diag204_store_sc = SUBC_STIB4;
- diag204_info_type = INFO_SIMPLE;
+ if (diag204((unsigned long)DIAG204_SUBC_STIB4 |
+ (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) {
+ diag204_store_sc = DIAG204_SUBC_STIB4;
+ diag204_info_type = DIAG204_INFO_SIMPLE;
goto out;
} else {
rc = -ENOSYS;
@@ -508,20 +360,6 @@ out:
/* Diagnose 224 functions */
-static int diag224(void *ptr)
-{
- int rc = -EOPNOTSUPP;
-
- diag_stat_inc(DIAG_STAT_X224);
- asm volatile(
- " diag %1,%2,0x224\n"
- "0: lhi %0,0x0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "+d" (rc) :"d" (0), "d" (ptr) : "memory");
- return rc;
-}
-
static int diag224_get_name_table(void)
{
/* memory must be below 2GB */
@@ -543,9 +381,9 @@ static void diag224_delete_name_table(void)
static int diag224_idx2name(int index, char *name)
{
- memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN),
- CPU_NAME_LEN);
- name[CPU_NAME_LEN] = 0;
+ memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN),
+ DIAG204_CPU_NAME_LEN);
+ name[DIAG204_CPU_NAME_LEN] = 0;
strim(name);
return 0;
}
@@ -601,7 +439,7 @@ __init int hypfs_diag_init(void)
pr_err("The hardware system does not support hypfs\n");
return -ENODATA;
}
- if (diag204_info_type == INFO_EXT) {
+ if (diag204_info_type == DIAG204_INFO_EXT) {
rc = hypfs_dbfs_create_file(&dbfs_file_d204);
if (rc)
return rc;
@@ -649,7 +487,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info)
cpu_info__lp_time(diag204_info_type, cpu_info));
if (IS_ERR(rc))
return PTR_ERR(rc);
- if (diag204_info_type == INFO_EXT) {
+ if (diag204_info_type == DIAG204_INFO_EXT) {
rc = hypfs_create_u64(cpu_dir, "onlinetime",
cpu_info__online_time(diag204_info_type,
cpu_info));
@@ -665,12 +503,12 @@ static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr)
{
struct dentry *cpus_dir;
struct dentry *lpar_dir;
- char lpar_name[LPAR_NAME_LEN + 1];
+ char lpar_name[DIAG204_LPAR_NAME_LEN + 1];
void *cpu_info;
int i;
part_hdr__part_name(diag204_info_type, part_hdr, lpar_name);
- lpar_name[LPAR_NAME_LEN] = 0;
+ lpar_name[DIAG204_LPAR_NAME_LEN] = 0;
lpar_dir = hypfs_mkdir(systems_dir, lpar_name);
if (IS_ERR(lpar_dir))
return lpar_dir;
@@ -753,7 +591,8 @@ int hypfs_diag_create_files(struct dentry *root)
goto err_out;
}
}
- if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) {
+ if (info_blk_hdr__flags(diag204_info_type, time_hdr) &
+ DIAG204_LPAR_PHYS_FLG) {
ptr = hypfs_create_phys_files(root, part_hdr);
if (IS_ERR(ptr)) {
rc = PTR_ERR(ptr);
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index 1a82cf2..d28621d 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -20,6 +20,9 @@
#define CPACF_KMC 0xb92f /* MSA */
#define CPACF_KIMD 0xb93e /* MSA */
#define CPACF_KLMD 0xb93f /* MSA */
+#define CPACF_PCKMO 0xb928 /* MSA3 */
+#define CPACF_KMF 0xb92a /* MSA4 */
+#define CPACF_KMO 0xb92b /* MSA4 */
#define CPACF_PCC 0xb92c /* MSA4 */
#define CPACF_KMCTR 0xb92d /* MSA4 */
#define CPACF_PPNO 0xb93c /* MSA5 */
@@ -136,6 +139,7 @@ static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
register unsigned long r1 asm("1") = (unsigned long) status;
asm volatile(
+ " spm 0\n" /* pckmo doesn't change the cc */
/* Parameter registers are ignored, but may not be 0 */
"0: .insn rrf,%[opc] << 16,2,2,2,0\n"
" brc 1,0b\n" /* handle partial completion */
@@ -157,6 +161,12 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func)
if (!test_facility(17)) /* check for MSA */
return 0;
break;
+ case CPACF_PCKMO:
+ if (!test_facility(76)) /* check for MSA3 */
+ return 0;
+ break;
+ case CPACF_KMF:
+ case CPACF_KMO:
case CPACF_PCC:
case CPACF_KMCTR:
if (!test_facility(77)) /* check for MSA4 */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 5fac921..8221199 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -78,4 +78,153 @@ struct diag210 {
extern int diag210(struct diag210 *addr);
+/* bit is set in flags, when physical cpu info is included in diag 204 data */
+#define DIAG204_LPAR_PHYS_FLG 0x80
+#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */
+#define DIAG204_CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */
+
+/* diag 204 subcodes */
+enum diag204_sc {
+ DIAG204_SUBC_STIB4 = 4,
+ DIAG204_SUBC_RSI = 5,
+ DIAG204_SUBC_STIB6 = 6,
+ DIAG204_SUBC_STIB7 = 7
+};
+
+/* The two available diag 204 data formats */
+enum diag204_format {
+ DIAG204_INFO_SIMPLE = 0,
+ DIAG204_INFO_EXT = 0x00010000
+};
+
+enum diag204_cpu_flags {
+ DIAG204_CPU_ONLINE = 0x20,
+ DIAG204_CPU_CAPPED = 0x40,
+};
+
+struct diag204_info_blk_hdr {
+ __u8 npar;
+ __u8 flags;
+ __u16 tslice;
+ __u16 phys_cpus;
+ __u16 this_part;
+ __u64 curtod;
+} __packed;
+
+struct diag204_x_info_blk_hdr {
+ __u8 npar;
+ __u8 flags;
+ __u16 tslice;
+ __u16 phys_cpus;
+ __u16 this_part;
+ __u64 curtod1;
+ __u64 curtod2;
+ char reserved[40];
+} __packed;
+
+struct diag204_part_hdr {
+ __u8 pn;
+ __u8 cpus;
+ char reserved[6];
+ char part_name[DIAG204_LPAR_NAME_LEN];
+} __packed;
+
+struct diag204_x_part_hdr {
+ __u8 pn;
+ __u8 cpus;
+ __u8 rcpus;
+ __u8 pflag;
+ __u32 mlu;
+ char part_name[DIAG204_LPAR_NAME_LEN];
+ char lpc_name[8];
+ char os_name[8];
+ __u64 online_cs;
+ __u64 online_es;
+ __u8 upid;
+ __u8 reserved:3;
+ __u8 mtid:5;
+ char reserved1[2];
+ __u32 group_mlu;
+ char group_name[8];
+ char hardware_group_name[8];
+ char reserved2[24];
+} __packed;
+
+struct diag204_cpu_info {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ __u8 cflag;
+ __u16 weight;
+ __u64 acc_time;
+ __u64 lp_time;
+} __packed;
+
+struct diag204_x_cpu_info {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ __u8 cflag;
+ __u16 weight;
+ __u64 acc_time;
+ __u64 lp_time;
+ __u16 min_weight;
+ __u16 cur_weight;
+ __u16 max_weight;
+ char reseved2[2];
+ __u64 online_time;
+ __u64 wait_time;
+ __u32 pma_weight;
+ __u32 polar_weight;
+ __u32 cpu_type_cap;
+ __u32 group_cpu_type_cap;
+ char reserved3[32];
+} __packed;
+
+struct diag204_phys_hdr {
+ char reserved1[1];
+ __u8 cpus;
+ char reserved2[6];
+ char mgm_name[8];
+} __packed;
+
+struct diag204_x_phys_hdr {
+ char reserved1[1];
+ __u8 cpus;
+ char reserved2[6];
+ char mgm_name[8];
+ char reserved3[80];
+} __packed;
+
+struct diag204_phys_cpu {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ char reserved2[3];
+ __u64 mgm_time;
+ char reserved3[8];
+} __packed;
+
+struct diag204_x_phys_cpu {
+ __u16 cpu_addr;
+ char reserved1[2];
+ __u8 ctidx;
+ char reserved2[1];
+ __u16 weight;
+ __u64 mgm_time;
+ char reserved3[80];
+} __packed;
+
+struct diag204_x_part_block {
+ struct diag204_x_part_hdr hdr;
+ struct diag204_x_cpu_info cpus[];
+} __packed;
+
+struct diag204_x_phys_block {
+ struct diag204_x_phys_hdr hdr;
+ struct diag204_x_phys_cpu cpus[];
+} __packed;
+
+int diag204(unsigned long subcode, unsigned long size, void *addr);
+int diag224(void *ptr);
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 37b9017..9eed5c1 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -154,6 +154,7 @@ struct kvm_s390_sie_block {
#define LCTL_CR14 0x0002
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
+#define ICTL_OPEREXC 0x80000000
#define ICTL_PINT 0x20000000
#define ICTL_LPSW 0x00400000
#define ICTL_STCTL 0x00040000
@@ -185,7 +186,9 @@ struct kvm_s390_sie_block {
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
__u32 todpr; /* 0x006c */
- __u8 reserved70[32]; /* 0x0070 */
+ __u8 reserved70[16]; /* 0x0070 */
+ __u64 mso; /* 0x0080 */
+ __u64 msl; /* 0x0088 */
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
@@ -255,6 +258,7 @@ struct kvm_vcpu_stat {
u32 instruction_stctg;
u32 exit_program_interruption;
u32 exit_instr_and_program;
+ u32 exit_operation_exception;
u32 deliver_external_call;
u32 deliver_emergency_signal;
u32 deliver_service_signal;
@@ -278,6 +282,7 @@ struct kvm_vcpu_stat {
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_essa;
+ u32 instruction_sthyi;
u32 instruction_sigp_sense;
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
@@ -649,11 +654,14 @@ struct kvm_arch{
wait_queue_head_t ipte_wq;
int ipte_lock_count;
struct mutex ipte_mutex;
+ struct ratelimit_state sthyi_limit;
spinlock_t start_stop_lock;
struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
+ /* subset of available cpu features enabled by user space */
+ DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
};
#define KVM_HVA_ERR_BAD (-1UL)
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 53eacbd..f874e7d5 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -109,13 +109,14 @@ static inline unsigned char page_get_storage_key(unsigned long addr)
static inline int page_reset_referenced(unsigned long addr)
{
- unsigned int ipm;
+ int cc;
asm volatile(
" rrbe 0,%1\n"
" ipm %0\n"
- : "=d" (ipm) : "a" (addr) : "cc");
- return !!(ipm & 0x20000000);
+ " srl %0,28\n"
+ : "=d" (cc) : "a" (addr) : "cc");
+ return cc;
}
/* Bits int the storage key */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 18d2beb..9951e7e 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -893,7 +893,12 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address);
int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
unsigned char key, bool nq);
-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, unsigned char *oldkey,
+ bool nq, bool mr, bool mc);
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr);
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char *key);
/*
* Certain architectures need to do special things when PTEs
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index e4f6f73..2ad9c20 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -32,12 +32,19 @@ struct sclp_core_entry {
u8 reserved0;
u8 : 4;
u8 sief2 : 1;
- u8 : 3;
- u8 : 3;
+ u8 skey : 1;
+ u8 : 2;
+ u8 : 2;
+ u8 gpere : 1;
u8 siif : 1;
u8 sigpif : 1;
u8 : 3;
- u8 reserved2[10];
+ u8 reserved2[3];
+ u8 : 2;
+ u8 ib : 1;
+ u8 cei : 1;
+ u8 : 4;
+ u8 reserved3[6];
u8 type;
u8 reserved1;
} __attribute__((packed));
@@ -59,6 +66,15 @@ struct sclp_info {
unsigned char has_hvs : 1;
unsigned char has_esca : 1;
unsigned char has_sief2 : 1;
+ unsigned char has_64bscao : 1;
+ unsigned char has_gpere : 1;
+ unsigned char has_cmma : 1;
+ unsigned char has_gsls : 1;
+ unsigned char has_ib : 1;
+ unsigned char has_cei : 1;
+ unsigned char has_pfmfi : 1;
+ unsigned char has_ibs : 1;
+ unsigned char has_skey : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
@@ -101,5 +117,6 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count);
int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count);
void sclp_early_detect(void);
void _sclp_print_early(const char *);
+void sclp_ocf_cpc_name_copy(char *dst);
#endif /* _ASM_S390_SCLP_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 3b8e99e..f0818d7 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -93,6 +93,35 @@ struct kvm_s390_vm_cpu_machine {
__u64 fac_list[256];
};
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT 3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024
+#define KVM_S390_VM_CPU_FEAT_ESOP 0
+struct kvm_s390_vm_cpu_feat {
+ __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+ __u8 plo[32]; /* always */
+ __u8 ptff[16]; /* with TOD-clock steering */
+ __u8 kmac[16]; /* with MSA */
+ __u8 kmc[16]; /* with MSA */
+ __u8 km[16]; /* with MSA */
+ __u8 kimd[16]; /* with MSA */
+ __u8 klmd[16]; /* with MSA */
+ __u8 pckmo[16]; /* with MSA3 */
+ __u8 kmctr[16]; /* with MSA4 */
+ __u8 kmf[16]; /* with MSA4 */
+ __u8 kmo[16]; /* with MSA4 */
+ __u8 pcc[16]; /* with MSA4 */
+ __u8 ppno[16]; /* with MSA5 */
+ __u8 reserved[1824];
+};
+
/* kvm attributes for crypto */
#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index 8fb5d4a..3ac6343 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -140,6 +140,7 @@
exit_code_ipa0(0xB2, 0x4c, "TAR"), \
exit_code_ipa0(0xB2, 0x50, "CSP"), \
exit_code_ipa0(0xB2, 0x54, "MVPG"), \
+ exit_code_ipa0(0xB2, 0x56, "STHYI"), \
exit_code_ipa0(0xB2, 0x58, "BSG"), \
exit_code_ipa0(0xB2, 0x5a, "BSA"), \
exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 48b37b8..a44faf4 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -162,6 +162,28 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
}
EXPORT_SYMBOL(diag14);
+static inline int __diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+ register unsigned long _subcode asm("0") = subcode;
+ register unsigned long _size asm("1") = size;
+
+ asm volatile(
+ " diag %2,%0,0x204\n"
+ "0:\n"
+ EX_TABLE(0b,0b)
+ : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory");
+ if (_subcode)
+ return -1;
+ return _size;
+}
+
+int diag204(unsigned long subcode, unsigned long size, void *addr)
+{
+ diag_stat_inc(DIAG_STAT_X204);
+ return __diag204(subcode, size, addr);
+}
+EXPORT_SYMBOL(diag204);
+
/*
* Diagnose 210: Get information about a virtual device
*/
@@ -196,3 +218,18 @@ int diag210(struct diag210 *addr)
return ccode;
}
EXPORT_SYMBOL(diag210);
+
+int diag224(void *ptr)
+{
+ int rc = -EOPNOTSUPP;
+
+ diag_stat_inc(DIAG_STAT_X224);
+ asm volatile(
+ " diag %1,%2,0x224\n"
+ "0: lhi %0,0x0\n"
+ "1:\n"
+ EX_TABLE(0b,1b)
+ : "+d" (rc) :"d" (0), "d" (ptr) : "memory");
+ return rc;
+}
+EXPORT_SYMBOL(diag224);
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d42fa38..82e73e2 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o
+kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1ea4095..ce865bd 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
return -EOPNOTSUPP;
+ VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+ (u32) vcpu->run->s.regs.gprs[2],
+ (u32) vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
/*
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 66938d2..8e245e7 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -476,18 +476,73 @@ enum {
FSI_FETCH = 2 /* Exception was due to fetch operation */
};
-static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, enum gacc_mode mode)
+enum prot_type {
+ PROT_TYPE_LA = 0,
+ PROT_TYPE_KEYC = 1,
+ PROT_TYPE_ALC = 2,
+ PROT_TYPE_DAT = 3,
+};
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
+ ar_t ar, enum gacc_mode mode, enum prot_type prot)
{
- int rc;
- struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec_bits;
+ struct trans_exc_code_bits *tec;
memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw.as;
+ pgm->code = code;
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+ switch (code) {
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ /*
+ * op_access_id only applies to MOVE_PAGE -> set bit 61
+ * exc_access_id has to be set to 0 for some instructions. Both
+ * cases have to be handled by the caller. We can always store
+ * exc_access_id, as it is undefined for non-ar cases.
+ */
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* FALL THROUGH */
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm->exc_access_id = ar;
+ break;
+ case PGM_PROTECTION:
+ switch (prot) {
+ case PROT_TYPE_ALC:
+ tec->b60 = 1;
+ /* FALL THROUGH */
+ case PROT_TYPE_DAT:
+ tec->b61 = 1;
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* exc_access_id is undefined for most cases */
+ pgm->exc_access_id = ar;
+ break;
+ default: /* LA and KEYC set b61 to 0, other params undefined */
+ break;
+ }
+ break;
+ }
+ return code;
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+ unsigned long ga, ar_t ar, enum gacc_mode mode)
+{
+ int rc;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
if (!psw.t) {
asce->val = 0;
@@ -510,21 +565,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
return 0;
case PSW_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, mode);
- switch (rc) {
- case PGM_ALEN_TRANSLATION:
- case PGM_ALE_SEQUENCE:
- case PGM_ASTE_VALIDITY:
- case PGM_ASTE_SEQUENCE:
- case PGM_EXTENDED_AUTHORITY:
- vcpu->arch.pgm.exc_access_id = ar;
- break;
- case PGM_PROTECTION:
- tec_bits->b60 = 1;
- tec_bits->b61 = 1;
- break;
- }
if (rc > 0)
- pgm->code = rc;
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
return rc;
}
return 0;
@@ -729,40 +771,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
unsigned long *pages, unsigned long nr_pages,
const union asce asce, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
- int lap_enabled, rc;
+ int lap_enabled, rc = 0;
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
- tec_bits->addr = ga >> PAGE_SHIFT;
- if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
- pgm->code = PGM_PROTECTION;
- return pgm->code;
- }
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+ return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+ PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
- if (rc == PGM_PROTECTION)
- tec_bits->b61 = 1;
- if (rc)
- pgm->code = rc;
} else {
*pages = kvm_s390_real_to_abs(vcpu, ga);
if (kvm_is_error_gpa(vcpu->kvm, *pages))
- pgm->code = PGM_ADDRESSING;
+ rc = PGM_ADDRESSING;
}
- if (pgm->code)
- return pgm->code;
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -783,7 +816,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -795,7 +829,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
+ rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -846,37 +880,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long *gpa, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec;
union asce asce;
int rc;
gva = kvm_s390_logical_to_effective(vcpu, gva);
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
- tec->addr = gva >> PAGE_SHIFT;
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE) {
- rc = pgm->code = PGM_PROTECTION;
- return rc;
- }
+ if (mode == GACC_STORE)
+ return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
+ mode, PROT_TYPE_LA);
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, mode);
- if (rc > 0) {
- if (rc == PGM_PROTECTION)
- tec->b61 = 1;
- pgm->code = rc;
- }
+ if (rc > 0)
+ return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
} else {
- rc = 0;
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- rc = pgm->code = PGM_ADDRESSING;
+ return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
}
return rc;
@@ -915,20 +940,9 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
*/
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
if (!ctlreg0.lap || !is_low_address(gra))
return 0;
-
- memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = FSI_STORE;
- tec_bits->as = psw_bits(*psw).as;
- tec_bits->addr = gra >> PAGE_SHIFT;
- pgm->code = PGM_PROTECTION;
-
- return pgm->code;
+ return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 2e6b54e..9359f65 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -349,6 +349,19 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_operation_exception++;
+ trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
+
+ if (vcpu->arch.sie_block->ipa == 0xb256 &&
+ test_kvm_facility(vcpu->kvm, 74))
+ return handle_sthyi(vcpu);
+
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
if (kvm_is_ucontrol(vcpu->kvm))
@@ -370,6 +383,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return handle_validity(vcpu);
case 0x28:
return handle_stop(vcpu);
+ case 0x2c:
+ return handle_operexc(vcpu);
case 0x38:
return handle_partial_execution(vcpu);
default:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5a80af7..d72c4a8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,6 @@
#include "gaccess.h"
#include "trace-s390.h"
-#define IOINT_SCHID_MASK 0x0000ffff
-#define IOINT_SSID_MASK 0x00030000
-#define IOINT_CSSID_MASK 0x03fc0000
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
@@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info,
list);
if (inti) {
- VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+ else
+ VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
+
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
@@ -1415,6 +1419,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
fi->counters[FIRQ_CNTR_IO] += 1;
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
@@ -1531,13 +1542,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
inti->mchk.mcic = s390int->parm64;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (inti->type & KVM_S390_INT_IO_AI_MASK)
- VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
- else
- VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
- s390int->type & IOINT_CSSID_MASK,
- s390int->type & IOINT_SSID_MASK,
- s390int->type & IOINT_SCHID_MASK);
inti->io.subchannel_id = s390int->parm >> 16;
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
inti->io.io_int_parm = s390int->parm64 >> 32;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6d8ec3a..49c6039 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/etr.h>
@@ -35,6 +36,8 @@
#include <asm/switch_to.h>
#include <asm/isc.h>
#include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/etr.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -63,6 +66,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
@@ -93,6 +97,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -130,6 +135,11 @@ unsigned long kvm_s390_fac_list_mask_size(void)
return ARRAY_SIZE(kvm_s390_fac_list_mask);
}
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
static struct gmap_notifier gmap_notifier;
debug_info_t *kvm_s390_dbf;
@@ -187,6 +197,61 @@ void kvm_arch_hardware_unsetup(void)
&kvm_clock_notifier);
}
+static void allow_cpu_feat(unsigned long nr)
+{
+ set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+ int cc = 3; /* subfunction not available */
+
+ asm volatile(
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : "d" (r0)
+ : "cc");
+ return cc == 0;
+}
+
+static void kvm_s390_cpu_feat_init(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ if (plo_test_bit(i))
+ kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+ }
+
+ if (test_facility(28)) /* TOD-clock steering */
+ etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
+
+ if (test_facility(17)) { /* MSA */
+ __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
+ __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
+ __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
+ __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
+ __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+ }
+ if (test_facility(76)) /* MSA3 */
+ __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+ if (test_facility(77)) { /* MSA4 */
+ __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
+ __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
+ __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
+ __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+ }
+ if (test_facility(57)) /* MSA5 */
+ __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+
+ if (MACHINE_HAS_ESOP)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+}
+
int kvm_arch_init(void *opaque)
{
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
@@ -198,6 +263,8 @@ int kvm_arch_init(void *opaque)
return -ENOMEM;
}
+ kvm_s390_cpu_feat_init();
+
/* Register floating interrupt controller interface. */
return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
}
@@ -250,8 +317,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
- r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
- : KVM_S390_BSCA_CPU_SLOTS;
+ r = KVM_S390_BSCA_CPU_SLOTS;
+ if (sclp.has_esca && sclp.has_64bscao)
+ r = KVM_S390_ESCA_CPU_SLOTS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
@@ -417,9 +485,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
unsigned int idx;
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
- /* enable CMMA only for z10 and later (EDAT_1) */
- ret = -EINVAL;
- if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
break;
ret = -EBUSY;
@@ -432,6 +499,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
mutex_unlock(&kvm->lock);
break;
case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
+ break;
ret = -EINVAL;
if (!kvm->arch.use_cmma)
break;
@@ -675,6 +745,39 @@ out:
return ret;
}
+static int kvm_s390_set_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+ int ret = -EBUSY;
+
+ if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
+ return -EFAULT;
+ if (!bitmap_subset((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ if (!atomic_read(&kvm->online_vcpus)) {
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ ret = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once supported by kernel + hw, we have to store the subfunctions
+ * in kvm->arch and remember that user space configured them.
+ */
+ return -ENXIO;
+}
+
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -683,6 +786,12 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_set_processor(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_set_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_set_processor_subfunc(kvm, attr);
+ break;
}
return ret;
}
@@ -731,6 +840,50 @@ out:
return ret;
}
+static int kvm_s390_get_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_get_machine_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once we can actually configure subfunctions (kernel + hw support),
+ * we have to check if they were already set by user space, if so copy
+ * them from kvm->arch.
+ */
+ return -ENXIO;
+}
+
+static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
+ sizeof(struct kvm_s390_vm_cpu_subfunc)))
+ return -EFAULT;
+ return 0;
+}
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -742,6 +895,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE:
ret = kvm_s390_get_machine(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_get_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ ret = kvm_s390_get_machine_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_get_processor_subfunc(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+ ret = kvm_s390_get_machine_subfunc(kvm, attr);
+ break;
}
return ret;
}
@@ -802,6 +967,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = sclp.has_cmma ? 0 : -ENXIO;
+ break;
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
break;
@@ -825,8 +992,13 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_S390_VM_CPU_PROCESSOR:
case KVM_S390_VM_CPU_MACHINE:
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = 0;
break;
+ /* configuring subfunctions is not supported yet */
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
default:
ret = -ENXIO;
break;
@@ -857,7 +1029,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
- unsigned long curkey;
int i, r = 0;
if (args->flags != 0)
@@ -878,26 +1049,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (!keys)
return -ENOMEM;
+ down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
- goto out;
+ break;
}
- curkey = get_guest_storage_key(current->mm, hva);
- if (IS_ERR_VALUE(curkey)) {
- r = curkey;
- goto out;
- }
- keys[i] = curkey;
+ r = get_guest_storage_key(current->mm, hva, &keys[i]);
+ if (r)
+ break;
+ }
+ up_read(&current->mm->mmap_sem);
+
+ if (!r) {
+ r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+ sizeof(uint8_t) * args->count);
+ if (r)
+ r = -EFAULT;
}
- r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
- sizeof(uint8_t) * args->count);
- if (r)
- r = -EFAULT;
-out:
kvfree(keys);
return r;
}
@@ -934,24 +1106,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
goto out;
+ down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
- goto out;
+ break;
}
/* Lowest order bit is reserved */
if (keys[i] & 0x01) {
r = -EINVAL;
- goto out;
+ break;
}
- r = set_guest_storage_key(current->mm, hva,
- (unsigned long)keys[i], 0);
+ r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r)
- goto out;
+ break;
}
+ up_read(&current->mm->mmap_sem);
out:
kvfree(keys);
return r;
@@ -1128,6 +1301,7 @@ static void sca_dispose(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ gfp_t alloc_flags = GFP_KERNEL;
int i, rc;
char debug_name[16];
static unsigned long sca_offset;
@@ -1149,9 +1323,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
rc = -ENOMEM;
+ ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
+
kvm->arch.use_esca = 0; /* start with basic SCA */
+ if (!sclp.has_64bscao)
+ alloc_flags |= GFP_DMA;
rwlock_init(&kvm->arch.sca_lock);
- kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
+ kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
@@ -1188,6 +1366,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
+ set_kvm_facility(kvm->arch.model.fac_mask, 74);
+ set_kvm_facility(kvm->arch.model.fac_list, 74);
+
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
@@ -1395,7 +1576,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
if (id < KVM_S390_BSCA_CPU_SLOTS)
return true;
- if (!sclp.has_esca)
+ if (!sclp.has_esca || !sclp.has_64bscao)
return false;
mutex_lock(&kvm->lock);
@@ -1657,15 +1838,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- vcpu->arch.sie_block->ecb = 0x02;
+ /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+ if (MACHINE_HAS_ESOP)
+ vcpu->arch.sie_block->ecb |= 0x02;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= 0x04;
- if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+ if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
- if (test_kvm_facility(vcpu->kvm, 8))
+ if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
vcpu->arch.sie_block->ecb2 |= 0x08;
- vcpu->arch.sie_block->eca = 0xC1002000U;
+ vcpu->arch.sie_block->eca = 0x1002000U;
+ if (sclp.has_cei)
+ vcpu->arch.sie_block->eca |= 0x80000000U;
+ if (sclp.has_ib)
+ vcpu->arch.sie_block->eca |= 0x40000000U;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
if (sclp.has_sigpif)
@@ -1678,6 +1865,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
}
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ if (test_kvm_facility(vcpu->kvm, 74))
+ vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
if (vcpu->kvm->arch.use_cmma) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1715,6 +1904,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+ /* the real guest size will always be smaller than msl */
+ vcpu->arch.sie_block->mso = 0;
+ vcpu->arch.sie_block->msl = sclp.hamax;
+
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -2001,6 +2194,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (dbg->control & ~VALID_GUESTDBG_FLAGS)
return -EINVAL;
+ if (!sclp.has_gpere)
+ return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
@@ -2597,6 +2792,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
+ if (!sclp.has_ibs)
+ return;
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8621ab0..52aa47e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
return 0;
}
+static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr)
+{
+ WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS);
+ return test_bit_inv(nr, kvm->arch.cpu_feat);
+}
+
/* are cpu states controlled by user space */
static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
{
@@ -250,6 +256,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+/* implemented in sthyi.c */
+int handle_sthyi(struct kvm_vcpu *vcpu);
+
/* implemented in kvm-s390.c */
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 95916fa..3db3be1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -27,6 +27,7 @@
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
+#include <asm/sclp.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
@@ -152,30 +153,166 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
static int __skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc = 0;
+
+ trace_kvm_s390_skey_related_inst(vcpu);
if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
return rc;
rc = s390_enable_skey();
- VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
- trace_kvm_s390_skey_related_inst(vcpu);
- vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
+ if (!rc)
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
return rc;
}
-
-static int handle_skey(struct kvm_vcpu *vcpu)
+static int try_handle_skey(struct kvm_vcpu *vcpu)
{
- int rc = __skey_check_enable(vcpu);
+ int rc;
+ vcpu->stat.instruction_storage_key++;
+ rc = __skey_check_enable(vcpu);
if (rc)
return rc;
- vcpu->stat.instruction_storage_key++;
-
+ if (sclp.has_skey) {
+ /* with storage-key facility, SIE interprets it for us */
+ kvm_s390_retry_instr(vcpu);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+ return -EAGAIN;
+ }
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ return 0;
+}
- kvm_s390_retry_instr(vcpu);
- VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+static int handle_iske(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr;
+ unsigned char key;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+ addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = get_guest_storage_key(current->mm, addr, &key);
+ up_read(&current->mm->mmap_sem);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff;
+ vcpu->run->s.regs.gprs[reg1] |= key;
+ return 0;
+}
+
+static int handle_rrbe(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+ addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = reset_guest_reference_bit(current->mm, addr);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ kvm_s390_set_psw_cc(vcpu, rc);
+ return 0;
+}
+
+#define SSKE_NQ 0x8
+#define SSKE_MR 0x4
+#define SSKE_MC 0x2
+#define SSKE_MB 0x1
+static int handle_sske(struct kvm_vcpu *vcpu)
+{
+ unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+ unsigned long start, end;
+ unsigned char key, oldkey;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ if (!test_kvm_facility(vcpu->kvm, 8))
+ m3 &= ~SSKE_MB;
+ if (!test_kvm_facility(vcpu->kvm, 10))
+ m3 &= ~(SSKE_MC | SSKE_MR);
+ if (!test_kvm_facility(vcpu->kvm, 14))
+ m3 &= ~SSKE_NQ;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ start = kvm_s390_logical_to_effective(vcpu, start);
+ if (m3 & SSKE_MB) {
+ /* start already designates an absolute address */
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ } else {
+ start = kvm_s390_real_to_abs(vcpu, start);
+ end = start + PAGE_SIZE;
+ }
+
+ while (start != end) {
+ unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
+ m3 & SSKE_NQ, m3 & SSKE_MR,
+ m3 & SSKE_MC);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ start += PAGE_SIZE;
+ };
+
+ if (m3 & (SSKE_MC | SSKE_MR)) {
+ if (m3 & SSKE_MB) {
+ /* skey in reg1 is unpredictable */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ } else {
+ kvm_s390_set_psw_cc(vcpu, rc);
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
+ vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+ }
+ }
+ if (m3 & SSKE_MB) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
+ vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
+ else
+ vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
return 0;
}
@@ -583,9 +720,9 @@ static const intercept_handler_t b2_handlers[256] = {
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x21] = handle_ipte_interlock,
- [0x29] = handle_skey,
- [0x2a] = handle_skey,
- [0x2b] = handle_skey,
+ [0x29] = handle_iske,
+ [0x2a] = handle_rrbe,
+ [0x2b] = handle_sske,
[0x2c] = handle_test_block,
[0x30] = handle_io_inst,
[0x31] = handle_io_inst,
@@ -654,8 +791,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
static int handle_pfmf(struct kvm_vcpu *vcpu)
{
+ bool mr = false, mc = false, nq;
int reg1, reg2;
unsigned long start, end;
+ unsigned char key;
vcpu->stat.instruction_pfmf++;
@@ -675,15 +814,27 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
!test_kvm_facility(vcpu->kvm, 14))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* No support for conditional-SSKE */
- if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* Only provide conditional-SSKE support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
+ test_kvm_facility(vcpu->kvm, 10)) {
+ mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
+ mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
+ }
+ nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
+ key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
start = kvm_s390_logical_to_effective(vcpu, start);
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ }
+
switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
case 0x00000000:
+ /* only 4k frames specify a real address */
+ start = kvm_s390_real_to_abs(vcpu, start);
end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
break;
case 0x00001000:
@@ -701,20 +852,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
- if (kvm_s390_check_low_addr_prot_real(vcpu, start))
- return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
- }
-
- while (start < end) {
- unsigned long useraddr, abs_addr;
+ while (start != end) {
+ unsigned long useraddr;
/* Translate guest address to host address */
- if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
- abs_addr = kvm_s390_real_to_abs(vcpu, start);
- else
- abs_addr = start;
- useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+ useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(useraddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@@ -728,16 +870,25 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- if (set_guest_storage_key(current->mm, useraddr,
- vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
- vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, useraddr,
+ key, NULL, nq, mr, mc);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
start += PAGE_SIZE;
}
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
- vcpu->run->s.regs.gprs[reg2] = end;
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
+ vcpu->run->s.regs.gprs[reg2] = end;
+ } else {
+ vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
+ }
return 0;
}
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
new file mode 100644
index 0000000..bd98b7d
--- /dev/null
+++ b/arch/s390/kvm/sthyi.c
@@ -0,0 +1,471 @@
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/ratelimit.h>
+
+#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP 0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+ HDR_NOT_LPAR = 0x10,
+ HDR_STACK_INCM = 0x20,
+ HDR_STSI_UNAV = 0x40,
+ HDR_PERF_UNAV = 0x80,
+};
+
+enum mac_validity {
+ MAC_NAME_VLD = 0x20,
+ MAC_ID_VLD = 0x40,
+ MAC_CNT_VLD = 0x80,
+};
+
+enum par_flag {
+ PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+ PAR_GRP_VLD = 0x08,
+ PAR_ID_VLD = 0x10,
+ PAR_ABS_VLD = 0x20,
+ PAR_WGHT_VLD = 0x40,
+ PAR_PCNT_VLD = 0x80,
+};
+
+struct hdr_sctn {
+ u8 infhflg1;
+ u8 infhflg2; /* reserved */
+ u8 infhval1; /* reserved */
+ u8 infhval2; /* reserved */
+ u8 reserved[3];
+ u8 infhygct;
+ u16 infhtotl;
+ u16 infhdln;
+ u16 infmoff;
+ u16 infmlen;
+ u16 infpoff;
+ u16 infplen;
+ u16 infhoff1;
+ u16 infhlen1;
+ u16 infgoff1;
+ u16 infglen1;
+ u16 infhoff2;
+ u16 infhlen2;
+ u16 infgoff2;
+ u16 infglen2;
+ u16 infhoff3;
+ u16 infhlen3;
+ u16 infgoff3;
+ u16 infglen3;
+ u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+ u8 infmflg1; /* reserved */
+ u8 infmflg2; /* reserved */
+ u8 infmval1;
+ u8 infmval2; /* reserved */
+ u16 infmscps;
+ u16 infmdcps;
+ u16 infmsifl;
+ u16 infmdifl;
+ char infmname[8];
+ char infmtype[4];
+ char infmmanu[16];
+ char infmseq[16];
+ char infmpman[4];
+ u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+ u8 infpflg1;
+ u8 infpflg2; /* reserved */
+ u8 infpval1;
+ u8 infpval2; /* reserved */
+ u16 infppnum;
+ u16 infpscps;
+ u16 infpdcps;
+ u16 infpsifl;
+ u16 infpdifl;
+ u16 reserved;
+ char infppnam[8];
+ u32 infpwbcp;
+ u32 infpabcp;
+ u32 infpwbif;
+ u32 infpabif;
+ char infplgnm[8];
+ u32 infplgcp;
+ u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+ struct hdr_sctn hdr;
+ struct mac_sctn mac;
+ struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+ u64 lpar_cap;
+ u64 lpar_grp_cap;
+ u64 lpar_weight;
+ u64 all_weight;
+ int cpu_num_ded;
+ int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+ struct cpu_inf cp;
+ struct cpu_inf ifl;
+};
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+ return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+ return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+ sctns->hdr.infhdln = sizeof(sctns->hdr);
+ sctns->hdr.infmoff = sizeof(sctns->hdr);
+ sctns->hdr.infmlen = sizeof(sctns->mac);
+ sctns->hdr.infplen = sizeof(sctns->par);
+ sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+ sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+ struct sysinfo_1_1_1 *sysinfo)
+{
+ if (stsi(sysinfo, 1, 1, 1))
+ return;
+
+ sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+
+ memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+ memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+ memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+ memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+ sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+ struct sysinfo_2_2_2 *sysinfo)
+{
+ if (stsi(sysinfo, 2, 2, 2))
+ return;
+
+ sctns->par.infppnum = sysinfo->lpar_number;
+ memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+ sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+ void *sysinfo;
+
+ /* Errors are handled through the validity bits in the response. */
+ sysinfo = (void *)__get_free_page(GFP_KERNEL);
+ if (!sysinfo)
+ return;
+
+ fill_stsi_mac(sctns, sysinfo);
+ fill_stsi_par(sctns, sysinfo);
+
+ free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+ struct diag204_x_phys_block *block,
+ void *diag224_buf)
+{
+ int i;
+
+ for (i = 0; i < block->hdr.cpus; i++) {
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdcps++;
+ else
+ sctns->mac.infmscps++;
+ break;
+ case IFL:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdifl++;
+ else
+ sctns->mac.infmsifl++;
+ break;
+ }
+ }
+ sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+ bool this_lpar,
+ void *diag224_buf,
+ struct diag204_x_part_block *block)
+{
+ int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+ struct cpu_inf *cpu_inf;
+
+ for (i = 0; i < block->hdr.rcpus; i++) {
+ if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+ continue;
+
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ cpu_inf = &part_inf->cp;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_cp |= block->cpus[i].cur_weight;
+ break;
+ case IFL:
+ cpu_inf = &part_inf->ifl;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_ifl |= block->cpus[i].cur_weight;
+ break;
+ default:
+ continue;
+ }
+
+ if (!this_lpar)
+ continue;
+
+ capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+ cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+ cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+ if (block->cpus[i].weight == DED_WEIGHT)
+ cpu_inf->cpu_num_ded += 1;
+ else
+ cpu_inf->cpu_num_shd += 1;
+ }
+
+ if (this_lpar && capped) {
+ part_inf->cp.lpar_weight = weight_cp;
+ part_inf->ifl.lpar_weight = weight_ifl;
+ }
+ part_inf->cp.all_weight += weight_cp;
+ part_inf->ifl.all_weight += weight_ifl;
+ return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+ int i, r, pages;
+ bool this_lpar;
+ void *diag204_buf;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
+
+ /* Errors are handled through the validity bits in the response. */
+ pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+ (unsigned long)DIAG204_INFO_EXT, 0, NULL);
+ if (pages <= 0)
+ return;
+
+ diag204_buf = vmalloc(PAGE_SIZE * pages);
+ if (!diag204_buf)
+ return;
+
+ r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+ (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+ if (r < 0)
+ goto out;
+
+ diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+ if (!diag224_buf || diag224(diag224_buf))
+ goto out;
+
+ ti_hdr = diag204_buf;
+ part_block = diag204_buf + sizeof(*ti_hdr);
+
+ for (i = 0; i < ti_hdr->npar; i++) {
+ /*
+ * For the calling lpar we also need to get the cpu
+ * caps and weights. The time information block header
+ * specifies the offset to the partition block of the
+ * caller lpar, so we know when we process its data.
+ */
+ this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+ part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+ part_block);
+ }
+
+ phys_block = (struct diag204_x_phys_block *)part_block;
+ part_block = diag204_buf + ti_hdr->this_part;
+ if (part_block->hdr.mtid)
+ sctns->par.infpflg1 = PAR_MT_EN;
+
+ sctns->par.infpval1 |= PAR_GRP_VLD;
+ sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+ sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+ memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+ sizeof(sctns->par.infplgnm));
+
+ sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+ sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+ sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+ sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+ sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+ sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+ sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+ sctns->par.infpval1 |= PAR_ABS_VLD;
+
+ /*
+ * Everything below needs global performance data to be
+ * meaningful.
+ */
+ if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+ sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+ goto out;
+ }
+
+ fill_diag_mac(sctns, phys_block, diag224_buf);
+
+ if (lpar_inf.cp.lpar_weight) {
+ sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+ lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+ }
+
+ if (lpar_inf.ifl.lpar_weight) {
+ sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+ lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+ }
+ sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+ kfree(diag224_buf);
+ vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr)
+{
+ register u64 code asm("0") = 0;
+ register u64 addr asm("2") = vaddr;
+ int cc;
+
+ asm volatile(
+ ".insn rre,0xB2560000,%[code],%[addr]\n"
+ "ipm %[cc]\n"
+ "srl %[cc],28\n"
+ : [cc] "=d" (cc)
+ : [code] "d" (code), [addr] "a" (addr)
+ : "memory", "cc");
+ return cc;
+}
+
+int handle_sthyi(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2, r = 0;
+ u64 code, addr, cc = 0;
+ struct sthyi_sctns *sctns = NULL;
+
+ /*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * ratelimit the executions per VM.
+ */
+ if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) {
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ }
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+ code = vcpu->run->s.regs.gprs[reg1];
+ addr = vcpu->run->s.regs.gprs[reg2];
+
+ vcpu->stat.instruction_sthyi++;
+ VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
+ trace_kvm_s390_handle_sthyi(vcpu, code, addr);
+
+ if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (code & 0xffff) {
+ cc = 3;
+ goto out;
+ }
+
+ /*
+ * If the page has not yet been faulted in, we want to do that
+ * now and not after all the expensive calculations.
+ */
+ r = write_guest(vcpu, addr, reg2, &cc, 1);
+ if (r)
+ return kvm_s390_inject_prog_cond(vcpu, r);
+
+ sctns = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!sctns)
+ return -ENOMEM;
+
+ /*
+ * If we are a guest, we don't want to emulate an emulated
+ * instruction. We ask the hypervisor to provide the data.
+ */
+ if (test_facility(74)) {
+ cc = sthyi((u64)sctns);
+ goto out;
+ }
+
+ fill_hdr(sctns);
+ fill_stsi(sctns);
+ fill_diag(sctns);
+
+out:
+ if (!cc) {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
+ }
+
+ free_page((unsigned long)sctns);
+ vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0;
+ kvm_s390_set_psw_cc(vcpu, cc);
+ return r;
+}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 916834d..4fc9d4e 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst,
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
- VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+ VCPU_TP_PRINTK("%s", "storage key related instruction")
);
TRACE_EVENT(kvm_s390_major_guest_pfault,
@@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog,
__entry->code = code;
),
- VCPU_TP_PRINTK("intercepted program interruption %04x",
- __entry->code)
+ VCPU_TP_PRINTK("intercepted program interruption %04x (%s)",
+ __entry->code,
+ __print_symbolic(__entry->code,
+ icpt_prog_codes))
);
/*
@@ -412,6 +414,47 @@ TRACE_EVENT(kvm_s390_handle_stsi,
__entry->addr)
);
+TRACE_EVENT(kvm_s390_handle_operexc,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)",
+ __entry->instruction,
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_sthyi,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, code, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, code)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx",
+ __entry->code, __entry->addr)
+ );
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 4324b87..fa286d0 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -506,12 +506,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_t old, new;
pte_t *ptep;
- down_read(&mm->mmap_sem);
ptep = get_locked_pte(mm, addr, &ptl);
- if (unlikely(!ptep)) {
- up_read(&mm->mmap_sem);
+ if (unlikely(!ptep))
return -EFAULT;
- }
new = old = pgste_get_lock(ptep);
pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT |
@@ -538,45 +535,100 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
pgste_set_unlock(ptep, new);
pte_unmap_unlock(ptep, ptl);
- up_read(&mm->mmap_sem);
return 0;
}
EXPORT_SYMBOL(set_guest_storage_key);
-unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+/**
+ * Conditionally set a guest storage key (handling csske).
+ * oldkey will be updated when either mr or mc is set and a pointer is given.
+ *
+ * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest
+ * storage key was updated and -EFAULT on access errors.
+ */
+int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char key, unsigned char *oldkey,
+ bool nq, bool mr, bool mc)
+{
+ unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT;
+ int rc;
+
+ /* we can drop the pgste lock between getting and setting the key */
+ if (mr | mc) {
+ rc = get_guest_storage_key(current->mm, addr, &tmp);
+ if (rc)
+ return rc;
+ if (oldkey)
+ *oldkey = tmp;
+ if (!mr)
+ mask |= _PAGE_REFERENCED;
+ if (!mc)
+ mask |= _PAGE_CHANGED;
+ if (!((tmp ^ key) & mask))
+ return 0;
+ }
+ rc = set_guest_storage_key(current->mm, addr, key, nq);
+ return rc < 0 ? rc : 1;
+}
+EXPORT_SYMBOL(cond_set_guest_storage_key);
+
+/**
+ * Reset a guest reference bit (rrbe), returning the reference and changed bit.
+ *
+ * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
+ */
+int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr)
{
- unsigned char key;
spinlock_t *ptl;
- pgste_t pgste;
+ pgste_t old, new;
pte_t *ptep;
+ int cc = 0;
- down_read(&mm->mmap_sem);
ptep = get_locked_pte(mm, addr, &ptl);
- if (unlikely(!ptep)) {
- up_read(&mm->mmap_sem);
+ if (unlikely(!ptep))
return -EFAULT;
- }
- pgste = pgste_get_lock(ptep);
- if (pte_val(*ptep) & _PAGE_INVALID) {
- key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
- key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
- key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
- key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
- } else {
- key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+ new = old = pgste_get_lock(ptep);
+ /* Reset guest reference bit only */
+ pgste_val(new) &= ~PGSTE_GR_BIT;
- /* Reflect guest's logical view, not physical */
- if (pgste_val(pgste) & PGSTE_GR_BIT)
- key |= _PAGE_REFERENCED;
- if (pgste_val(pgste) & PGSTE_GC_BIT)
- key |= _PAGE_CHANGED;
+ if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK);
+ /* Merge real referenced bit into host-set */
+ pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT;
}
+ /* Reflect guest's logical view, not physical */
+ cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49;
+ /* Changing the guest storage key is considered a change of the page */
+ if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT)
+ pgste_val(new) |= PGSTE_UC_BIT;
+ pgste_set_unlock(ptep, new);
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+EXPORT_SYMBOL(reset_guest_reference_bit);
+
+int get_guest_storage_key(struct mm_struct *mm, unsigned long addr,
+ unsigned char *key)
+{
+ spinlock_t *ptl;
+ pgste_t pgste;
+ pte_t *ptep;
+
+ ptep = get_locked_pte(mm, addr, &ptl);
+ if (unlikely(!ptep))
+ return -EFAULT;
+
+ pgste = pgste_get_lock(ptep);
+ *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56;
+ if (!(pte_val(*ptep) & _PAGE_INVALID))
+ *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK);
+ /* Reflect guest's logical view, not physical */
+ *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48;
pgste_set_unlock(ptep, pgste);
pte_unmap_unlock(ptep, ptl);
- up_read(&mm->mmap_sem);
- return key;
+ return 0;
}
EXPORT_SYMBOL(get_guest_storage_key);
#endif