From e65f30e0cb29694c4241bd9c96ea9413938fcec5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 4 Feb 2016 10:24:52 +0100 Subject: s390: hypfs: Move diag implementation and data definitions Diag 204 data and function definitions currently live in the hypfs files. As KVM will be a consumer of this data, we need to make it publicly available and move it to the appropriate diag.{c,h} files. __attribute__ ((packed)) occurences were replaced with __packed for all moved structs. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Acked-by: Michael Holzheu Signed-off-by: Christian Borntraeger diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 0450357..1e28414 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -19,29 +19,10 @@ #include #include "hypfs.h" -#define LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ -#define CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ #define TMP_SIZE 64 /* size of temporary buffers */ #define DBFS_D204_HDR_VERSION 0 -/* diag 204 subcodes */ -enum diag204_sc { - SUBC_STIB4 = 4, - SUBC_RSI = 5, - SUBC_STIB6 = 6, - SUBC_STIB7 = 7 -}; - -/* The two available diag 204 data formats */ -enum diag204_format { - INFO_SIMPLE = 0, - INFO_EXT = 0x00010000 -}; - -/* bit is set in flags, when physical cpu info is included in diag 204 data */ -#define LPAR_PHYS_FLG 0x80 - static char *diag224_cpu_names; /* diag 224 name table */ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ @@ -53,7 +34,7 @@ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; /* - * DIAG 204 data structures and member access functions. + * DIAG 204 member access functions. * * Since we have two different diag 204 data formats for old and new s390 * machines, we do not access the structs directly, but use getter functions for @@ -62,302 +43,173 @@ static struct dentry *dbfs_d204_file; /* Time information block */ -struct info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod; -} __attribute__ ((packed)); - -struct x_info_blk_hdr { - __u8 npar; - __u8 flags; - __u16 tslice; - __u16 phys_cpus; - __u16 this_part; - __u64 curtod1; - __u64 curtod2; - char reserved[40]; -} __attribute__ ((packed)); - static inline int info_blk_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct info_blk_hdr); - else /* INFO_EXT */ - return sizeof(struct x_info_blk_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_info_blk_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_info_blk_hdr); } static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->npar; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->npar; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->npar; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->npar; } static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->flags; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->flags; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->flags; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->flags; } static inline __u16 info_blk_hdr__pcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct info_blk_hdr *)hdr)->phys_cpus; - else /* INFO_EXT */ - return ((struct x_info_blk_hdr *)hdr)->phys_cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->phys_cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->phys_cpus; } /* Partition header */ -struct part_hdr { - __u8 pn; - __u8 cpus; - char reserved[6]; - char part_name[LPAR_NAME_LEN]; -} __attribute__ ((packed)); - -struct x_part_hdr { - __u8 pn; - __u8 cpus; - __u8 rcpus; - __u8 pflag; - __u32 mlu; - char part_name[LPAR_NAME_LEN]; - char lpc_name[8]; - char os_name[8]; - __u64 online_cs; - __u64 online_es; - __u8 upid; - char reserved1[3]; - __u32 group_mlu; - char group_name[8]; - char reserved2[32]; -} __attribute__ ((packed)); - static inline int part_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct part_hdr); - else /* INFO_EXT */ - return sizeof(struct x_part_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_part_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_part_hdr); } static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct part_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_part_hdr *)hdr)->rcpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_part_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_part_hdr *)hdr)->rcpus; } static inline void part_hdr__part_name(enum diag204_format type, void *hdr, char *name) { - if (type == INFO_SIMPLE) - memcpy(name, ((struct part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - else /* INFO_EXT */ - memcpy(name, ((struct x_part_hdr *)hdr)->part_name, - LPAR_NAME_LEN); - EBCASC(name, LPAR_NAME_LEN); - name[LPAR_NAME_LEN] = 0; + if (type == DIAG204_INFO_SIMPLE) + memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + else /* DIAG204_INFO_EXT */ + memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + EBCASC(name, DIAG204_LPAR_NAME_LEN); + name[DIAG204_LPAR_NAME_LEN] = 0; strim(name); } -struct cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; -} __attribute__ ((packed)); - -struct x_cpu_info { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - __u8 cflag; - __u16 weight; - __u64 acc_time; - __u64 lp_time; - __u16 min_weight; - __u16 cur_weight; - __u16 max_weight; - char reseved2[2]; - __u64 online_time; - __u64 wait_time; - __u32 pma_weight; - __u32 polar_weight; - char reserved3[40]; -} __attribute__ ((packed)); - /* CPU info block */ static inline int cpu_info__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct cpu_info); - else /* INFO_EXT */ - return sizeof(struct x_cpu_info); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_cpu_info); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_cpu_info); } static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->ctidx; } static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; } static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->acc_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->acc_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->acc_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->acc_time; } static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct cpu_info *)hdr)->lp_time; - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->lp_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->lp_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->lp_time; } static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) + if (type == DIAG204_INFO_SIMPLE) return 0; /* online_time not available in simple info */ - else /* INFO_EXT */ - return ((struct x_cpu_info *)hdr)->online_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->online_time; } /* Physical header */ -struct phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; -} __attribute__ ((packed)); - -struct x_phys_hdr { - char reserved1[1]; - __u8 cpus; - char reserved2[6]; - char mgm_name[8]; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_hdr__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_hdr); - else /* INFO_EXT */ - return sizeof(struct x_phys_hdr); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_hdr); } static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_hdr *)hdr)->cpus; - else /* INFO_EXT */ - return ((struct x_phys_hdr *)hdr)->cpus; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_hdr *)hdr)->cpus; } /* Physical CPU info block */ -struct phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[8]; -} __attribute__ ((packed)); - -struct x_phys_cpu { - __u16 cpu_addr; - char reserved1[2]; - __u8 ctidx; - char reserved2[3]; - __u64 mgm_time; - char reserved3[80]; -} __attribute__ ((packed)); - static inline int phys_cpu__size(enum diag204_format type) { - if (type == INFO_SIMPLE) - return sizeof(struct phys_cpu); - else /* INFO_EXT */ - return sizeof(struct x_phys_cpu); + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_cpu); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_cpu); } static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->cpu_addr; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->cpu_addr; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; } static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->mgm_time; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->mgm_time; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->mgm_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; } static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) { - if (type == INFO_SIMPLE) - return ((struct phys_cpu *)hdr)->ctidx; - else /* INFO_EXT */ - return ((struct x_phys_cpu *)hdr)->ctidx; + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->ctidx; } /* Diagnose 204 functions */ - -static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) -{ - register unsigned long _subcode asm("0") = subcode; - register unsigned long _size asm("1") = size; - - asm volatile( - " diag %2,%0,0x204\n" - "0:\n" - EX_TABLE(0b,0b) - : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); - if (_subcode) - return -1; - return _size; -} - -static int diag204(unsigned long subcode, unsigned long size, void *addr) -{ - diag_stat_inc(DIAG_STAT_X204); - return __diag204(subcode, size, addr); -} - /* * For the old diag subcode 4 with simple data format we have to use real * memory. If we use subcode 6 or 7 with extended data format, we can (and @@ -409,12 +261,12 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages) *pages = diag204_buf_pages; return diag204_buf; } - if (fmt == INFO_SIMPLE) { + if (fmt == DIAG204_INFO_SIMPLE) { *pages = 1; return diag204_alloc_rbuf(); - } else {/* INFO_EXT */ - *pages = diag204((unsigned long)SUBC_RSI | - (unsigned long)INFO_EXT, 0, NULL); + } else {/* DIAG204_INFO_EXT */ + *pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) return ERR_PTR(-ENOSYS); else @@ -441,18 +293,18 @@ static int diag204_probe(void) void *buf; int pages, rc; - buf = diag204_get_buffer(INFO_EXT, &pages); + buf = diag204_get_buffer(DIAG204_INFO_EXT, &pages); if (!IS_ERR(buf)) { - if (diag204((unsigned long)SUBC_STIB7 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB7; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB7; + diag204_info_type = DIAG204_INFO_EXT; goto out; } - if (diag204((unsigned long)SUBC_STIB6 | - (unsigned long)INFO_EXT, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB6; - diag204_info_type = INFO_EXT; + if (diag204((unsigned long)DIAG204_SUBC_STIB6 | + (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB6; + diag204_info_type = DIAG204_INFO_EXT; goto out; } diag204_free_buffer(); @@ -460,15 +312,15 @@ static int diag204_probe(void) /* subcodes 6 and 7 failed, now try subcode 4 */ - buf = diag204_get_buffer(INFO_SIMPLE, &pages); + buf = diag204_get_buffer(DIAG204_INFO_SIMPLE, &pages); if (IS_ERR(buf)) { rc = PTR_ERR(buf); goto fail_alloc; } - if (diag204((unsigned long)SUBC_STIB4 | - (unsigned long)INFO_SIMPLE, pages, buf) >= 0) { - diag204_store_sc = SUBC_STIB4; - diag204_info_type = INFO_SIMPLE; + if (diag204((unsigned long)DIAG204_SUBC_STIB4 | + (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) { + diag204_store_sc = DIAG204_SUBC_STIB4; + diag204_info_type = DIAG204_INFO_SIMPLE; goto out; } else { rc = -ENOSYS; @@ -543,9 +395,9 @@ static void diag224_delete_name_table(void) static int diag224_idx2name(int index, char *name) { - memcpy(name, diag224_cpu_names + ((index + 1) * CPU_NAME_LEN), - CPU_NAME_LEN); - name[CPU_NAME_LEN] = 0; + memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), + DIAG204_CPU_NAME_LEN); + name[DIAG204_CPU_NAME_LEN] = 0; strim(name); return 0; } @@ -601,7 +453,7 @@ __init int hypfs_diag_init(void) pr_err("The hardware system does not support hypfs\n"); return -ENODATA; } - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_dbfs_create_file(&dbfs_file_d204); if (rc) return rc; @@ -649,7 +501,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) cpu_info__lp_time(diag204_info_type, cpu_info)); if (IS_ERR(rc)) return PTR_ERR(rc); - if (diag204_info_type == INFO_EXT) { + if (diag204_info_type == DIAG204_INFO_EXT) { rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_info_type, cpu_info)); @@ -665,12 +517,12 @@ static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) { struct dentry *cpus_dir; struct dentry *lpar_dir; - char lpar_name[LPAR_NAME_LEN + 1]; + char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; void *cpu_info; int i; part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); - lpar_name[LPAR_NAME_LEN] = 0; + lpar_name[DIAG204_LPAR_NAME_LEN] = 0; lpar_dir = hypfs_mkdir(systems_dir, lpar_name); if (IS_ERR(lpar_dir)) return lpar_dir; @@ -753,7 +605,8 @@ int hypfs_diag_create_files(struct dentry *root) goto err_out; } } - if (info_blk_hdr__flags(diag204_info_type, time_hdr) & LPAR_PHYS_FLG) { + if (info_blk_hdr__flags(diag204_info_type, time_hdr) & + DIAG204_LPAR_PHYS_FLG) { ptr = hypfs_create_phys_files(root, part_hdr); if (IS_ERR(ptr)) { rc = PTR_ERR(ptr); diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 5fac921..f72744f 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -78,4 +78,131 @@ struct diag210 { extern int diag210(struct diag210 *addr); +/* bit is set in flags, when physical cpu info is included in diag 204 data */ +#define DIAG204_LPAR_PHYS_FLG 0x80 +#define DIAG204_LPAR_NAME_LEN 8 /* lpar name len in diag 204 data */ +#define DIAG204_CPU_NAME_LEN 16 /* type name len of cpus in diag224 name table */ + +/* diag 204 subcodes */ +enum diag204_sc { + DIAG204_SUBC_STIB4 = 4, + DIAG204_SUBC_RSI = 5, + DIAG204_SUBC_STIB6 = 6, + DIAG204_SUBC_STIB7 = 7 +}; + +/* The two available diag 204 data formats */ +enum diag204_format { + DIAG204_INFO_SIMPLE = 0, + DIAG204_INFO_EXT = 0x00010000 +}; + +struct diag204_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod; +} __packed; + +struct diag204_x_info_blk_hdr { + __u8 npar; + __u8 flags; + __u16 tslice; + __u16 phys_cpus; + __u16 this_part; + __u64 curtod1; + __u64 curtod2; + char reserved[40]; +} __packed; + +struct diag204_part_hdr { + __u8 pn; + __u8 cpus; + char reserved[6]; + char part_name[DIAG204_LPAR_NAME_LEN]; +} __packed; + +struct diag204_x_part_hdr { + __u8 pn; + __u8 cpus; + __u8 rcpus; + __u8 pflag; + __u32 mlu; + char part_name[DIAG204_LPAR_NAME_LEN]; + char lpc_name[8]; + char os_name[8]; + __u64 online_cs; + __u64 online_es; + __u8 upid; + char reserved1[3]; + __u32 group_mlu; + char group_name[8]; + char reserved2[32]; +} __packed; + +struct diag204_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; +} __packed; + +struct diag204_x_cpu_info { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + __u8 cflag; + __u16 weight; + __u64 acc_time; + __u64 lp_time; + __u16 min_weight; + __u16 cur_weight; + __u16 max_weight; + char reseved2[2]; + __u64 online_time; + __u64 wait_time; + __u32 pma_weight; + __u32 polar_weight; + char reserved3[40]; +} __packed; + +struct diag204_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; +} __packed; + +struct diag204_x_phys_hdr { + char reserved1[1]; + __u8 cpus; + char reserved2[6]; + char mgm_name[8]; + char reserved3[80]; +} __packed; + +struct diag204_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[8]; +} __packed; + +struct diag204_x_phys_cpu { + __u16 cpu_addr; + char reserved1[2]; + __u8 ctidx; + char reserved2[3]; + __u64 mgm_time; + char reserved3[80]; +} __packed; + +int diag204(unsigned long subcode, unsigned long size, void *addr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 48b37b8..f4ce4a2 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -162,6 +162,28 @@ int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode) } EXPORT_SYMBOL(diag14); +static inline int __diag204(unsigned long subcode, unsigned long size, void *addr) +{ + register unsigned long _subcode asm("0") = subcode; + register unsigned long _size asm("1") = size; + + asm volatile( + " diag %2,%0,0x204\n" + "0:\n" + EX_TABLE(0b,0b) + : "+d" (_subcode), "+d" (_size) : "d" (addr) : "memory"); + if (_subcode) + return -1; + return _size; +} + +int diag204(unsigned long subcode, unsigned long size, void *addr) +{ + diag_stat_inc(DIAG_STAT_X204); + return __diag204(subcode, size, addr); +} +EXPORT_SYMBOL(diag204); + /* * Diagnose 210: Get information about a virtual device */ -- cgit v0.10.2 From e435dc31398e63b992639cf62024d959219db191 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 8 Feb 2016 13:36:22 +0100 Subject: s390: Make cpc_name accessible sclp_ocf.c is the only way to get the cpc name, as it registers the sole event handler for the ocf event. By creating a new global function that copies that name, we make it accessible to the world which longs to retrieve it. Additionally we now also store the cpc name as EBCDIC, so we don't have to convert it to and from ASCII if it is requested in native encoding. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e4f6f73..49736a0 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -101,5 +101,6 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count); int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count); void sclp_early_detect(void); void _sclp_print_early(const char *); +void sclp_ocf_cpc_name_copy(char *dst); #endif /* _ASM_S390_SCLP_H */ diff --git a/drivers/s390/char/sclp_ocf.c b/drivers/s390/char/sclp_ocf.c index 2553db0..f59b717 100644 --- a/drivers/s390/char/sclp_ocf.c +++ b/drivers/s390/char/sclp_ocf.c @@ -26,7 +26,7 @@ #define OCF_LENGTH_CPC_NAME 8UL static char hmc_network[OCF_LENGTH_HMC_NETWORK + 1]; -static char cpc_name[OCF_LENGTH_CPC_NAME + 1]; +static char cpc_name[OCF_LENGTH_CPC_NAME]; /* in EBCDIC */ static DEFINE_SPINLOCK(sclp_ocf_lock); static struct work_struct sclp_ocf_change_work; @@ -72,9 +72,8 @@ static void sclp_ocf_handler(struct evbuf_header *evbuf) } if (cpc) { size = min(OCF_LENGTH_CPC_NAME, (size_t) cpc->length); + memset(cpc_name, 0, OCF_LENGTH_CPC_NAME); memcpy(cpc_name, cpc + 1, size); - EBCASC(cpc_name, size); - cpc_name[size] = 0; } spin_unlock(&sclp_ocf_lock); schedule_work(&sclp_ocf_change_work); @@ -85,15 +84,23 @@ static struct sclp_register sclp_ocf_event = { .receiver_fn = sclp_ocf_handler, }; +void sclp_ocf_cpc_name_copy(char *dst) +{ + spin_lock_irq(&sclp_ocf_lock); + memcpy(dst, cpc_name, OCF_LENGTH_CPC_NAME); + spin_unlock_irq(&sclp_ocf_lock); +} +EXPORT_SYMBOL(sclp_ocf_cpc_name_copy); + static ssize_t cpc_name_show(struct kobject *kobj, struct kobj_attribute *attr, char *page) { - int rc; + char name[OCF_LENGTH_CPC_NAME + 1]; - spin_lock_irq(&sclp_ocf_lock); - rc = snprintf(page, PAGE_SIZE, "%s\n", cpc_name); - spin_unlock_irq(&sclp_ocf_lock); - return rc; + sclp_ocf_cpc_name_copy(name); + name[OCF_LENGTH_CPC_NAME] = 0; + EBCASC(name, OCF_LENGTH_CPC_NAME); + return snprintf(page, PAGE_SIZE, "%s\n", name); } static struct kobj_attribute cpc_name_attr = -- cgit v0.10.2 From 022bd2d11cc51f62e873a09bcae8016b10950194 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 12 Feb 2016 12:52:49 +0100 Subject: s390: Make diag224 public Diag204's cpu structures only contain the cpu type by means of an index in the diag224 name table. Hence, to be able to use diag204 in any meaningful way, we also need a usable diag224 interface. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Reviewed-by: David Hildenbrand Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 1e28414..28f03ca 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -360,20 +360,6 @@ out: /* Diagnose 224 functions */ -static int diag224(void *ptr) -{ - int rc = -EOPNOTSUPP; - - diag_stat_inc(DIAG_STAT_X224); - asm volatile( - " diag %1,%2,0x224\n" - "0: lhi %0,0x0\n" - "1:\n" - EX_TABLE(0b,1b) - : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); - return rc; -} - static int diag224_get_name_table(void) { /* memory must be below 2GB */ diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index f72744f..197e303 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -205,4 +205,5 @@ struct diag204_x_phys_cpu { } __packed; int diag204(unsigned long subcode, unsigned long size, void *addr); +int diag224(void *ptr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index f4ce4a2..a44faf4 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -218,3 +218,18 @@ int diag210(struct diag210 *addr) return ccode; } EXPORT_SYMBOL(diag210); + +int diag224(void *ptr) +{ + int rc = -EOPNOTSUPP; + + diag_stat_inc(DIAG_STAT_X224); + asm volatile( + " diag %1,%2,0x224\n" + "0: lhi %0,0x0\n" + "1:\n" + EX_TABLE(0b,1b) + : "+d" (rc) :"d" (0), "d" (ptr) : "memory"); + return rc; +} +EXPORT_SYMBOL(diag224); -- cgit v0.10.2 From a011eeb2a3d6cd778eb63bea0bf149ebbe658ab5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 9 May 2016 14:14:01 +0200 Subject: KVM: s390: Add operation exception interception handler This commit introduces code that handles operation exception interceptions. With this handler we can emulate instructions by using illegal opcodes. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 37b9017..093ea14 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_vcpu_stat { u32 instruction_stctg; u32 exit_program_interruption; u32 exit_instr_and_program; + u32 exit_operation_exception; u32 deliver_external_call; u32 deliver_emergency_signal; u32 deliver_service_signal; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 2e6b54e..09c13db 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -349,6 +349,15 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; } +static int handle_operexc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_operation_exception++; + trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, + vcpu->arch.sie_block->ipb); + + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); +} + int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { if (kvm_is_ucontrol(vcpu->kvm)) @@ -370,6 +379,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) return handle_validity(vcpu); case 0x28: return handle_stop(vcpu); + case 0x2c: + return handle_operexc(vcpu); case 0x38: return handle_partial_execution(vcpu); default: diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6d8ec3a..f0addec 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instruction", VCPU_STAT(exit_instruction) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 916834d..90d26a6 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -412,6 +412,27 @@ TRACE_EVENT(kvm_s390_handle_stsi, __entry->addr) ); +TRACE_EVENT(kvm_s390_handle_operexc, + TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb), + TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(__u64, instruction) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->instruction = ((__u64)ipa << 48) | + ((__u64)ipb << 16); + ), + + VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)", + __entry->instruction, + __print_symbolic(icpt_insn_decoder(__entry->instruction), + icpt_insn_codes)) + ); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ -- cgit v0.10.2 From a2d57b35c0226102b1f2ffdc2f719fcc30c99bf5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 23 May 2016 15:09:19 +0200 Subject: KVM: s390: Extend diag 204 fields The new store hypervisor information instruction, which we are going to introduce, needs previously unused fields in diag 204 structures. Signed-off-by: Janosch Frank Acked-by: Heiko Carstens Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 197e303..f4000cd 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -97,6 +97,11 @@ enum diag204_format { DIAG204_INFO_EXT = 0x00010000 }; +enum diag204_cpu_flags { + DIAG204_CPU_ONLINE = 0x20, + DIAG204_CPU_CAPPED = 0x40, +}; + struct diag204_info_blk_hdr { __u8 npar; __u8 flags; @@ -136,10 +141,13 @@ struct diag204_x_part_hdr { __u64 online_cs; __u64 online_es; __u8 upid; - char reserved1[3]; + __u8 reserved:3; + __u8 mtid:5; + char reserved1[2]; __u32 group_mlu; char group_name[8]; - char reserved2[32]; + char hardware_group_name[8]; + char reserved2[24]; } __packed; struct diag204_cpu_info { @@ -168,7 +176,9 @@ struct diag204_x_cpu_info { __u64 wait_time; __u32 pma_weight; __u32 polar_weight; - char reserved3[40]; + __u32 cpu_type_cap; + __u32 group_cpu_type_cap; + char reserved3[32]; } __packed; struct diag204_phys_hdr { @@ -199,7 +209,8 @@ struct diag204_x_phys_cpu { __u16 cpu_addr; char reserved1[2]; __u8 ctidx; - char reserved2[3]; + char reserved2[1]; + __u16 weight; __u64 mgm_time; char reserved3[80]; } __packed; -- cgit v0.10.2 From 95ca2cb57985b07f5b136405f80a5106f5b06641 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Mon, 23 May 2016 15:11:58 +0200 Subject: KVM: s390: Add sthyi emulation Store Hypervisor Information is an emulated z/VM instruction that provides a guest with basic information about the layers it is running on. This includes information about the cpu configuration of both the machine and the lpar, as well as their names, machine model and machine type. This information enables an application to determine the maximum capacity of CPs and IFLs available to software. The instruction is available whenever the facility bit 74 is set, otherwise executing it results in an operation exception. It is important to check the validity flags in the sections before using data from any structure member. It is not guaranteed that all members will be valid on all machines / machine configurations. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index f4000cd..8221199 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -215,6 +215,16 @@ struct diag204_x_phys_cpu { char reserved3[80]; } __packed; +struct diag204_x_part_block { + struct diag204_x_part_hdr hdr; + struct diag204_x_cpu_info cpus[]; +} __packed; + +struct diag204_x_phys_block { + struct diag204_x_phys_hdr hdr; + struct diag204_x_phys_cpu cpus[]; +} __packed; + int diag204(unsigned long subcode, unsigned long size, void *addr); int diag224(void *ptr); #endif /* _ASM_S390_DIAG_H */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 093ea14..7233b1c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -154,6 +154,7 @@ struct kvm_s390_sie_block { #define LCTL_CR14 0x0002 __u16 lctl; /* 0x0044 */ __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 #define ICTL_PINT 0x20000000 #define ICTL_LPSW 0x00400000 #define ICTL_STCTL 0x00040000 @@ -279,6 +280,7 @@ struct kvm_vcpu_stat { u32 instruction_stfl; u32 instruction_tprot; u32 instruction_essa; + u32 instruction_sthyi; u32 instruction_sigp_sense; u32 instruction_sigp_sense_running; u32 instruction_sigp_external_call; diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index 8fb5d4a..3ac6343 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -140,6 +140,7 @@ exit_code_ipa0(0xB2, 0x4c, "TAR"), \ exit_code_ipa0(0xB2, 0x50, "CSP"), \ exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x56, "STHYI"), \ exit_code_ipa0(0xB2, 0x58, "BSG"), \ exit_code_ipa0(0xB2, 0x5a, "BSA"), \ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile index d42fa38..82e73e2 100644 --- a/arch/s390/kvm/Makefile +++ b/arch/s390/kvm/Makefile @@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch ccflags-y := -Ivirt/kvm -Iarch/s390/kvm kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o -kvm-objs += diag.o gaccess.o guestdbg.o +kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o obj-$(CONFIG_KVM) += kvm.o diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 09c13db..9359f65 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -355,6 +355,10 @@ static int handle_operexc(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); + if (vcpu->arch.sie_block->ipa == 0xb256 && + test_kvm_facility(vcpu->kvm, 74)) + return handle_sthyi(vcpu); + return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f0addec..1c10254 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -94,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_stsi", VCPU_STAT(instruction_stsi) }, { "instruction_stfl", VCPU_STAT(instruction_stfl) }, { "instruction_tprot", VCPU_STAT(instruction_tprot) }, + { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, @@ -1189,6 +1190,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask, S390_ARCH_FAC_LIST_SIZE_BYTE); + set_kvm_facility(kvm->arch.model.fac_mask, 74); + set_kvm_facility(kvm->arch.model.fac_list, 74); + kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid(); kvm->arch.model.ibc = sclp.ibc & 0x0fff; @@ -1679,6 +1683,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) } vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; + if (test_kvm_facility(vcpu->kvm, 74)) + vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; if (vcpu->kvm->arch.use_cmma) { rc = kvm_s390_vcpu_setup_cmma(vcpu); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 8621ab0..c5ec4d3 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -250,6 +250,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); +/* implemented in sthyi.c */ +int handle_sthyi(struct kvm_vcpu *vcpu); + /* implemented in kvm-s390.c */ void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod); long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c new file mode 100644 index 0000000..894d562 --- /dev/null +++ b/arch/s390/kvm/sthyi.c @@ -0,0 +1,460 @@ +/* + * store hypervisor information instruction emulation functions. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Copyright IBM Corp. 2016 + * Author(s): Janosch Frank + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "kvm-s390.h" +#include "gaccess.h" +#include "trace.h" + +#define DED_WEIGHT 0xffff +/* + * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string + * as they are justified with spaces. + */ +#define CP 0xc3d7404040404040UL +#define IFL 0xc9c6d34040404040UL + +enum hdr_flags { + HDR_NOT_LPAR = 0x10, + HDR_STACK_INCM = 0x20, + HDR_STSI_UNAV = 0x40, + HDR_PERF_UNAV = 0x80, +}; + +enum mac_validity { + MAC_NAME_VLD = 0x20, + MAC_ID_VLD = 0x40, + MAC_CNT_VLD = 0x80, +}; + +enum par_flag { + PAR_MT_EN = 0x80, +}; + +enum par_validity { + PAR_GRP_VLD = 0x08, + PAR_ID_VLD = 0x10, + PAR_ABS_VLD = 0x20, + PAR_WGHT_VLD = 0x40, + PAR_PCNT_VLD = 0x80, +}; + +struct hdr_sctn { + u8 infhflg1; + u8 infhflg2; /* reserved */ + u8 infhval1; /* reserved */ + u8 infhval2; /* reserved */ + u8 reserved[3]; + u8 infhygct; + u16 infhtotl; + u16 infhdln; + u16 infmoff; + u16 infmlen; + u16 infpoff; + u16 infplen; + u16 infhoff1; + u16 infhlen1; + u16 infgoff1; + u16 infglen1; + u16 infhoff2; + u16 infhlen2; + u16 infgoff2; + u16 infglen2; + u16 infhoff3; + u16 infhlen3; + u16 infgoff3; + u16 infglen3; + u8 reserved2[4]; +} __packed; + +struct mac_sctn { + u8 infmflg1; /* reserved */ + u8 infmflg2; /* reserved */ + u8 infmval1; + u8 infmval2; /* reserved */ + u16 infmscps; + u16 infmdcps; + u16 infmsifl; + u16 infmdifl; + char infmname[8]; + char infmtype[4]; + char infmmanu[16]; + char infmseq[16]; + char infmpman[4]; + u8 reserved[4]; +} __packed; + +struct par_sctn { + u8 infpflg1; + u8 infpflg2; /* reserved */ + u8 infpval1; + u8 infpval2; /* reserved */ + u16 infppnum; + u16 infpscps; + u16 infpdcps; + u16 infpsifl; + u16 infpdifl; + u16 reserved; + char infppnam[8]; + u32 infpwbcp; + u32 infpabcp; + u32 infpwbif; + u32 infpabif; + char infplgnm[8]; + u32 infplgcp; + u32 infplgif; +} __packed; + +struct sthyi_sctns { + struct hdr_sctn hdr; + struct mac_sctn mac; + struct par_sctn par; +} __packed; + +struct cpu_inf { + u64 lpar_cap; + u64 lpar_grp_cap; + u64 lpar_weight; + u64 all_weight; + int cpu_num_ded; + int cpu_num_shd; +}; + +struct lpar_cpu_inf { + struct cpu_inf cp; + struct cpu_inf ifl; +}; + +static inline u64 cpu_id(u8 ctidx, void *diag224_buf) +{ + return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN)); +} + +/* + * Scales the cpu capping from the lpar range to the one expected in + * sthyi data. + * + * diag204 reports a cap in hundredths of processor units. + * z/VM's range for one core is 0 - 0x10000. + */ +static u32 scale_cap(u32 in) +{ + return (0x10000 * in) / 100; +} + +static void fill_hdr(struct sthyi_sctns *sctns) +{ + sctns->hdr.infhdln = sizeof(sctns->hdr); + sctns->hdr.infmoff = sizeof(sctns->hdr); + sctns->hdr.infmlen = sizeof(sctns->mac); + sctns->hdr.infplen = sizeof(sctns->par); + sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen; + sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen; +} + +static void fill_stsi_mac(struct sthyi_sctns *sctns, + struct sysinfo_1_1_1 *sysinfo) +{ + if (stsi(sysinfo, 1, 1, 1)) + return; + + sclp_ocf_cpc_name_copy(sctns->mac.infmname); + + memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype)); + memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu)); + memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman)); + memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq)); + + sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD; +} + +static void fill_stsi_par(struct sthyi_sctns *sctns, + struct sysinfo_2_2_2 *sysinfo) +{ + if (stsi(sysinfo, 2, 2, 2)) + return; + + sctns->par.infppnum = sysinfo->lpar_number; + memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam)); + + sctns->par.infpval1 |= PAR_ID_VLD; +} + +static void fill_stsi(struct sthyi_sctns *sctns) +{ + void *sysinfo; + + /* Errors are handled through the validity bits in the response. */ + sysinfo = (void *)__get_free_page(GFP_KERNEL); + if (!sysinfo) + return; + + fill_stsi_mac(sctns, sysinfo); + fill_stsi_par(sctns, sysinfo); + + free_pages((unsigned long)sysinfo, 0); +} + +static void fill_diag_mac(struct sthyi_sctns *sctns, + struct diag204_x_phys_block *block, + void *diag224_buf) +{ + int i; + + for (i = 0; i < block->hdr.cpus; i++) { + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdcps++; + else + sctns->mac.infmscps++; + break; + case IFL: + if (block->cpus[i].weight == DED_WEIGHT) + sctns->mac.infmdifl++; + else + sctns->mac.infmsifl++; + break; + } + } + sctns->mac.infmval1 |= MAC_CNT_VLD; +} + +/* Returns a pointer to the the next partition block. */ +static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf, + bool this_lpar, + void *diag224_buf, + struct diag204_x_part_block *block) +{ + int i, capped = 0, weight_cp = 0, weight_ifl = 0; + struct cpu_inf *cpu_inf; + + for (i = 0; i < block->hdr.rcpus; i++) { + if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE)) + continue; + + switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) { + case CP: + cpu_inf = &part_inf->cp; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_cp |= block->cpus[i].cur_weight; + break; + case IFL: + cpu_inf = &part_inf->ifl; + if (block->cpus[i].cur_weight < DED_WEIGHT) + weight_ifl |= block->cpus[i].cur_weight; + break; + default: + continue; + } + + if (!this_lpar) + continue; + + capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED; + cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap; + cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap; + + if (block->cpus[i].weight == DED_WEIGHT) + cpu_inf->cpu_num_ded += 1; + else + cpu_inf->cpu_num_shd += 1; + } + + if (this_lpar && capped) { + part_inf->cp.lpar_weight = weight_cp; + part_inf->ifl.lpar_weight = weight_ifl; + } + part_inf->cp.all_weight += weight_cp; + part_inf->ifl.all_weight += weight_ifl; + return (struct diag204_x_part_block *)&block->cpus[i]; +} + +static void fill_diag(struct sthyi_sctns *sctns) +{ + int i, r, pages; + bool this_lpar; + void *diag204_buf; + void *diag224_buf = NULL; + struct diag204_x_info_blk_hdr *ti_hdr; + struct diag204_x_part_block *part_block; + struct diag204_x_phys_block *phys_block; + struct lpar_cpu_inf lpar_inf = {}; + + /* Errors are handled through the validity bits in the response. */ + pages = diag204((unsigned long)DIAG204_SUBC_RSI | + (unsigned long)DIAG204_INFO_EXT, 0, NULL); + if (pages <= 0) + return; + + diag204_buf = vmalloc(PAGE_SIZE * pages); + if (!diag204_buf) + return; + + r = diag204((unsigned long)DIAG204_SUBC_STIB7 | + (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf); + if (r < 0) + goto out; + + diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + if (!diag224_buf || diag224(diag224_buf)) + goto out; + + ti_hdr = diag204_buf; + part_block = diag204_buf + sizeof(*ti_hdr); + + for (i = 0; i < ti_hdr->npar; i++) { + /* + * For the calling lpar we also need to get the cpu + * caps and weights. The time information block header + * specifies the offset to the partition block of the + * caller lpar, so we know when we process its data. + */ + this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part; + part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf, + part_block); + } + + phys_block = (struct diag204_x_phys_block *)part_block; + part_block = diag204_buf + ti_hdr->this_part; + if (part_block->hdr.mtid) + sctns->par.infpflg1 = PAR_MT_EN; + + sctns->par.infpval1 |= PAR_GRP_VLD; + sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap); + sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap); + memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name, + sizeof(sctns->par.infplgnm)); + + sctns->par.infpscps = lpar_inf.cp.cpu_num_shd; + sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded; + sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd; + sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded; + sctns->par.infpval1 |= PAR_PCNT_VLD; + + sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap); + sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap); + sctns->par.infpval1 |= PAR_ABS_VLD; + + /* + * Everything below needs global performance data to be + * meaningful. + */ + if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) { + sctns->hdr.infhflg1 |= HDR_PERF_UNAV; + goto out; + } + + fill_diag_mac(sctns, phys_block, diag224_buf); + + if (lpar_inf.cp.lpar_weight) { + sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 * + lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight; + } + + if (lpar_inf.ifl.lpar_weight) { + sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 * + lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight; + } + sctns->par.infpval1 |= PAR_WGHT_VLD; + +out: + kfree(diag224_buf); + vfree(diag204_buf); +} + +static int sthyi(u64 vaddr) +{ + register u64 code asm("0") = 0; + register u64 addr asm("2") = vaddr; + int cc; + + asm volatile( + ".insn rre,0xB2560000,%[code],%[addr]\n" + "ipm %[cc]\n" + "srl %[cc],28\n" + : [cc] "=d" (cc) + : [code] "d" (code), [addr] "a" (addr) + : "memory", "cc"); + return cc; +} + +int handle_sthyi(struct kvm_vcpu *vcpu) +{ + int reg1, reg2, r = 0; + u64 code, addr, cc = 0; + struct sthyi_sctns *sctns = NULL; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + code = vcpu->run->s.regs.gprs[reg1]; + addr = vcpu->run->s.regs.gprs[reg2]; + + vcpu->stat.instruction_sthyi++; + VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr); + trace_kvm_s390_handle_sthyi(vcpu, code, addr); + + if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + if (code & 0xffff) { + cc = 3; + goto out; + } + + /* + * If the page has not yet been faulted in, we want to do that + * now and not after all the expensive calculations. + */ + r = write_guest(vcpu, addr, reg2, &cc, 1); + if (r) + return kvm_s390_inject_prog_cond(vcpu, r); + + sctns = (void *)get_zeroed_page(GFP_KERNEL); + if (!sctns) + return -ENOMEM; + + /* + * If we are a guest, we don't want to emulate an emulated + * instruction. We ask the hypervisor to provide the data. + */ + if (test_facility(74)) { + cc = sthyi((u64)sctns); + goto out; + } + + fill_hdr(sctns); + fill_stsi(sctns); + fill_diag(sctns); + +out: + if (!cc) { + r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); + if (r) { + free_page((unsigned long)sctns); + return kvm_s390_inject_prog_cond(vcpu, r); + } + } + + free_page((unsigned long)sctns); + vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0; + kvm_s390_set_psw_cc(vcpu, cc); + return r; +} diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 90d26a6..a429ef9 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -433,6 +433,26 @@ TRACE_EVENT(kvm_s390_handle_operexc, icpt_insn_codes)) ); +TRACE_EVENT(kvm_s390_handle_sthyi, + TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr), + TP_ARGS(VCPU_ARGS_COMMON, code, addr), + + TP_STRUCT__entry( + VCPU_FIELD_COMMON + __field(u64, code) + __field(u64, addr) + ), + + TP_fast_assign( + VCPU_ASSIGN_COMMON + __entry->code = code; + __entry->addr = addr; + ), + + VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx", + __entry->code, __entry->addr) + ); + #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ -- cgit v0.10.2 From 7d0a5e62411a9223512c6af2e4c08a2d7c00fa2e Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Tue, 10 May 2016 15:03:42 +0200 Subject: KVM: s390: Limit sthyi execution Store hypervisor information is a valid instruction not only in supervisor state but also in problem state, i.e. the guest's userspace. Its execution is not only computational and memory intensive, but also has to get hold of the ipte lock to write to the guest's memory. This lock is not intended to be held often and long, especially not from the untrusted guest userspace. Therefore we apply rate limiting of sthyi executions per VM. Signed-off-by: Janosch Frank Acked-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 7233b1c..bcc20dc 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -652,6 +652,7 @@ struct kvm_arch{ wait_queue_head_t ipte_wq; int ipte_lock_count; struct mutex ipte_mutex; + struct ratelimit_state sthyi_limit; spinlock_t start_stop_lock; struct sie_page2 *sie_page2; struct kvm_s390_cpu_model model; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1c10254..44297ff 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1151,6 +1151,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; + ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); + kvm->arch.use_esca = 0; /* start with basic SCA */ rwlock_init(&kvm->arch.sca_lock); kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index 894d562..bd98b7d 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -403,6 +404,16 @@ int handle_sthyi(struct kvm_vcpu *vcpu) u64 code, addr, cc = 0; struct sthyi_sctns *sctns = NULL; + /* + * STHYI requires extensive locking in the higher hypervisors + * and is very computational/memory expensive. Therefore we + * ratelimit the executions per VM. + */ + if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) { + kvm_s390_retry_instr(vcpu); + return 0; + } + kvm_s390_get_regs_rre(vcpu, ®1, ®2); code = vcpu->run->s.regs.gprs[reg1]; addr = vcpu->run->s.regs.gprs[reg2]; -- cgit v0.10.2 From c1778e515712dae0575657fe6c9511ffcb28a7e0 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Fri, 6 May 2016 15:47:19 +0300 Subject: KVM: s390: Add mnemonic print to kvm_s390_intercept_prog We have a table of mnemonic names for intercepted program interruptions, let's print readable name of the interruption in the kvm_s390_intercept_prog trace event. Signed-off-by: Alexander Yarygin Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index a429ef9..1c4586b 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog, __entry->code = code; ), - VCPU_TP_PRINTK("intercepted program interruption %04x", - __entry->code) + VCPU_TP_PRINTK("intercepted program interruption %04x (%s)", + __entry->code, + __print_symbolic(__entry->code, + icpt_prog_codes)) ); /* -- cgit v0.10.2 From 15c9705f0c8af2d19dede9866aec364746b269ef Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 19 Mar 2015 17:36:43 +0100 Subject: KVM: s390: interface to query and configure cpu features For now, we only have an interface to query and configure facilities indicated via STFL(E). However, we also have features indicated via SCLP, that have to be indicated to the guest by user space and usually require KVM support. This patch allows user space to query and configure available cpu features for the guest. Please note that disabling a feature doesn't necessarily mean that it is completely disabled (e.g. ESOP is mostly handled by the SIE). We will try our best to disable it. Most features (e.g. SCLP) can't directly be forwarded, as most of them need in addition to hardware support, support in KVM. As we later on want to turn these features in KVM explicitly on/off (to simulate different behavior), we have to filter all features provided by the hardware and make them configurable. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index a9ea877..0ed6808 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -85,6 +85,33 @@ Returns: -EBUSY in case 1 or more vcpus are already activated (only in write -ENOMEM if not enough memory is available to process the ioctl 0 in case of success +2.3. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_FEAT (r/o) + +Allows user space to retrieve available cpu features. A feature is available if +provided by the hardware and supported by kvm. In theory, cpu features could +even be completely emulated by kvm. + +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; # Bitmap (1 = feature available), MSB 0 bit numbering +}; + +Parameters: address of a buffer to load the feature list from. +Returns: -EFAULT if the given address is not accessible from kernel space. + 0 in case of success. + +2.4. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_FEAT (r/w) + +Allows user space to retrieve or change enabled cpu features for all VCPUs of a +VM. Features that are not available cannot be enabled. + +See 2.3. for a description of the parameter struct. + +Parameters: address of a buffer to store/load the feature list from. +Returns: -EFAULT if the given address is not accessible from kernel space. + -EINVAL if a cpu feature that is not available is to be enabled. + -EBUSY if at least one VCPU has already been defined. + 0 in case of success. + 3. GROUP: KVM_S390_VM_TOD Architectures: s390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index bcc20dc..b2a83a0 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -658,6 +658,8 @@ struct kvm_arch{ struct kvm_s390_cpu_model model; struct kvm_s390_crypto crypto; u64 epoch; + /* subset of available cpu features enabled by user space */ + DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); }; #define KVM_HVA_ERR_BAD (-1UL) diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 3b8e99e..a8559f2 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -93,6 +93,14 @@ struct kvm_s390_vm_cpu_machine { __u64 fac_list[256]; }; +#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2 +#define KVM_S390_VM_CPU_MACHINE_FEAT 3 + +#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 44297ff..6960468 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -132,6 +133,9 @@ unsigned long kvm_s390_fac_list_mask_size(void) return ARRAY_SIZE(kvm_s390_fac_list_mask); } +/* available cpu features supported by kvm */ +static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); + static struct gmap_notifier gmap_notifier; debug_info_t *kvm_s390_dbf; @@ -677,6 +681,29 @@ out: return ret; } +static int kvm_s390_set_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + int ret = -EBUSY; + + if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data))) + return -EFAULT; + if (!bitmap_subset((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS)) + return -EINVAL; + + mutex_lock(&kvm->lock); + if (!atomic_read(&kvm->online_vcpus)) { + bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + ret = 0; + } + mutex_unlock(&kvm->lock); + return ret; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -685,6 +712,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR: ret = kvm_s390_set_processor(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_set_processor_feat(kvm, attr); + break; } return ret; } @@ -733,6 +763,31 @@ out: return ret; } +static int kvm_s390_get_processor_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + +static int kvm_s390_get_machine_feat(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + struct kvm_s390_vm_cpu_feat data; + + bitmap_copy((unsigned long *) data.feat, + kvm_s390_available_cpu_feat, + KVM_S390_VM_CPU_FEAT_NR_BITS); + if (copy_to_user((void __user *)attr->addr, &data, sizeof(data))) + return -EFAULT; + return 0; +} + static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -744,6 +799,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE: ret = kvm_s390_get_machine(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + ret = kvm_s390_get_processor_feat(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_FEAT: + ret = kvm_s390_get_machine_feat(kvm, attr); + break; } return ret; } @@ -827,6 +888,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR: case KVM_S390_VM_CPU_MACHINE: + case KVM_S390_VM_CPU_PROCESSOR_FEAT: + case KVM_S390_VM_CPU_MACHINE_FEAT: ret = 0; break; default: diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5ec4d3..52aa47e 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) return 0; } +static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr) +{ + WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS); + return test_bit_inv(nr, kvm->arch.cpu_feat); +} + /* are cpu states controlled by user space */ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) { -- cgit v0.10.2 From 22be5a133169e855097936438417ab1b672ad43f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 21 Jan 2016 13:22:54 +0100 Subject: KVM: s390: forward ESOP if available ESOP guarantees that during a protection exception, bit 61 of real location 168-175 will only be set to 1 if it was because of ALCP or DATP. If the exception is due to LAP or KCP, the bit will always be set to 0. The old SOP definition allowed bit 61 to be unpredictable in case of LAP or KCP in some conditions. So ESOP replaces this unpredictability by a guarantee. Therefore, we can directly forward ESOP if it is available on our machine. We don't have to do anything when ESOP is disabled - the guest will simply expect unpredictable values. Our guest access functions are already handling ESOP properly. Please note that future functionality in KVM will require knowledge about ESOP being enabled for a guest or not. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index a8559f2..789c4e2 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -97,6 +97,7 @@ struct kvm_s390_vm_cpu_machine { #define KVM_S390_VM_CPU_MACHINE_FEAT 3 #define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +#define KVM_S390_VM_CPU_FEAT_ESOP 0 struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6960468..2b5c14d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -193,6 +193,17 @@ void kvm_arch_hardware_unsetup(void) &kvm_clock_notifier); } +static void allow_cpu_feat(unsigned long nr) +{ + set_bit_inv(nr, kvm_s390_available_cpu_feat); +} + +static void kvm_s390_cpu_feat_init(void) +{ + if (MACHINE_HAS_ESOP) + allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); +} + int kvm_arch_init(void *opaque) { kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long)); @@ -204,6 +215,8 @@ int kvm_arch_init(void *opaque) return -ENOMEM; } + kvm_s390_cpu_feat_init(); + /* Register floating interrupt controller interface. */ return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC); } -- cgit v0.10.2 From 6167375b558196fdedd38e9867f7bb30ff4dda50 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 19:44:10 +0200 Subject: KVM: s390: gaccess: store guest address on ALC prot exceptions Let's pass the effective guest address to get_vcpu_asce(), so we can properly set the guest address in case we inject an ALC protection exception. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 66938d2..c0da9e9 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -477,7 +477,7 @@ enum { }; static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - ar_t ar, enum gacc_mode mode) + unsigned long ga, ar_t ar, enum gacc_mode mode) { int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); @@ -519,6 +519,7 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, vcpu->arch.pgm.exc_access_id = ar; break; case PGM_PROTECTION: + tec_bits->addr = ga >> PAGE_SHIFT; tec_bits->b60 = 1; tec_bits->b61 = 1; break; @@ -783,7 +784,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + ga = kvm_s390_logical_to_effective(vcpu, ga); + rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode); if (rc) return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; @@ -854,7 +856,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, gva = kvm_s390_logical_to_effective(vcpu, gva); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - rc = get_vcpu_asce(vcpu, &asce, ar, mode); + rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; -- cgit v0.10.2 From d03193de30e6d99770930c6fbf14f0d5dd5cb2f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 19:56:46 +0200 Subject: KVM: s390: gaccess: function for preparing translation exceptions Let's provide a function trans_exc() that can be used for handling preparation of translation exceptions on a central basis. We will use that function to replace existing code in gaccess. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index c0da9e9..b6ccb26 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -476,6 +476,68 @@ enum { FSI_FETCH = 2 /* Exception was due to fetch operation */ }; +enum prot_type { + PROT_TYPE_LA = 0, + PROT_TYPE_KEYC = 1, + PROT_TYPE_ALC = 2, + PROT_TYPE_DAT = 3, +}; + +static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, + ar_t ar, enum gacc_mode mode, enum prot_type prot) +{ + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + struct trans_exc_code_bits *tec; + + memset(pgm, 0, sizeof(*pgm)); + pgm->code = code; + tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + + switch (code) { + case PGM_ASCE_TYPE: + case PGM_PAGE_TRANSLATION: + case PGM_REGION_FIRST_TRANS: + case PGM_REGION_SECOND_TRANS: + case PGM_REGION_THIRD_TRANS: + case PGM_SEGMENT_TRANSLATION: + /* + * op_access_id only applies to MOVE_PAGE -> set bit 61 + * exc_access_id has to be set to 0 for some instructions. Both + * cases have to be handled by the caller. We can always store + * exc_access_id, as it is undefined for non-ar cases. + */ + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* FALL THROUGH */ + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + pgm->exc_access_id = ar; + break; + case PGM_PROTECTION: + switch (prot) { + case PROT_TYPE_ALC: + tec->b60 = 1; + /* FALL THROUGH */ + case PROT_TYPE_DAT: + tec->b61 = 1; + tec->addr = gva >> PAGE_SHIFT; + tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; + tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as; + /* exc_access_id is undefined for most cases */ + pgm->exc_access_id = ar; + break; + default: /* LA and KEYC set b61 to 0, other params undefined */ + break; + } + break; + } + return code; +} + static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, unsigned long ga, ar_t ar, enum gacc_mode mode) { -- cgit v0.10.2 From 3e3c67f6a327852375247c98b0d153c44e460216 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:00:33 +0200 Subject: KVM: s390: gaccess: convert kvm_s390_check_low_addr_prot_real() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index b6ccb26..61dc45e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -979,20 +979,9 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, */ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; if (!ctlreg0.lap || !is_low_address(gra)) return 0; - - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = FSI_STORE; - tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = gra >> PAGE_SHIFT; - pgm->code = PGM_PROTECTION; - - return pgm->code; + return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA); } -- cgit v0.10.2 From fbcb7d5157718645cc198c6be6b435ab326c1892 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:06:55 +0200 Subject: KVM: s390: gaccess: convert guest_translate_address() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 61dc45e..ae9f9e8 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -910,37 +910,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec; union asce asce; int rc; gva = kvm_s390_logical_to_effective(vcpu, gva); - tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode); - tec->addr = gva >> PAGE_SHIFT; if (rc) return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { - if (mode == GACC_STORE) { - rc = pgm->code = PGM_PROTECTION; - return rc; - } + if (mode == GACC_STORE) + return trans_exc(vcpu, PGM_PROTECTION, gva, 0, + mode, PROT_TYPE_LA); } if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ rc = guest_translate(vcpu, gva, gpa, asce, mode); - if (rc > 0) { - if (rc == PGM_PROTECTION) - tec->b61 = 1; - pgm->code = rc; - } + if (rc > 0) + return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT); } else { - rc = 0; *gpa = kvm_s390_real_to_abs(vcpu, gva); if (kvm_is_error_gpa(vcpu->kvm, *gpa)) - rc = pgm->code = PGM_ADDRESSING; + return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0); } return rc; -- cgit v0.10.2 From cde0dcfb5df1dbcd90a8e73130a6b7091bdb493a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:13:35 +0200 Subject: KVM: s390: gaccess: convert guest_page_range() Let's use our new function for preparing translation exceptions. As we will need the correct ar, let's pass that to guest_page_range(). This will also make sure that the guest address is stored in the tec for applicable excptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ae9f9e8..ec6c91e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -792,40 +792,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, unsigned long *pages, unsigned long nr_pages, const union asce asce, enum gacc_mode mode) { - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; - struct trans_exc_code_bits *tec_bits; - int lap_enabled, rc; + int lap_enabled, rc = 0; - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); - tec_bits->addr = ga >> PAGE_SHIFT; - if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) { - pgm->code = PGM_PROTECTION; - return pgm->code; - } + if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) + return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode, + PROT_TYPE_LA); ga &= PAGE_MASK; if (psw_bits(*psw).t) { rc = guest_translate(vcpu, ga, pages, asce, mode); if (rc < 0) return rc; - if (rc == PGM_PROTECTION) - tec_bits->b61 = 1; - if (rc) - pgm->code = rc; } else { *pages = kvm_s390_real_to_abs(vcpu, ga); if (kvm_is_error_gpa(vcpu->kvm, *pages)) - pgm->code = PGM_ADDRESSING; + rc = PGM_ADDRESSING; } - if (pgm->code) - return pgm->code; + if (rc) + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT); ga += PAGE_SIZE; pages++; nr_pages--; @@ -859,7 +850,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode); + rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); -- cgit v0.10.2 From bcfa01d787278476f3e79530d03df9b3f52e6e59 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 31 May 2016 20:21:03 +0200 Subject: KVM: s390: gaccess: convert get_vcpu_asce() Let's use our new function for preparing translation exceptions. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ec6c91e..8e245e7 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -543,13 +543,6 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, { int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); - struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; - struct trans_exc_code_bits *tec_bits; - - memset(pgm, 0, sizeof(*pgm)); - tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw.as; if (!psw.t) { asce->val = 0; @@ -572,22 +565,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, return 0; case PSW_AS_ACCREG: rc = ar_translation(vcpu, asce, ar, mode); - switch (rc) { - case PGM_ALEN_TRANSLATION: - case PGM_ALE_SEQUENCE: - case PGM_ASTE_VALIDITY: - case PGM_ASTE_SEQUENCE: - case PGM_EXTENDED_AUTHORITY: - vcpu->arch.pgm.exc_access_id = ar; - break; - case PGM_PROTECTION: - tec_bits->addr = ga >> PAGE_SHIFT; - tec_bits->b60 = 1; - tec_bits->b61 = 1; - break; - } if (rc > 0) - pgm->code = rc; + return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC); return rc; } return 0; -- cgit v0.10.2 From 1afd43e0fbba4a92effc22977e3a7e64213ee860 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 18 May 2016 15:59:06 +0200 Subject: s390/crypto: allow to query all known cpacf functions KVM will have to query these functions, let's add at least the query capabilities. PCKMO has RRE format, as bit 16-31 are ignored, we can still use the existing function. As PCKMO won't touch the cc, let's force it to 0 upfront. Signed-off-by: David Hildenbrand Acked-by: Ingo Tuchscherer Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h index 1a82cf2..d28621d 100644 --- a/arch/s390/include/asm/cpacf.h +++ b/arch/s390/include/asm/cpacf.h @@ -20,6 +20,9 @@ #define CPACF_KMC 0xb92f /* MSA */ #define CPACF_KIMD 0xb93e /* MSA */ #define CPACF_KLMD 0xb93f /* MSA */ +#define CPACF_PCKMO 0xb928 /* MSA3 */ +#define CPACF_KMF 0xb92a /* MSA4 */ +#define CPACF_KMO 0xb92b /* MSA4 */ #define CPACF_PCC 0xb92c /* MSA4 */ #define CPACF_KMCTR 0xb92d /* MSA4 */ #define CPACF_PPNO 0xb93c /* MSA5 */ @@ -136,6 +139,7 @@ static inline void __cpacf_query(unsigned int opcode, unsigned char *status) register unsigned long r1 asm("1") = (unsigned long) status; asm volatile( + " spm 0\n" /* pckmo doesn't change the cc */ /* Parameter registers are ignored, but may not be 0 */ "0: .insn rrf,%[opc] << 16,2,2,2,0\n" " brc 1,0b\n" /* handle partial completion */ @@ -157,6 +161,12 @@ static inline int cpacf_query(unsigned int opcode, unsigned int func) if (!test_facility(17)) /* check for MSA */ return 0; break; + case CPACF_PCKMO: + if (!test_facility(76)) /* check for MSA3 */ + return 0; + break; + case CPACF_KMF: + case CPACF_KMO: case CPACF_PCC: case CPACF_KMCTR: if (!test_facility(77)) /* check for MSA4 */ -- cgit v0.10.2 From 0a763c780b7cb830c250d00ead975778ab948f40 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 18 May 2016 16:03:47 +0200 Subject: KVM: s390: interface to query and configure cpu subfunctions We have certain instructions that indicate available subfunctions via a query subfunction (crypto functions and ptff), or via a test bit function (plo). By exposing these "subfunction blocks" to user space, we allow user space to 1) query available subfunctions and make sure subfunctions won't get lost during migration - e.g. properly indicate them via a CPU model 2) change the subfunctions to be reported to the guest (even adding unavailable ones) This mechanism works just like the way we indicate the stfl(e) list to user space. This way, user space could even emulate some subfunctions in QEMU in the future. If this is ever applicable, we have to make sure later on, that unsupported subfunctions result in an intercept to QEMU. Please note that support to indicate them to the guest is still missing and requires hardware support. Usually, the IBC takes already care of these subfunctions for migration safety. QEMU should make sure to always set these bits properly according to the machine generation to be emulated. Available subfunctions are only valid in combination with STFLE bits retrieved via KVM_S390_VM_CPU_MACHINE and enabled via KVM_S390_VM_CPU_PROCESSOR. If the applicable bits are available, the indicated subfunctions are guaranteed to be correct. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 0ed6808..8a458f4 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -112,6 +112,63 @@ Returns: -EFAULT if the given address is not accessible from kernel space. -EBUSY if at least one VCPU has already been defined. 0 in case of success. +2.5. ATTRIBUTE: KVM_S390_VM_CPU_MACHINE_SUBFUNC (r/o) + +Allows user space to retrieve available cpu subfunctions without any filtering +done by a set IBC. These subfunctions are indicated to the guest VCPU via +query or "test bit" subfunctions and used e.g. by cpacf functions, plo and ptff. + +A subfunction block is only valid if KVM_S390_VM_CPU_MACHINE contains the +STFL(E) bit introducing the affected instruction. If the affected instruction +indicates subfunctions via a "query subfunction", the response block is +contained in the returned struct. If the affected instruction +indicates subfunctions via a "test bit" mechanism, the subfunction codes are +contained in the returned struct in MSB 0 bit numbering. + +struct kvm_s390_vm_cpu_subfunc { + u8 plo[32]; # always valid (ESA/390 feature) + u8 ptff[16]; # valid with TOD-clock steering + u8 kmac[16]; # valid with Message-Security-Assist + u8 kmc[16]; # valid with Message-Security-Assist + u8 km[16]; # valid with Message-Security-Assist + u8 kimd[16]; # valid with Message-Security-Assist + u8 klmd[16]; # valid with Message-Security-Assist + u8 pckmo[16]; # valid with Message-Security-Assist-Extension 3 + u8 kmctr[16]; # valid with Message-Security-Assist-Extension 4 + u8 kmf[16]; # valid with Message-Security-Assist-Extension 4 + u8 kmo[16]; # valid with Message-Security-Assist-Extension 4 + u8 pcc[16]; # valid with Message-Security-Assist-Extension 4 + u8 ppno[16]; # valid with Message-Security-Assist-Extension 5 + u8 reserved[1824]; # reserved for future instructions +}; + +Parameters: address of a buffer to load the subfunction blocks from. +Returns: -EFAULT if the given address is not accessible from kernel space. + 0 in case of success. + +2.6. ATTRIBUTE: KVM_S390_VM_CPU_PROCESSOR_SUBFUNC (r/w) + +Allows user space to retrieve or change cpu subfunctions to be indicated for +all VCPUs of a VM. This attribute will only be available if kernel and +hardware support are in place. + +The kernel uses the configured subfunction blocks for indication to +the guest. A subfunction block will only be used if the associated STFL(E) bit +has not been disabled by user space (so the instruction to be queried is +actually available for the guest). + +As long as no data has been written, a read will fail. The IBC will be used +to determine available subfunctions in this case, this will guarantee backward +compatibility. + +See 2.5. for a description of the parameter struct. + +Parameters: address of a buffer to store/load the subfunction blocks from. +Returns: -EFAULT if the given address is not accessible from kernel space. + -EINVAL when reading, if there was no write yet. + -EBUSY if at least one VCPU has already been defined. + 0 in case of success. + 3. GROUP: KVM_S390_VM_TOD Architectures: s390 diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 789c4e2..f0818d7 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -102,6 +102,26 @@ struct kvm_s390_vm_cpu_feat { __u64 feat[16]; }; +#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4 +#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5 +/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */ +struct kvm_s390_vm_cpu_subfunc { + __u8 plo[32]; /* always */ + __u8 ptff[16]; /* with TOD-clock steering */ + __u8 kmac[16]; /* with MSA */ + __u8 kmc[16]; /* with MSA */ + __u8 km[16]; /* with MSA */ + __u8 kimd[16]; /* with MSA */ + __u8 klmd[16]; /* with MSA */ + __u8 pckmo[16]; /* with MSA3 */ + __u8 kmctr[16]; /* with MSA4 */ + __u8 kmf[16]; /* with MSA4 */ + __u8 kmo[16]; /* with MSA4 */ + __u8 pcc[16]; /* with MSA4 */ + __u8 ppno[16]; /* with MSA5 */ + __u8 reserved[1824]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2b5c14d..f746a35 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -135,6 +137,8 @@ unsigned long kvm_s390_fac_list_mask_size(void) /* available cpu features supported by kvm */ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); +/* available subfunctions indicated via query / "test bit" */ +static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc; static struct gmap_notifier gmap_notifier; debug_info_t *kvm_s390_dbf; @@ -198,8 +202,52 @@ static void allow_cpu_feat(unsigned long nr) set_bit_inv(nr, kvm_s390_available_cpu_feat); } +static inline int plo_test_bit(unsigned char nr) +{ + register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; + int cc = 3; /* subfunction not available */ + + asm volatile( + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : "d" (r0) + : "cc"); + return cc == 0; +} + static void kvm_s390_cpu_feat_init(void) { + int i; + + for (i = 0; i < 256; ++i) { + if (plo_test_bit(i)) + kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7); + } + + if (test_facility(28)) /* TOD-clock steering */ + etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF); + + if (test_facility(17)) { /* MSA */ + __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac); + __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc); + __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km); + __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd); + __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd); + } + if (test_facility(76)) /* MSA3 */ + __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo); + if (test_facility(77)) { /* MSA4 */ + __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr); + __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf); + __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo); + __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc); + } + if (test_facility(57)) /* MSA5 */ + __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno); + if (MACHINE_HAS_ESOP) allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); } @@ -717,6 +765,16 @@ static int kvm_s390_set_processor_feat(struct kvm *kvm, return ret; } +static int kvm_s390_set_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once supported by kernel + hw, we have to store the subfunctions + * in kvm->arch and remember that user space configured them. + */ + return -ENXIO; +} + static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -728,6 +786,9 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_PROCESSOR_FEAT: ret = kvm_s390_set_processor_feat(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_set_processor_subfunc(kvm, attr); + break; } return ret; } @@ -801,6 +862,25 @@ static int kvm_s390_get_machine_feat(struct kvm *kvm, return 0; } +static int kvm_s390_get_processor_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + /* + * Once we can actually configure subfunctions (kernel + hw support), + * we have to check if they were already set by user space, if so copy + * them from kvm->arch. + */ + return -ENXIO; +} + +static int kvm_s390_get_machine_subfunc(struct kvm *kvm, + struct kvm_device_attr *attr) +{ + if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc, + sizeof(struct kvm_s390_vm_cpu_subfunc))) + return -EFAULT; + return 0; +} static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) { int ret = -ENXIO; @@ -818,6 +898,12 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE_FEAT: ret = kvm_s390_get_machine_feat(kvm, attr); break; + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: + ret = kvm_s390_get_processor_subfunc(kvm, attr); + break; + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: + ret = kvm_s390_get_machine_subfunc(kvm, attr); + break; } return ret; } @@ -903,8 +989,11 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) case KVM_S390_VM_CPU_MACHINE: case KVM_S390_VM_CPU_PROCESSOR_FEAT: case KVM_S390_VM_CPU_MACHINE_FEAT: + case KVM_S390_VM_CPU_MACHINE_SUBFUNC: ret = 0; break; + /* configuring subfunctions is not supported yet */ + case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: default: ret = -ENXIO; break; -- cgit v0.10.2 From 4013ade3fb2fefa021827d675d8bc1d51f4aef93 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:49:43 +0100 Subject: s390/sclp: detect 64-bit-SCAO facility Let's correctly detect that facility, so we can correctly handle its abscence later on. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 49736a0..5214000 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -59,6 +59,7 @@ struct sclp_info { unsigned char has_hvs : 1; unsigned char has_esca : 1; unsigned char has_sief2 : 1; + unsigned char has_64bscao : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 0ac520d..211eb86a 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -114,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) -- cgit v0.10.2 From 76a6dd7241ae03c47f44a9605dcd525f31b2124a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:33:49 +0100 Subject: KVM: s390: handle missing 64-bit-SCAO facility Without that facility, we may only use scaol. So fallback to DMA allocation in that case, so we won't overwrite random memory via the SIE. Also disallow ESCA, so we don't have to handle that allocation case. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f746a35..efb902c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -317,8 +317,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS - : KVM_S390_BSCA_CPU_SLOTS; + r = KVM_S390_BSCA_CPU_SLOTS; + if (sclp.has_esca && sclp.has_64bscao) + r = KVM_S390_ESCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; @@ -1295,6 +1296,7 @@ static void sca_dispose(struct kvm *kvm) int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { + gfp_t alloc_flags = GFP_KERNEL; int i, rc; char debug_name[16]; static unsigned long sca_offset; @@ -1319,8 +1321,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500); kvm->arch.use_esca = 0; /* start with basic SCA */ + if (!sclp.has_64bscao) + alloc_flags |= GFP_DMA; rwlock_init(&kvm->arch.sca_lock); - kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); @@ -1567,7 +1571,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) if (id < KVM_S390_BSCA_CPU_SLOTS) return true; - if (!sclp.has_esca) + if (!sclp.has_esca || !sclp.has_64bscao) return false; mutex_lock(&kvm->lock); -- cgit v0.10.2 From b9e28897e6e9f82585ecf6ea45942866ece7d167 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:51:52 +0100 Subject: s390/sclp: detect guest-PER enhancement Let's detect that facility, so we can correctly handle its abscence. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 5214000..076f631 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -33,7 +33,8 @@ struct sclp_core_entry { u8 : 4; u8 sief2 : 1; u8 : 3; - u8 : 3; + u8 : 2; + u8 gpere : 1; u8 siif : 1; u8 sigpif : 1; u8 : 3; @@ -60,6 +61,7 @@ struct sclp_info { unsigned char has_esca : 1; unsigned char has_sief2 : 1; unsigned char has_64bscao : 1; + unsigned char has_gpere : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 211eb86a..a05f2d0 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -146,6 +146,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_siif = cpue->siif; sclp.has_sigpif = cpue->sigpif; sclp.has_sief2 = cpue->sief2; + sclp.has_gpere = cpue->gpere; break; } -- cgit v0.10.2 From 89b5b4de33902a57cb9c8f2d06de4ffbc921de15 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:47:13 +0100 Subject: KVM: s390: guestdbg: signal missing hardware support Without guest-PER enhancement, we can't provide any debugging support. Therefore act like kernel support is missing. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index efb902c..e477c8e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2179,6 +2179,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, if (dbg->control & ~VALID_GUESTDBG_FLAGS) return -EINVAL; + if (!sclp.has_gpere) + return -EINVAL; if (dbg->control & KVM_GUESTDBG_ENABLE) { vcpu->guest_debug = dbg->control; -- cgit v0.10.2 From 09be9cb92bb9e799bdbfd3834595bd6b4703b40b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:55:35 +0100 Subject: s390/sclp: detect cmma Let's detect the Collaborative-memory-management-interpretation facility, aka CMM assist, so we can correctly enable cmma later. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 076f631..fa40ac8 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -62,6 +62,7 @@ struct sclp_info { unsigned char has_sief2 : 1; unsigned char has_64bscao : 1; unsigned char has_gpere : 1; + unsigned char has_cmma : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index a05f2d0..366e1a4 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -115,6 +115,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); sclp.has_64bscao = !!(sccb->fac116 & 0x80); + sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) -- cgit v0.10.2 From c24cc9c8a6ca798427d3ff46b55df8403361df3e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:53:04 +0100 Subject: KVM: s390: enable CMMA if the interpration is available Now that we can detect if collaborative-memory-management interpretation is available, replace the heuristic by a real hardware detection. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e477c8e..005e664 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -485,9 +485,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: - /* enable CMMA only for z10 and later (EDAT_1) */ ret = -EINVAL; - if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1) + if (!sclp.has_cmma) break; ret = -EBUSY; -- cgit v0.10.2 From f9cbd9b02539330ddd349df583fcfc2db8a23b90 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 3 Mar 2016 09:48:47 +0100 Subject: KVM: s390: provide CMMA attributes only if available Let's not provide the device attribute for cmma enabling and clearing if the hardware doesn't support it. This also helps getting rid of the undocumented return value "-EINVAL" in case CMMA is not available when trying to enable it. Also properly document the meaning of -EINVAL for CMMA clearing. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a4482cc..4aac3e5 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2520,6 +2520,7 @@ Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. EPERM: The attribute cannot (currently) be accessed this way (e.g. read-only attribute, or attribute that only makes sense when the device is in a different state) @@ -2547,6 +2548,7 @@ Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. Tests whether a device supports a particular attribute. A successful return indicates the attribute is implemented. It does not necessarily diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 8a458f4..b6cda49 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -20,7 +20,8 @@ Enables Collaborative Memory Management Assist (CMMA) for the virtual machine. 1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA Parameters: none -Returns: 0 +Returns: -EINVAL if CMMA was not enabled + 0 otherwise Clear the CMMA status for all guest pages, so any pages the guest marked as unused are again used any may not be reclaimed by the host. diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 005e664..f695c6e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -485,7 +485,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att unsigned int idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: - ret = -EINVAL; + ret = -ENXIO; if (!sclp.has_cmma) break; @@ -499,6 +499,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA: + ret = -ENXIO; + if (!sclp.has_cmma) + break; ret = -EINVAL; if (!kvm->arch.use_cmma) break; @@ -964,6 +967,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: case KVM_S390_VM_MEM_CLR_CMMA: + ret = sclp.has_cmma ? 0 : -ENXIO; + break; case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; break; -- cgit v0.10.2 From 5236c751da5e6ccfda4e5d53690a37dfb456997b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:53:46 +0100 Subject: s390/sclp: detect guest-storage-limit-suppression Let's detect that facility. Reviewed-by: Christian Borntraeger Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index fa40ac8..e1450dd 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -63,6 +63,7 @@ struct sclp_info { unsigned char has_64bscao : 1; unsigned char has_gpere : 1; unsigned char has_cmma : 1; + unsigned char has_gsls : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 366e1a4..99fce6b 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -114,6 +114,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_gsls = !!(sccb->fac85 & 0x80); sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); -- cgit v0.10.2 From efed110446226c725268a1f980806d799990a979 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 16 Apr 2015 12:32:41 +0200 Subject: KVM: s390: handle missing guest-storage-limit-suppression If guest-storage-limit-suppression is not available, we would for now have a valid guest address space with size 0. So let's simply set the origin to 0 and the limit to hamax. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b2a83a0..9eed5c1 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -186,7 +186,9 @@ struct kvm_s390_sie_block { __u32 scaol; /* 0x0064 */ __u8 reserved68[4]; /* 0x0068 */ __u32 todpr; /* 0x006c */ - __u8 reserved70[32]; /* 0x0070 */ + __u8 reserved70[16]; /* 0x0070 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ psw_t gpsw; /* 0x0090 */ __u64 gg14; /* 0x00a0 */ __u64 gg15; /* 0x00a8 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f695c6e..2a23955 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1897,6 +1897,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + /* the real guest size will always be smaller than msl */ + vcpu->arch.sie_block->mso = 0; + vcpu->arch.sie_block->msl = sclp.hamax; + vcpu->arch.sie_block->icpua = id; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; -- cgit v0.10.2 From 72cd82b9e9d075713367ad840c2a9b52b4cd447d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:59:03 +0100 Subject: s390/sclp: detect intervention bypass facility Let's detect if we have the intervention bypass facility installed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index e1450dd..ef1f427 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -38,7 +38,11 @@ struct sclp_core_entry { u8 siif : 1; u8 sigpif : 1; u8 : 3; - u8 reserved2[10]; + u8 reserved2[3]; + u8 : 2; + u8 ib : 1; + u8 : 5; + u8 reserved3[6]; u8 type; u8 reserved1; } __attribute__((packed)); @@ -64,6 +68,7 @@ struct sclp_info { unsigned char has_gpere : 1; unsigned char has_cmma : 1; unsigned char has_gsls : 1; + unsigned char has_ib : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 99fce6b..2240b61 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -149,6 +149,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sigpif = cpue->sigpif; sclp.has_sief2 = cpue->sief2; sclp.has_gpere = cpue->gpere; + sclp.has_ib = cpue->ib; break; } -- cgit v0.10.2 From 11ad65b79e8c27cdafe404e33938da270a55858a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:46:26 +0200 Subject: KVM: s390: enable ib only if available Let's enable intervention bypass only if the facility is acutally available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2a23955..340fb40 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1845,7 +1845,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 8)) vcpu->arch.sie_block->ecb2 |= 0x08; - vcpu->arch.sie_block->eca = 0xC1002000U; + vcpu->arch.sie_block->eca = 0x81002000U; + if (sclp.has_ib) + vcpu->arch.sie_block->eca |= 0x40000000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) -- cgit v0.10.2 From 4a5c3e08271216891ce1b5315cec3dcadbd01cd4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:00:23 +0100 Subject: s390/sclp: detect conditional-external-interception facility Let's detect if we have that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index ef1f427..c91ad19 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -41,7 +41,8 @@ struct sclp_core_entry { u8 reserved2[3]; u8 : 2; u8 ib : 1; - u8 : 5; + u8 cei : 1; + u8 : 4; u8 reserved3[6]; u8 type; u8 reserved1; @@ -69,6 +70,7 @@ struct sclp_info { unsigned char has_cmma : 1; unsigned char has_gsls : 1; unsigned char has_ib : 1; + unsigned char has_cei : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 2240b61..4b330fb 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -150,6 +150,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_sief2 = cpue->sief2; sclp.has_gpere = cpue->gpere; sclp.has_ib = cpue->ib; + sclp.has_cei = cpue->cei; break; } -- cgit v0.10.2 From 48ee7d3a7f8f3ca90dfc5e1103e68c0044051acc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:49:34 +0200 Subject: KVM: s390: enable cei only if available Let's only enable conditional-external-interruption if the facility is actually available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 340fb40..1a239a6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1845,7 +1845,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 8)) vcpu->arch.sie_block->ecb2 |= 0x08; - vcpu->arch.sie_block->eca = 0x81002000U; + vcpu->arch.sie_block->eca = 0x1002000U; + if (sclp.has_cei) + vcpu->arch.sie_block->eca |= 0x80000000U; if (sclp.has_ib) vcpu->arch.sie_block->eca |= 0x40000000U; if (sclp.has_siif) -- cgit v0.10.2 From a0eb55e6318f1bcfe93b01f0944622f14a6b2977 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:02:25 +0100 Subject: s390/sclp: detect PFMF interpretation facility Let's detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index c91ad19..bdb7f22 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -71,6 +71,7 @@ struct sclp_info { unsigned char has_gsls : 1; unsigned char has_ib : 1; unsigned char has_cei : 1; + unsigned char has_pfmfi : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 4b330fb..500cbfd 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -46,7 +46,8 @@ struct read_info_sccb { u64 rnmax2; /* 104-111 */ u8 _pad_112[116 - 112]; /* 112-115 */ u8 fac116; /* 116 */ - u8 _pad_117[119 - 117]; /* 117-118 */ + u8 fac117; /* 117 */ + u8 _pad_118; /* 118 */ u8 fac119; /* 119 */ u16 hcpua; /* 120-121 */ u8 _pad_122[124 - 122]; /* 122-123 */ @@ -118,6 +119,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_64bscao = !!(sccb->fac116 & 0x80); sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); + sclp.has_pfmfi = !!(sccb->fac117 & 0x40); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; -- cgit v0.10.2 From 873b425e4c2fd0ba6617d67a45fbf119b65575b4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:53:47 +0200 Subject: KVM: s390: enable PFMFI only if available Let's enable interpretation of PFMFI only if the facility is actually available. Emulation code still works in case the guest is offered EDAT-1. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1a239a6..d987eb8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1843,7 +1843,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; - if (test_kvm_facility(vcpu->kvm, 8)) + if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) vcpu->arch.sie_block->ecb2 |= 0x08; vcpu->arch.sie_block->eca = 0x1002000U; if (sclp.has_cei) -- cgit v0.10.2 From 9c375490fc812ebdf3259ea2566c271d00544fc2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 13:02:52 +0100 Subject: s390/sclp: detect interlock-and-broadcast-suppression facility Let's detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index bdb7f22..99a0150 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -72,6 +72,7 @@ struct sclp_info { unsigned char has_ib : 1; unsigned char has_cei : 1; unsigned char has_pfmfi : 1; + unsigned char has_ibs : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 500cbfd..d5b873c 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -120,6 +120,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_cmma = !!(sccb->fac116 & 0x40); sclp.has_esca = !!(sccb->fac116 & 0x08); sclp.has_pfmfi = !!(sccb->fac117 & 0x40); + sclp.has_ibs = !!(sccb->fac117 & 0x20); sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; -- cgit v0.10.2 From 09a400e78eaf02d8ab8e836edf864e1025c8e2d7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 4 Apr 2016 15:57:08 +0200 Subject: KVM: s390: enable ibs only if available Let's enable interlock-and-broadcast suppression only if the facility is actually available. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d987eb8..ad93b40 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2789,6 +2789,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm) static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { + if (!sclp.has_ibs) + return; kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu); kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu); } -- cgit v0.10.2 From bdab09f3d81c3fac6314012ca0eff1206ea067ab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 12 Apr 2016 11:07:49 +0200 Subject: KVM: s390: enable host-protection-interruption only with ESOP host-protection-interruption control was introduced with ESOP. So let's enable it only if we have ESOP and add an explanatory comment why we can live without it. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ad93b40..4e764fa 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1837,7 +1837,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) kvm_s390_vcpu_setup_model(vcpu); - vcpu->arch.sie_block->ecb = 0x02; + /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */ + if (MACHINE_HAS_ESOP) + vcpu->arch.sie_block->ecb |= 0x02; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= 0x04; if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) -- cgit v0.10.2 From f597d24eee2dd9486edaac7a1821f35bc4d349c2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 22 Apr 2016 16:26:49 +0200 Subject: KVM: s390: turn on tx even without ctx Constrained transactional execution is an addon of transactional execution. Let's enable the assist also if only TX is enabled for the guest. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4e764fa..9d0e4d0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1842,7 +1842,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb |= 0x02; if (test_kvm_facility(vcpu->kvm, 9)) vcpu->arch.sie_block->ecb |= 0x04; - if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) + if (test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) -- cgit v0.10.2 From 1bb78d161feae5b613c80eb822059eec60d2a538 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jun 2016 09:57:08 +0200 Subject: KVM: s390: provide logging for diagnose 0x500 We might need to debug some virtio things, so better have diagnose 500 logged. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 1ea4095..ce865bd 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) (vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY)) return -EOPNOTSUPP; + VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx", + (u32) vcpu->run->s.regs.gprs[2], + (u32) vcpu->run->s.regs.gprs[3], + vcpu->run->s.regs.gprs[4]); + /* * The layout is as follows: * - gpr 2 contains the subchannel id (passed as addr) -- cgit v0.10.2 From dcc98ea6146e4da27eee2f3e9983500e9618cc23 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 7 Jun 2016 09:37:17 +0200 Subject: KVM: s390: fixup I/O interrupt traces We currently have two issues with the I/O interrupt injection logging: 1. All QEMU versions up to 2.6 have a wrong encoding of device numbers etc for the I/O interrupt type, so the inject VM_EVENT will have wrong data. Let's fix this by using the interrupt parameters and not the interrupt type number. 2. We only log in kvm_s390_inject_vm, but not when coming from kvm_s390_reinject_io_int or from flic. Let's move the logging to the common __inject_io function. We also enhance the logging for delivery to match the data. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5a80af7..d72c4a8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -28,9 +28,6 @@ #include "gaccess.h" #include "trace-s390.h" -#define IOINT_SCHID_MASK 0x0000ffff -#define IOINT_SSID_MASK 0x00030000 -#define IOINT_CSSID_MASK 0x03fc0000 #define PFAULT_INIT 0x0600 #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 @@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt_info, list); if (inti) { - VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type); + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)"); + else + VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); + vcpu->stat.deliver_io_int++; trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, @@ -1415,6 +1419,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } fi->counters[FIRQ_CNTR_IO] += 1; + if (inti->type & KVM_S390_INT_IO_AI_MASK) + VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)"); + else + VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x", + inti->io.subchannel_id >> 8, + inti->io.subchannel_id >> 1 & 0x3, + inti->io.subchannel_nr); isc = int_word_to_isc(inti->io.io_int_word); list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; list_add_tail(&inti->list, list); @@ -1531,13 +1542,6 @@ int kvm_s390_inject_vm(struct kvm *kvm, inti->mchk.mcic = s390int->parm64; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (inti->type & KVM_S390_INT_IO_AI_MASK) - VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)"); - else - VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x", - s390int->type & IOINT_CSSID_MASK, - s390int->type & IOINT_SSID_MASK, - s390int->type & IOINT_SCHID_MASK); inti->io.subchannel_id = s390int->parm >> 16; inti->io.subchannel_nr = s390int->parm & 0x0000ffffu; inti->io.io_int_parm = s390int->parm64 >> 32; -- cgit v0.10.2 From c427c42cd612719e8fb8b5891cc9761e7770024e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 13:51:54 +0200 Subject: s390/mm: don't drop errors in get_guest_storage_key Commit 1e133ab296f3 ("s390/mm: split arch/s390/mm/pgtable.c") changed the return value of get_guest_storage_key to an unsigned char, resulting in -EFAULT getting interpreted as a valid storage key. Cc: stable@vger.kernel.org # 4.6+ Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 18d2beb..42b968a8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,7 +893,7 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4324b87..2a23ca9 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -543,7 +543,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); -unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) { unsigned char key; spinlock_t *ptl; -- cgit v0.10.2 From d3ed1ceeace311af9973d17a07a114bfaf0ca1b1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Mar 2016 11:53:35 +0100 Subject: s390/mm: set and get guest storage key mmap locking Move the mmap semaphore locking out of set_guest_storage_key and get_guest_storage_key. This makes the two functions more like the other ptep_xxx operations and allows to avoid repeated semaphore operations if multiple keys are read or written. Reviewed-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9d0e4d0..d0156d7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1050,26 +1050,30 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (!keys) return -ENOMEM; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } curkey = get_guest_storage_key(current->mm, hva); if (IS_ERR_VALUE(curkey)) { r = curkey; - goto out; + break; } keys[i] = curkey; } + up_read(¤t->mm->mmap_sem); + + if (!r) { + r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, + sizeof(uint8_t) * args->count); + if (r) + r = -EFAULT; + } - r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, - sizeof(uint8_t) * args->count); - if (r) - r = -EFAULT; -out: kvfree(keys); return r; } @@ -1106,24 +1110,26 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) if (r) goto out; + down_read(¤t->mm->mmap_sem); for (i = 0; i < args->count; i++) { hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) { r = -EFAULT; - goto out; + break; } /* Lowest order bit is reserved */ if (keys[i] & 0x01) { r = -EINVAL; - goto out; + break; } r = set_guest_storage_key(current->mm, hva, (unsigned long)keys[i], 0); if (r) - goto out; + break; } + up_read(¤t->mm->mmap_sem); out: kvfree(keys); return r; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 95916fa..c6deed7 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -728,9 +728,12 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; - if (set_guest_storage_key(current->mm, useraddr, + down_read(¤t->mm->mmap_sem); + rc = set_guest_storage_key(current->mm, useraddr, vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, - vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) + vcpu->run->s.regs.gprs[reg1] & PFMF_NQ); + up_read(¤t->mm->mmap_sem); + if (rc) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2a23ca9..7612a7c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -506,12 +506,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_t old, new; pte_t *ptep; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | @@ -538,7 +535,6 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); return 0; } EXPORT_SYMBOL(set_guest_storage_key); @@ -550,14 +546,11 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) pgste_t pgste; pte_t *ptep; - down_read(&mm->mmap_sem); ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); + if (unlikely(!ptep)) return -EFAULT; - } - pgste = pgste_get_lock(ptep); + pgste = pgste_get_lock(ptep); if (pte_val(*ptep) & _PAGE_INVALID) { key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; @@ -572,10 +565,8 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) if (pgste_val(pgste) & PGSTE_GC_BIT) key |= _PAGE_CHANGED; } - pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); return key; } EXPORT_SYMBOL(get_guest_storage_key); -- cgit v0.10.2 From 8d6037a7b4f21708451d4aec14828f9ebe77b37a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 11:15:32 +0200 Subject: s390/mm: simplify get_guest_storage_key We can safe a few LOC and make that function easier to understand by rewriting existing code. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7612a7c..4c8d572 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -551,20 +551,11 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return -EFAULT; pgste = pgste_get_lock(ptep); - if (pte_val(*ptep) & _PAGE_INVALID) { - key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; - key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; - key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; - key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; - } else { + key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); - - /* Reflect guest's logical view, not physical */ - if (pgste_val(pgste) & PGSTE_GR_BIT) - key |= _PAGE_REFERENCED; - if (pgste_val(pgste) & PGSTE_GC_BIT) - key |= _PAGE_CHANGED; - } + /* Reflect guest's logical view, not physical */ + key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); return key; -- cgit v0.10.2 From 154c8c19c35b6da94a623cb793458e203572083d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 11:22:34 +0200 Subject: s390/mm: return key via pointer in get_guest_storage_key Let's just split returning the key and reporting errors. This makes calling code easier and avoids bugs as happened already. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 42b968a8..91f0e7b 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,7 +893,8 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key); /* * Certain architectures need to do special things when PTEs diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d0156d7..ad166c6 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1029,7 +1029,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) { uint8_t *keys; uint64_t hva; - unsigned long curkey; int i, r = 0; if (args->flags != 0) @@ -1058,12 +1057,9 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } - curkey = get_guest_storage_key(current->mm, hva); - if (IS_ERR_VALUE(curkey)) { - r = curkey; + r = get_guest_storage_key(current->mm, hva, &keys[i]); + if (r) break; - } - keys[i] = curkey; } up_read(¤t->mm->mmap_sem); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 4c8d572..3e35298 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -539,9 +539,9 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char *key) { - unsigned char key; spinlock_t *ptl; pgste_t pgste; pte_t *ptep; @@ -551,14 +551,14 @@ unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) return -EFAULT; pgste = pgste_get_lock(ptep); - key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + *key = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; if (!(pte_val(*ptep) & _PAGE_INVALID)) - key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + *key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); /* Reflect guest's logical view, not physical */ - key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + *key |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; pgste_set_unlock(ptep, pgste); pte_unmap_unlock(ptep, ptl); - return key; + return 0; } EXPORT_SYMBOL(get_guest_storage_key); #endif -- cgit v0.10.2 From fe69eabf8deb85ae8b2958830ea3b2911e332820 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 9 May 2016 13:08:07 +0200 Subject: KVM: s390: storage keys fit into a char No need to convert the storage key into an unsigned long, the target function expects a char as argument. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index ad166c6..49c6039 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1120,8 +1120,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) break; } - r = set_guest_storage_key(current->mm, hva, - (unsigned long)keys[i], 0); + r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) break; } -- cgit v0.10.2 From 6164a2e90a5b6c5c32ccfe7a1baff80d603d702d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 10:09:47 +0200 Subject: KVM: s390: pfmf: fix end address calculation The current calculation is wrong if absolute != real address. Let's just calculate the start address for 4k frames upfront. Otherwise, the calculated end address will be wrong, resulting in wrong memory location/storage keys getting touched. To keep low-address protection working (using the effective address), we have to move the check. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c6deed7..bfba983 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -682,8 +682,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); + if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { + if (kvm_s390_check_low_addr_prot_real(vcpu, start)) + return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + } + switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { case 0x00000000: + /* only 4k frames specify a real address */ + start = kvm_s390_real_to_abs(vcpu, start); end = (start + (1UL << 12)) & ~((1UL << 12) - 1); break; case 0x00001000: @@ -701,20 +708,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_prot_real(vcpu, start)) - return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); - } - while (start < end) { - unsigned long useraddr, abs_addr; + unsigned long useraddr; /* Translate guest address to host address */ - if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0) - abs_addr = kvm_s390_real_to_abs(vcpu, start); - else - abs_addr = start; - useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr)); + useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); if (kvm_is_error_hva(useraddr)) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); -- cgit v0.10.2 From 9a68f0af8cd907452fa6c33343d38cdacff96294 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 12:09:58 +0200 Subject: KVM: s390: pfmf: MR and MC are ignored without CSSKE These two bits are simply ignored when the conditional-SSKE facility is not installed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index bfba983..5c926b7 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -675,10 +675,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - /* No support for conditional-SSKE */ - if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC)) - return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); -- cgit v0.10.2 From 2c26d1d23abd9a67d056c95a0823132a71edc477 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 13 Apr 2016 15:47:21 +0200 Subject: KVM: s390: pfmf: take care of amode when setting reg2 Depending on the addressing mode, we must not overwrite bit 0-31 of the register. In addition, 24 bit and 31 bit have to set certain bits to 0, which is guaranteed by converting the end address to an effective address. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 5c926b7..71fa603 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -733,8 +733,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) start += PAGE_SIZE; } - if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) - vcpu->run->s.regs.gprs[reg2] = end; + if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) { + vcpu->run->s.regs.gprs[reg2] = end; + } else { + vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } + } return 0; } -- cgit v0.10.2 From 1824c723ac90f9870ebafae4b3b3e5f4b82ffeef Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:43:11 +0200 Subject: KVM: s390: pfmf: support conditional-sske facility We already indicate that facility but don't implement it in our pfmf interception handler. Let's add a new storage key handling function for conditionally setting the guest storage key. As we will reuse this function later on, let's directly implement returning the old key via parameter and indicating if any change happened via rc. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 91f0e7b..2f6702e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -893,6 +893,9 @@ void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, bool nq); +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 71fa603..752a1ac 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -654,8 +654,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu) static int handle_pfmf(struct kvm_vcpu *vcpu) { + bool mr = false, mc = false, nq; int reg1, reg2; unsigned long start, end; + unsigned char key; vcpu->stat.instruction_pfmf++; @@ -675,6 +677,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) !test_kvm_facility(vcpu->kvm, 14)) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + /* Only provide conditional-SSKE support if enabled for the guest */ + if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK && + test_kvm_facility(vcpu->kvm, 10)) { + mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR; + mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC; + } + + nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ; + key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY; start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; start = kvm_s390_logical_to_effective(vcpu, start); @@ -723,11 +734,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) if (rc) return rc; down_read(¤t->mm->mmap_sem); - rc = set_guest_storage_key(current->mm, useraddr, - vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, - vcpu->run->s.regs.gprs[reg1] & PFMF_NQ); + rc = cond_set_guest_storage_key(current->mm, useraddr, + key, NULL, nq, mr, mc); up_read(¤t->mm->mmap_sem); - if (rc) + if (rc < 0) return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3e35298..e791e8b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -539,6 +539,39 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(set_guest_storage_key); +/** + * Conditionally set a guest storage key (handling csske). + * oldkey will be updated when either mr or mc is set and a pointer is given. + * + * Returns 0 if a guests storage key update wasn't necessary, 1 if the guest + * storage key was updated and -EFAULT on access errors. + */ +int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, unsigned char *oldkey, + bool nq, bool mr, bool mc) +{ + unsigned char tmp, mask = _PAGE_ACC_BITS | _PAGE_FP_BIT; + int rc; + + /* we can drop the pgste lock between getting and setting the key */ + if (mr | mc) { + rc = get_guest_storage_key(current->mm, addr, &tmp); + if (rc) + return rc; + if (oldkey) + *oldkey = tmp; + if (!mr) + mask |= _PAGE_REFERENCED; + if (!mc) + mask |= _PAGE_CHANGED; + if (!((tmp ^ key) & mask)) + return 0; + } + rc = set_guest_storage_key(current->mm, addr, key, nq); + return rc < 0 ? rc : 1; +} +EXPORT_SYMBOL(cond_set_guest_storage_key); + int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { -- cgit v0.10.2 From 695be0e7a24a8875c347437566f2c44ba673580b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 May 2016 14:07:05 +0200 Subject: KVM: s390: pfmf: handle address overflows In theory, end could always end up being < start, if overflowing to 0. Although very unlikely for now, let's just fix it. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 752a1ac..b8327b8 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -715,7 +715,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); } - while (start < end) { + while (start != end) { unsigned long useraddr; /* Translate guest address to host address */ -- cgit v0.10.2 From 238614515287c9400727e4cd7aa958649dcbf05f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Nov 2015 12:56:43 +0100 Subject: s390/sclp: detect storage-key facility Let's correctly detect that facility. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 99a0150..2ad9c20 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -32,7 +32,8 @@ struct sclp_core_entry { u8 reserved0; u8 : 4; u8 sief2 : 1; - u8 : 3; + u8 skey : 1; + u8 : 2; u8 : 2; u8 gpere : 1; u8 siif : 1; @@ -73,6 +74,7 @@ struct sclp_info { unsigned char has_cei : 1; unsigned char has_pfmfi : 1; unsigned char has_ibs : 1; + unsigned char has_skey : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index d5b873c..c71df0c 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -154,6 +154,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.has_gpere = cpue->gpere; sclp.has_ib = cpue->ib; sclp.has_cei = cpue->cei; + sclp.has_skey = cpue->skey; break; } -- cgit v0.10.2 From 11ddcd41bce5c2394b0390584236afdd13656998 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:40:09 +0200 Subject: KVM: s390: trace and count all skey intercepts Let's trace and count all skey handling operations, even if lazy skey handling was already activated. Also, don't enable lazy skey handling if anything went wrong while enabling skey handling for the SIE. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b8327b8..6745c2a 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -152,24 +152,27 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) static int __skey_check_enable(struct kvm_vcpu *vcpu) { int rc = 0; + + trace_kvm_s390_skey_related_inst(vcpu); if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) return rc; rc = s390_enable_skey(); - VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest"); - trace_kvm_s390_skey_related_inst(vcpu); - vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc); + if (!rc) + vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); return rc; } static int handle_skey(struct kvm_vcpu *vcpu) { - int rc = __skey_check_enable(vcpu); + int rc; + vcpu->stat.instruction_storage_key++; + rc = __skey_check_enable(vcpu); if (rc) return rc; - vcpu->stat.instruction_storage_key++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 1c4586b..4fc9d4e 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst, TP_fast_assign( VCPU_ASSIGN_COMMON ), - VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu") + VCPU_TP_PRINTK("%s", "storage key related instruction") ); TRACE_EVENT(kvm_s390_major_guest_pfault, -- cgit v0.10.2 From a7e19ab55ffdd82f1a8d12694b9a0c0beeef534c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 10 May 2016 09:50:21 +0200 Subject: KVM: s390: handle missing storage-key facility Without the storage-key facility, SIE won't interpret SSKE, ISKE and RRBE for us. So let's add proper interception handlers that will be called if lazy sske cannot be enabled. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 53eacbd..f874e7d5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -109,13 +109,14 @@ static inline unsigned char page_get_storage_key(unsigned long addr) static inline int page_reset_referenced(unsigned long addr) { - unsigned int ipm; + int cc; asm volatile( " rrbe 0,%1\n" " ipm %0\n" - : "=d" (ipm) : "a" (addr) : "cc"); - return !!(ipm & 0x20000000); + " srl %0,28\n" + : "=d" (cc) : "a" (addr) : "cc"); + return cc; } /* Bits int the storage key */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 2f6702e..9951e7e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -896,6 +896,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char key, unsigned char *oldkey, bool nq, bool mr, bool mc); +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr); int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 6745c2a..3db3be1 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -27,6 +27,7 @@ #include #include #include +#include #include "gaccess.h" #include "kvm-s390.h" #include "trace.h" @@ -164,8 +165,7 @@ static int __skey_check_enable(struct kvm_vcpu *vcpu) return rc; } - -static int handle_skey(struct kvm_vcpu *vcpu) +static int try_handle_skey(struct kvm_vcpu *vcpu) { int rc; @@ -173,12 +173,146 @@ static int handle_skey(struct kvm_vcpu *vcpu) rc = __skey_check_enable(vcpu); if (rc) return rc; - + if (sclp.has_skey) { + /* with storage-key facility, SIE interprets it for us */ + kvm_s390_retry_instr(vcpu); + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return -EAGAIN; + } if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); + return 0; +} - kvm_s390_retry_instr(vcpu); - VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); +static int handle_iske(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + unsigned char key; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = get_guest_storage_key(current->mm, addr, &key); + up_read(¤t->mm->mmap_sem); + if (rc) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + vcpu->run->s.regs.gprs[reg1] &= ~0xff; + vcpu->run->s.regs.gprs[reg1] |= key; + return 0; +} + +static int handle_rrbe(struct kvm_vcpu *vcpu) +{ + unsigned long addr; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + addr = kvm_s390_logical_to_effective(vcpu, addr); + addr = kvm_s390_real_to_abs(vcpu, addr); + addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr)); + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = reset_guest_reference_bit(current->mm, addr); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + kvm_s390_set_psw_cc(vcpu, rc); + return 0; +} + +#define SSKE_NQ 0x8 +#define SSKE_MR 0x4 +#define SSKE_MC 0x2 +#define SSKE_MB 0x1 +static int handle_sske(struct kvm_vcpu *vcpu) +{ + unsigned char m3 = vcpu->arch.sie_block->ipb >> 28; + unsigned long start, end; + unsigned char key, oldkey; + int reg1, reg2; + int rc; + + rc = try_handle_skey(vcpu); + if (rc) + return rc != -EAGAIN ? rc : 0; + + if (!test_kvm_facility(vcpu->kvm, 8)) + m3 &= ~SSKE_MB; + if (!test_kvm_facility(vcpu->kvm, 10)) + m3 &= ~(SSKE_MC | SSKE_MR); + if (!test_kvm_facility(vcpu->kvm, 14)) + m3 &= ~SSKE_NQ; + + kvm_s390_get_regs_rre(vcpu, ®1, ®2); + + key = vcpu->run->s.regs.gprs[reg1] & 0xfe; + start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; + start = kvm_s390_logical_to_effective(vcpu, start); + if (m3 & SSKE_MB) { + /* start already designates an absolute address */ + end = (start + (1UL << 20)) & ~((1UL << 20) - 1); + } else { + start = kvm_s390_real_to_abs(vcpu, start); + end = start + PAGE_SIZE; + } + + while (start != end) { + unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start)); + + if (kvm_is_error_hva(addr)) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + down_read(¤t->mm->mmap_sem); + rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey, + m3 & SSKE_NQ, m3 & SSKE_MR, + m3 & SSKE_MC); + up_read(¤t->mm->mmap_sem); + if (rc < 0) + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + start += PAGE_SIZE; + }; + + if (m3 & (SSKE_MC | SSKE_MR)) { + if (m3 & SSKE_MB) { + /* skey in reg1 is unpredictable */ + kvm_s390_set_psw_cc(vcpu, 3); + } else { + kvm_s390_set_psw_cc(vcpu, rc); + vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL; + vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8; + } + } + if (m3 & SSKE_MB) { + if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) + vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK; + else + vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL; + end = kvm_s390_logical_to_effective(vcpu, end); + vcpu->run->s.regs.gprs[reg2] |= end; + } return 0; } @@ -586,9 +720,9 @@ static const intercept_handler_t b2_handlers[256] = { [0x11] = handle_store_prefix, [0x12] = handle_store_cpu_address, [0x21] = handle_ipte_interlock, - [0x29] = handle_skey, - [0x2a] = handle_skey, - [0x2b] = handle_skey, + [0x29] = handle_iske, + [0x2a] = handle_rrbe, + [0x2b] = handle_sske, [0x2c] = handle_test_block, [0x30] = handle_io_inst, [0x31] = handle_io_inst, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e791e8b..fa286d0 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -572,6 +572,43 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(cond_set_guest_storage_key); +/** + * Reset a guest reference bit (rrbe), returning the reference and changed bit. + * + * Returns < 0 in case of error, otherwise the cc to be reported to the guest. + */ +int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + int cc = 0; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return -EFAULT; + + new = old = pgste_get_lock(ptep); + /* Reset guest reference bit only */ + pgste_val(new) &= ~PGSTE_GR_BIT; + + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + cc = page_reset_referenced(pte_val(*ptep) & PAGE_MASK); + /* Merge real referenced bit into host-set */ + pgste_val(new) |= ((unsigned long) cc << 53) & PGSTE_HR_BIT; + } + /* Reflect guest's logical view, not physical */ + cc |= (pgste_val(old) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 49; + /* Changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & PGSTE_GR_BIT) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + return 0; +} +EXPORT_SYMBOL(reset_guest_reference_bit); + int get_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned char *key) { -- cgit v0.10.2