summaryrefslogtreecommitdiff
path: root/arch/s390/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/Makefile2
-rw-r--r--arch/s390/kvm/diag.c5
-rw-r--r--arch/s390/kvm/gaccess.c152
-rw-r--r--arch/s390/kvm/intercept.c15
-rw-r--r--arch/s390/kvm/interrupt.c26
-rw-r--r--arch/s390/kvm/kvm-s390.c255
-rw-r--r--arch/s390/kvm/kvm-s390.h9
-rw-r--r--arch/s390/kvm/priv.c217
-rw-r--r--arch/s390/kvm/sthyi.c471
-rw-r--r--arch/s390/kvm/trace.h49
10 files changed, 1055 insertions, 146 deletions
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index d42fa38..82e73e2 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -12,6 +12,6 @@ common-objs = $(KVM)/kvm_main.o $(KVM)/eventfd.o $(KVM)/async_pf.o $(KVM)/irqch
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-objs := $(common-objs) kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-objs += diag.o gaccess.o guestdbg.o
+kvm-objs += diag.o gaccess.o guestdbg.o sthyi.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 1ea4095..ce865bd 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -212,6 +212,11 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
(vcpu->run->s.regs.gprs[1] != KVM_S390_VIRTIO_CCW_NOTIFY))
return -EOPNOTSUPP;
+ VCPU_EVENT(vcpu, 4, "diag 0x500 schid 0x%8.8x queue 0x%x cookie 0x%llx",
+ (u32) vcpu->run->s.regs.gprs[2],
+ (u32) vcpu->run->s.regs.gprs[3],
+ vcpu->run->s.regs.gprs[4]);
+
/*
* The layout is as follows:
* - gpr 2 contains the subchannel id (passed as addr)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 66938d2..8e245e7 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -476,18 +476,73 @@ enum {
FSI_FETCH = 2 /* Exception was due to fetch operation */
};
-static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
- ar_t ar, enum gacc_mode mode)
+enum prot_type {
+ PROT_TYPE_LA = 0,
+ PROT_TYPE_KEYC = 1,
+ PROT_TYPE_ALC = 2,
+ PROT_TYPE_DAT = 3,
+};
+
+static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva,
+ ar_t ar, enum gacc_mode mode, enum prot_type prot)
{
- int rc;
- struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- struct trans_exc_code_bits *tec_bits;
+ struct trans_exc_code_bits *tec;
memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw.as;
+ pgm->code = code;
+ tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+
+ switch (code) {
+ case PGM_ASCE_TYPE:
+ case PGM_PAGE_TRANSLATION:
+ case PGM_REGION_FIRST_TRANS:
+ case PGM_REGION_SECOND_TRANS:
+ case PGM_REGION_THIRD_TRANS:
+ case PGM_SEGMENT_TRANSLATION:
+ /*
+ * op_access_id only applies to MOVE_PAGE -> set bit 61
+ * exc_access_id has to be set to 0 for some instructions. Both
+ * cases have to be handled by the caller. We can always store
+ * exc_access_id, as it is undefined for non-ar cases.
+ */
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* FALL THROUGH */
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_EXTENDED_AUTHORITY:
+ pgm->exc_access_id = ar;
+ break;
+ case PGM_PROTECTION:
+ switch (prot) {
+ case PROT_TYPE_ALC:
+ tec->b60 = 1;
+ /* FALL THROUGH */
+ case PROT_TYPE_DAT:
+ tec->b61 = 1;
+ tec->addr = gva >> PAGE_SHIFT;
+ tec->fsi = mode == GACC_STORE ? FSI_STORE : FSI_FETCH;
+ tec->as = psw_bits(vcpu->arch.sie_block->gpsw).as;
+ /* exc_access_id is undefined for most cases */
+ pgm->exc_access_id = ar;
+ break;
+ default: /* LA and KEYC set b61 to 0, other params undefined */
+ break;
+ }
+ break;
+ }
+ return code;
+}
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+ unsigned long ga, ar_t ar, enum gacc_mode mode)
+{
+ int rc;
+ struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw);
if (!psw.t) {
asce->val = 0;
@@ -510,21 +565,8 @@ static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
return 0;
case PSW_AS_ACCREG:
rc = ar_translation(vcpu, asce, ar, mode);
- switch (rc) {
- case PGM_ALEN_TRANSLATION:
- case PGM_ALE_SEQUENCE:
- case PGM_ASTE_VALIDITY:
- case PGM_ASTE_SEQUENCE:
- case PGM_EXTENDED_AUTHORITY:
- vcpu->arch.pgm.exc_access_id = ar;
- break;
- case PGM_PROTECTION:
- tec_bits->b60 = 1;
- tec_bits->b61 = 1;
- break;
- }
if (rc > 0)
- pgm->code = rc;
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_ALC);
return rc;
}
return 0;
@@ -729,40 +771,31 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
return 1;
}
-static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
+static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar,
unsigned long *pages, unsigned long nr_pages,
const union asce asce, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
- int lap_enabled, rc;
+ int lap_enabled, rc = 0;
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
- tec_bits->addr = ga >> PAGE_SHIFT;
- if (mode == GACC_STORE && lap_enabled && is_low_address(ga)) {
- pgm->code = PGM_PROTECTION;
- return pgm->code;
- }
+ if (mode == GACC_STORE && lap_enabled && is_low_address(ga))
+ return trans_exc(vcpu, PGM_PROTECTION, ga, ar, mode,
+ PROT_TYPE_LA);
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
rc = guest_translate(vcpu, ga, pages, asce, mode);
if (rc < 0)
return rc;
- if (rc == PGM_PROTECTION)
- tec_bits->b61 = 1;
- if (rc)
- pgm->code = rc;
} else {
*pages = kvm_s390_real_to_abs(vcpu, ga);
if (kvm_is_error_gpa(vcpu->kvm, *pages))
- pgm->code = PGM_ADDRESSING;
+ rc = PGM_ADDRESSING;
}
- if (pgm->code)
- return pgm->code;
+ if (rc)
+ return trans_exc(vcpu, rc, ga, ar, mode, PROT_TYPE_DAT);
ga += PAGE_SIZE;
pages++;
nr_pages--;
@@ -783,7 +816,8 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
if (!len)
return 0;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
+ ga = kvm_s390_logical_to_effective(vcpu, ga);
+ rc = get_vcpu_asce(vcpu, &asce, ga, ar, mode);
if (rc)
return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
@@ -795,7 +829,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, mode);
+ rc = guest_page_range(vcpu, ga, ar, pages, nr_pages, asce, mode);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -846,37 +880,28 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long *gpa, enum gacc_mode mode)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec;
union asce asce;
int rc;
gva = kvm_s390_logical_to_effective(vcpu, gva);
- tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- rc = get_vcpu_asce(vcpu, &asce, ar, mode);
- tec->addr = gva >> PAGE_SHIFT;
+ rc = get_vcpu_asce(vcpu, &asce, gva, ar, mode);
if (rc)
return rc;
if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
- if (mode == GACC_STORE) {
- rc = pgm->code = PGM_PROTECTION;
- return rc;
- }
+ if (mode == GACC_STORE)
+ return trans_exc(vcpu, PGM_PROTECTION, gva, 0,
+ mode, PROT_TYPE_LA);
}
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
rc = guest_translate(vcpu, gva, gpa, asce, mode);
- if (rc > 0) {
- if (rc == PGM_PROTECTION)
- tec->b61 = 1;
- pgm->code = rc;
- }
+ if (rc > 0)
+ return trans_exc(vcpu, rc, gva, 0, mode, PROT_TYPE_DAT);
} else {
- rc = 0;
*gpa = kvm_s390_real_to_abs(vcpu, gva);
if (kvm_is_error_gpa(vcpu->kvm, *gpa))
- rc = pgm->code = PGM_ADDRESSING;
+ return trans_exc(vcpu, rc, gva, PGM_ADDRESSING, mode, 0);
}
return rc;
@@ -915,20 +940,9 @@ int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
*/
int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
{
- struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
- psw_t *psw = &vcpu->arch.sie_block->gpsw;
- struct trans_exc_code_bits *tec_bits;
union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
if (!ctlreg0.lap || !is_low_address(gra))
return 0;
-
- memset(pgm, 0, sizeof(*pgm));
- tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = FSI_STORE;
- tec_bits->as = psw_bits(*psw).as;
- tec_bits->addr = gra >> PAGE_SHIFT;
- pgm->code = PGM_PROTECTION;
-
- return pgm->code;
+ return trans_exc(vcpu, PGM_PROTECTION, gra, 0, GACC_STORE, PROT_TYPE_LA);
}
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 2e6b54e..9359f65 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -349,6 +349,19 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_operexc(struct kvm_vcpu *vcpu)
+{
+ vcpu->stat.exit_operation_exception++;
+ trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
+ vcpu->arch.sie_block->ipb);
+
+ if (vcpu->arch.sie_block->ipa == 0xb256 &&
+ test_kvm_facility(vcpu->kvm, 74))
+ return handle_sthyi(vcpu);
+
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
{
if (kvm_is_ucontrol(vcpu->kvm))
@@ -370,6 +383,8 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return handle_validity(vcpu);
case 0x28:
return handle_stop(vcpu);
+ case 0x2c:
+ return handle_operexc(vcpu);
case 0x38:
return handle_partial_execution(vcpu);
default:
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5a80af7..d72c4a8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -28,9 +28,6 @@
#include "gaccess.h"
#include "trace-s390.h"
-#define IOINT_SCHID_MASK 0x0000ffff
-#define IOINT_SSID_MASK 0x00030000
-#define IOINT_CSSID_MASK 0x03fc0000
#define PFAULT_INIT 0x0600
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
@@ -821,7 +818,14 @@ static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt_info,
list);
if (inti) {
- VCPU_EVENT(vcpu, 4, "deliver: I/O 0x%llx", inti->type);
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VCPU_EVENT(vcpu, 4, "%s", "deliver: I/O (AI)");
+ else
+ VCPU_EVENT(vcpu, 4, "deliver: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
+
vcpu->stat.deliver_io_int++;
trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
inti->type,
@@ -1415,6 +1419,13 @@ static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
fi->counters[FIRQ_CNTR_IO] += 1;
+ if (inti->type & KVM_S390_INT_IO_AI_MASK)
+ VM_EVENT(kvm, 4, "%s", "inject: I/O (AI)");
+ else
+ VM_EVENT(kvm, 4, "inject: I/O %x ss %x schid %04x",
+ inti->io.subchannel_id >> 8,
+ inti->io.subchannel_id >> 1 & 0x3,
+ inti->io.subchannel_nr);
isc = int_word_to_isc(inti->io.io_int_word);
list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
list_add_tail(&inti->list, list);
@@ -1531,13 +1542,6 @@ int kvm_s390_inject_vm(struct kvm *kvm,
inti->mchk.mcic = s390int->parm64;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (inti->type & KVM_S390_INT_IO_AI_MASK)
- VM_EVENT(kvm, 5, "%s", "inject: I/O (AI)");
- else
- VM_EVENT(kvm, 5, "inject: I/O css %x ss %x schid %04x",
- s390int->type & IOINT_CSSID_MASK,
- s390int->type & IOINT_SSID_MASK,
- s390int->type & IOINT_SCHID_MASK);
inti->io.subchannel_id = s390int->parm >> 16;
inti->io.subchannel_nr = s390int->parm & 0x0000ffffu;
inti->io.io_int_parm = s390int->parm64 >> 32;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6d8ec3a..49c6039 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -26,6 +26,7 @@
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
+#include <linux/bitmap.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/etr.h>
@@ -35,6 +36,8 @@
#include <asm/switch_to.h>
#include <asm/isc.h>
#include <asm/sclp.h>
+#include <asm/cpacf.h>
+#include <asm/etr.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -63,6 +66,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "exit_instruction", VCPU_STAT(exit_instruction) },
{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
+ { "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
@@ -93,6 +97,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
+ { "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
@@ -130,6 +135,11 @@ unsigned long kvm_s390_fac_list_mask_size(void)
return ARRAY_SIZE(kvm_s390_fac_list_mask);
}
+/* available cpu features supported by kvm */
+static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+/* available subfunctions indicated via query / "test bit" */
+static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
+
static struct gmap_notifier gmap_notifier;
debug_info_t *kvm_s390_dbf;
@@ -187,6 +197,61 @@ void kvm_arch_hardware_unsetup(void)
&kvm_clock_notifier);
}
+static void allow_cpu_feat(unsigned long nr)
+{
+ set_bit_inv(nr, kvm_s390_available_cpu_feat);
+}
+
+static inline int plo_test_bit(unsigned char nr)
+{
+ register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
+ int cc = 3; /* subfunction not available */
+
+ asm volatile(
+ /* Parameter registers are ignored for "test bit" */
+ " plo 0,0,0,0(0)\n"
+ " ipm %0\n"
+ " srl %0,28\n"
+ : "=d" (cc)
+ : "d" (r0)
+ : "cc");
+ return cc == 0;
+}
+
+static void kvm_s390_cpu_feat_init(void)
+{
+ int i;
+
+ for (i = 0; i < 256; ++i) {
+ if (plo_test_bit(i))
+ kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
+ }
+
+ if (test_facility(28)) /* TOD-clock steering */
+ etr_ptff(kvm_s390_available_subfunc.ptff, ETR_PTFF_QAF);
+
+ if (test_facility(17)) { /* MSA */
+ __cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
+ __cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
+ __cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
+ __cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
+ __cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+ }
+ if (test_facility(76)) /* MSA3 */
+ __cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+ if (test_facility(77)) { /* MSA4 */
+ __cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
+ __cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
+ __cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
+ __cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+ }
+ if (test_facility(57)) /* MSA5 */
+ __cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+
+ if (MACHINE_HAS_ESOP)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
+}
+
int kvm_arch_init(void *opaque)
{
kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
@@ -198,6 +263,8 @@ int kvm_arch_init(void *opaque)
return -ENOMEM;
}
+ kvm_s390_cpu_feat_init();
+
/* Register floating interrupt controller interface. */
return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
}
@@ -250,8 +317,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
- r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
- : KVM_S390_BSCA_CPU_SLOTS;
+ r = KVM_S390_BSCA_CPU_SLOTS;
+ if (sclp.has_esca && sclp.has_64bscao)
+ r = KVM_S390_ESCA_CPU_SLOTS;
break;
case KVM_CAP_NR_MEMSLOTS:
r = KVM_USER_MEM_SLOTS;
@@ -417,9 +485,8 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
unsigned int idx;
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
- /* enable CMMA only for z10 and later (EDAT_1) */
- ret = -EINVAL;
- if (!MACHINE_IS_LPAR || !MACHINE_HAS_EDAT1)
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
break;
ret = -EBUSY;
@@ -432,6 +499,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
mutex_unlock(&kvm->lock);
break;
case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = -ENXIO;
+ if (!sclp.has_cmma)
+ break;
ret = -EINVAL;
if (!kvm->arch.use_cmma)
break;
@@ -675,6 +745,39 @@ out:
return ret;
}
+static int kvm_s390_set_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+ int ret = -EBUSY;
+
+ if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
+ return -EFAULT;
+ if (!bitmap_subset((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS))
+ return -EINVAL;
+
+ mutex_lock(&kvm->lock);
+ if (!atomic_read(&kvm->online_vcpus)) {
+ bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ ret = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once supported by kernel + hw, we have to store the subfunctions
+ * in kvm->arch and remember that user space configured them.
+ */
+ return -ENXIO;
+}
+
static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -683,6 +786,12 @@ static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_set_processor(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_set_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_set_processor_subfunc(kvm, attr);
+ break;
}
return ret;
}
@@ -731,6 +840,50 @@ out:
return ret;
}
+static int kvm_s390_get_processor_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_get_machine_feat(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ struct kvm_s390_vm_cpu_feat data;
+
+ bitmap_copy((unsigned long *) data.feat,
+ kvm_s390_available_cpu_feat,
+ KVM_S390_VM_CPU_FEAT_NR_BITS);
+ if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
+ return -EFAULT;
+ return 0;
+}
+
+static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ /*
+ * Once we can actually configure subfunctions (kernel + hw support),
+ * we have to check if they were already set by user space, if so copy
+ * them from kvm->arch.
+ */
+ return -ENXIO;
+}
+
+static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
+ struct kvm_device_attr *attr)
+{
+ if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
+ sizeof(struct kvm_s390_vm_cpu_subfunc)))
+ return -EFAULT;
+ return 0;
+}
static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{
int ret = -ENXIO;
@@ -742,6 +895,18 @@ static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
case KVM_S390_VM_CPU_MACHINE:
ret = kvm_s390_get_machine(kvm, attr);
break;
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ ret = kvm_s390_get_processor_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ ret = kvm_s390_get_machine_feat(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
+ ret = kvm_s390_get_processor_subfunc(kvm, attr);
+ break;
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
+ ret = kvm_s390_get_machine_subfunc(kvm, attr);
+ break;
}
return ret;
}
@@ -802,6 +967,8 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_S390_VM_MEM_ENABLE_CMMA:
case KVM_S390_VM_MEM_CLR_CMMA:
+ ret = sclp.has_cmma ? 0 : -ENXIO;
+ break;
case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
break;
@@ -825,8 +992,13 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
switch (attr->attr) {
case KVM_S390_VM_CPU_PROCESSOR:
case KVM_S390_VM_CPU_MACHINE:
+ case KVM_S390_VM_CPU_PROCESSOR_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_FEAT:
+ case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = 0;
break;
+ /* configuring subfunctions is not supported yet */
+ case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
default:
ret = -ENXIO;
break;
@@ -857,7 +1029,6 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva;
- unsigned long curkey;
int i, r = 0;
if (args->flags != 0)
@@ -878,26 +1049,27 @@ static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (!keys)
return -ENOMEM;
+ down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
- goto out;
+ break;
}
- curkey = get_guest_storage_key(current->mm, hva);
- if (IS_ERR_VALUE(curkey)) {
- r = curkey;
- goto out;
- }
- keys[i] = curkey;
+ r = get_guest_storage_key(current->mm, hva, &keys[i]);
+ if (r)
+ break;
+ }
+ up_read(&current->mm->mmap_sem);
+
+ if (!r) {
+ r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+ sizeof(uint8_t) * args->count);
+ if (r)
+ r = -EFAULT;
}
- r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
- sizeof(uint8_t) * args->count);
- if (r)
- r = -EFAULT;
-out:
kvfree(keys);
return r;
}
@@ -934,24 +1106,25 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
if (r)
goto out;
+ down_read(&current->mm->mmap_sem);
for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i);
if (kvm_is_error_hva(hva)) {
r = -EFAULT;
- goto out;
+ break;
}
/* Lowest order bit is reserved */
if (keys[i] & 0x01) {
r = -EINVAL;
- goto out;
+ break;
}
- r = set_guest_storage_key(current->mm, hva,
- (unsigned long)keys[i], 0);
+ r = set_guest_storage_key(current->mm, hva, keys[i], 0);
if (r)
- goto out;
+ break;
}
+ up_read(&current->mm->mmap_sem);
out:
kvfree(keys);
return r;
@@ -1128,6 +1301,7 @@ static void sca_dispose(struct kvm *kvm)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ gfp_t alloc_flags = GFP_KERNEL;
int i, rc;
char debug_name[16];
static unsigned long sca_offset;
@@ -1149,9 +1323,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
rc = -ENOMEM;
+ ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
+
kvm->arch.use_esca = 0; /* start with basic SCA */
+ if (!sclp.has_64bscao)
+ alloc_flags |= GFP_DMA;
rwlock_init(&kvm->arch.sca_lock);
- kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
+ kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
@@ -1188,6 +1366,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
S390_ARCH_FAC_LIST_SIZE_BYTE);
+ set_kvm_facility(kvm->arch.model.fac_mask, 74);
+ set_kvm_facility(kvm->arch.model.fac_list, 74);
+
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;
@@ -1395,7 +1576,7 @@ static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
if (id < KVM_S390_BSCA_CPU_SLOTS)
return true;
- if (!sclp.has_esca)
+ if (!sclp.has_esca || !sclp.has_64bscao)
return false;
mutex_lock(&kvm->lock);
@@ -1657,15 +1838,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
kvm_s390_vcpu_setup_model(vcpu);
- vcpu->arch.sie_block->ecb = 0x02;
+ /* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
+ if (MACHINE_HAS_ESOP)
+ vcpu->arch.sie_block->ecb |= 0x02;
if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= 0x04;
- if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
+ if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
- if (test_kvm_facility(vcpu->kvm, 8))
+ if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
vcpu->arch.sie_block->ecb2 |= 0x08;
- vcpu->arch.sie_block->eca = 0xC1002000U;
+ vcpu->arch.sie_block->eca = 0x1002000U;
+ if (sclp.has_cei)
+ vcpu->arch.sie_block->eca |= 0x80000000U;
+ if (sclp.has_ib)
+ vcpu->arch.sie_block->eca |= 0x40000000U;
if (sclp.has_siif)
vcpu->arch.sie_block->eca |= 1;
if (sclp.has_sigpif)
@@ -1678,6 +1865,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
}
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ if (test_kvm_facility(vcpu->kvm, 74))
+ vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
if (vcpu->kvm->arch.use_cmma) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1715,6 +1904,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+ /* the real guest size will always be smaller than msl */
+ vcpu->arch.sie_block->mso = 0;
+ vcpu->arch.sie_block->msl = sclp.hamax;
+
vcpu->arch.sie_block->icpua = id;
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -2001,6 +2194,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
if (dbg->control & ~VALID_GUESTDBG_FLAGS)
return -EINVAL;
+ if (!sclp.has_gpere)
+ return -EINVAL;
if (dbg->control & KVM_GUESTDBG_ENABLE) {
vcpu->guest_debug = dbg->control;
@@ -2597,6 +2792,8 @@ static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
+ if (!sclp.has_ibs)
+ return;
kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 8621ab0..52aa47e 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -175,6 +175,12 @@ static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
return 0;
}
+static inline int test_kvm_cpu_feat(struct kvm *kvm, unsigned long nr)
+{
+ WARN_ON_ONCE(nr >= KVM_S390_VM_CPU_FEAT_NR_BITS);
+ return test_bit_inv(nr, kvm->arch.cpu_feat);
+}
+
/* are cpu states controlled by user space */
static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
{
@@ -250,6 +256,9 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
+/* implemented in sthyi.c */
+int handle_sthyi(struct kvm_vcpu *vcpu);
+
/* implemented in kvm-s390.c */
void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 95916fa..3db3be1 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -27,6 +27,7 @@
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/compat.h>
+#include <asm/sclp.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
@@ -152,30 +153,166 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
static int __skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc = 0;
+
+ trace_kvm_s390_skey_related_inst(vcpu);
if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
return rc;
rc = s390_enable_skey();
- VCPU_EVENT(vcpu, 3, "%s", "enabling storage keys for guest");
- trace_kvm_s390_skey_related_inst(vcpu);
- vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
+ if (!rc)
+ vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
return rc;
}
-
-static int handle_skey(struct kvm_vcpu *vcpu)
+static int try_handle_skey(struct kvm_vcpu *vcpu)
{
- int rc = __skey_check_enable(vcpu);
+ int rc;
+ vcpu->stat.instruction_storage_key++;
+ rc = __skey_check_enable(vcpu);
if (rc)
return rc;
- vcpu->stat.instruction_storage_key++;
-
+ if (sclp.has_skey) {
+ /* with storage-key facility, SIE interprets it for us */
+ kvm_s390_retry_instr(vcpu);
+ VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+ return -EAGAIN;
+ }
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+ return 0;
+}
- kvm_s390_retry_instr(vcpu);
- VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
+static int handle_iske(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr;
+ unsigned char key;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+ addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = get_guest_storage_key(current->mm, addr, &key);
+ up_read(&current->mm->mmap_sem);
+ if (rc)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff;
+ vcpu->run->s.regs.gprs[reg1] |= key;
+ return 0;
+}
+
+static int handle_rrbe(struct kvm_vcpu *vcpu)
+{
+ unsigned long addr;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ addr = kvm_s390_logical_to_effective(vcpu, addr);
+ addr = kvm_s390_real_to_abs(vcpu, addr);
+ addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(addr));
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = reset_guest_reference_bit(current->mm, addr);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ kvm_s390_set_psw_cc(vcpu, rc);
+ return 0;
+}
+
+#define SSKE_NQ 0x8
+#define SSKE_MR 0x4
+#define SSKE_MC 0x2
+#define SSKE_MB 0x1
+static int handle_sske(struct kvm_vcpu *vcpu)
+{
+ unsigned char m3 = vcpu->arch.sie_block->ipb >> 28;
+ unsigned long start, end;
+ unsigned char key, oldkey;
+ int reg1, reg2;
+ int rc;
+
+ rc = try_handle_skey(vcpu);
+ if (rc)
+ return rc != -EAGAIN ? rc : 0;
+
+ if (!test_kvm_facility(vcpu->kvm, 8))
+ m3 &= ~SSKE_MB;
+ if (!test_kvm_facility(vcpu->kvm, 10))
+ m3 &= ~(SSKE_MC | SSKE_MR);
+ if (!test_kvm_facility(vcpu->kvm, 14))
+ m3 &= ~SSKE_NQ;
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+
+ key = vcpu->run->s.regs.gprs[reg1] & 0xfe;
+ start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
+ start = kvm_s390_logical_to_effective(vcpu, start);
+ if (m3 & SSKE_MB) {
+ /* start already designates an absolute address */
+ end = (start + (1UL << 20)) & ~((1UL << 20) - 1);
+ } else {
+ start = kvm_s390_real_to_abs(vcpu, start);
+ end = start + PAGE_SIZE;
+ }
+
+ while (start != end) {
+ unsigned long addr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
+
+ if (kvm_is_error_hva(addr))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, addr, key, &oldkey,
+ m3 & SSKE_NQ, m3 & SSKE_MR,
+ m3 & SSKE_MC);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ start += PAGE_SIZE;
+ };
+
+ if (m3 & (SSKE_MC | SSKE_MR)) {
+ if (m3 & SSKE_MB) {
+ /* skey in reg1 is unpredictable */
+ kvm_s390_set_psw_cc(vcpu, 3);
+ } else {
+ kvm_s390_set_psw_cc(vcpu, rc);
+ vcpu->run->s.regs.gprs[reg1] &= ~0xff00UL;
+ vcpu->run->s.regs.gprs[reg1] |= (u64) oldkey << 8;
+ }
+ }
+ if (m3 & SSKE_MB) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT)
+ vcpu->run->s.regs.gprs[reg2] &= ~PAGE_MASK;
+ else
+ vcpu->run->s.regs.gprs[reg2] &= ~0xfffff000UL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
return 0;
}
@@ -583,9 +720,9 @@ static const intercept_handler_t b2_handlers[256] = {
[0x11] = handle_store_prefix,
[0x12] = handle_store_cpu_address,
[0x21] = handle_ipte_interlock,
- [0x29] = handle_skey,
- [0x2a] = handle_skey,
- [0x2b] = handle_skey,
+ [0x29] = handle_iske,
+ [0x2a] = handle_rrbe,
+ [0x2b] = handle_sske,
[0x2c] = handle_test_block,
[0x30] = handle_io_inst,
[0x31] = handle_io_inst,
@@ -654,8 +791,10 @@ static int handle_epsw(struct kvm_vcpu *vcpu)
static int handle_pfmf(struct kvm_vcpu *vcpu)
{
+ bool mr = false, mc = false, nq;
int reg1, reg2;
unsigned long start, end;
+ unsigned char key;
vcpu->stat.instruction_pfmf++;
@@ -675,15 +814,27 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
!test_kvm_facility(vcpu->kvm, 14))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- /* No support for conditional-SSKE */
- if (vcpu->run->s.regs.gprs[reg1] & (PFMF_MR | PFMF_MC))
- return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ /* Only provide conditional-SSKE support if enabled for the guest */
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK &&
+ test_kvm_facility(vcpu->kvm, 10)) {
+ mr = vcpu->run->s.regs.gprs[reg1] & PFMF_MR;
+ mc = vcpu->run->s.regs.gprs[reg1] & PFMF_MC;
+ }
+ nq = vcpu->run->s.regs.gprs[reg1] & PFMF_NQ;
+ key = vcpu->run->s.regs.gprs[reg1] & PFMF_KEY;
start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
start = kvm_s390_logical_to_effective(vcpu, start);
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+ if (kvm_s390_check_low_addr_prot_real(vcpu, start))
+ return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+ }
+
switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
case 0x00000000:
+ /* only 4k frames specify a real address */
+ start = kvm_s390_real_to_abs(vcpu, start);
end = (start + (1UL << 12)) & ~((1UL << 12) - 1);
break;
case 0x00001000:
@@ -701,20 +852,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
}
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
- if (kvm_s390_check_low_addr_prot_real(vcpu, start))
- return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
- }
-
- while (start < end) {
- unsigned long useraddr, abs_addr;
+ while (start != end) {
+ unsigned long useraddr;
/* Translate guest address to host address */
- if ((vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) == 0)
- abs_addr = kvm_s390_real_to_abs(vcpu, start);
- else
- abs_addr = start;
- useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(abs_addr));
+ useraddr = gfn_to_hva(vcpu->kvm, gpa_to_gfn(start));
if (kvm_is_error_hva(useraddr))
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
@@ -728,16 +870,25 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
if (rc)
return rc;
- if (set_guest_storage_key(current->mm, useraddr,
- vcpu->run->s.regs.gprs[reg1] & PFMF_KEY,
- vcpu->run->s.regs.gprs[reg1] & PFMF_NQ))
+ down_read(&current->mm->mmap_sem);
+ rc = cond_set_guest_storage_key(current->mm, useraddr,
+ key, NULL, nq, mr, mc);
+ up_read(&current->mm->mmap_sem);
+ if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
}
start += PAGE_SIZE;
}
- if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC)
- vcpu->run->s.regs.gprs[reg2] = end;
+ if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
+ if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_AMODE_64BIT) {
+ vcpu->run->s.regs.gprs[reg2] = end;
+ } else {
+ vcpu->run->s.regs.gprs[reg2] &= ~0xffffffffUL;
+ end = kvm_s390_logical_to_effective(vcpu, end);
+ vcpu->run->s.regs.gprs[reg2] |= end;
+ }
+ }
return 0;
}
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
new file mode 100644
index 0000000..bd98b7d
--- /dev/null
+++ b/arch/s390/kvm/sthyi.c
@@ -0,0 +1,471 @@
+/*
+ * store hypervisor information instruction emulation functions.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Janosch Frank <frankja@linux.vnet.ibm.com>
+ */
+#include <linux/kvm_host.h>
+#include <linux/errno.h>
+#include <linux/pagemap.h>
+#include <linux/vmalloc.h>
+#include <linux/ratelimit.h>
+
+#include <asm/kvm_host.h>
+#include <asm/asm-offsets.h>
+#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/sysinfo.h>
+#include <asm/ebcdic.h>
+
+#include "kvm-s390.h"
+#include "gaccess.h"
+#include "trace.h"
+
+#define DED_WEIGHT 0xffff
+/*
+ * CP and IFL as EBCDIC strings, SP/0x40 determines the end of string
+ * as they are justified with spaces.
+ */
+#define CP 0xc3d7404040404040UL
+#define IFL 0xc9c6d34040404040UL
+
+enum hdr_flags {
+ HDR_NOT_LPAR = 0x10,
+ HDR_STACK_INCM = 0x20,
+ HDR_STSI_UNAV = 0x40,
+ HDR_PERF_UNAV = 0x80,
+};
+
+enum mac_validity {
+ MAC_NAME_VLD = 0x20,
+ MAC_ID_VLD = 0x40,
+ MAC_CNT_VLD = 0x80,
+};
+
+enum par_flag {
+ PAR_MT_EN = 0x80,
+};
+
+enum par_validity {
+ PAR_GRP_VLD = 0x08,
+ PAR_ID_VLD = 0x10,
+ PAR_ABS_VLD = 0x20,
+ PAR_WGHT_VLD = 0x40,
+ PAR_PCNT_VLD = 0x80,
+};
+
+struct hdr_sctn {
+ u8 infhflg1;
+ u8 infhflg2; /* reserved */
+ u8 infhval1; /* reserved */
+ u8 infhval2; /* reserved */
+ u8 reserved[3];
+ u8 infhygct;
+ u16 infhtotl;
+ u16 infhdln;
+ u16 infmoff;
+ u16 infmlen;
+ u16 infpoff;
+ u16 infplen;
+ u16 infhoff1;
+ u16 infhlen1;
+ u16 infgoff1;
+ u16 infglen1;
+ u16 infhoff2;
+ u16 infhlen2;
+ u16 infgoff2;
+ u16 infglen2;
+ u16 infhoff3;
+ u16 infhlen3;
+ u16 infgoff3;
+ u16 infglen3;
+ u8 reserved2[4];
+} __packed;
+
+struct mac_sctn {
+ u8 infmflg1; /* reserved */
+ u8 infmflg2; /* reserved */
+ u8 infmval1;
+ u8 infmval2; /* reserved */
+ u16 infmscps;
+ u16 infmdcps;
+ u16 infmsifl;
+ u16 infmdifl;
+ char infmname[8];
+ char infmtype[4];
+ char infmmanu[16];
+ char infmseq[16];
+ char infmpman[4];
+ u8 reserved[4];
+} __packed;
+
+struct par_sctn {
+ u8 infpflg1;
+ u8 infpflg2; /* reserved */
+ u8 infpval1;
+ u8 infpval2; /* reserved */
+ u16 infppnum;
+ u16 infpscps;
+ u16 infpdcps;
+ u16 infpsifl;
+ u16 infpdifl;
+ u16 reserved;
+ char infppnam[8];
+ u32 infpwbcp;
+ u32 infpabcp;
+ u32 infpwbif;
+ u32 infpabif;
+ char infplgnm[8];
+ u32 infplgcp;
+ u32 infplgif;
+} __packed;
+
+struct sthyi_sctns {
+ struct hdr_sctn hdr;
+ struct mac_sctn mac;
+ struct par_sctn par;
+} __packed;
+
+struct cpu_inf {
+ u64 lpar_cap;
+ u64 lpar_grp_cap;
+ u64 lpar_weight;
+ u64 all_weight;
+ int cpu_num_ded;
+ int cpu_num_shd;
+};
+
+struct lpar_cpu_inf {
+ struct cpu_inf cp;
+ struct cpu_inf ifl;
+};
+
+static inline u64 cpu_id(u8 ctidx, void *diag224_buf)
+{
+ return *((u64 *)(diag224_buf + (ctidx + 1) * DIAG204_CPU_NAME_LEN));
+}
+
+/*
+ * Scales the cpu capping from the lpar range to the one expected in
+ * sthyi data.
+ *
+ * diag204 reports a cap in hundredths of processor units.
+ * z/VM's range for one core is 0 - 0x10000.
+ */
+static u32 scale_cap(u32 in)
+{
+ return (0x10000 * in) / 100;
+}
+
+static void fill_hdr(struct sthyi_sctns *sctns)
+{
+ sctns->hdr.infhdln = sizeof(sctns->hdr);
+ sctns->hdr.infmoff = sizeof(sctns->hdr);
+ sctns->hdr.infmlen = sizeof(sctns->mac);
+ sctns->hdr.infplen = sizeof(sctns->par);
+ sctns->hdr.infpoff = sctns->hdr.infhdln + sctns->hdr.infmlen;
+ sctns->hdr.infhtotl = sctns->hdr.infpoff + sctns->hdr.infplen;
+}
+
+static void fill_stsi_mac(struct sthyi_sctns *sctns,
+ struct sysinfo_1_1_1 *sysinfo)
+{
+ if (stsi(sysinfo, 1, 1, 1))
+ return;
+
+ sclp_ocf_cpc_name_copy(sctns->mac.infmname);
+
+ memcpy(sctns->mac.infmtype, sysinfo->type, sizeof(sctns->mac.infmtype));
+ memcpy(sctns->mac.infmmanu, sysinfo->manufacturer, sizeof(sctns->mac.infmmanu));
+ memcpy(sctns->mac.infmpman, sysinfo->plant, sizeof(sctns->mac.infmpman));
+ memcpy(sctns->mac.infmseq, sysinfo->sequence, sizeof(sctns->mac.infmseq));
+
+ sctns->mac.infmval1 |= MAC_ID_VLD | MAC_NAME_VLD;
+}
+
+static void fill_stsi_par(struct sthyi_sctns *sctns,
+ struct sysinfo_2_2_2 *sysinfo)
+{
+ if (stsi(sysinfo, 2, 2, 2))
+ return;
+
+ sctns->par.infppnum = sysinfo->lpar_number;
+ memcpy(sctns->par.infppnam, sysinfo->name, sizeof(sctns->par.infppnam));
+
+ sctns->par.infpval1 |= PAR_ID_VLD;
+}
+
+static void fill_stsi(struct sthyi_sctns *sctns)
+{
+ void *sysinfo;
+
+ /* Errors are handled through the validity bits in the response. */
+ sysinfo = (void *)__get_free_page(GFP_KERNEL);
+ if (!sysinfo)
+ return;
+
+ fill_stsi_mac(sctns, sysinfo);
+ fill_stsi_par(sctns, sysinfo);
+
+ free_pages((unsigned long)sysinfo, 0);
+}
+
+static void fill_diag_mac(struct sthyi_sctns *sctns,
+ struct diag204_x_phys_block *block,
+ void *diag224_buf)
+{
+ int i;
+
+ for (i = 0; i < block->hdr.cpus; i++) {
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdcps++;
+ else
+ sctns->mac.infmscps++;
+ break;
+ case IFL:
+ if (block->cpus[i].weight == DED_WEIGHT)
+ sctns->mac.infmdifl++;
+ else
+ sctns->mac.infmsifl++;
+ break;
+ }
+ }
+ sctns->mac.infmval1 |= MAC_CNT_VLD;
+}
+
+/* Returns a pointer to the the next partition block. */
+static struct diag204_x_part_block *lpar_cpu_inf(struct lpar_cpu_inf *part_inf,
+ bool this_lpar,
+ void *diag224_buf,
+ struct diag204_x_part_block *block)
+{
+ int i, capped = 0, weight_cp = 0, weight_ifl = 0;
+ struct cpu_inf *cpu_inf;
+
+ for (i = 0; i < block->hdr.rcpus; i++) {
+ if (!(block->cpus[i].cflag & DIAG204_CPU_ONLINE))
+ continue;
+
+ switch (cpu_id(block->cpus[i].ctidx, diag224_buf)) {
+ case CP:
+ cpu_inf = &part_inf->cp;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_cp |= block->cpus[i].cur_weight;
+ break;
+ case IFL:
+ cpu_inf = &part_inf->ifl;
+ if (block->cpus[i].cur_weight < DED_WEIGHT)
+ weight_ifl |= block->cpus[i].cur_weight;
+ break;
+ default:
+ continue;
+ }
+
+ if (!this_lpar)
+ continue;
+
+ capped |= block->cpus[i].cflag & DIAG204_CPU_CAPPED;
+ cpu_inf->lpar_cap |= block->cpus[i].cpu_type_cap;
+ cpu_inf->lpar_grp_cap |= block->cpus[i].group_cpu_type_cap;
+
+ if (block->cpus[i].weight == DED_WEIGHT)
+ cpu_inf->cpu_num_ded += 1;
+ else
+ cpu_inf->cpu_num_shd += 1;
+ }
+
+ if (this_lpar && capped) {
+ part_inf->cp.lpar_weight = weight_cp;
+ part_inf->ifl.lpar_weight = weight_ifl;
+ }
+ part_inf->cp.all_weight += weight_cp;
+ part_inf->ifl.all_weight += weight_ifl;
+ return (struct diag204_x_part_block *)&block->cpus[i];
+}
+
+static void fill_diag(struct sthyi_sctns *sctns)
+{
+ int i, r, pages;
+ bool this_lpar;
+ void *diag204_buf;
+ void *diag224_buf = NULL;
+ struct diag204_x_info_blk_hdr *ti_hdr;
+ struct diag204_x_part_block *part_block;
+ struct diag204_x_phys_block *phys_block;
+ struct lpar_cpu_inf lpar_inf = {};
+
+ /* Errors are handled through the validity bits in the response. */
+ pages = diag204((unsigned long)DIAG204_SUBC_RSI |
+ (unsigned long)DIAG204_INFO_EXT, 0, NULL);
+ if (pages <= 0)
+ return;
+
+ diag204_buf = vmalloc(PAGE_SIZE * pages);
+ if (!diag204_buf)
+ return;
+
+ r = diag204((unsigned long)DIAG204_SUBC_STIB7 |
+ (unsigned long)DIAG204_INFO_EXT, pages, diag204_buf);
+ if (r < 0)
+ goto out;
+
+ diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA);
+ if (!diag224_buf || diag224(diag224_buf))
+ goto out;
+
+ ti_hdr = diag204_buf;
+ part_block = diag204_buf + sizeof(*ti_hdr);
+
+ for (i = 0; i < ti_hdr->npar; i++) {
+ /*
+ * For the calling lpar we also need to get the cpu
+ * caps and weights. The time information block header
+ * specifies the offset to the partition block of the
+ * caller lpar, so we know when we process its data.
+ */
+ this_lpar = (void *)part_block - diag204_buf == ti_hdr->this_part;
+ part_block = lpar_cpu_inf(&lpar_inf, this_lpar, diag224_buf,
+ part_block);
+ }
+
+ phys_block = (struct diag204_x_phys_block *)part_block;
+ part_block = diag204_buf + ti_hdr->this_part;
+ if (part_block->hdr.mtid)
+ sctns->par.infpflg1 = PAR_MT_EN;
+
+ sctns->par.infpval1 |= PAR_GRP_VLD;
+ sctns->par.infplgcp = scale_cap(lpar_inf.cp.lpar_grp_cap);
+ sctns->par.infplgif = scale_cap(lpar_inf.ifl.lpar_grp_cap);
+ memcpy(sctns->par.infplgnm, part_block->hdr.hardware_group_name,
+ sizeof(sctns->par.infplgnm));
+
+ sctns->par.infpscps = lpar_inf.cp.cpu_num_shd;
+ sctns->par.infpdcps = lpar_inf.cp.cpu_num_ded;
+ sctns->par.infpsifl = lpar_inf.ifl.cpu_num_shd;
+ sctns->par.infpdifl = lpar_inf.ifl.cpu_num_ded;
+ sctns->par.infpval1 |= PAR_PCNT_VLD;
+
+ sctns->par.infpabcp = scale_cap(lpar_inf.cp.lpar_cap);
+ sctns->par.infpabif = scale_cap(lpar_inf.ifl.lpar_cap);
+ sctns->par.infpval1 |= PAR_ABS_VLD;
+
+ /*
+ * Everything below needs global performance data to be
+ * meaningful.
+ */
+ if (!(ti_hdr->flags & DIAG204_LPAR_PHYS_FLG)) {
+ sctns->hdr.infhflg1 |= HDR_PERF_UNAV;
+ goto out;
+ }
+
+ fill_diag_mac(sctns, phys_block, diag224_buf);
+
+ if (lpar_inf.cp.lpar_weight) {
+ sctns->par.infpwbcp = sctns->mac.infmscps * 0x10000 *
+ lpar_inf.cp.lpar_weight / lpar_inf.cp.all_weight;
+ }
+
+ if (lpar_inf.ifl.lpar_weight) {
+ sctns->par.infpwbif = sctns->mac.infmsifl * 0x10000 *
+ lpar_inf.ifl.lpar_weight / lpar_inf.ifl.all_weight;
+ }
+ sctns->par.infpval1 |= PAR_WGHT_VLD;
+
+out:
+ kfree(diag224_buf);
+ vfree(diag204_buf);
+}
+
+static int sthyi(u64 vaddr)
+{
+ register u64 code asm("0") = 0;
+ register u64 addr asm("2") = vaddr;
+ int cc;
+
+ asm volatile(
+ ".insn rre,0xB2560000,%[code],%[addr]\n"
+ "ipm %[cc]\n"
+ "srl %[cc],28\n"
+ : [cc] "=d" (cc)
+ : [code] "d" (code), [addr] "a" (addr)
+ : "memory", "cc");
+ return cc;
+}
+
+int handle_sthyi(struct kvm_vcpu *vcpu)
+{
+ int reg1, reg2, r = 0;
+ u64 code, addr, cc = 0;
+ struct sthyi_sctns *sctns = NULL;
+
+ /*
+ * STHYI requires extensive locking in the higher hypervisors
+ * and is very computational/memory expensive. Therefore we
+ * ratelimit the executions per VM.
+ */
+ if (!__ratelimit(&vcpu->kvm->arch.sthyi_limit)) {
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ }
+
+ kvm_s390_get_regs_rre(vcpu, &reg1, &reg2);
+ code = vcpu->run->s.regs.gprs[reg1];
+ addr = vcpu->run->s.regs.gprs[reg2];
+
+ vcpu->stat.instruction_sthyi++;
+ VCPU_EVENT(vcpu, 3, "STHYI: fc: %llu addr: 0x%016llx", code, addr);
+ trace_kvm_s390_handle_sthyi(vcpu, code, addr);
+
+ if (reg1 == reg2 || reg1 & 1 || reg2 & 1 || addr & ~PAGE_MASK)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ if (code & 0xffff) {
+ cc = 3;
+ goto out;
+ }
+
+ /*
+ * If the page has not yet been faulted in, we want to do that
+ * now and not after all the expensive calculations.
+ */
+ r = write_guest(vcpu, addr, reg2, &cc, 1);
+ if (r)
+ return kvm_s390_inject_prog_cond(vcpu, r);
+
+ sctns = (void *)get_zeroed_page(GFP_KERNEL);
+ if (!sctns)
+ return -ENOMEM;
+
+ /*
+ * If we are a guest, we don't want to emulate an emulated
+ * instruction. We ask the hypervisor to provide the data.
+ */
+ if (test_facility(74)) {
+ cc = sthyi((u64)sctns);
+ goto out;
+ }
+
+ fill_hdr(sctns);
+ fill_stsi(sctns);
+ fill_diag(sctns);
+
+out:
+ if (!cc) {
+ r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
+ if (r) {
+ free_page((unsigned long)sctns);
+ return kvm_s390_inject_prog_cond(vcpu, r);
+ }
+ }
+
+ free_page((unsigned long)sctns);
+ vcpu->run->s.regs.gprs[reg2 + 1] = cc ? 4 : 0;
+ kvm_s390_set_psw_cc(vcpu, cc);
+ return r;
+}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 916834d..4fc9d4e 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -41,7 +41,7 @@ TRACE_EVENT(kvm_s390_skey_related_inst,
TP_fast_assign(
VCPU_ASSIGN_COMMON
),
- VCPU_TP_PRINTK("%s", "first instruction related to skeys on vcpu")
+ VCPU_TP_PRINTK("%s", "storage key related instruction")
);
TRACE_EVENT(kvm_s390_major_guest_pfault,
@@ -185,8 +185,10 @@ TRACE_EVENT(kvm_s390_intercept_prog,
__entry->code = code;
),
- VCPU_TP_PRINTK("intercepted program interruption %04x",
- __entry->code)
+ VCPU_TP_PRINTK("intercepted program interruption %04x (%s)",
+ __entry->code,
+ __print_symbolic(__entry->code,
+ icpt_prog_codes))
);
/*
@@ -412,6 +414,47 @@ TRACE_EVENT(kvm_s390_handle_stsi,
__entry->addr)
);
+TRACE_EVENT(kvm_s390_handle_operexc,
+ TP_PROTO(VCPU_PROTO_COMMON, __u16 ipa, __u32 ipb),
+ TP_ARGS(VCPU_ARGS_COMMON, ipa, ipb),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(__u64, instruction)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->instruction = ((__u64)ipa << 48) |
+ ((__u64)ipb << 16);
+ ),
+
+ VCPU_TP_PRINTK("operation exception on instruction %016llx (%s)",
+ __entry->instruction,
+ __print_symbolic(icpt_insn_decoder(__entry->instruction),
+ icpt_insn_codes))
+ );
+
+TRACE_EVENT(kvm_s390_handle_sthyi,
+ TP_PROTO(VCPU_PROTO_COMMON, u64 code, u64 addr),
+ TP_ARGS(VCPU_ARGS_COMMON, code, addr),
+
+ TP_STRUCT__entry(
+ VCPU_FIELD_COMMON
+ __field(u64, code)
+ __field(u64, addr)
+ ),
+
+ TP_fast_assign(
+ VCPU_ASSIGN_COMMON
+ __entry->code = code;
+ __entry->addr = addr;
+ ),
+
+ VCPU_TP_PRINTK("STHYI fc: %llu addr: %016llx",
+ __entry->code, __entry->addr)
+ );
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */