From b75f4c9afac2604feb971441116c07a24ecca1ec Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Tue, 3 Mar 2015 09:54:41 +0100 Subject: KVM: s390: Zero out current VMDB of STSI before including level3 data. s390 documentation requires words 0 and 10-15 to be reserved and stored as zeros. As we fill out all other fields, we can memset the full structure. Signed-off-by: Ekaterina Tumanova Cc: stable@vger.kernel.org Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3511169..c7fee9d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -467,6 +467,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) for (n = mem->count - 1; n > 0 ; n--) memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + memset(&mem->vm[0], 0, sizeof(mem->vm[0])); mem->vm[0].cpus_total = cpus; mem->vm[0].cpus_configured = cpus; mem->vm[0].cpus_standby = 0; -- cgit v0.10.2 From 261520dcfcba93ca5dfe671b88ffab038cd940c8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Feb 2015 15:53:42 +0100 Subject: KVM: s390: fix handling of write errors in the tpi handler If the I/O interrupt could not be written to the guest provided area (e.g. access exception), a program exception was injected into the guest but "inti" wasn't freed, therefore resulting in a memory leak. In addition, the I/O interrupt wasn't reinjected. Therefore the dequeued interrupt is lost. This patch fixes the problem while cleaning up the function and making the cc and rc logic easier to handle. Signed-off-by: David Hildenbrand Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index c7fee9d..be7138e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -229,18 +229,19 @@ static int handle_tpi(struct kvm_vcpu *vcpu) struct kvm_s390_interrupt_info *inti; unsigned long len; u32 tpi_data[3]; - int cc, rc; + int rc; u64 addr; - rc = 0; addr = kvm_s390_get_base_disp_s(vcpu); if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - cc = 0; + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); - if (!inti) - goto no_interrupt; - cc = 1; + if (!inti) { + kvm_s390_set_psw_cc(vcpu, 0); + return 0; + } + tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr; tpi_data[1] = inti->io.io_int_parm; tpi_data[2] = inti->io.io_int_word; @@ -251,30 +252,35 @@ static int handle_tpi(struct kvm_vcpu *vcpu) */ len = sizeof(tpi_data) - 4; rc = write_guest(vcpu, addr, &tpi_data, len); - if (rc) - return kvm_s390_inject_prog_cond(vcpu, rc); + if (rc) { + rc = kvm_s390_inject_prog_cond(vcpu, rc); + goto reinject_interrupt; + } } else { /* * Store the three-word I/O interruption code into * the appropriate lowcore area. */ len = sizeof(tpi_data); - if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) + if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) { + /* failed writes to the low core are not recoverable */ rc = -EFAULT; + goto reinject_interrupt; + } } + + /* irq was successfully handed to the guest */ + kfree(inti); + kvm_s390_set_psw_cc(vcpu, 1); + return 0; +reinject_interrupt: /* * If we encounter a problem storing the interruption code, the * instruction is suppressed from the guest's view: reinject the * interrupt. */ - if (!rc) - kfree(inti); - else - kvm_s390_reinject_io_int(vcpu->kvm, inti); -no_interrupt: - /* Set condition code and we're done. */ - if (!rc) - kvm_s390_set_psw_cc(vcpu, cc); + kvm_s390_reinject_io_int(vcpu->kvm, inti); + /* don't set the cc, a pgm irq was injected or we drop to user space */ return rc ? -EFAULT : 0; } -- cgit v0.10.2 From 15462e37ca848abac7477dece65f8af25febd744 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 4 Feb 2015 15:59:11 +0100 Subject: KVM: s390: reinjection of irqs can fail in the tpi handler The reinjection of an I/O interrupt can fail if the list is at the limit and between the dequeue and the reinjection, another I/O interrupt is injected (e.g. if user space floods kvm with I/O interrupts). This patch avoids this memory leak and returns -EFAULT in this special case. This error is not recoverable, so let's fail hard. This can later be avoided by not dequeuing the interrupt but working directly on the locked list. Signed-off-by: David Hildenbrand Cc: stable@vger.kernel.org # 3.16+ Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 073b5f3..e7a46e8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1332,10 +1332,10 @@ int kvm_s390_inject_vm(struct kvm *kvm, return rc; } -void kvm_s390_reinject_io_int(struct kvm *kvm, +int kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { - __inject_vm(kvm, inti); + return __inject_vm(kvm, inti); } int s390int_to_s390irq(struct kvm_s390_interrupt *s390int, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c34109a..6995a30 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -151,8 +151,8 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); -void kvm_s390_reinject_io_int(struct kvm *kvm, - struct kvm_s390_interrupt_info *inti); +int kvm_s390_reinject_io_int(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); /* implemented in intercept.c */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index be7138e..b982fbc 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -279,7 +279,10 @@ reinject_interrupt: * instruction is suppressed from the guest's view: reinject the * interrupt. */ - kvm_s390_reinject_io_int(vcpu->kvm, inti); + if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) { + kfree(inti); + rc = -EFAULT; + } /* don't set the cc, a pgm irq was injected or we drop to user space */ return rc ? -EFAULT : 0; } -- cgit v0.10.2 From a9a846fd5c1723820c97cef56989ea14eea4b30e Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 5 Feb 2015 09:06:56 +0100 Subject: KVM: s390: Nullify instruction for certain program exceptions When certain program exceptions (e.g. DAT access exceptions) occur, the current instruction has to be nullified, i.e. the old PSW that gets written into the low-core has to point to the beginning of the instruction again, and not to the beginning of the next instruction. Thus we have to rewind the PSW before writing it into the low-core. The list of nullifying exceptions can be found in the POP, chapter 6, figure 6-1 ("Interruption Action"). Signed-off-by: Thomas Huth Reviewed-by: Jens Freimann Reviewed-by: David Hildenbrand Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index e7a46e8..98a3131 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -484,7 +484,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_pgm_info pgm_info; - int rc = 0; + int rc = 0, nullifying = false; u16 ilc = get_ilc(vcpu); spin_lock(&li->lock); @@ -509,6 +509,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) case PGM_LX_TRANSLATION: case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY: + nullifying = true; + /* fall through */ case PGM_SPACE_SWITCH: rc = put_guest_lc(vcpu, pgm_info.trans_exc_code, (u64 *)__LC_TRANS_EXC_CODE); @@ -521,6 +523,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) case PGM_EXTENDED_AUTHORITY: rc = put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); + nullifying = true; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: @@ -534,6 +537,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *)__LC_EXC_ACCESS_ID); rc |= put_guest_lc(vcpu, pgm_info.op_access_id, (u8 *)__LC_OP_ACCESS_ID); + nullifying = true; break; case PGM_MONITOR: rc = put_guest_lc(vcpu, pgm_info.mon_class_nr, @@ -551,6 +555,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, pgm_info.exc_access_id, (u8 *)__LC_EXC_ACCESS_ID); break; + case PGM_STACK_FULL: + case PGM_STACK_EMPTY: + case PGM_STACK_SPECIFICATION: + case PGM_STACK_TYPE: + case PGM_STACK_OPERATION: + case PGM_TRACE_TABEL: + case PGM_CRYPTO_OPERATION: + nullifying = true; + break; } if (pgm_info.code & PGM_PER) { @@ -564,6 +577,9 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) (u8 *) __LC_PER_ACCESS_ID); } + if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST) + kvm_s390_rewind_psw(vcpu, ilc); + rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); -- cgit v0.10.2 From 492d8642eaefbd47f6fb0e8265f058c02720e5c8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 10 Feb 2015 16:11:01 +0100 Subject: KVM: s390: Forward PSW to next instruction for addressing exceptions When the SIE exited by a DAT access exceptions which we can not resolve, the guest tried to access a page which is out of bounds and can not be paged-in. In this case we have to signal the bad access by injecting an address exception. However, address exceptions are either suppressing or terminating, i.e. the PSW has to point to the next instruction when the exception is delivered. Since the originating DAT access exception is nullifying, the PSW still points to the offending instruction instead, so we've got to forward the PSW to the next instruction. Having fixed this issue, we can now also enable the TPROT interpretation facility again which had been disabled because of this problem. Signed-off-by: Thomas Huth Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f6579cf..7ac40aa 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1148,8 +1148,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp_has_sigpif()) vcpu->arch.sie_block->eca |= 0x10000000U; - vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE | - ICTL_TPROT; + vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (kvm_s390_cmma_enabled(vcpu->kvm)) { rc = kvm_s390_vcpu_setup_cmma(vcpu); @@ -1726,6 +1725,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) return 0; } +static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) +{ + psw_t *psw = &vcpu->arch.sie_block->gpsw; + u8 opcode; + int rc; + + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + + /* + * We want to inject an addressing exception, which is defined as a + * suppressing or terminating exception. However, since we came here + * by a DAT access exception, the PSW still points to the faulting + * instruction since DAT exceptions are nullifying. So we've got + * to look up the current opcode to get the length of the instruction + * to be able to forward the PSW. + */ + rc = read_guest(vcpu, psw->addr, &opcode, 1); + if (rc) + return kvm_s390_inject_prog_cond(vcpu, rc); + psw->addr = __rewind_psw(*psw, -insn_length(opcode)); + + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); +} + static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { int rc = -1; @@ -1757,11 +1781,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) } } - if (rc == -1) { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - trace_kvm_s390_sie_fault(vcpu); - rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } + if (rc == -1) + rc = vcpu_post_run_fault_in_sie(vcpu); memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); -- cgit v0.10.2 From 33b412acd32d403a8de9511f236f9b4f31551868 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 11 Feb 2015 10:38:46 +0100 Subject: KVM: s390: Use insn_length() to calculate length of instruction The common s390 function insn_length() results in slightly smaller (and thus hopefully faster) code than the calculation of the instruction length via a lookup-table. So let's use that function in the interrupt delivery code, too. Signed-off-by: Thomas Huth Reviewed-by: Jens Freimann Reviewed-by: David Hildenbrand Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 98a3131..9561e1d 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1,7 +1,7 @@ /* * handling kvm guest interrupts * - * Copyright IBM Corp. 2008,2014 + * Copyright IBM Corp. 2008, 2015 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include "kvm-s390.h" @@ -265,8 +266,6 @@ static void __set_intercept_indicator(struct kvm_vcpu *vcpu, static u16 get_ilc(struct kvm_vcpu *vcpu) { - const unsigned short table[] = { 2, 4, 4, 6 }; - switch (vcpu->arch.sie_block->icptcode) { case ICPT_INST: case ICPT_INSTPROGI: @@ -274,7 +273,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu) case ICPT_PARTEXEC: case ICPT_IOINST: /* last instruction only stored for these icptcodes */ - return table[vcpu->arch.sie_block->ipa >> 14]; + return insn_length(vcpu->arch.sie_block->ipa >> 8); case ICPT_PROGI: return vcpu->arch.sie_block->pgmilc; default: -- cgit v0.10.2 From 91520f1af8a01d349d19911238fc3dbed3fa58d2 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 27 Feb 2015 14:32:11 +0100 Subject: KVM: s390: perform vcpu model setup in a function The function kvm_s390_vcpu_setup_model() now performs all cpu model realated setup tasks for a vcpu. Besides cpuid and ibc initialization, facility list assignment takes place during the setup step as well. The model setup has been pulled to the begin of vcpu setup to allow kvm facility tests. There is no need to protect the cpu model setup with a lock since the attributes can't be changed anymore as soon the first vcpu is online. Signed-off-by: Michael Mueller Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7ac40aa..f3517e7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1130,6 +1130,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) return 0; } +static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) +{ + struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model; + + vcpu->arch.cpu_id = model->cpu_id; + vcpu->arch.sie_block->ibc = model->ibc; + vcpu->arch.sie_block->fac = (int) (long) model->fac->list; +} + int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { int rc = 0; @@ -1138,6 +1147,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) CPUSTAT_SM | CPUSTAT_STOPPED | CPUSTAT_GED); + kvm_s390_vcpu_setup_model(vcpu); + vcpu->arch.sie_block->ecb = 6; if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73)) vcpu->arch.sie_block->ecb |= 0x10; @@ -1158,11 +1169,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; - mutex_lock(&vcpu->kvm->lock); - vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id; - vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc; - mutex_unlock(&vcpu->kvm->lock); - kvm_s390_vcpu_crypto_setup(vcpu); return rc; @@ -1205,7 +1211,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); } - vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->list; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; -- cgit v0.10.2 From 16b0fc13d60293ce073c8f9baa53469744b4d74a Mon Sep 17 00:00:00 2001 From: Yannick Guerrini Date: Thu, 26 Feb 2015 23:16:44 +0100 Subject: KVM: s390: Fix trivial typo in comments Change 'architecuture' to 'architecture' Signed-off-by: Yannick Guerrini Message-Id: <1424989004-14412-1-git-send-email-yguerrini@tomshardware.fr> Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 267523c..633fe9b 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -333,7 +333,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @write: indicates if access is a write access * * Translate a guest virtual address into a guest absolute address by means - * of dynamic address translation as specified by the architecuture. + * of dynamic address translation as specified by the architecture. * If the resulting absolute address is not available in the configuration * an addressing exception is indicated and @gpa will not be changed. * -- cgit v0.10.2 From 16a0c4c3aa68423b306fb82f6b9a2794e76f6fc0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 21 Nov 2014 15:39:06 +0100 Subject: KVM: s390: fix instruction interception trace point trace-cmd fails to parse the instruction interception trace point: "Error: expected type 5 but read 4 failed to read event print fmt for kvm_s390_intercept_instruction" The result is an unformatted string in the output, with a warning: "kvm_s390_intercept_instruction: [FAILED TO PARSE]..." So let's add parentheses around the instruction parser macro to fix the format parsing. Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h index d4096fd..ee69c08 100644 --- a/arch/s390/include/uapi/asm/sie.h +++ b/arch/s390/include/uapi/asm/sie.h @@ -230,7 +230,7 @@ * and returns a key, which can be used to find a mnemonic name * of the instruction in the icpt_insn_codes table. */ -#define icpt_insn_decoder(insn) \ +#define icpt_insn_decoder(insn) ( \ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \ @@ -239,6 +239,6 @@ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \ - INSN_DECODE(insn) + INSN_DECODE(insn)) #endif /* _UAPI_ASM_S390_SIE_H */ -- cgit v0.10.2 From 1f289a8429022f112be9817a81ff07308eb78a9c Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Tue, 3 Mar 2015 19:05:43 +0300 Subject: KVM: s390: Use the read_guest_abs() in guest debug functions The guest debug functions work on absolute addresses and should use the read_guest_abs() function rather than general read_guest() that works with logical addresses. Cc: David Hildenbrand Signed-off-by: Alexander Yarygin Reviewed-by: David Hildenbrand Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index 3e8d409..e97b345 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu, if (!wp_info->old_data) return -ENOMEM; /* try to backup the original value */ - ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data, - wp_info->len); + ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data, + wp_info->len); if (ret) { kfree(wp_info->old_data); wp_info->old_data = NULL; @@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu) continue; /* refetch the wp data and compare it to the old value */ - if (!read_guest(vcpu, wp_info->phys_addr, temp, - wp_info->len)) { + if (!read_guest_abs(vcpu, wp_info->phys_addr, temp, + wp_info->len)) { if (memcmp(temp, wp_info->old_data, wp_info->len)) { kfree(temp); return wp_info; -- cgit v0.10.2 From 68c557501b008515cb86c9a36c75f4e82e14a819 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Mon, 9 Jun 2014 10:57:26 -0400 Subject: KVM: s390: Allocate and save/restore vector registers Define and allocate space for both the host and guest views of the vector registers for a given vcpu. The 32 vector registers occupy 128 bits each (512 bytes total), but architecturally are paired with 512 additional bytes of reserved space for future expansion. The kvm_sync_regs structs containing the registers are union'ed with 1024 bytes of padding in the common kvm_run struct. The addition of 1024 bytes of new register information clearly exceeds the existing union, so an expansion of that padding is required. When changing environments, we need to appropriately save and restore the vector registers viewed by both the host and guest, into and out of the sync_regs space. The floating point registers overlay the upper half of vector registers 0-15, so there's a bit of data duplication here that needs to be carefully avoided. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b112efc..ee47998e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3248,3 +3248,13 @@ All other orders will be handled completely in user space. Only privileged operation exceptions will be checked for in the kernel (or even in the hardware prior to interception). If this capability is not enabled, the old way of handling SIGP orders is used (partially in kernel and user space). + +7.3 KVM_CAP_S390_VECTOR_REGISTERS + +Architectures: s390 +Parameters: none +Returns: 0 on success, negative value on error + +Allows use of the vector registers introduced with z13 processor, and +provides for the synchronization between host and user space. Will +return -EINVAL if the machine does not support vectors. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index f407bbf..3449a38 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -183,11 +183,17 @@ struct kvm_s390_itdb { __u8 data[256]; } __packed; +struct kvm_s390_vregs { + __vector128 vrs[32]; + __u8 reserved200[512]; /* for future vector expansion */ +} __packed; + struct sie_page { struct kvm_s390_sie_block sie_block; __u8 reserved200[1024]; /* 0x0200 */ struct kvm_s390_itdb itdb; /* 0x0600 */ - __u8 reserved700[2304]; /* 0x0700 */ + __u8 reserved700[1280]; /* 0x0700 */ + struct kvm_s390_vregs vregs; /* 0x0c00 */ } __packed; struct kvm_vcpu_stat { @@ -465,6 +471,7 @@ struct kvm_vcpu_arch { s390_fp_regs host_fpregs; unsigned int host_acrs[NUM_ACRS]; s390_fp_regs guest_fpregs; + struct kvm_s390_vregs *host_vregs; struct kvm_s390_local_interrupt local_int; struct hrtimer ckc_timer; struct kvm_s390_pgm_info pgm; @@ -551,6 +558,7 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; + int use_vectors; int user_cpu_state_ctrl; int user_sigp; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index 9c77e60..ef1a5fc 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -150,6 +150,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_CRS (1UL << 3) #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) +#define KVM_SYNC_VRS (1UL << 6) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -164,6 +165,9 @@ struct kvm_sync_regs { __u64 pft; /* pfault token [PFAULT] */ __u64 pfs; /* pfault select [PFAULT] */ __u64 pfc; /* pfault compare [PFAULT] */ + __u64 vrs[32][2]; /* vector registers */ + __u8 reserved[512]; /* for future vector expansion */ + __u32 fpc; /* only valid with vector registers */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f3517e7..a8fe3ab 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -185,6 +185,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_COW: r = MACHINE_HAS_ESOP; break; + case KVM_CAP_S390_VECTOR_REGISTERS: + r = MACHINE_HAS_VX; + break; default: r = 0; } @@ -265,6 +268,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) kvm->arch.user_sigp = 1; r = 0; break; + case KVM_CAP_S390_VECTOR_REGISTERS: + kvm->arch.use_vectors = MACHINE_HAS_VX; + r = MACHINE_HAS_VX ? 0 : -EINVAL; + break; default: r = -EINVAL; break; @@ -942,6 +949,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.css_support = 0; kvm->arch.use_irqchip = 0; + kvm->arch.use_vectors = 0; kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); @@ -1035,6 +1043,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 129)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; if (kvm_is_ucontrol(vcpu->kvm)) return __kvm_ucontrol_vcpu_init(vcpu); @@ -1045,10 +1055,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); - save_fp_regs(vcpu->arch.host_fpregs.fprs); + if (vcpu->kvm->arch.use_vectors) + save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + else + save_fp_regs(vcpu->arch.host_fpregs.fprs); save_access_regs(vcpu->arch.host_acrs); - restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + if (vcpu->kvm->arch.use_vectors) { + restore_fp_ctl(&vcpu->run->s.regs.fpc); + restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + } else { + restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + restore_fp_regs(vcpu->arch.guest_fpregs.fprs); + } restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); @@ -1058,11 +1076,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); + if (vcpu->kvm->arch.use_vectors) { + save_fp_ctl(&vcpu->run->s.regs.fpc); + save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + } else { + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + } save_access_regs(vcpu->run->s.regs.acrs); restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); - restore_fp_regs(vcpu->arch.host_fpregs.fprs); + if (vcpu->kvm->arch.use_vectors) + restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + else + restore_fp_regs(vcpu->arch.host_fpregs.fprs); restore_access_regs(vcpu->arch.host_acrs); } @@ -1196,6 +1222,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block = &sie_page->sie_block; vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.host_vregs = &sie_page->vregs; vcpu->arch.sie_block->icpua = id; if (!kvm_is_ucontrol(kvm)) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8055706..82634a4 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -324,7 +324,7 @@ struct kvm_run { __u64 kvm_dirty_regs; union { struct kvm_sync_regs regs; - char padding[1024]; + char padding[2048]; } s; }; @@ -760,6 +760,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_PPC_ENABLE_HCALL 104 #define KVM_CAP_CHECK_EXTENSION_VM 105 #define KVM_CAP_S390_USER_SIGP 106 +#define KVM_CAP_S390_VECTOR_REGISTERS 107 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 403c8648cb191ef88555bfa72deaa969c0367f41 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Mon, 2 Feb 2015 15:01:06 -0500 Subject: KVM: s390: Vector exceptions A new exception type for vector instructions is introduced with the new processor, but is handled exactly like a Data Exception which is already handled by the system. Signed-off-by: Eric Farman Reviewed-by: David Hildenbrand Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3449a38..fb1fd39 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -276,6 +276,7 @@ struct kvm_vcpu_stat { #define PGM_SPECIAL_OPERATION 0x13 #define PGM_OPERAND 0x15 #define PGM_TRACE_TABEL 0x16 +#define PGM_VECTOR_PROCESSING 0x1b #define PGM_SPACE_SWITCH 0x1c #define PGM_HFP_SQUARE_ROOT 0x1d #define PGM_PC_TRANSLATION_SPEC 0x1f diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index bebd215..08ae10a 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu, pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn; pgm_info->mon_code = vcpu->arch.sie_block->tecmc; break; + case PGM_VECTOR_PROCESSING: case PGM_DATA: pgm_info->data_exc_code = vcpu->arch.sie_block->dxc; break; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 9561e1d..036d375 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -544,6 +544,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) rc |= put_guest_lc(vcpu, pgm_info.mon_code, (u64 *)__LC_MON_CODE); break; + case PGM_VECTOR_PROCESSING: case PGM_DATA: rc = put_guest_lc(vcpu, pgm_info.data_exc_code, (u32 *)__LC_DATA_EXC_CODE); -- cgit v0.10.2 From cd7b4b61063eb55ab7a5f96523e028c9e0914694 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Thu, 12 Feb 2015 09:06:34 -0500 Subject: KVM: s390: Add new SIGP order to kernel counters The new SIGP order Store Additional Status at Address is totally handled by user space, but we should still record the occurrence of this order in the kernel code. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index fb1fd39..3fe2597 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -244,6 +244,7 @@ struct kvm_vcpu_stat { u32 instruction_sigp_stop; u32 instruction_sigp_stop_store_status; u32 instruction_sigp_store_status; + u32 instruction_sigp_store_adtl_status; u32 instruction_sigp_arch; u32 instruction_sigp_prefix; u32 instruction_sigp_restart; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a8fe3ab..c0ae03a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -87,6 +87,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, + { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 23b1e86..755a733 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code) case SIGP_STORE_STATUS_AT_ADDRESS: vcpu->stat.instruction_sigp_store_status++; break; + case SIGP_STORE_ADDITIONAL_STATUS: + vcpu->stat.instruction_sigp_store_adtl_status++; + break; case SIGP_SET_PREFIX: vcpu->stat.instruction_sigp_prefix++; break; -- cgit v0.10.2 From bc17de7c966504b287a1dceb76a523d8b7816731 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Mon, 14 Apr 2014 16:01:09 -0400 Subject: KVM: s390: Machine Check Store additional status in the machine check handler, in order to collect status (such as vector registers) that is not defined by store status. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index e07e916..8dc4db1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -171,6 +171,7 @@ int main(void) #else /* CONFIG_32BIT */ DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code)); DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address)); + DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr)); DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2)); DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area)); DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste)); diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 036d375..2afec60 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -351,6 +351,7 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_mchk_info mchk; + unsigned long adtl_status_addr; int rc; spin_lock(&li->lock); @@ -371,6 +372,9 @@ static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) mchk.cr14, mchk.mcic); rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); + rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, + &adtl_status_addr, sizeof(unsigned long)); + rc |= kvm_s390_vcpu_store_adtl_status(vcpu, adtl_status_addr); rc |= put_guest_lc(vcpu, mchk.mcic, (u64 __user *) __LC_MCCK_CODE); rc |= put_guest_lc(vcpu, mchk.failing_storage_address, diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c0ae03a..0c045cf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2031,6 +2031,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return kvm_s390_store_status_unloaded(vcpu, addr); } +/* + * store additional status at address + */ +int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, + unsigned long gpa) +{ + /* Only bits 0-53 are used for address formation */ + if (!(gpa & ~0x3ff)) + return 0; + + return write_guest_abs(vcpu, gpa & ~0x3ff, + (void *)&vcpu->run->s.regs.vrs, 512); +} + +int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + if (!test_kvm_facility(vcpu->kvm, 129)) + return 0; + + /* + * The guest VXRS are in the host VXRs due to the lazy + * copying in vcpu load/put. Let's update our copies before we save + * it into the save area. + */ + save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); + + return kvm_s390_store_adtl_status_unloaded(vcpu, addr); +} + static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu) { kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 6995a30..fda3f31 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -177,7 +177,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable); int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu, + unsigned long addr); int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu); void s390_vcpu_block(struct kvm_vcpu *vcpu); -- cgit v0.10.2 From 13211ea7b47db3d8ee2ff258a9a973a6d3aa3d43 Mon Sep 17 00:00:00 2001 From: Eric Farman Date: Wed, 30 Apr 2014 13:39:46 -0400 Subject: KVM: s390: Enable vector support for capable guest We finally have all the pieces in place, so let's include the vector facility bit in the mask of available hardware facilities for the guest to recognize. Also, enable the vector functionality in the guest control blocks, to avoid a possible vector data exception that would otherwise occur when a vector instruction is issued by the guest operating system. Signed-off-by: Eric Farman Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3fe2597..347a333 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -172,7 +172,9 @@ struct kvm_s390_sie_block { __u32 fac; /* 0x01a0 */ __u8 reserved1a4[20]; /* 0x01a4 */ __u64 cbrlo; /* 0x01b8 */ - __u8 reserved1c0[30]; /* 0x01c0 */ + __u8 reserved1c0[8]; /* 0x01c0 */ + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0c045cf..02e03c8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -104,6 +104,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { unsigned long kvm_s390_fac_list_mask[] = { 0xff82fffbf4fc2000UL, 0x005c000000000000UL, + 0x4000000000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) @@ -1186,6 +1187,10 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp_has_sigpif()) vcpu->arch.sie_block->eca |= 0x10000000U; + if (vcpu->kvm->arch.use_vectors) { + vcpu->arch.sie_block->eca |= 0x00020000; + vcpu->arch.sie_block->ecd |= 0x20000000; + } vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (kvm_s390_cmma_enabled(vcpu->kvm)) { -- cgit v0.10.2 From 668f198f40d1cc89c2330c6ad56f3b397b05a0bc Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Fri, 20 Feb 2015 16:02:10 -0600 Subject: KVM: SVM: use kvm_register_write()/read() KVM has nice wrappers to access the register values, clean up a few places that should use them but currently do not. Signed-off-by: David Kaplan [forward port and testing] Signed-off-by: Joel Schopp Acked-by: Borislav Petkov Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index cc618c8..93dda3c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2757,11 +2757,11 @@ static int invlpga_interception(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu = &svm->vcpu; - trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX], - vcpu->arch.regs[VCPU_REGS_RAX]); + trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX), + kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */ - kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]); + kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; skip_emulated_instruction(&svm->vcpu); @@ -2770,7 +2770,7 @@ static int invlpga_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm) { - trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]); + trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); kvm_queue_exception(&svm->vcpu, UD_VECTOR); return 1; @@ -3133,7 +3133,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data) static int rdmsr_interception(struct vcpu_svm *svm) { - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); u64 data; if (svm_get_msr(&svm->vcpu, ecx, &data)) { @@ -3142,8 +3142,8 @@ static int rdmsr_interception(struct vcpu_svm *svm) } else { trace_kvm_msr_read(ecx, data); - svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff; - svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; + kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff); + kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32); svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; skip_emulated_instruction(&svm->vcpu); } @@ -3246,9 +3246,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) static int wrmsr_interception(struct vcpu_svm *svm) { struct msr_data msr; - u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; - u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u) - | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); + u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX); + u64 data = kvm_read_edx_eax(&svm->vcpu); msr.data = data; msr.index = ecx; -- cgit v0.10.2 From bfda0e849102108eeedbeb71077859cdc853b7cd Mon Sep 17 00:00:00 2001 From: Kevin Mulvey Date: Fri, 20 Feb 2015 08:21:36 -0500 Subject: KVM: white space formatting in kvm_main.c Better alignment of loop using tabs rather than spaces, this makes checkpatch.pl happier. Signed-off-by: Kevin Mulvey Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a109370..36ab89d 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1742,7 +1742,7 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) int offset = offset_in_page(gpa); int ret; - while ((seg = next_segment(len, offset)) != 0) { + while ((seg = next_segment(len, offset)) != 0) { ret = kvm_clear_guest_page(kvm, gfn, offset, seg); if (ret < 0) return ret; -- cgit v0.10.2 From ae548c5c806497b3495019f550f93dee03f6c15a Mon Sep 17 00:00:00 2001 From: Kevin Mulvey Date: Fri, 20 Feb 2015 08:21:37 -0500 Subject: KVM: fix checkpatch.pl errors in kvm/irqchip.c Fix whitespace around while Signed-off-by: Kevin Mulvey Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 7f256f3..1d56a90 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -105,7 +105,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, i = kvm_irq_map_gsi(kvm, irq_set, irq); srcu_read_unlock(&kvm->irq_srcu, idx); - while(i--) { + while (i--) { int r; r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level, line_status); -- cgit v0.10.2 From 5bda6eed2e3626f40f2602a8fed72007f1fafaf8 Mon Sep 17 00:00:00 2001 From: Wincy Van Date: Wed, 24 Dec 2014 11:14:29 +0800 Subject: KVM: ioapic: Record edge-triggered interrupts delivery status This patch fixes the bug discussed in https://www.mail-archive.com/kvm@vger.kernel.org/msg109813.html This patch uses a new field named irr_delivered to record the delivery status of edge-triggered interrupts, and clears the delivered interrupts in kvm_get_ioapic. So it has the same effect of commit 0bc830b05c667218d703f2026ec866c49df974fc ("KVM: ioapic: clear IRR for edge-triggered interrupts at delivery") while avoids the bug of Windows guests. Signed-off-by: Wincy Van Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index b1947e0..a2e9d96 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq, old_irr = ioapic->irr; ioapic->irr |= mask; + if (edge) + ioapic->irr_delivered &= ~mask; if ((edge && old_irr == ioapic->irr) || (!edge && entry.fields.remote_irr)) { ret = 0; @@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status) irqe.shorthand = 0; if (irqe.trig_mode == IOAPIC_EDGE_TRIG) - ioapic->irr &= ~(1 << irq); + ioapic->irr_delivered |= 1 << irq; if (irq == RTC_GSI && line_status) { /* @@ -597,6 +599,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS; ioapic->ioregsel = 0; ioapic->irr = 0; + ioapic->irr_delivered = 0; ioapic->id = 0; memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); rtc_irq_eoi_tracking_reset(ioapic); @@ -654,6 +657,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(state, ioapic, sizeof(struct kvm_ioapic_state)); + state->irr &= ~ioapic->irr_delivered; spin_unlock(&ioapic->lock); return 0; } @@ -667,6 +671,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state) spin_lock(&ioapic->lock); memcpy(ioapic, state, sizeof(struct kvm_ioapic_state)); ioapic->irr = 0; + ioapic->irr_delivered = 0; update_handled_vectors(ioapic); kvm_vcpu_request_scan_ioapic(kvm); kvm_ioapic_inject_all(ioapic, state->irr); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index c2e36d9..38d8402 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -77,6 +77,7 @@ struct kvm_ioapic { struct rtc_status rtc_status; struct delayed_work eoi_inject; u32 irq_eoi[IOAPIC_NUM_PINS]; + u32 irr_delivered; }; #ifdef DEBUG -- cgit v0.10.2 From 0fa9778895635ab3824caf34fd573562dd2b999c Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Feb 2015 16:50:10 +0100 Subject: KVM: make halt_poll_ns static halt_poll_ns is used only locally. Make it static. Signed-off-by: Christian Borntraeger Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 36ab89d..b1d6a16 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -66,7 +66,7 @@ MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); -unsigned int halt_poll_ns = 0; +static unsigned int halt_poll_ns; module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* -- cgit v0.10.2 From 548ef28449c0c06f92194c40ff0eaed248cb4b75 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 24 Feb 2015 21:29:25 +0100 Subject: KVM: Get rid of kvm_kvfree() kvm_kvfree() provides exactly the same functionality as the new common kvfree() function - so let's simply replace the kvm function with the common function. Signed-off-by: Thomas Huth Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..c5f7e03 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7429,7 +7429,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) { - kvm_kvfree(free->arch.rmap[i]); + kvfree(free->arch.rmap[i]); free->arch.rmap[i] = NULL; } if (i == 0) @@ -7437,7 +7437,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, if (!dont || free->arch.lpage_info[i - 1] != dont->arch.lpage_info[i - 1]) { - kvm_kvfree(free->arch.lpage_info[i - 1]); + kvfree(free->arch.lpage_info[i - 1]); free->arch.lpage_info[i - 1] = NULL; } } @@ -7491,12 +7491,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, out_free: for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) { - kvm_kvfree(slot->arch.rmap[i]); + kvfree(slot->arch.rmap[i]); slot->arch.rmap[i] = NULL; if (i == 0) continue; - kvm_kvfree(slot->arch.lpage_info[i - 1]); + kvfree(slot->arch.lpage_info[i - 1]); slot->arch.lpage_info[i - 1] = NULL; } return -ENOMEM; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b210..0f574eb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -658,7 +658,6 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); void *kvm_kvzalloc(unsigned long size); -void kvm_kvfree(const void *addr); #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b1d6a16..07064dc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -539,20 +539,12 @@ void *kvm_kvzalloc(unsigned long size) return kzalloc(size, GFP_KERNEL); } -void kvm_kvfree(const void *addr) -{ - if (is_vmalloc_addr(addr)) - vfree(addr); - else - kfree(addr); -} - static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) { if (!memslot->dirty_bitmap) return; - kvm_kvfree(memslot->dirty_bitmap); + kvfree(memslot->dirty_bitmap); memslot->dirty_bitmap = NULL; } -- cgit v0.10.2 From 893bdbf16574e781504ea2a767ff8919d1394e52 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:19 +0800 Subject: KVM: Fix WARNINGs for 'sizeof(X)' instead of 'sizeof X' in kvm_main.c There are many WARNINGs like this: WARNING: sizeof tr should be sizeof(tr) + if (copy_from_user(&tr, argp, sizeof tr)) In kvm_main.c many places are using 'sizeof(X)', and the other places are using 'sizeof X', while the kernel recommands to use 'sizeof(X)', so this patch will replace all 'sizeof X' to 'sizeof(X)' to make them consistent and at the same time to reduce the WARNINGs noise when we are checking new patches. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 07064dc..38738c2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2197,7 +2197,7 @@ out_free1: if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &mp_state, sizeof mp_state)) + if (copy_to_user(argp, &mp_state, sizeof(mp_state))) goto out; r = 0; break; @@ -2206,7 +2206,7 @@ out_free1: struct kvm_mp_state mp_state; r = -EFAULT; - if (copy_from_user(&mp_state, argp, sizeof mp_state)) + if (copy_from_user(&mp_state, argp, sizeof(mp_state))) goto out; r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, &mp_state); break; @@ -2215,13 +2215,13 @@ out_free1: struct kvm_translation tr; r = -EFAULT; - if (copy_from_user(&tr, argp, sizeof tr)) + if (copy_from_user(&tr, argp, sizeof(tr))) goto out; r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr); if (r) goto out; r = -EFAULT; - if (copy_to_user(argp, &tr, sizeof tr)) + if (copy_to_user(argp, &tr, sizeof(tr))) goto out; r = 0; break; @@ -2230,7 +2230,7 @@ out_free1: struct kvm_guest_debug dbg; r = -EFAULT; - if (copy_from_user(&dbg, argp, sizeof dbg)) + if (copy_from_user(&dbg, argp, sizeof(dbg))) goto out; r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, &dbg); break; @@ -2244,14 +2244,14 @@ out_free1: if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof sigset) + if (kvm_sigmask.len != sizeof(sigset)) goto out; r = -EFAULT; if (copy_from_user(&sigset, sigmask_arg->sigset, - sizeof sigset)) + sizeof(sigset))) goto out; p = &sigset; } @@ -2313,14 +2313,14 @@ static long kvm_vcpu_compat_ioctl(struct file *filp, if (argp) { r = -EFAULT; if (copy_from_user(&kvm_sigmask, argp, - sizeof kvm_sigmask)) + sizeof(kvm_sigmask))) goto out; r = -EINVAL; - if (kvm_sigmask.len != sizeof csigset) + if (kvm_sigmask.len != sizeof(csigset)) goto out; r = -EFAULT; if (copy_from_user(&csigset, sigmask_arg->sigset, - sizeof csigset)) + sizeof(csigset))) goto out; sigset_from_compat(&sigset, &csigset); r = kvm_vcpu_ioctl_set_sigmask(vcpu, &sigset); @@ -2516,7 +2516,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (copy_from_user(&kvm_userspace_mem, argp, - sizeof kvm_userspace_mem)) + sizeof(kvm_userspace_mem))) goto out; r = kvm_vm_ioctl_set_memory_region(kvm, &kvm_userspace_mem); @@ -2526,7 +2526,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_dirty_log log; r = -EFAULT; - if (copy_from_user(&log, argp, sizeof log)) + if (copy_from_user(&log, argp, sizeof(log))) goto out; r = kvm_vm_ioctl_get_dirty_log(kvm, &log); break; @@ -2535,7 +2535,7 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_register_coalesced_mmio(kvm, &zone); break; @@ -2543,7 +2543,7 @@ static long kvm_vm_ioctl(struct file *filp, case KVM_UNREGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; r = -EFAULT; - if (copy_from_user(&zone, argp, sizeof zone)) + if (copy_from_user(&zone, argp, sizeof(zone))) goto out; r = kvm_vm_ioctl_unregister_coalesced_mmio(kvm, &zone); break; @@ -2553,7 +2553,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irqfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_irqfd(kvm, &data); break; @@ -2562,7 +2562,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_ioeventfd data; r = -EFAULT; - if (copy_from_user(&data, argp, sizeof data)) + if (copy_from_user(&data, argp, sizeof(data))) goto out; r = kvm_ioeventfd(kvm, &data); break; @@ -2583,7 +2583,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_msi msi; r = -EFAULT; - if (copy_from_user(&msi, argp, sizeof msi)) + if (copy_from_user(&msi, argp, sizeof(msi))) goto out; r = kvm_send_userspace_msi(kvm, &msi); break; @@ -2595,7 +2595,7 @@ static long kvm_vm_ioctl(struct file *filp, struct kvm_irq_level irq_event; r = -EFAULT; - if (copy_from_user(&irq_event, argp, sizeof irq_event)) + if (copy_from_user(&irq_event, argp, sizeof(irq_event))) goto out; r = kvm_vm_ioctl_irq_line(kvm, &irq_event, @@ -2605,7 +2605,7 @@ static long kvm_vm_ioctl(struct file *filp, r = -EFAULT; if (ioctl == KVM_IRQ_LINE_STATUS) { - if (copy_to_user(argp, &irq_event, sizeof irq_event)) + if (copy_to_user(argp, &irq_event, sizeof(irq_event))) goto out; } -- cgit v0.10.2 From a642a1756752421e5f6661d951943b53225c03eb Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:20 +0800 Subject: KVM: Fix WARNING: labels should not be indented in kvm_main.c WARNING: labels should not be indented + out_free_irq_routing: This patch fixes this WARNING to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 38738c2..4146d01 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2638,7 +2638,7 @@ static long kvm_vm_ioctl(struct file *filp, goto out_free_irq_routing; r = kvm_set_irq_routing(kvm, entries, routing.nr, routing.flags); - out_free_irq_routing: +out_free_irq_routing: vfree(entries); break; } -- cgit v0.10.2 From f4fee93270abbf862aab268111ac1e12934130c4 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:21 +0800 Subject: KVM: Fix ERROR: do not initialise statics to 0 or NULL in kvm_main.c ERROR: do not initialise statics to 0 or NULL +static int kvm_usage_count = 0; The kvm_usage_count will be placed to .bss segment when linking, so not need to set it to 0 here obviously. This patch fixes this ERROR to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4146d01..7d2c2ac3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -80,7 +80,7 @@ static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); static cpumask_var_t cpus_hardware_enabled; -static int kvm_usage_count = 0; +static int kvm_usage_count; static atomic_t hardware_enable_failed; struct kmem_cache *kvm_vcpu_cache; -- cgit v0.10.2 From ee543159d5e0cfa9f3a349ac4e3da01a0ec66c78 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:22 +0800 Subject: KVM: EXPORT_SYMBOL should immediately follow its function WARNING: EXPORT_SYMBOL(foo); should immediately follow its function/variable +EXPORT_SYMBOL_GPL(gfn_to_page); This patch fixes these warnings to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7d2c2ac3..f9ad307 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1473,7 +1473,6 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn) return kvm_pfn_to_page(pfn); } - EXPORT_SYMBOL_GPL(gfn_to_page); void kvm_release_page_clean(struct page *page) -- cgit v0.10.2 From f95ef0cd0257852198b31ffeb527ef2f72caa1aa Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:23 +0800 Subject: KVM: Missing blank line after declarations in kvm_main.c There are many Warnings like this: WARNING: Missing a blank line after declarations + struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; This patch fixes these warnings to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f9ad307..ba7fc2e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1508,6 +1508,7 @@ void kvm_set_pfn_dirty(pfn_t pfn) { if (!kvm_is_reserved_pfn(pfn)) { struct page *page = pfn_to_page(pfn); + if (!PageReserved(page)) SetPageDirty(page); } @@ -1791,6 +1792,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) start = cur = ktime_get(); if (halt_poll_ns) { ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns); + do { /* * This sets KVM_REQ_UNHALT if an interrupt @@ -2126,6 +2128,7 @@ static long kvm_vcpu_ioctl(struct file *filp, /* The thread running this VCPU changed. */ struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); + rcu_assign_pointer(vcpu->pid, newpid); if (oldpid) synchronize_rcu(); @@ -2533,6 +2536,7 @@ static long kvm_vm_ioctl(struct file *filp, #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET case KVM_REGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; if (copy_from_user(&zone, argp, sizeof(zone))) goto out; @@ -2541,6 +2545,7 @@ static long kvm_vm_ioctl(struct file *filp, } case KVM_UNREGISTER_COALESCED_MMIO: { struct kvm_coalesced_mmio_zone zone; + r = -EFAULT; if (copy_from_user(&zone, argp, sizeof(zone))) goto out; @@ -3259,6 +3264,7 @@ struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn) static void kvm_sched_in(struct preempt_notifier *pn, int cpu) { struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); + if (vcpu->preempted) vcpu->preempted = false; -- cgit v0.10.2 From b7d409deb9322138d031c2122d1fcba9af9c508c Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:24 +0800 Subject: KVM: no space before tabs in kvm_main.c WARNING: please, no space before tabs + * ^I^Ikvm->lock --> kvm->slots_lock --> kvm->irq_lock$ WARNING: please, no space before tabs +^I^I * ^I- gfn_to_hva (kvm_read_guest, gfn_to_pfn)$ WARNING: please, no space before tabs +^I^I * ^I- kvm_is_visible_gfn (mmu_check_roots)$ This patch fixes these warnings to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index ba7fc2e..8f76c50 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -72,7 +72,7 @@ module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); /* * Ordering of locks: * - * kvm->lock --> kvm->slots_lock --> kvm->irq_lock + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ DEFINE_SPINLOCK(kvm_lock); @@ -880,8 +880,8 @@ int __kvm_set_memory_region(struct kvm *kvm, * or moved, memslot will be created. * * validation of sp->gfn happens in: - * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) - * - kvm_is_visible_gfn (mmu_check_roots) + * - gfn_to_hva (kvm_read_guest, gfn_to_pfn) + * - kvm_is_visible_gfn (mmu_check_roots) */ kvm_arch_flush_shadow_memslot(kvm, slot); -- cgit v0.10.2 From 20e87b72244f922f420d83b0b15b42d17b92acae Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:25 +0800 Subject: KVM: Fix indentation in kvm_main.c ERROR: code indent should use tabs where possible + const struct kvm_io_range *r2)$ WARNING: please, no spaces at the start of a line + const struct kvm_io_range *r2)$ This patch fixes this ERROR & WARNING to reduce noise when checking new patches in kvm_main.c. Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8f76c50..e7d1bf8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2940,7 +2940,7 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) } static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1, - const struct kvm_io_range *r2) + const struct kvm_io_range *r2) { if (r1->addr < r2->addr) return -1; -- cgit v0.10.2 From 1170adc6dd9e94d3cefb6eefe1f44b308d882515 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Thu, 26 Feb 2015 14:58:26 +0800 Subject: KVM: Use pr_info/pr_err in kvm_main.c WARNING: Prefer [subsystem eg: netdev]_info([subsystem]dev, ... then dev_info(dev, ... then pr_info(... to printk(KERN_INFO ... + printk(KERN_INFO "kvm: exiting hardware virtualization\n"); WARNING: Prefer [subsystem eg: netdev]_err([subsystem]dev, ... then dev_err(dev, ... then pr_err(... to printk(KERN_ERR ... + printk(KERN_ERR "kvm: misc device register failed\n"); Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e7d1bf8..49900fc 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2817,8 +2817,7 @@ static void hardware_enable_nolock(void *junk) if (r) { cpumask_clear_cpu(cpu, cpus_hardware_enabled); atomic_inc(&hardware_enable_failed); - printk(KERN_INFO "kvm: enabling virtualization on " - "CPU%d failed\n", cpu); + pr_info("kvm: enabling virtualization on CPU%d failed\n", cpu); } } @@ -2894,12 +2893,12 @@ static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val, val &= ~CPU_TASKS_FROZEN; switch (val) { case CPU_DYING: - printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n", + pr_info("kvm: disabling virtualization on CPU%d\n", cpu); hardware_disable(); break; case CPU_STARTING: - printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n", + pr_info("kvm: enabling virtualization on CPU%d\n", cpu); hardware_enable(); break; @@ -2916,7 +2915,7 @@ static int kvm_reboot(struct notifier_block *notifier, unsigned long val, * * And Intel TXT required VMX off for all cpu when system shutdown. */ - printk(KERN_INFO "kvm: exiting hardware virtualization\n"); + pr_info("kvm: exiting hardware virtualization\n"); kvm_rebooting = true; on_each_cpu(hardware_disable_nolock, NULL, 1); return NOTIFY_OK; @@ -3346,7 +3345,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = misc_register(&kvm_dev); if (r) { - printk(KERN_ERR "kvm: misc device register failed\n"); + pr_err("kvm: misc device register failed\n"); goto out_unreg; } @@ -3357,7 +3356,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, r = kvm_init_debug(); if (r) { - printk(KERN_ERR "kvm: create debugfs files failed\n"); + pr_err("kvm: create debugfs files failed\n"); goto out_undebugfs; } -- cgit v0.10.2 From 5cb56059c94ddfaf92567a1c6443deec8363ae1c Mon Sep 17 00:00:00 2001 From: Joel Schopp Date: Mon, 2 Mar 2015 13:43:31 -0600 Subject: kvm: x86: make kvm_emulate_* consistant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently kvm_emulate() skips the instruction but kvm_emulate_* sometimes don't. The end reult is the caller ends up doing the skip themselves. Let's make them consistant. Signed-off-by: Joel Schopp Reviewed-by: Radim Krčmář Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a236e39..bf5a160 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -933,6 +933,7 @@ struct x86_emulate_ctxt; int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port); void kvm_emulate_cpuid(struct kvm_vcpu *vcpu); int kvm_emulate_halt(struct kvm_vcpu *vcpu); +int kvm_vcpu_halt(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 93dda3c..16d6e5c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm) static int halt_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 1; - skip_emulated_instruction(&svm->vcpu); return kvm_emulate_halt(&svm->vcpu); } static int vmmcall_interception(struct vcpu_svm *svm) { svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; - skip_emulated_instruction(&svm->vcpu); kvm_emulate_hypercall(&svm->vcpu); return 1; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b20b4..fbd9499 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5000,7 +5000,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, if (emulate_instruction(vcpu, 0) == EMULATE_DONE) { if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); } return 1; } @@ -5527,13 +5527,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu) static int handle_halt(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); return kvm_emulate_halt(vcpu); } static int handle_vmcall(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_hypercall(vcpu); return 1; } @@ -5564,7 +5562,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu) static int handle_wbinvd(struct kvm_vcpu *vcpu) { - skip_emulated_instruction(vcpu); kvm_emulate_wbinvd(vcpu); return 1; } @@ -5903,7 +5900,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; - ret = kvm_emulate_halt(vcpu); + ret = kvm_vcpu_halt(vcpu); goto out; } @@ -9518,7 +9515,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmcs12->launch_state = 1; if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) - return kvm_emulate_halt(vcpu); + return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c5f7e03..d1a1fea 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4706,7 +4706,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address) kvm_mmu_invlpg(emul_to_vcpu(ctxt), address); } -int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) { if (!need_emulate_wbinvd(vcpu)) return X86EMUL_CONTINUE; @@ -4723,11 +4723,19 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) wbinvd(); return X86EMUL_CONTINUE; } + +int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_emulate_wbinvd_noskip(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd); + + static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) { - kvm_emulate_wbinvd(emul_to_vcpu(ctxt)); + kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) @@ -5817,7 +5825,7 @@ void kvm_arch_exit(void) free_percpu(shared_msrs); } -int kvm_emulate_halt(struct kvm_vcpu *vcpu) +int kvm_vcpu_halt(struct kvm_vcpu *vcpu) { ++vcpu->stat.halt_exits; if (irqchip_in_kernel(vcpu->kvm)) { @@ -5828,6 +5836,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) return 0; } } +EXPORT_SYMBOL_GPL(kvm_vcpu_halt); + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->skip_emulated_instruction(vcpu); + return kvm_vcpu_halt(vcpu); +} EXPORT_SYMBOL_GPL(kvm_emulate_halt); int kvm_hv_hypercall(struct kvm_vcpu *vcpu) @@ -5912,6 +5927,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) unsigned long nr, a0, a1, a2, a3, ret; int op_64_bit, r = 1; + kvm_x86_ops->skip_emulated_instruction(vcpu); + if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); -- cgit v0.10.2 From dab429a798a8ab3377136e09dda55ea75a41648d Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Mon, 2 Mar 2015 13:43:37 -0600 Subject: kvm: svm: make wbinvd faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to re-decode WBINVD since we know what it is from the intercept. Signed-off-by: David Kaplan [extracted from larger unlrelated patch, forward ported, tested,style cleanup] Signed-off-by: Joel Schopp Reviewed-by: Radim Krčmář Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 16d6e5c..8f66585 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2774,6 +2774,12 @@ static int skinit_interception(struct vcpu_svm *svm) return 1; } +static int wbinvd_interception(struct vcpu_svm *svm) +{ + kvm_emulate_wbinvd(&svm->vcpu); + return 1; +} + static int xsetbv_interception(struct vcpu_svm *svm) { u64 new_bv = kvm_read_edx_eax(&svm->vcpu); @@ -3373,7 +3379,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_STGI] = stgi_interception, [SVM_EXIT_CLGI] = clgi_interception, [SVM_EXIT_SKINIT] = skinit_interception, - [SVM_EXIT_WBINVD] = emulate_on_interception, + [SVM_EXIT_WBINVD] = wbinvd_interception, [SVM_EXIT_MONITOR] = monitor_interception, [SVM_EXIT_MWAIT] = mwait_interception, [SVM_EXIT_XSETBV] = xsetbv_interception, -- cgit v0.10.2 From 0f37247574b3ef5b130116bbf7c0f9eb8a4c78c2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Mar 2015 14:47:44 +0000 Subject: KVM: vgic: add virt-capable compatible strings Several dts only list "arm,cortex-a7-gic" or "arm,gic-400" in their GIC compatible list, and while this is correct (and supported by the GIC driver), KVM will fail to detect that it can support these cases. This patch adds the missing strings to the VGIC code. The of_device_id entries are padded to keep the probe function data aligned. Signed-off-by: Mark Rutland Cc: Andre Przywara Cc: Christoffer Dall Cc: Marc Zyngier Cc: Michal Simek Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0cc6ab6..86cec79 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1865,8 +1865,10 @@ static struct notifier_block vgic_cpu_nb = { }; static const struct of_device_id vgic_ids[] = { - { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, - { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, + { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-400", .data = vgic_v2_probe, }, + { .compatible = "arm,gic-v3", .data = vgic_v3_probe, }, {}, }; -- cgit v0.10.2 From 69ff5c619cb350f43fbab2a491b4b66de7e96959 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Mar 2015 12:26:06 +0100 Subject: KVM: arm/arm64: prefer IS_ENABLED to a static variable IS_ENABLED gives compile-time checking and keeps the code clearer. The one exception is inside kvm_vm_ioctl_check_extension, where the established idiom is to wrap the case labels with an #ifdef. Signed-off-by: Christoffer Dall Signed-off-by: Paolo Bonzini diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5560f74..e0e9434 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); static u8 kvm_next_vmid; static DEFINE_SPINLOCK(kvm_vmid_lock); -static bool vgic_present; - static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -172,9 +170,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { +#ifdef CONFIG_KVM_ARM_VGIC case KVM_CAP_IRQCHIP: - r = vgic_present; - break; +#endif case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: @@ -831,7 +829,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: - if (!vgic_present) + if (!IS_ENABLED(CONFIG_KVM_ARM_VGIC)) return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: @@ -847,10 +845,9 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { - if (vgic_present) - return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); - else + if (!IS_ENABLED(CONFIG_KVM_ARM_VGIC)) return -ENXIO; + return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { struct kvm_arm_device_addr dev_addr; @@ -1035,10 +1032,6 @@ static int init_hyp_mode(void) if (err) goto out_free_context; -#ifdef CONFIG_KVM_ARM_VGIC - vgic_present = true; -#endif - /* * Init HYP architected timer support */ -- cgit v0.10.2 From 662d9715840aef44dcb573b0f9fab9e8319c868a Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 11 Mar 2015 14:21:31 +0100 Subject: arm/arm64: KVM: Kill CONFIG_KVM_ARM_{VGIC,TIMER} We can definitely decide at run-time whether to use the GIC and timers or not, and the extra code and data structures that we allocate space for is really negligable with this config option, so I don't think it's worth the extra complexity of always having to define stub static inlines. The !CONFIG_KVM_ARM_VGIC/TIMER case is pretty much an untested code path anyway, so we're better off just getting rid of it. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 2d2d608..488eaac 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -190,7 +190,6 @@ int main(void) DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); -#ifdef CONFIG_KVM_ARM_VGIC DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); @@ -200,14 +199,11 @@ int main(void) DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); -#ifdef CONFIG_KVM_ARM_TIMER DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); -#endif DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); -#endif DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); #endif return 0; diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 338ace7..7b6347b 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on MMU && OF select PREEMPT_NOTIFIERS select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT @@ -26,10 +27,11 @@ config KVM select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU - depends on ARM_VIRT_EXT && ARM_LPAE + select MMU_NOTIFIER + select HAVE_KVM_IRQCHIP + depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- - Support hosting virtualized guest machines. You will also - need to select one or more of the processor modules below. + Support hosting virtualized guest machines. This module provides access to the hardware capabilities through a character device node named /dev/kvm. @@ -37,10 +39,7 @@ config KVM If unsure, say N. config KVM_ARM_HOST - bool "KVM host support for ARM cpus." - depends on KVM - depends on MMU - select MMU_NOTIFIER + bool ---help--- Provides host support for ARM processors. @@ -55,20 +54,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool "KVM support for Virtual GIC" - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool "KVM support for Architected Timers" - depends on KVM_ARM_VGIC && ARM_ARCH_TIMER - select HAVE_KVM_IRQCHIP - default y - ---help--- - Adds support for the Architected Timers in virtual machines - endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 443b8be..60be7be 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -20,7 +20,7 @@ kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +obj-y += $(KVM)/arm/vgic.o +obj-y += $(KVM)/arm/vgic-v2.o +obj-y += $(KVM)/arm/vgic-v2-emul.o +obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e0e9434..37b46c5 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -170,9 +170,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) { int r; switch (ext) { -#ifdef CONFIG_KVM_ARM_VGIC case KVM_CAP_IRQCHIP: -#endif case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: @@ -829,8 +827,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: - if (!IS_ENABLED(CONFIG_KVM_ARM_VGIC)) - return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -845,8 +841,6 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { - if (!IS_ENABLED(CONFIG_KVM_ARM_VGIC)) - return -ENXIO; return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 384bab6..d503fbb 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } -#ifndef CONFIG_KVM_ARM_TIMER - -#define NUM_TIMER_REGS 0 - -static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) -{ - return 0; -} - -static bool is_timer_reg(u64 index) -{ - return false; -} - -#else - #define NUM_TIMER_REGS 3 static bool is_timer_reg(u64 index) @@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return 0; } -#endif - static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { void __user *uaddr = (void __user *)(long)reg->addr; diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S index 14d4883..35e4a3a 100644 --- a/arch/arm/kvm/interrupts_head.S +++ b/arch/arm/kvm/interrupts_head.S @@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0 * Assumes vcpu pointer in vcpu reg */ .macro save_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm /* @@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 ) * Assumes vcpu pointer in vcpu reg */ .macro restore_vgic_state -#ifdef CONFIG_KVM_ARM_VGIC /* Get VGIC VCTRL base into r2 */ ldr r2, [vcpu, #VCPU_KVM] ldr r2, [r2, #KVM_VGIC_VCTRL] @@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 ) subs r4, r4, #1 bne 1b 2: -#endif .endm #define CNTHCTL_PL1PCTEN (1 << 0) @@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 ) * Clobbers r2-r5 */ .macro save_timer_state -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 ) mcrr p15, 4, r2, r2, c14 @ CNTVOFF 1: -#endif @ Allow physical timer/counter access for the host mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN) @@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 ) bic r2, r2, #CNTHCTL_PL1PCEN mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL -#ifdef CONFIG_KVM_ARM_TIMER ldr r4, [vcpu, #VCPU_KVM] ldr r2, [r4, #KVM_TIMER_ENABLED] cmp r2, #0 @@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 ) and r2, r2, #3 mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL 1: -#endif .endm .equ vmentry, 0 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index f5590c8..ee43750 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -18,6 +18,7 @@ if VIRTUALIZATION config KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on OF select MMU_NOTIFIER select PREEMPT_NOTIFIERS select ANON_INODES @@ -25,8 +26,7 @@ config KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST - select KVM_ARM_VGIC - select KVM_ARM_TIMER + select HAVE_KVM_IRQCHIP select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU ---help--- @@ -50,17 +50,4 @@ config KVM_ARM_MAX_VCPUS large, so only choose a reasonable number that you expect to actually use. -config KVM_ARM_VGIC - bool - depends on KVM_ARM_HOST && OF - select HAVE_KVM_IRQCHIP - ---help--- - Adds support for a hardware assisted, in-kernel GIC emulation. - -config KVM_ARM_TIMER - bool - depends on KVM_ARM_VGIC - ---help--- - Adds support for the Architected Timers in virtual machines. - endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 4e6e09e..c92b26a 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o -kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o +kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index b3f45a5..a74e4c2 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -24,17 +24,14 @@ #include struct arch_timer_kvm { -#ifdef CONFIG_KVM_ARM_TIMER /* Is the timer enabled */ bool enabled; /* Virtual offset */ cycle_t cntvoff; -#endif }; struct arch_timer_cpu { -#ifdef CONFIG_KVM_ARM_TIMER /* Registers: control register, timer value */ u32 cntv_ctl; /* Saved/restored */ cycle_t cntv_cval; /* Saved/restored */ @@ -55,10 +52,8 @@ struct arch_timer_cpu { /* Timer IRQ */ const struct kvm_irq_level *irq; -#endif }; -#ifdef CONFIG_KVM_ARM_TIMER int kvm_timer_hyp_init(void); void kvm_timer_enable(struct kvm *kvm); void kvm_timer_init(struct kvm *kvm); @@ -72,30 +67,4 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); -#else -static inline int kvm_timer_hyp_init(void) -{ - return 0; -}; - -static inline void kvm_timer_enable(struct kvm *kvm) {} -static inline void kvm_timer_init(struct kvm *kvm) {} -static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *irq) {} -static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) -{ - return 0; -} - -static inline u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) -{ - return 0; -} -#endif - #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7c55dd5..b81630b 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -148,7 +148,6 @@ struct vgic_vm_ops { }; struct vgic_dist { -#ifdef CONFIG_KVM_ARM_VGIC spinlock_t lock; bool in_kernel; bool ready; @@ -237,7 +236,6 @@ struct vgic_dist { unsigned long *irq_pending_on_cpu; struct vgic_vm_ops vm_ops; -#endif }; struct vgic_v2_cpu_if { @@ -265,7 +263,6 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { -#ifdef CONFIG_KVM_ARM_VGIC /* per IRQ to LR mapping */ u8 *vgic_irq_lr_map; @@ -284,7 +281,6 @@ struct vgic_cpu { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; -#endif }; #define LR_EMPTY 0xff @@ -297,7 +293,6 @@ struct kvm_vcpu; struct kvm_run; struct kvm_exit_mmio; -#ifdef CONFIG_KVM_ARM_VGIC int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_map_resources(struct kvm *kvm); @@ -334,84 +329,4 @@ static inline int vgic_v3_probe(struct device_node *vgic_node, } #endif -#else -static inline int kvm_vgic_hyp_init(void) -{ - return 0; -} - -static inline int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) -{ - return 0; -} - -static inline int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - return -ENXIO; -} - -static inline int kvm_vgic_map_resources(struct kvm *kvm) -{ - return 0; -} - -static inline int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - return 0; -} - -static inline void kvm_vgic_destroy(struct kvm *kvm) -{ -} - -static inline void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ -} - -static inline int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) {} -static inline void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) {} - -static inline int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - return 0; -} - -static inline int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - return 0; -} - -static inline bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - return false; -} - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 0; -} - -static inline bool vgic_initialized(struct kvm *kvm) -{ - return true; -} - -static inline bool vgic_ready(struct kvm *kvm) -{ - return true; -} - -static inline int kvm_vgic_get_max_vcpus(void) -{ - return KVM_MAX_VCPUS; -} -#endif - #endif -- cgit v0.10.2 From df2bd1ac03dfc19e955a43f796cfe9f9cf49c75f Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:32 +0100 Subject: KVM: arm/arm64: unset CONFIG_HAVE_KVM_IRQCHIP CONFIG_HAVE_KVM_IRQCHIP is needed to support IRQ routing (along with irq_comm.c and irqchip.c usage). This is not the case for arm/arm64 currently. This patch unsets the flag for both arm and arm64. Signed-off-by: Eric Auger Reviewed-by: Andre Przywara Acked-by: Christoffer Dall Acked-by: Will Deacon Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 7b6347b..83a448e 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -28,7 +28,6 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select MMU_NOTIFIER - select HAVE_KVM_IRQCHIP depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index ee43750..05f56ce 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -26,7 +26,6 @@ config KVM select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO select KVM_ARM_HOST - select HAVE_KVM_IRQCHIP select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU ---help--- -- cgit v0.10.2 From 01c94e64f5a6f298774bdbde435e577821119fc0 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:33 +0100 Subject: KVM: introduce kvm_arch_intc_initialized and use it in irqfd Introduce __KVM_HAVE_ARCH_INTC_INITIALIZED define and associated kvm_arch_intc_initialized function. This latter allows to test whether the virtual interrupt controller is initialized and ready to accept virtual IRQ injection. On some architectures, the virtual interrupt controller is dynamically instantiated, justifying that kind of check. The new function can now be used by irqfd to check whether the virtual interrupt controller is ready on KVM_IRQFD request. If not, KVM_IRQFD returns -EAGAIN. Signed-off-by: Eric Auger Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d12b210..ae9c720 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -700,6 +700,20 @@ static inline wait_queue_head_t *kvm_arch_vcpu_wq(struct kvm_vcpu *vcpu) #endif } +#ifdef __KVM_HAVE_ARCH_INTC_INITIALIZED +/* + * returns true if the virtual interrupt controller is initialized and + * ready to accept virtual IRQ. On some architectures the virtual interrupt + * controller is dynamically instantiated and this is not always true. + */ +bool kvm_arch_intc_initialized(struct kvm *kvm); +#else +static inline bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return true; +} +#endif + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 148b239..fc5f43e 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -311,6 +311,9 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) unsigned int events; int idx; + if (!kvm_arch_intc_initialized(kvm)) + return -EAGAIN; + irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL); if (!irqfd) return -ENOMEM; -- cgit v0.10.2 From c1426e4c5add09042840013dfa5565e6be6d412e Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:34 +0100 Subject: KVM: arm/arm64: implement kvm_arch_intc_initialized On arm/arm64 the VGIC is dynamically instantiated and it is useful to expose its state, especially for irqfd setup. This patch defines __KVM_HAVE_ARCH_INTC_INITIALIZED and implements kvm_arch_intc_initialized. Signed-off-by: Eric Auger Acked-by: Christoffer Dall Reviewed-by: Andre Przywara Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 41008cd..902a7d1 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -27,6 +27,8 @@ #include #include +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 37b46c5..5e893eb 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -448,6 +448,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return 0; } +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + static void vcpu_pause(struct kvm_vcpu *vcpu) { wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 8ac3c70f..967fb1c 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -28,6 +28,8 @@ #include #include +#define __KVM_HAVE_ARCH_INTC_INITIALIZED + #if defined(CONFIG_KVM_ARM_MAX_VCPUS) #define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS #else -- cgit v0.10.2 From 649cf73994e8ac69dfe3e7a35fba9acf051e7fe6 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:35 +0100 Subject: KVM: arm/arm64: remove coarse grain dist locking at kvm_vgic_sync_hwstate To prepare for irqfd addition, coarse grain locking is removed at kvm_vgic_sync_hwstate level and finer grain locking is introduced in vgic_process_maintenance only. Signed-off-by: Eric Auger Acked-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 86cec79..897c849 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1081,6 +1081,7 @@ epilog: static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool level_pending = false; kvm_debug("STATUS = %08x\n", status); @@ -1098,6 +1099,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) struct vgic_lr vlr = vgic_get_lr(vcpu, lr); WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); vgic_irq_clear_queued(vcpu, vlr.irq); WARN_ON(vlr.state & LR_STATE_MASK); vlr.state = 0; @@ -1125,6 +1127,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vgic_cpu_irq_clear(vcpu, vlr.irq); } + spin_unlock(&dist->lock); + /* * Despite being EOIed, the LR may not have * been marked as empty. @@ -1139,10 +1143,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) return level_pending; } -/* - * Sync back the VGIC state after a guest run. The distributor lock is - * needed so we don't get preempted in the middle of the state processing. - */ +/* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -1189,14 +1190,10 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - if (!irqchip_in_kernel(vcpu->kvm)) return; - spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); - spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 174178fed338edba66ab9580af0c5d9e1a4e5019 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 4 Mar 2015 11:14:36 +0100 Subject: KVM: arm/arm64: add irqfd support This patch enables irqfd on arm/arm64. Both irqfd and resamplefd are supported. Injection is implemented in vgic.c without routing. This patch enables CONFIG_HAVE_KVM_EVENTFD and CONFIG_HAVE_KVM_IRQFD. KVM_CAP_IRQFD is now advertised. KVM_CAP_IRQFD_RESAMPLE capability automatically is advertised as soon as CONFIG_HAVE_KVM_IRQFD is set. Irqfd injection is restricted to SPI. The rationale behind not supporting PPI irqfd injection is that any device using a PPI would be a private-to-the-CPU device (timer for instance), so its state would have to be context-switched along with the VCPU and would require in-kernel wiring anyhow. It is not a relevant use case for irqfds. Signed-off-by: Eric Auger Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b112efc..b265d8e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2234,7 +2234,7 @@ into the hash PTE second double word). 4.75 KVM_IRQFD Capability: KVM_CAP_IRQFD -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irqfd (in) Returns: 0 on success, -1 on error @@ -2260,6 +2260,10 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. +On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared +Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is +given by gsi + 32. + 4.76 KVM_PPC_ALLOCATE_HTAB Capability: KVM_CAP_PPC_ALLOC_HTAB diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 0db25bc..2499867 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -198,6 +198,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 83a448e..f1f79d1 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -28,6 +28,8 @@ config KVM select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select MMU_NOTIFIER + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 60be7be..a093bf1 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -15,7 +15,7 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt) AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt) KVM := ../../../virt/kvm -kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 5e893eb..cc96619 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -171,6 +171,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: + case KVM_CAP_IRQFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 3ef77a4..c154c0b7 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -191,6 +191,9 @@ struct kvm_arch_memory_slot { /* Highest supported SPI, from VGIC_NR_IRQS */ #define KVM_ARM_IRQ_GIC_MAX 127 +/* One single KVM irqchip, ie. the VGIC */ +#define KVM_NR_IRQCHIPS 1 + /* PSCI interface */ #define KVM_PSCI_FN_BASE 0x95c1ba5e #define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n)) diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 05f56ce..5105e29 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -28,6 +28,8 @@ config KVM select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU + select HAVE_KVM_EVENTFD + select HAVE_KVM_IRQFD ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index c92b26a..b22c636 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 897c849..c000e97 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -31,6 +31,7 @@ #include #include #include +#include /* * How the whole thing works (courtesy of Christoffer Dall): @@ -1083,6 +1084,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) u32 status = vgic_get_interrupt_status(vcpu); struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool level_pending = false; + struct kvm *kvm = vcpu->kvm; kvm_debug("STATUS = %08x\n", status); @@ -1118,6 +1120,17 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) */ vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + /* + * kvm_notify_acked_irq calls kvm_set_irq() + * to reset the IRQ level. Need to release the + * lock for kvm_set_irq to grab it. + */ + spin_unlock(&dist->lock); + + kvm_notify_acked_irq(kvm, 0, + vlr.irq - VGIC_NR_PRIVATE_IRQS); + spin_lock(&dist->lock); + /* Any additional pending interrupt? */ if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { vgic_cpu_irq_set(vcpu, vlr.irq); @@ -1913,3 +1926,38 @@ out_free_irq: free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); return ret; } + +int kvm_irq_map_gsi(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *entries, + int gsi) +{ + return gsi; +} + +int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ + return pin; +} + +int kvm_set_irq(struct kvm *kvm, int irq_source_id, + u32 irq, int level, bool line_status) +{ + unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; + + trace_kvm_set_irq(irq, level, irq_source_id); + + BUG_ON(!vgic_initialized(kvm)); + + if (spi > kvm->arch.vgic.nr_irqs) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi, level); + +} + +/* MSI not implemented yet */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + return 0; +} -- cgit v0.10.2 From 1d2ebaccc741a299abfafb848414b01d190f4e33 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Mar 2015 18:16:50 +0000 Subject: arm/arm64: KVM: Allow handle_hva_to_gpa to return a value So far, handle_hva_to_gpa was never required to return a value. As we prepare to age pages at Stage-2, we need to be able to return a value from the iterator (kvm_test_age_hva). Adapt the code to handle this situation. No semantic change. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 3e6859b..ffa06e0 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1377,15 +1377,16 @@ out_unlock: return ret; } -static void handle_hva_to_gpa(struct kvm *kvm, - unsigned long start, - unsigned long end, - void (*handler)(struct kvm *kvm, - gpa_t gpa, void *data), - void *data) +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, void *data), + void *data) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; + int ret = 0; slots = kvm_memslots(kvm); @@ -1409,14 +1410,17 @@ static void handle_hva_to_gpa(struct kvm *kvm, for (; gfn < gfn_end; ++gfn) { gpa_t gpa = gfn << PAGE_SHIFT; - handler(kvm, gpa, data); + ret |= handler(kvm, gpa, data); } } + + return ret; } -static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) { unmap_stage2_range(kvm, gpa, PAGE_SIZE); + return 0; } int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) @@ -1442,7 +1446,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, return 0; } -static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) { pte_t *pte = (pte_t *)data; @@ -1454,6 +1458,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data) * through this calling path. */ stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; } -- cgit v0.10.2 From 35307b9a5f7ebcc8d8db41c73b69c131b48ace2b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Mar 2015 18:16:51 +0000 Subject: arm/arm64: KVM: Implement Stage-2 page aging Until now, KVM/arm didn't care much for page aging (who was swapping anyway?), and simply provided empty hooks to the core KVM code. With server-type systems now being available, things are quite different. This patch implements very simple support for page aging, by clearing the Access flag in the Stage-2 page tables. On access fault, the current fault handling will write the PTE or PMD again, putting the Access flag back on. It should be possible to implement a much faster handling for Access faults, but that's left for a later patch. With this in place, performance in VMs is degraded much more gracefully. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 816db0b..d995821 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -185,6 +185,7 @@ #define HSR_COND (0xfU << HSR_COND_SHIFT) #define FSC_FAULT (0x04) +#define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 902a7d1..d71607c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -167,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu); int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index ffa06e0..1831aa2 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1299,6 +1299,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, out_unlock: spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret; } @@ -1333,7 +1334,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Check the stage-2 fault is trans. fault or write fault */ fault_status = kvm_vcpu_trap_get_fault_type(vcpu); - if (fault_status != FSC_FAULT && fault_status != FSC_PERM) { + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", kvm_vcpu_trap_get_class(vcpu), (unsigned long)kvm_vcpu_trap_get_fault(vcpu), @@ -1475,6 +1477,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); } +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + if (pmd_young(*pmd)) { + *pmd = pmd_mkold(*pmd); + return 1; + } + + return 0; + } + + pte = pte_offset_kernel(pmd, gpa); + if (pte_none(*pte)) + return 0; + + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); /* Just a page... */ + return 1; + } + + return 0; +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data) +{ + pmd_t *pmd; + pte_t *pte; + + pmd = stage2_get_pmd(kvm, NULL, gpa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + return 0; + + if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */ + return pmd_young(*pmd); + + pte = pte_offset_kernel(pmd, gpa); + if (!pte_none(*pte)) /* Just a page... */ + return pte_young(*pte); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 6817664..c09f37f 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -210,6 +210,39 @@ TRACE_EVENT(kvm_set_spte_hva, TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) ); +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + TRACE_EVENT(kvm_hvc, TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), TP_ARGS(vcpu_pc, r0, imm), diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 92bbae3..7052245 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -90,6 +90,7 @@ #define ESR_ELx_FSC (0x3F) #define ESR_ELx_FSC_TYPE (0x3C) #define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_ACCESS (0x08) #define ESR_ELx_FSC_FAULT (0x04) #define ESR_ELx_FSC_PERM (0x0C) #define ESR_ELx_CV (UL(1) << 24) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 94674eb..9e5543e 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -187,6 +187,7 @@ /* For compatibility with fault code shared with 32-bit */ #define FSC_FAULT ESR_ELx_FSC_FAULT +#define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 967fb1c..f0f58c9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -179,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); /* We do not have shadow page tables, hence the empty hooks */ -static inline int kvm_age_hva(struct kvm *kvm, unsigned long start, - unsigned long end) -{ - return 0; -} - -static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - return 0; -} - static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, unsigned long address) { -- cgit v0.10.2 From aeda9130c38e2e0e77c1aaa65292c2f5a81107a8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 12 Mar 2015 18:16:52 +0000 Subject: arm/arm64: KVM: Optimize handling of Access Flag faults Now that we have page aging in Stage-2, it becomes obvious that we're doing way too much work handling the fault. The page is not going anywhere (it is still mapped), the page tables are already allocated, and all we want is to flip a bit in the PMD or PTE. Also, we can avoid any form of TLB invalidation, since a page with the AF bit off is not allowed to be cached. An obvious solution is to have a separate handler for FSC_ACCESS, where we pride ourselves to only do the very minimum amount of work. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 1831aa2..56c8b03 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1304,6 +1304,46 @@ out_unlock: return ret; } +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pmd_t *pmd; + pte_t *pte; + pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa); + if (!pmd || pmd_none(*pmd)) /* Nothing there */ + goto out; + + if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + goto out; + } + + pte = pte_offset_kernel(pmd, fault_ipa); + if (pte_none(*pte)) /* Nothing there either */ + goto out; + + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1371,6 +1411,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) /* Userspace should not be able to register out-of-bounds IPAs */ VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE); + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); if (ret == 0) ret = 1; diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index c09f37f..0ec3539 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -68,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault, __entry->hxfar, __entry->vcpu_pc) ); +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + TRACE_EVENT(kvm_irq_line, TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), TP_ARGS(type, vcpu_idx, irq_num, level), -- cgit v0.10.2 From 1662e862a87110f742a144210c59dc0e8a112bc9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 12 Mar 2015 14:59:26 +0100 Subject: KVM: MAINTAINERS: add file arch/x86/kernel/kvm.c|kvmclock.c The KVM list should be CCed on changes for arch/x86/kernel/kvm.c and arch/x86/kernel/kvmclock.c Signed-off-by: Christian Borntraeger Acked-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti diff --git a/MAINTAINERS b/MAINTAINERS index 69cc89f..15e4015 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5575,6 +5575,8 @@ S: Supported F: Documentation/*/kvm*.txt F: Documentation/virtual/kvm/ F: arch/*/kvm/ +F: arch/x86/kernel/kvm.c +F: arch/x86/kernel/kvmclock.c F: arch/*/include/asm/kvm* F: include/linux/kvm* F: include/uapi/linux/kvm* -- cgit v0.10.2 From 5e57518d99725e8b4ee34cc94669afb79e4cfe4e Mon Sep 17 00:00:00 2001 From: David Kaplan Date: Fri, 6 Mar 2015 14:44:35 -0600 Subject: x86: svm: use cr_interception for SVM_EXIT_CR0_SEL_WRITE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Another patch in my war on emulate_on_interception() use as a svm exit handler. These were pulled out of a larger patch at the suggestion of Radim Krcmar, see https://lkml.org/lkml/2015/2/25/559 Changes since v1: * fixed typo introduced after test, retested Signed-off-by: David Kaplan [separated out just cr_interception part from larger removal of INTERCEPT_CR0_WRITE, forward ported, tested] Signed-off-by: Joel Schopp Reviewed-by: Radim Krčmář Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8f66585..001e630 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2944,7 +2944,10 @@ static int cr_interception(struct vcpu_svm *svm) return emulate_on_interception(svm); reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK; - cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; + if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE) + cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0; + else + cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0; err = 0; if (cr >= 16) { /* mov to cr */ @@ -3328,7 +3331,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR3] = cr_interception, [SVM_EXIT_READ_CR4] = cr_interception, [SVM_EXIT_READ_CR8] = cr_interception, - [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, + [SVM_EXIT_CR0_SEL_WRITE] = cr_interception, [SVM_EXIT_WRITE_CR0] = cr_interception, [SVM_EXIT_WRITE_CR3] = cr_interception, [SVM_EXIT_WRITE_CR4] = cr_interception, -- cgit v0.10.2 From b34a80517bfcd917bc59d9670d8f465a564af3b9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 9 Mar 2015 20:27:43 +0100 Subject: KVM: x86: Fix re-execution of patched vmmcall For a very long time (since 2b3d2a20), the path handling a vmmcall instruction of the guest on an Intel host only applied the patch but no longer handled the hypercall. The reverse case, vmcall on AMD hosts, is fine. As both em_vmcall and em_vmmcall actually have to do the same, we can fix the issue by consolidating both into the same handler. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 106c015..c941abe 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -3323,7 +3323,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt) return X86EMUL_CONTINUE; } -static int em_vmcall(struct x86_emulate_ctxt *ctxt) +static int em_hypercall(struct x86_emulate_ctxt *ctxt) { int rc = ctxt->ops->fix_hypercall(ctxt); @@ -3395,17 +3395,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt) return em_lgdt_lidt(ctxt, true); } -static int em_vmmcall(struct x86_emulate_ctxt *ctxt) -{ - int rc; - - rc = ctxt->ops->fix_hypercall(ctxt); - - /* Disable writeback. */ - ctxt->dst.type = OP_NONE; - return rc; -} - static int em_lidt(struct x86_emulate_ctxt *ctxt) { return em_lgdt_lidt(ctxt, false); @@ -3769,7 +3758,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) static const struct opcode group7_rm0[] = { N, - I(SrcNone | Priv | EmulateOnUD, em_vmcall), + I(SrcNone | Priv | EmulateOnUD, em_hypercall), N, N, N, N, N, N, }; @@ -3781,7 +3770,7 @@ static const struct opcode group7_rm1[] = { static const struct opcode group7_rm3[] = { DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa), - II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall), + II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall), DIP(SrcNone | Prot | Priv, vmload, check_svme_pa), DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa), DIP(SrcNone | Prot | Priv, stgi, check_svme), -- cgit v0.10.2 From ae1f57670703656cc9f293722c3b8b6782f8ab3f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 9 Mar 2015 20:56:43 +0100 Subject: KVM: nVMX: Do not emulate #UD while in guest mode While in L2, leave all #UD to L2 and do not try to emulate it. If L1 is interested in doing this, it reports its interest via the exception bitmap, and we never get into handle_exception of L0 anyway. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fbd9499..50c675b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5065,6 +5065,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) } if (is_invalid_opcode(intr_info)) { + if (is_guest_mode(vcpu)) { + kvm_queue_exception(vcpu, UD_VECTOR); + return 1; + } er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD); if (er != EMULATE_DONE) kvm_queue_exception(vcpu, UD_VECTOR); -- cgit v0.10.2 From ecccf0cc722f40e0dcc97872e7a960765119a256 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 13 Mar 2015 17:02:52 +0000 Subject: arm/arm64: KVM: export VCPU power state via MP_STATE ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To cleanly restore an SMP VM we need to ensure that the current pause state of each vcpu is correctly recorded. Things could get confused if the CPU starts running after migration restore completes when it was paused before it state was captured. We use the existing KVM_GET/SET_MP_STATE ioctl to do this. The arm/arm64 interface is a lot simpler as the only valid states are KVM_MP_STATE_RUNNABLE and KVM_MP_STATE_STOPPED. Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index b265d8e..71d10d7 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -997,7 +997,7 @@ for vm-wide capabilities. 4.38 KVM_GET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (out) Returns: 0 on success; -1 on error @@ -1011,7 +1011,7 @@ uniprocessor guests). Possible values are: - - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86] + - KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86,arm/arm64] - KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP) which has not yet received an INIT signal [x86] - KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is @@ -1020,7 +1020,7 @@ Possible values are: is waiting for an interrupt [x86] - KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector accessible via KVM_GET_VCPU_EVENTS) [x86] - - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390] + - KVM_MP_STATE_STOPPED: the vcpu is stopped [s390,arm/arm64] - KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390] - KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted) [s390] @@ -1031,11 +1031,15 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu is paused or not. 4.39 KVM_SET_MP_STATE Capability: KVM_CAP_MP_STATE -Architectures: x86, s390 +Architectures: x86, s390, arm, arm64 Type: vcpu ioctl Parameters: struct kvm_mp_state (in) Returns: 0 on success; -1 on error @@ -1047,6 +1051,10 @@ On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel irqchip, the multiprocessing state must be maintained by userspace on these architectures. +For arm/arm64: + +The only states that are valid are KVM_MP_STATE_STOPPED and +KVM_MP_STATE_RUNNABLE which reflect if the vcpu should be paused or not. 4.40 KVM_SET_IDENTITY_MAP_ADDR diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc96619..9a5f057 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -180,6 +180,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI: case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -310,13 +311,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + if (vcpu->arch.pause) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - return -EINVAL; + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.pause = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu->arch.pause = true; + break; + default: + return -EINVAL; + } + + return 0; } /** -- cgit v0.10.2 From 71760950bf3dc796e5e53ea3300dec724a09f593 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Fri, 13 Mar 2015 17:02:53 +0000 Subject: arm/arm64: KVM: add a common vgic_queue_irq_to_lr fn MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps re-factor away some of the repetitive code and makes the code flow more nicely. Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c000e97..697ce17 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -950,6 +950,20 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) } } +static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, + int lr_nr, struct vgic_lr vlr) +{ + if (vgic_dist_irq_is_pending(vcpu, irq)) { + vlr.state |= LR_STATE_PENDING; + kvm_debug("Set pending: 0x%x\n", vlr.state); + } + + if (!vgic_irq_is_edge(vcpu, irq)) + vlr.state |= LR_EOI_INT; + + vgic_set_lr(vcpu, lr_nr, vlr); +} + /* * Queue an interrupt to a CPU virtual interface. Return true on success, * or false if it wasn't possible to queue it. @@ -977,8 +991,7 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) if (vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); - vlr.state |= LR_STATE_PENDING; - vgic_set_lr(vcpu, lr, vlr); + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } @@ -995,11 +1008,8 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vlr.irq = irq; vlr.source = sgi_source_id; - vlr.state = LR_STATE_PENDING; - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - vgic_set_lr(vcpu, lr, vlr); + vlr.state = 0; + vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } -- cgit v0.10.2 From 47a98b15ba7cf6a13bd94ab8455d3f586b16420b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 13 Mar 2015 17:02:54 +0000 Subject: arm/arm64: KVM: support for un-queuing active IRQs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migrating active interrupts causes the active state to be lost completely. This implements some additional bitmaps to track the active state on the distributor and export this to user space. Signed-off-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index b81630b..9092fad 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -195,6 +195,9 @@ struct vgic_dist { /* Level-triggered interrupt queued on VCPU interface */ struct vgic_bitmap irq_queued; + /* Interrupt was active when unqueue from VCPU interface */ + struct vgic_bitmap irq_active; + /* Interrupt priority. Not used yet. */ struct vgic_bytemap irq_priority; @@ -235,6 +238,9 @@ struct vgic_dist { /* Bitmap indicating which CPU has something pending */ unsigned long *irq_pending_on_cpu; + /* Bitmap indicating which CPU has active IRQs */ + unsigned long *irq_active_on_cpu; + struct vgic_vm_ops vm_ops; }; @@ -266,9 +272,15 @@ struct vgic_cpu { /* per IRQ to LR mapping */ u8 *vgic_irq_lr_map; - /* Pending interrupts on this VCPU */ + /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP( pending_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( active_percpu, VGIC_NR_PRIVATE_IRQS); + DECLARE_BITMAP( pend_act_percpu, VGIC_NR_PRIVATE_IRQS); + + /* Pending/active/both shared interrupts, dynamically sized */ unsigned long *pending_shared; + unsigned long *active_shared; + unsigned long *pend_act_shared; /* Bitmap of used/free list registers */ DECLARE_BITMAP( lr_used, VGIC_V2_MAX_LRS); @@ -306,6 +318,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, bool level); void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio); diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 19c6210..c818662 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -107,6 +107,22 @@ static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, vcpu->vcpu_id); } +static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + +static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, + vcpu->vcpu_id); +} + static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -344,13 +360,13 @@ static const struct kvm_mmio_range vgic_dist_ranges[] = { .base = GIC_DIST_ACTIVE_SET, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_set_active_reg, }, { .base = GIC_DIST_ACTIVE_CLEAR, .len = VGIC_MAX_IRQS / 8, .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, + .handle_mmio = handle_mmio_clear_active_reg, }, { .base = GIC_DIST_PRI, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 697ce17..ffd937c 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -264,6 +264,13 @@ static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); } +static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); +} + static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -278,6 +285,20 @@ static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); } +static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); +} + +static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); +} + static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -521,6 +542,44 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, return false; } +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id) +{ + u32 *reg; + struct vgic_dist *dist = &kvm->arch.vgic; + + reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); + vgic_reg_access(mmio, reg, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); + + if (mmio->is_write) { + vgic_update_state(kvm); + return true; + } + + return false; +} + static u32 vgic_cfg_expand(u16 val) { u32 res = 0; @@ -589,16 +648,12 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, } /** - * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs * - * Move any pending IRQs that have already been assigned to LRs back to the + * Move any IRQs that have already been assigned to LRs back to the * emulated distributor state so that the complete emulated state can be read * from the main emulation structures without investigating the LRs. - * - * Note that IRQs in the active state in the LRs get their pending state moved - * to the distributor but the active state stays in the LRs, because we don't - * track the active state on the distributor side. */ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { @@ -614,12 +669,22 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * 01: pending * 10: active * 11: pending and active - * - * If the LR holds only an active interrupt (not pending) then - * just leave it alone. */ - if ((lr.state & LR_STATE_MASK) == LR_STATE_ACTIVE) - continue; + BUG_ON(!(lr.state & LR_STATE_MASK)); + + /* Reestablish SGI source for pending and active IRQs */ + if (lr.irq < VGIC_NR_SGIS) + add_sgi_source(vcpu, lr.irq, lr.source); + + /* + * If the LR holds an active (10) or a pending and active (11) + * interrupt then move the active state to the + * distributor tracking bit. + */ + if (lr.state & LR_STATE_ACTIVE) { + vgic_irq_set_active(vcpu, lr.irq); + lr.state &= ~LR_STATE_ACTIVE; + } /* * Reestablish the pending state on the distributor and the @@ -627,21 +692,19 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * is fine, then we are only setting a few bits that were * already set. */ - vgic_dist_irq_set_pending(vcpu, lr.irq); - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - lr.state &= ~LR_STATE_PENDING; + if (lr.state & LR_STATE_PENDING) { + vgic_dist_irq_set_pending(vcpu, lr.irq); + lr.state &= ~LR_STATE_PENDING; + } + vgic_set_lr(vcpu, i, lr); /* - * If there's no state left on the LR (it could still be - * active), then the LR does not hold any useful info and can - * be marked as free for other use. + * Mark the LR as free for other use. */ - if (!(lr.state & LR_STATE_MASK)) { - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); - } + BUG_ON(lr.state & LR_STATE_MASK); + vgic_retire_lr(i, lr.irq, vcpu); + vgic_irq_clear_queued(vcpu, lr.irq); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -805,6 +868,36 @@ static int vgic_nr_shared_irqs(struct vgic_dist *dist) return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; } +static int compute_active_for_cpu(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *active, *enabled, *act_percpu, *act_shared; + unsigned long active_private, active_shared; + int nr_shared = vgic_nr_shared_irqs(dist); + int vcpu_id; + + vcpu_id = vcpu->vcpu_id; + act_percpu = vcpu->arch.vgic_cpu.active_percpu; + act_shared = vcpu->arch.vgic_cpu.active_shared; + + active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); + enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); + bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); + + active = vgic_bitmap_get_shared_map(&dist->irq_active); + enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); + bitmap_and(act_shared, active, enabled, nr_shared); + bitmap_and(act_shared, act_shared, + vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), + nr_shared); + + active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); + active_shared = find_first_bit(act_shared, nr_shared); + + return (active_private < VGIC_NR_PRIVATE_IRQS || + active_shared < nr_shared); +} + static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; @@ -836,7 +929,7 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) /* * Update the interrupt state and determine which CPUs have pending - * interrupts. Must be called with distributor lock held. + * or active interrupts. Must be called with distributor lock held. */ void vgic_update_state(struct kvm *kvm) { @@ -850,10 +943,13 @@ void vgic_update_state(struct kvm *kvm) } kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) { - pr_debug("CPU%d has pending interrupts\n", c); + if (compute_pending_for_cpu(vcpu)) set_bit(c, dist->irq_pending_on_cpu); - } + + if (compute_active_for_cpu(vcpu)) + set_bit(c, dist->irq_active_on_cpu); + else + clear_bit(c, dist->irq_active_on_cpu); } } @@ -953,7 +1049,12 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, int lr_nr, struct vgic_lr vlr) { - if (vgic_dist_irq_is_pending(vcpu, irq)) { + if (vgic_irq_is_active(vcpu, irq)) { + vlr.state |= LR_STATE_ACTIVE; + kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); + vgic_irq_clear_active(vcpu, irq); + vgic_update_state(vcpu->kvm); + } else if (vgic_dist_irq_is_pending(vcpu, irq)) { vlr.state |= LR_STATE_PENDING; kvm_debug("Set pending: 0x%x\n", vlr.state); } @@ -1041,39 +1142,49 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + unsigned long *pa_percpu, *pa_shared; int i, vcpu_id; int overflow = 0; + int nr_shared = vgic_nr_shared_irqs(dist); vcpu_id = vcpu->vcpu_id; + pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; + pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; + + bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, + VGIC_NR_PRIVATE_IRQS); + bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, + nr_shared); /* * We may not have any pending interrupt, or the interrupts * may have been serviced from another vcpu. In all cases, * move along. */ - if (!kvm_vgic_vcpu_pending_irq(vcpu)) { - pr_debug("CPU%d has no pending interrupt\n", vcpu_id); + if (!kvm_vgic_vcpu_pending_irq(vcpu) && !kvm_vgic_vcpu_active_irq(vcpu)) goto epilog; - } /* SGIs */ - for_each_set_bit(i, vgic_cpu->pending_percpu, VGIC_NR_SGIS) { + for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { if (!queue_sgi(vcpu, i)) overflow = 1; } /* PPIs */ - for_each_set_bit_from(i, vgic_cpu->pending_percpu, VGIC_NR_PRIVATE_IRQS) { + for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { if (!vgic_queue_hwirq(vcpu, i)) overflow = 1; } /* SPIs */ - for_each_set_bit(i, vgic_cpu->pending_shared, vgic_nr_shared_irqs(dist)) { + for_each_set_bit(i, pa_shared, nr_shared) { if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) overflow = 1; } + + + epilog: if (overflow) { vgic_enable_underflow(vcpu); @@ -1229,6 +1340,17 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); } +int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); +} + + void vgic_kick_vcpus(struct kvm *kvm) { struct kvm_vcpu *vcpu; @@ -1401,8 +1523,12 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; kfree(vgic_cpu->pending_shared); + kfree(vgic_cpu->active_shared); + kfree(vgic_cpu->pend_act_shared); kfree(vgic_cpu->vgic_irq_lr_map); vgic_cpu->pending_shared = NULL; + vgic_cpu->active_shared = NULL; + vgic_cpu->pend_act_shared = NULL; vgic_cpu->vgic_irq_lr_map = NULL; } @@ -1412,9 +1538,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8; vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); + vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); - if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) { + if (!vgic_cpu->pending_shared + || !vgic_cpu->active_shared + || !vgic_cpu->pend_act_shared + || !vgic_cpu->vgic_irq_lr_map) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } @@ -1467,10 +1598,12 @@ void kvm_vgic_destroy(struct kvm *kvm) kfree(dist->irq_spi_mpidr); kfree(dist->irq_spi_target); kfree(dist->irq_pending_on_cpu); + kfree(dist->irq_active_on_cpu); dist->irq_sgi_sources = NULL; dist->irq_spi_cpu = NULL; dist->irq_spi_target = NULL; dist->irq_pending_on_cpu = NULL; + dist->irq_active_on_cpu = NULL; dist->nr_cpus = 0; } @@ -1506,6 +1639,7 @@ int vgic_init(struct kvm *kvm) ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); + ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); @@ -1518,10 +1652,13 @@ int vgic_init(struct kvm *kvm) GFP_KERNEL); dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), GFP_KERNEL); + dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), + GFP_KERNEL); if (!dist->irq_sgi_sources || !dist->irq_spi_cpu || !dist->irq_spi_target || - !dist->irq_pending_on_cpu) { + !dist->irq_pending_on_cpu || + !dist->irq_active_on_cpu) { ret = -ENOMEM; goto out; } diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 1e83bdf..1e5a381 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -107,6 +107,14 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id); +bool vgic_handle_set_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + +bool vgic_handle_clear_active_reg(struct kvm *kvm, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, int vcpu_id); + bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, phys_addr_t offset); -- cgit v0.10.2 From 1a74847885cc87857d631f91cca4d83924f75674 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 13 Mar 2015 17:02:55 +0000 Subject: arm/arm64: KVM: Fix migration race in the arch timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a VCPU is no longer running, we currently check to see if it has a timer scheduled in the future, and if it does, we schedule a host hrtimer to notify is in case the timer expires while the VCPU is still not running. When the hrtimer fires, we mask the guest's timer and inject the timer IRQ (still relying on the guest unmasking the time when it receives the IRQ). This is all good and fine, but when migration a VM (checkpoint/restore) this introduces a race. It is unlikely, but possible, for the following sequence of events to happen: 1. Userspace stops the VM 2. Hrtimer for VCPU is scheduled 3. Userspace checkpoints the VGIC state (no pending timer interrupts) 4. The hrtimer fires, schedules work in a workqueue 5. Workqueue function runs, masks the timer and injects timer interrupt 6. Userspace checkpoints the timer state (timer masked) At restore time, you end up with a masked timer without any timer interrupts and your guest halts never receiving timer interrupts. Fix this by only kicking the VCPU in the workqueue function, and sample the expired state of the timer when entering the guest again and inject the interrupt and mask the timer only then. Signed-off-by: Christoffer Dall Signed-off-by: Alex Bennée Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9a5f057..e98370c 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -266,7 +266,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return 0; + return kvm_timer_should_fire(vcpu); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index a74e4c2..e596675 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -67,4 +67,6 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); + #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 6e54f35..98c95f2 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -85,13 +85,22 @@ static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) return IRQ_HANDLED; } +/* + * Work function for handling the backup timer that we schedule when a vcpu is + * no longer running, but had a timer programmed to fire in the future. + */ static void kvm_timer_inject_irq_work(struct work_struct *work) { struct kvm_vcpu *vcpu; vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired); vcpu->arch.timer_cpu.armed = false; - kvm_timer_inject_irq(vcpu); + + /* + * If the vcpu is blocked we want to wake it up so that it will see + * the timer has expired when entering the guest. + */ + kvm_vcpu_kick(vcpu); } static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) @@ -102,6 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + cycle_t cval, now; + + if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || + !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) + return false; + + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + return cval <= now; +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer @@ -119,6 +143,13 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * populate the CPU timer again. */ timer_disarm(timer); + + /* + * If the timer expired while we were not scheduled, now is the time + * to inject it. + */ + if (kvm_timer_should_fire(vcpu)) + kvm_timer_inject_irq(vcpu); } /** @@ -134,16 +165,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) cycle_t cval, now; u64 ns; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE)) - return; - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - BUG_ON(timer_is_armed(timer)); - if (cval <= now) { + if (kvm_timer_should_fire(vcpu)) { /* * Timer has already expired while we were not * looking. Inject the interrupt and carry on. @@ -152,6 +176,9 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) return; } + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, &timecounter->frac); timer_arm(timer, ns); -- cgit v0.10.2 From 1e8d242478a471b92b07e7966faa9f618df98e61 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 9 Mar 2015 21:27:12 +0100 Subject: KVM: s390: Spelling s/intance/instance/ Signed-off-by: Geert Uytterhoeven Message-Id: <1425932832-6244-1-git-send-email-geert+renesas@glider.be> Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index fda3f31..83f32a1 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -125,7 +125,7 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) vcpu->arch.sie_block->gpsw.mask |= cc << 44; } -/* test availability of facility in a kvm intance */ +/* test availability of facility in a kvm instance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { return __test_facility(nr, kvm->arch.model.fac->mask) && -- cgit v0.10.2 From 40f5b735e867b8fd3e6090f5a184950c68d227bb Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 12 Mar 2015 13:55:53 +0100 Subject: KVM: s390: cleanup jump lables in kvm_arch_init_vm As all cleanup functions can handle their respective NULL case there is no need to have more than one error jump label. Signed-off-by: Dominik Dingel Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 02e03c8..4075acb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -897,7 +897,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); if (!kvm->arch.dbf) - goto out_nodbf; + goto out_err; /* * The architectural maximum amount of facilities is 16 kbit. To store @@ -909,7 +909,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.fac = (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!kvm->arch.model.fac) - goto out_nofac; + goto out_err; /* Populate the facility mask initially. */ memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, @@ -929,7 +929,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; if (kvm_s390_crypto_init(kvm) < 0) - goto out_crypto; + goto out_err; spin_lock_init(&kvm->arch.float_int.lock); INIT_LIST_HEAD(&kvm->arch.float_int.list); @@ -944,7 +944,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) } else { kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); if (!kvm->arch.gmap) - goto out_nogmap; + goto out_err; kvm->arch.gmap->private = kvm; kvm->arch.gmap->pfault_enabled = 0; } @@ -957,15 +957,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) spin_lock_init(&kvm->arch.start_stop_lock); return 0; -out_nogmap: +out_err: kfree(kvm->arch.crypto.crycb); -out_crypto: free_page((unsigned long)kvm->arch.model.fac); -out_nofac: debug_unregister(kvm->arch.dbf); -out_nodbf: free_page((unsigned long)(kvm->arch.sca)); -out_err: return rc; } -- cgit v0.10.2 From dd9e5b7bdba3250c075a212ff632d31edfa91ae7 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Tue, 3 Mar 2015 14:26:14 +0300 Subject: KVM: s390: Fix low-address protection for real addresses The kvm_s390_check_low_addr_protection() function is used only with real addresses. According to the POP (the "Low-Address Protection" paragraph in chapter 3), if the effective address is real or absolute, the low-address protection procedure should raise a PROTECTION exception only when the low-address protection is enabled in the control register 0 and the address is low. This patch removes ASCE checks from the function and renames it to better reflect its behavior. Cc: Thomas Huth Signed-off-by: Alexander Yarygin Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 633fe9b..c230904 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -697,28 +697,29 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, } /** - * kvm_s390_check_low_addr_protection - check for low-address protection - * @ga: Guest address + * kvm_s390_check_low_addr_prot_real - check for low-address protection + * @gra: Guest real address * * Checks whether an address is subject to low-address protection and set * up vcpu->arch.pgm accordingly if necessary. * * Return: 0 if no protection exception, or PGM_PROTECTION if protected. */ -int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga) +int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; struct trans_exc_code_bits *tec_bits; + union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; - if (!is_low_address(ga) || !low_address_protection_enabled(vcpu)) + if (!ctlreg0.lap || !is_low_address(gra)) return 0; memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; tec_bits->fsi = FSI_STORE; tec_bits->as = psw_bits(*psw).as; - tec_bits->addr = ga >> PAGE_SHIFT; + tec_bits->addr = gra >> PAGE_SHIFT; pgm->code = PGM_PROTECTION; return pgm->code; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 0149cf1..20de77e 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -330,6 +330,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, void ipte_lock(struct kvm_vcpu *vcpu); void ipte_unlock(struct kvm_vcpu *vcpu); int ipte_lock_held(struct kvm_vcpu *vcpu); -int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga); +int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra); #endif /* __KVM_S390_GACCESS_H */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b982fbc..5f26425 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -207,7 +207,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu) kvm_s390_get_regs_rre(vcpu, NULL, ®2); addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK; addr = kvm_s390_logical_to_effective(vcpu, addr); - if (kvm_s390_check_low_addr_protection(vcpu, addr)) + if (kvm_s390_check_low_addr_prot_real(vcpu, addr)) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); addr = kvm_s390_real_to_abs(vcpu, addr); @@ -680,7 +680,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) { - if (kvm_s390_check_low_addr_protection(vcpu, start)) + if (kvm_s390_check_low_addr_prot_real(vcpu, start)) return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); } -- cgit v0.10.2 From 8ae04b8f500b9f46652c63431bf658223d875597 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Mon, 19 Jan 2015 13:24:51 +0300 Subject: KVM: s390: Guest's memory access functions get access registers In access register mode, the write_guest() read_guest() and other functions will invoke the access register translation, which requires an ar, designated by one of the instruction fields. Signed-off-by: Alexander Yarygin Reviewed-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9254aff..89140dd 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu) if (vcpu->run->s.regs.gprs[rx] & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm)); + rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258) @@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; + int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index c230904..494131e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -578,7 +578,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, return 0; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len, int write) { psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -652,7 +652,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, int write) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 20de77e..7c2866b 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -156,9 +156,9 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, } int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - unsigned long *gpa, int write); + ar_t ar, unsigned long *gpa, int write); -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len, int write); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, @@ -168,6 +168,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * write_guest - copy data from kernel space to guest space * @vcpu: virtual cpu * @ga: guest address + * @ar: access register * @data: source address in kernel space * @len: number of bytes to copy * @@ -210,16 +211,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * if data has been changed in guest space in case of an exception. */ static inline __must_check -int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, data, len, 1); + return access_guest(vcpu, ga, ar, data, len, 1); } /** * read_guest - copy data from guest space to kernel space * @vcpu: virtual cpu * @ga: guest address + * @ar: access register * @data: destination address in kernel space * @len: number of bytes to copy * @@ -229,10 +231,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, * data will be copied from guest space to kernel space. */ static inline __must_check -int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data, +int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len) { - return access_guest(vcpu, ga, data, len, 0); + return access_guest(vcpu, ga, ar, data, len, 0); } /** diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 08ae10a..9e3779e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -320,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the source is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2], - &srcaddr, 0); + reg2, &srcaddr, 0); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0); @@ -329,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu) /* Make sure that the destination is paged-in */ rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1], - &dstaddr, 1); + reg1, &dstaddr, 1); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4075acb..610e90a 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1776,7 +1776,7 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest(vcpu, psw->addr, &opcode, 1); + rc = read_guest(vcpu, psw->addr, 0, &opcode, 1); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); psw->addr = __rewind_psw(*psw, -insn_length(opcode)); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 83f32a1..5d54191 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } -static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu) +typedef u8 __bitwise ar_t; + +static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, - u64 *address1, u64 *address2) + u64 *address1, u64 *address2, + ar_t *ar_b1, ar_t *ar_b2) { u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; @@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, *address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1; *address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; + + if (ar_b1) + *ar_b1 = base1; + if (ar_b2) + *ar_b2 = base2; } static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2) @@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2 *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; } -static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) +static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + @@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu) if (disp2 & 0x80000) disp2+=0xfff00000; + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; } -static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu) +static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + if (ar) + *ar = base2; + return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 5f26425..f4fe02e 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu) struct kvm_vcpu *cpup; s64 hostclk, val; int i, rc; + ar_t ar; u64 op2; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - op2 = kvm_s390_get_base_disp_s(vcpu); + op2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (op2 & 7) /* Operand must be on a doubleword boundary */ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, op2, &val, sizeof(val)); + rc = read_guest(vcpu, op2, ar, &val, sizeof(val)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; + ar_t ar; vcpu->stat.instruction_spx++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); /* must be word boundary */ if (operand2 & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); /* get the value */ - rc = read_guest(vcpu, operand2, &address, sizeof(address)); + rc = read_guest(vcpu, operand2, ar, &address, sizeof(address)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; + ar_t ar; vcpu->stat.instruction_stpx++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); /* must be word boundary */ if (operand2 & 3) @@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) address = kvm_s390_get_prefix(vcpu); /* get the value */ - rc = write_guest(vcpu, operand2, &address, sizeof(address)); + rc = write_guest(vcpu, operand2, ar, &address, sizeof(address)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) u16 vcpu_id = vcpu->vcpu_id; u64 ga; int rc; + ar_t ar; vcpu->stat.instruction_stap++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_s(vcpu); + ga = kvm_s390_get_base_disp_s(vcpu, &ar); if (ga & 1) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id)); + rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -231,8 +235,9 @@ static int handle_tpi(struct kvm_vcpu *vcpu) u32 tpi_data[3]; int rc; u64 addr; + ar_t ar; - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -251,7 +256,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) * provided area. */ len = sizeof(tpi_data) - 4; - rc = write_guest(vcpu, addr, &tpi_data, len); + rc = write_guest(vcpu, addr, ar, &tpi_data, len); if (rc) { rc = kvm_s390_inject_prog_cond(vcpu, rc); goto reinject_interrupt; @@ -395,15 +400,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) psw_compat_t new_psw; u64 addr; int rc; + ar_t ar; if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); if (!(new_psw.mask & PSW32_MASK_BASE)) @@ -421,14 +427,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; int rc; + ar_t ar; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - addr = kvm_s390_get_base_disp_s(vcpu); + addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw)); + rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); vcpu->arch.sie_block->gpsw = new_psw; @@ -442,18 +449,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu) u64 stidp_data = vcpu->arch.stidp_data; u64 operand2; int rc; + ar_t ar; vcpu->stat.instruction_stidp++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (operand2 & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); - rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data)); + rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); @@ -496,6 +504,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) unsigned long mem = 0; u64 operand2; int rc = 0; + ar_t ar; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); @@ -518,7 +527,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return 0; } - operand2 = kvm_s390_get_base_disp_s(vcpu); + operand2 = kvm_s390_get_base_disp_s(vcpu, &ar); if (operand2 & 0xfff) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -542,7 +551,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) break; } - rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE); + rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); if (rc) { rc = kvm_s390_inject_prog_cond(vcpu, rc); goto out; @@ -786,13 +795,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_lctl++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rs(vcpu); + ga = kvm_s390_get_base_disp_rs(vcpu, &ar); if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -801,7 +811,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; - rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; @@ -824,13 +834,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_stctl++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rs(vcpu); + ga = kvm_s390_get_base_disp_rs(vcpu, &ar); if (ga & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -846,7 +857,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32)); + rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32)); return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } @@ -857,13 +868,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_lctlg++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rsy(vcpu); + ga = kvm_s390_get_base_disp_rsy(vcpu, &ar); if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -872,7 +884,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga); nr_regs = ((reg3 - reg1) & 0xf) + 1; - rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64)); if (rc) return kvm_s390_inject_prog_cond(vcpu, rc); reg = reg1; @@ -894,13 +906,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; + ar_t ar; vcpu->stat.instruction_stctg++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - ga = kvm_s390_get_base_disp_rsy(vcpu); + ga = kvm_s390_get_base_disp_rsy(vcpu, &ar); if (ga & 7) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); @@ -916,7 +929,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) break; reg = (reg + 1) % 16; } while (1); - rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64)); + rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64)); return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0; } @@ -941,13 +954,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu) unsigned long hva, gpa; int ret = 0, cc = 0; bool writable; + ar_t ar; vcpu->stat.instruction_tprot++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - kvm_s390_get_base_disp_sse(vcpu, &address1, &address2); + kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL); /* we only handle the Linux memory detection case: * access key == 0 @@ -956,11 +970,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT) ipte_lock(vcpu); - ret = guest_translate_address(vcpu, address1, &gpa, 1); + ret = guest_translate_address(vcpu, address1, ar, &gpa, 1); if (ret == PGM_PROTECTION) { /* Write protected? Try again with read-only... */ cc = 1; - ret = guest_translate_address(vcpu, address1, &gpa, 0); + ret = guest_translate_address(vcpu, address1, ar, &gpa, 0); } if (ret) { if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) { diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 755a733..72e58bd 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -434,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); - order_code = kvm_s390_get_base_disp_rs(vcpu); + order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); if (handle_sigp_order_in_user_space(vcpu, order_code)) return -EOPNOTSUPP; @@ -476,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) int r3 = vcpu->arch.sie_block->ipa & 0x000f; u16 cpu_addr = vcpu->run->s.regs.gprs[r3]; struct kvm_vcpu *dest_vcpu; - u8 order_code = kvm_s390_get_base_disp_rs(vcpu); + u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); -- cgit v0.10.2 From 75a1812230ad7ad16e5a06b5ef2220f765b12da5 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Thu, 22 Jan 2015 12:44:11 +0300 Subject: KVM: s390: Optimize paths where get_vcpu_asce() is invoked During dynamic address translation the get_vcpu_asce() function can be invoked several times. It's ok for usual modes, but will be slow if CPUs are in AR mode. Let's call the get_vcpu_asce() once and pass the result to the called functions. Signed-off-by: Alexander Yarygin Reviewed-by: Thomas Huth Reviewed-by: David Hildenbrand Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 494131e..c74462a 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -330,6 +330,7 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * @vcpu: virtual cpu * @gva: guest virtual address * @gpa: points to where guest physical (absolute) address should be stored + * @asce: effective asce * @write: indicates if access is a write access * * Translate a guest virtual address into a guest absolute address by means @@ -345,7 +346,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val) * by the architecture */ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, - unsigned long *gpa, int write) + unsigned long *gpa, const union asce asce, + int write) { union vaddress vaddr = {.addr = gva}; union raddress raddr = {.addr = gva}; @@ -354,12 +356,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva, union ctlreg0 ctlreg0; unsigned long ptr; int edat1, edat2; - union asce asce; ctlreg0.val = vcpu->arch.sie_block->gcr[0]; edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8); edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78); - asce.val = get_vcpu_asce(vcpu); if (asce.r) goto real_address; ptr = asce.origin * 4096; @@ -506,15 +506,14 @@ static inline int is_low_address(unsigned long ga) return (ga & ~0x11fful) == 0; } -static int low_address_protection_enabled(struct kvm_vcpu *vcpu) +static int low_address_protection_enabled(struct kvm_vcpu *vcpu, + const union asce asce) { union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]}; psw_t *psw = &vcpu->arch.sie_block->gpsw; - union asce asce; if (!ctlreg0.lap) return 0; - asce.val = get_vcpu_asce(vcpu); if (psw_bits(*psw).t && asce.p) return 0; return 1; @@ -536,7 +535,7 @@ enum { static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, - int write) + const union asce asce, int write) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -547,7 +546,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; tec_bits->as = psw_bits(*psw).as; - lap_enabled = low_address_protection_enabled(vcpu); + lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); tec_bits->addr = ga >> PAGE_SHIFT; @@ -557,7 +556,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, } ga &= PAGE_MASK; if (psw_bits(*psw).t) { - rc = guest_translate(vcpu, ga, pages, write); + rc = guest_translate(vcpu, ga, pages, asce, write); if (rc < 0) return rc; if (rc == PGM_PROTECTION) @@ -604,7 +603,7 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); - rc = guest_page_range(vcpu, ga, pages, nr_pages, write); + rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write); for (idx = 0; idx < nr_pages && !rc; idx++) { gpa = *(pages + idx) + (ga & ~PAGE_MASK); _len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len); @@ -671,16 +670,16 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, tec->as = psw_bits(*psw).as; tec->fsi = write ? FSI_STORE : FSI_FETCH; tec->addr = gva >> PAGE_SHIFT; - if (is_low_address(gva) && low_address_protection_enabled(vcpu)) { + asce.val = get_vcpu_asce(vcpu); + if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { if (write) { rc = pgm->code = PGM_PROTECTION; return rc; } } - asce.val = get_vcpu_asce(vcpu); if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */ - rc = guest_translate(vcpu, gva, gpa, write); + rc = guest_translate(vcpu, gva, gpa, asce, write); if (rc > 0) { if (rc == PGM_PROTECTION) tec->b61 = 1; -- cgit v0.10.2 From 664b4973537068402954bee6e2959b858f263a6f Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Mon, 9 Mar 2015 14:17:25 +0300 Subject: KVM: s390: Add access register mode Access register mode is one of the modes that control dynamic address translation. In this mode the address space is specified by values of the access registers. The effective address-space-control element is obtained from the result of the access register translation. See the "Access-Register Introduction" section of the chapter 5 "Program Execution" in "Principles of Operations" for more details. Signed-off-by: Alexander Yarygin Reviewed-by: Thomas Huth Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index c74462a..ea38d71 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -10,6 +10,7 @@ #include #include "kvm-s390.h" #include "gaccess.h" +#include union asce { unsigned long val; @@ -207,6 +208,54 @@ union raddress { unsigned long pfra : 52; /* Page-Frame Real Address */ }; +union alet { + u32 val; + struct { + u32 reserved : 7; + u32 p : 1; + u32 alesn : 8; + u32 alen : 16; + }; +}; + +union ald { + u32 val; + struct { + u32 : 1; + u32 alo : 24; + u32 all : 7; + }; +}; + +struct ale { + unsigned long i : 1; /* ALEN-Invalid Bit */ + unsigned long : 5; + unsigned long fo : 1; /* Fetch-Only Bit */ + unsigned long p : 1; /* Private Bit */ + unsigned long alesn : 8; /* Access-List-Entry Sequence Number */ + unsigned long aleax : 16; /* Access-List-Entry Authorization Index */ + unsigned long : 32; + unsigned long : 1; + unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */ + unsigned long : 6; + unsigned long astesn : 32; /* ASTE Sequence Number */ +} __packed; + +struct aste { + unsigned long i : 1; /* ASX-Invalid Bit */ + unsigned long ato : 29; /* Authority-Table Origin */ + unsigned long : 1; + unsigned long b : 1; /* Base-Space Bit */ + unsigned long ax : 16; /* Authorization Index */ + unsigned long atl : 12; /* Authority-Table Length */ + unsigned long : 2; + unsigned long ca : 1; /* Controlled-ASN Bit */ + unsigned long ra : 1; /* Reusable-ASN Bit */ + unsigned long asce : 64; /* Address-Space-Control Element */ + unsigned long ald : 32; + unsigned long astesn : 32; + /* .. more fields there */ +} __packed; int ipte_lock_held(struct kvm_vcpu *vcpu) { @@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu) ipte_unlock_simple(vcpu); } -static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu) +static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, + int write) +{ + union alet alet; + struct ale ale; + struct aste aste; + unsigned long ald_addr, authority_table_addr; + union ald ald; + int eax, rc; + u8 authority_table; + + if (ar >= NUM_ACRS) + return -EINVAL; + + save_access_regs(vcpu->run->s.regs.acrs); + alet.val = vcpu->run->s.regs.acrs[ar]; + + if (ar == 0 || alet.val == 0) { + asce->val = vcpu->arch.sie_block->gcr[1]; + return 0; + } else if (alet.val == 1) { + asce->val = vcpu->arch.sie_block->gcr[7]; + return 0; + } + + if (alet.reserved) + return PGM_ALET_SPECIFICATION; + + if (alet.p) + ald_addr = vcpu->arch.sie_block->gcr[5]; + else + ald_addr = vcpu->arch.sie_block->gcr[2]; + ald_addr &= 0x7fffffc0; + + rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald)); + if (rc) + return rc; + + if (alet.alen / 8 > ald.all) + return PGM_ALEN_TRANSLATION; + + if (0x7fffffff - ald.alo * 128 < alet.alen * 16) + return PGM_ADDRESSING; + + rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale, + sizeof(struct ale)); + if (rc) + return rc; + + if (ale.i == 1) + return PGM_ALEN_TRANSLATION; + if (ale.alesn != alet.alesn) + return PGM_ALE_SEQUENCE; + + rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste)); + if (rc) + return rc; + + if (aste.i) + return PGM_ASTE_VALIDITY; + if (aste.astesn != ale.astesn) + return PGM_ASTE_SEQUENCE; + + if (ale.p == 1) { + eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff; + if (ale.aleax != eax) { + if (eax / 16 > aste.atl) + return PGM_EXTENDED_AUTHORITY; + + authority_table_addr = aste.ato * 4 + eax / 4; + + rc = read_guest_real(vcpu, authority_table_addr, + &authority_table, + sizeof(u8)); + if (rc) + return rc; + + if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0) + return PGM_EXTENDED_AUTHORITY; + } + } + + if (ale.fo == 1 && write) + return PGM_PROTECTION; + + asce->val = aste.asce; + return 0; +} + +struct trans_exc_code_bits { + unsigned long addr : 52; /* Translation-exception Address */ + unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ + unsigned long : 6; + unsigned long b60 : 1; + unsigned long b61 : 1; + unsigned long as : 2; /* ASCE Identifier */ +}; + +enum { + FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ + FSI_STORE = 1, /* Exception was due to store operation */ + FSI_FETCH = 2 /* Exception was due to fetch operation */ +}; + +static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, + ar_t ar, int write) { + int rc; + psw_t *psw = &vcpu->arch.sie_block->gpsw; + struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; + struct trans_exc_code_bits *tec_bits; + + memset(pgm, 0, sizeof(*pgm)); + tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; + tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; + tec_bits->as = psw_bits(*psw).as; + + if (!psw_bits(*psw).t) { + asce->val = 0; + asce->r = 1; + return 0; + } + switch (psw_bits(vcpu->arch.sie_block->gpsw).as) { case PSW_AS_PRIMARY: - return vcpu->arch.sie_block->gcr[1]; + asce->val = vcpu->arch.sie_block->gcr[1]; + return 0; case PSW_AS_SECONDARY: - return vcpu->arch.sie_block->gcr[7]; + asce->val = vcpu->arch.sie_block->gcr[7]; + return 0; case PSW_AS_HOME: - return vcpu->arch.sie_block->gcr[13]; + asce->val = vcpu->arch.sie_block->gcr[13]; + return 0; + case PSW_AS_ACCREG: + rc = ar_translation(vcpu, asce, ar, write); + switch (rc) { + case PGM_ALEN_TRANSLATION: + case PGM_ALE_SEQUENCE: + case PGM_ASTE_VALIDITY: + case PGM_ASTE_SEQUENCE: + case PGM_EXTENDED_AUTHORITY: + vcpu->arch.pgm.exc_access_id = ar; + break; + case PGM_PROTECTION: + tec_bits->b60 = 1; + tec_bits->b61 = 1; + break; + } + if (rc > 0) + pgm->code = rc; + return rc; } return 0; } @@ -519,20 +710,6 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -struct trans_exc_code_bits { - unsigned long addr : 52; /* Translation-exception Address */ - unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ - unsigned long : 7; - unsigned long b61 : 1; - unsigned long as : 2; /* ASCE Identifier */ -}; - -enum { - FSI_UNKNOWN = 0, /* Unknown wether fetch or store */ - FSI_STORE = 1, /* Exception was due to store operation */ - FSI_FETCH = 2 /* Exception was due to fetch operation */ -}; - static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, unsigned long *pages, unsigned long nr_pages, const union asce asce, int write) @@ -542,10 +719,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, struct trans_exc_code_bits *tec_bits; int lap_enabled, rc; - memset(pgm, 0, sizeof(*pgm)); tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec_bits->fsi = write ? FSI_STORE : FSI_FETCH; - tec_bits->as = psw_bits(*psw).as; lap_enabled = low_address_protection_enabled(vcpu, asce); while (nr_pages) { ga = kvm_s390_logical_to_effective(vcpu, ga); @@ -590,16 +764,15 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, if (!len) return 0; - /* Access register mode is not supported yet. */ - if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) - return -EOPNOTSUPP; + rc = get_vcpu_asce(vcpu, &asce, ar, write); + if (rc) + return rc; nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1; pages = pages_array; if (nr_pages > ARRAY_SIZE(pages_array)) pages = vmalloc(nr_pages * sizeof(unsigned long)); if (!pages) return -ENOMEM; - asce.val = get_vcpu_asce(vcpu); need_ipte_lock = psw_bits(*psw).t && !asce.r; if (need_ipte_lock) ipte_lock(vcpu); @@ -660,17 +833,12 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, union asce asce; int rc; - /* Access register mode is not supported yet. */ - if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG) - return -EOPNOTSUPP; - gva = kvm_s390_logical_to_effective(vcpu, gva); - memset(pgm, 0, sizeof(*pgm)); tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code; - tec->as = psw_bits(*psw).as; - tec->fsi = write ? FSI_STORE : FSI_FETCH; + rc = get_vcpu_asce(vcpu, &asce, ar, write); tec->addr = gva >> PAGE_SHIFT; - asce.val = get_vcpu_asce(vcpu); + if (rc) + return rc; if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) { if (write) { rc = pgm->code = PGM_PROTECTION; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 7c2866b..835e557 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -177,8 +177,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * If DAT is off data will be copied to guest real or absolute memory. * If DAT is on data will be copied to the address space as specified by * the address space bits of the PSW: - * Primary, secondory or home space (access register mode is currently not - * implemented). + * Primary, secondary, home space or access register mode. * The addressing mode of the PSW is also inspected, so that address wrap * around is taken into account for 24-, 31- and 64-bit addressing mode, * if the to be copied data crosses page boundaries in guest address space. -- cgit v0.10.2 From 41408c28f283b49202ae374b1c42bc8e9b33a048 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 6 Feb 2015 15:01:21 +0100 Subject: KVM: s390: Add MEMOP ioctls for reading/writing guest memory On s390, we've got to make sure to hold the IPTE lock while accessing logical memory. So let's add an ioctl for reading and writing logical memory to provide this feature for userspace, too. The maximum transfer size of this call is limited to 64kB to prevent that the guest can trigger huge copy_from/to_user transfers. QEMU currently only requests up to one or two pages so far, so 16*4kB seems to be a reasonable limit here. Signed-off-by: Thomas Huth Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ee47998e..281179d 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2716,6 +2716,52 @@ The fields in each entry are defined as follows: eax, ebx, ecx, edx: the values returned by the cpuid instruction for this function/index combination +4.89 KVM_S390_MEM_OP + +Capability: KVM_CAP_S390_MEM_OP +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_mem_op (in) +Returns: = 0 on success, + < 0 on generic error (e.g. -EFAULT or -ENOMEM), + > 0 if an exception occurred while walking the page tables + +Read or write data from/to the logical (virtual) memory of a VPCU. + +Parameters are specified via the following structure: + +struct kvm_s390_mem_op { + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar; /* the access register number */ + __u8 reserved[31]; /* should be set to 0 */ +}; + +The type of operation is specified in the "op" field. It is either +KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or +KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The +KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check +whether the corresponding memory access would create an access exception +(without touching the data in the memory at the destination). In case an +access exception occurred while walking the MMU tables of the guest, the +ioctl returns a positive error number to indicate the type of exception. +This exception is also raised directly at the corresponding VCPU if the +flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field. + +The start address of the memory region has to be specified in the "gaddr" +field, and the length of the region in the "size" field. "buf" is the buffer +supplied by the userspace application where the read data should be written +to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written +is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL +when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access +register number to be used. + +The "reserved" field is meant for future extensions. It is not used by +KVM with the currently defined set of flags. + 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index ea38d71..a7559f7 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -864,6 +864,28 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, } /** + * check_gva_range - test a range of guest virtual addresses for accessibility + */ +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, + unsigned long length, int is_write) +{ + unsigned long gpa; + unsigned long currlen; + int rc = 0; + + ipte_lock(vcpu); + while (length > 0 && !rc) { + currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE)); + rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write); + gva += currlen; + length -= currlen; + } + ipte_unlock(vcpu); + + return rc; +} + +/** * kvm_s390_check_low_addr_prot_real - check for low-address protection * @gra: Guest real address * diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 835e557..ef03726 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -157,6 +157,8 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data, int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, unsigned long *gpa, int write); +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, + unsigned long length, int is_write); int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, unsigned long len, int write); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 610e90a..b7ecef9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -38,6 +39,8 @@ #include "trace.h" #include "trace-s390.h" +#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ + #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU struct kvm_stats_debugfs_item debugfs_entries[] = { @@ -177,6 +180,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_SIGP: r = 1; break; + case KVM_CAP_S390_MEM_OP: + r = MEM_OP_MAX_SIZE; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; @@ -2185,6 +2191,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return r; } +static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu, + struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + void *tmpbuf = NULL; + int r, srcu_idx; + const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION + | KVM_S390_MEMOP_F_CHECK_ONLY; + + if (mop->flags & ~supported_flags) + return -EINVAL; + + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + + if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { + tmpbuf = vmalloc(mop->size); + if (!tmpbuf) + return -ENOMEM; + } + + srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); + + switch (mop->op) { + case KVM_S390_MEMOP_LOGICAL_READ: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false); + break; + } + r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + if (r == 0) { + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } + break; + case KVM_S390_MEMOP_LOGICAL_WRITE: + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true); + break; + } + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + break; + } + r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size); + break; + default: + r = -EINVAL; + } + + srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); + + if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) + kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); + + vfree(tmpbuf); + return r; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -2284,6 +2349,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_S390_MEM_OP: { + struct kvm_s390_mem_op mem_op; + + if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0) + r = kvm_s390_guest_mem_op(vcpu, &mem_op); + else + r = -EFAULT; + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 82634a4..0e16f2c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -365,6 +365,24 @@ struct kvm_translation { __u8 pad[5]; }; +/* for KVM_S390_MEM_OP */ +struct kvm_s390_mem_op { + /* in */ + __u64 gaddr; /* the guest address */ + __u64 flags; /* flags */ + __u32 size; /* amount of bytes */ + __u32 op; /* type of operation */ + __u64 buf; /* buffer in userspace */ + __u8 ar; /* the access register number */ + __u8 reserved[31]; /* should be set to 0 */ +}; +/* types for kvm_s390_mem_op->op */ +#define KVM_S390_MEMOP_LOGICAL_READ 0 +#define KVM_S390_MEMOP_LOGICAL_WRITE 1 +/* flags for kvm_s390_mem_op->flags */ +#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) +#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) + /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ @@ -761,6 +779,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_CHECK_EXTENSION_VM 105 #define KVM_CAP_S390_USER_SIGP 106 #define KVM_CAP_S390_VECTOR_REGISTERS 107 +#define KVM_CAP_S390_MEM_OP 108 #ifdef KVM_CAP_IRQ_ROUTING @@ -1136,6 +1155,8 @@ struct kvm_s390_ucas_mapping { #define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init) #define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init) #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) +/* Available with KVM_CAP_S390_MEM_OP */ +#define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v0.10.2 From e44fc8c9dab215ac0e398622a05574cffd5f5184 Mon Sep 17 00:00:00 2001 From: Ekaterina Tumanova Date: Fri, 30 Jan 2015 16:55:56 +0100 Subject: KVM: s390: introduce post handlers for STSI The Store System Information (STSI) instruction currently collects all information it relays to the caller in the kernel. Some information, however, is only available in user space. An example of this is the guest name: The kernel always sets "KVMGuest", but user space knows the actual guest name. This patch introduces a new exit, KVM_EXIT_S390_STSI, guarded by a capability that can be enabled by user space if it wants to be able to insert such data. User space will be provided with the target buffer and the requested STSI function code. Reviewed-by: Eric Farman Reviewed-by: Christian Borntraeger Signed-off-by: Ekaterina Tumanova Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 281179d..c1fcb7a 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3304,3 +3304,31 @@ Returns: 0 on success, negative value on error Allows use of the vector registers introduced with z13 processor, and provides for the synchronization between host and user space. Will return -EINVAL if the machine does not support vectors. + +7.4 KVM_CAP_S390_USER_STSI + +Architectures: s390 +Parameters: none + +This capability allows post-handlers for the STSI instruction. After +initial handling in the kernel, KVM exits to user space with +KVM_EXIT_S390_STSI to allow user space to insert further data. + +Before exiting to userspace, kvm handlers should fill in s390_stsi field of +vcpu->run: +struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; +} s390_stsi; + +@addr - guest address of STSI SYSIB +@fc - function code +@sel1 - selector 1 +@sel2 - selector 2 +@ar - access register number + +KVM handlers should exit to userspace with rc = -EREMOTE. diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 347a333..2356a8c 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -565,6 +565,7 @@ struct kvm_arch{ int use_vectors; int user_cpu_state_ctrl; int user_sigp; + int user_stsi; struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS]; wait_queue_head_t ipte_wq; int ipte_lock_count; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b7ecef9..fdfa106 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -178,6 +178,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: case KVM_CAP_S390_USER_SIGP: + case KVM_CAP_S390_USER_STSI: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -280,6 +281,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) kvm->arch.use_vectors = MACHINE_HAS_VX; r = MACHINE_HAS_VX ? 0 : -EINVAL; break; + case KVM_CAP_S390_USER_STSI: + kvm->arch.user_stsi = 1; + r = 0; + break; default: r = -EINVAL; break; diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index f4fe02e..5e4658d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -496,6 +496,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) ASCEBC(mem->vm[0].cpi, 16); } +static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar, + u8 fc, u8 sel1, u16 sel2) +{ + vcpu->run->exit_reason = KVM_EXIT_S390_STSI; + vcpu->run->s390_stsi.addr = addr; + vcpu->run->s390_stsi.ar = ar; + vcpu->run->s390_stsi.fc = fc; + vcpu->run->s390_stsi.sel1 = sel1; + vcpu->run->s390_stsi.sel2 = sel2; +} + static int handle_stsi(struct kvm_vcpu *vcpu) { int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28; @@ -556,11 +567,15 @@ static int handle_stsi(struct kvm_vcpu *vcpu) rc = kvm_s390_inject_prog_cond(vcpu, rc); goto out; } + if (vcpu->kvm->arch.user_stsi) { + insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2); + rc = -EREMOTE; + } trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2); free_page(mem); kvm_s390_set_psw_cc(vcpu, 0); vcpu->run->s.regs.gprs[0] = 0; - return 0; + return rc; out_no_data: kvm_s390_set_psw_cc(vcpu, 3); out: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0e16f2c..57445ef 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -172,6 +172,7 @@ struct kvm_pit_config { #define KVM_EXIT_S390_TSCH 22 #define KVM_EXIT_EPR 23 #define KVM_EXIT_SYSTEM_EVENT 24 +#define KVM_EXIT_S390_STSI 25 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -309,6 +310,15 @@ struct kvm_run { __u32 type; __u64 flags; } system_event; + /* KVM_EXIT_S390_STSI */ + struct { + __u64 addr; + __u8 ar; + __u8 reserved; + __u8 fc; + __u8 sel1; + __u16 sel2; + } s390_stsi; /* Fix the size of the union. */ char padding[256]; }; @@ -780,6 +790,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_SIGP 106 #define KVM_CAP_S390_VECTOR_REGISTERS 107 #define KVM_CAP_S390_MEM_OP 108 +#define KVM_CAP_S390_USER_STSI 109 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 30ee2a984f07b00895e0e01d78859b3aff9307c7 Mon Sep 17 00:00:00 2001 From: "Jason J. Herne" Date: Tue, 23 Sep 2014 09:23:01 -0400 Subject: KVM: s390: Create ioctl for Getting/Setting guest storage keys Provide the KVM_S390_GET_SKEYS and KVM_S390_SET_SKEYS ioctl which can be used to get/set guest storage keys. This functionality is needed for live migration of s390 guests that use storage keys. Signed-off-by: Jason J. Herne Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index c1fcb7a..0d7fc66 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2762,6 +2762,64 @@ register number to be used. The "reserved" field is meant for future extensions. It is not used by KVM with the currently defined set of flags. +4.90 KVM_S390_GET_SKEYS + +Capability: KVM_CAP_S390_SKEYS +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_skeys +Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage + keys, negative value on error + +This ioctl is used to get guest storage key values on the s390 +architecture. The ioctl takes parameters via the kvm_s390_skeys struct. + +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; + +The start_gfn field is the number of the first guest frame whose storage keys +you want to get. + +The count field is the number of consecutive frames (starting from start_gfn) +whose storage keys to get. The count field must be at least 1 and the maximum +allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range +will cause the ioctl to return -EINVAL. + +The skeydata_addr field is the address to a buffer large enough to hold count +bytes. This buffer will be filled with storage key data by the ioctl. + +4.91 KVM_S390_SET_SKEYS + +Capability: KVM_CAP_S390_SKEYS +Architectures: s390 +Type: vm ioctl +Parameters: struct kvm_s390_skeys +Returns: 0 on success, negative value on error + +This ioctl is used to set guest storage key values on the s390 +architecture. The ioctl takes parameters via the kvm_s390_skeys struct. +See section on KVM_S390_GET_SKEYS for struct definition. + +The start_gfn field is the number of the first guest frame whose storage keys +you want to set. + +The count field is the number of consecutive frames (starting from start_gfn) +whose storage keys to get. The count field must be at least 1 and the maximum +allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range +will cause the ioctl to return -EINVAL. + +The skeydata_addr field is the address to a buffer containing count bytes of +storage keys. Each byte in the buffer will be set as the storage key for a +single frame starting at start_gfn for count frames. + +Note: If any architecturally invalid key value is found in the given data then +the ioctl will return -EINVAL. + 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fdfa106..0dc22ba 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -179,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MP_STATE: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: + case KVM_CAP_S390_SKEYS: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -729,6 +730,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr) return ret; } +static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) +{ + uint8_t *keys; + uint64_t hva; + unsigned long curkey; + int i, r = 0; + + if (args->flags != 0) + return -EINVAL; + + /* Is this guest using storage keys? */ + if (!mm_use_skey(current->mm)) + return KVM_S390_GET_SKEYS_NONE; + + /* Enforce sane limit on memory allocation */ + if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) + return -EINVAL; + + keys = kmalloc_array(args->count, sizeof(uint8_t), + GFP_KERNEL | __GFP_NOWARN); + if (!keys) + keys = vmalloc(sizeof(uint8_t) * args->count); + if (!keys) + return -ENOMEM; + + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + goto out; + } + + curkey = get_guest_storage_key(current->mm, hva); + if (IS_ERR_VALUE(curkey)) { + r = curkey; + goto out; + } + keys[i] = curkey; + } + + r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, + sizeof(uint8_t) * args->count); + if (r) + r = -EFAULT; +out: + kvfree(keys); + return r; +} + +static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args) +{ + uint8_t *keys; + uint64_t hva; + int i, r = 0; + + if (args->flags != 0) + return -EINVAL; + + /* Enforce sane limit on memory allocation */ + if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) + return -EINVAL; + + keys = kmalloc_array(args->count, sizeof(uint8_t), + GFP_KERNEL | __GFP_NOWARN); + if (!keys) + keys = vmalloc(sizeof(uint8_t) * args->count); + if (!keys) + return -ENOMEM; + + r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, + sizeof(uint8_t) * args->count); + if (r) { + r = -EFAULT; + goto out; + } + + /* Enable storage key handling for the guest */ + s390_enable_skey(); + + for (i = 0; i < args->count; i++) { + hva = gfn_to_hva(kvm, args->start_gfn + i); + if (kvm_is_error_hva(hva)) { + r = -EFAULT; + goto out; + } + + /* Lowest order bit is reserved */ + if (keys[i] & 0x01) { + r = -EINVAL; + goto out; + } + + r = set_guest_storage_key(current->mm, hva, + (unsigned long)keys[i], 0); + if (r) + goto out; + } +out: + kvfree(keys); + return r; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -788,6 +891,26 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_s390_vm_has_attr(kvm, &attr); break; } + case KVM_S390_GET_SKEYS: { + struct kvm_s390_skeys args; + + r = -EFAULT; + if (copy_from_user(&args, argp, + sizeof(struct kvm_s390_skeys))) + break; + r = kvm_s390_get_skeys(kvm, &args); + break; + } + case KVM_S390_SET_SKEYS: { + struct kvm_s390_skeys args; + + r = -EFAULT; + if (copy_from_user(&args, argp, + sizeof(struct kvm_s390_skeys))) + break; + r = kvm_s390_set_skeys(kvm, &args); + break; + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 57445ef..1162ef7 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -147,6 +147,16 @@ struct kvm_pit_config { #define KVM_PIT_SPEAKER_DUMMY 1 +struct kvm_s390_skeys { + __u64 start_gfn; + __u64 count; + __u64 skeydata_addr; + __u32 flags; + __u32 reserved[9]; +}; +#define KVM_S390_GET_SKEYS_NONE 1 +#define KVM_S390_SKEYS_MAX 1048576 + #define KVM_EXIT_UNKNOWN 0 #define KVM_EXIT_EXCEPTION 1 #define KVM_EXIT_IO 2 @@ -791,6 +801,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_VECTOR_REGISTERS 107 #define KVM_CAP_S390_MEM_OP 108 #define KVM_CAP_S390_USER_STSI 109 +#define KVM_CAP_S390_SKEYS 110 #ifdef KVM_CAP_IRQ_ROUTING @@ -1168,6 +1179,9 @@ struct kvm_s390_ucas_mapping { #define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list) /* Available with KVM_CAP_S390_MEM_OP */ #define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op) +/* Available with KVM_CAP_S390_SKEYS */ +#define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) +#define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v0.10.2 From 400ac6cd73633f61e42a035b910c3db2b590b9d5 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Tue, 17 Mar 2015 11:03:07 +0100 Subject: KVM: s390: drop SIMD bit from kvm_s390_fac_list_mask Setting the SIMD bit in the KVM mask is an issue because it makes the facility visible but not usable to the guest, thus it needs to be removed again. Signed-off-by: Michael Mueller Reviewed-by: Eric Farman Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0dc22ba..42b8a25 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -107,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { unsigned long kvm_s390_fac_list_mask[] = { 0xff82fffbf4fc2000UL, 0x005c000000000000UL, - 0x4000000000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) -- cgit v0.10.2 From 18280d8b4bcd4a2b174ee3cd748166c6190acacb Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Mon, 16 Mar 2015 16:05:41 +0100 Subject: KVM: s390: represent SIMD cap in kvm facility The patch represents capability KVM_CAP_S390_VECTOR_REGISTERS by means of the SIMD facility bit. This allows to a) disable the use of SIMD when used in conjunction with a not-SIMD-aware QEMU, b) to enable SIMD when used with a SIMD-aware version of QEMU and c) finally by means of a QEMU version using the future cpu model ioctls. Signed-off-by: Michael Mueller Reviewed-by: Eric Farman Tested-by: Eric Farman Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2356a8c..b8d1e97 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -562,7 +562,6 @@ struct kvm_arch{ int css_support; int use_irqchip; int use_cmma; - int use_vectors; int user_cpu_state_ctrl; int user_sigp; int user_stsi; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 42b8a25..9072127 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -278,8 +278,12 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - kvm->arch.use_vectors = MACHINE_HAS_VX; - r = MACHINE_HAS_VX ? 0 : -EINVAL; + if (MACHINE_HAS_VX) { + set_kvm_facility(kvm->arch.model.fac->mask, 129); + set_kvm_facility(kvm->arch.model.fac->list, 129); + r = 0; + } else + r = -EINVAL; break; case KVM_CAP_S390_USER_STSI: kvm->arch.user_stsi = 1; @@ -1084,7 +1088,6 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.css_support = 0; kvm->arch.use_irqchip = 0; - kvm->arch.use_vectors = 0; kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); @@ -1186,12 +1189,12 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { save_fp_ctl(&vcpu->arch.host_fpregs.fpc); - if (vcpu->kvm->arch.use_vectors) + if (test_kvm_facility(vcpu->kvm, 129)) save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); else save_fp_regs(vcpu->arch.host_fpregs.fprs); save_access_regs(vcpu->arch.host_acrs); - if (vcpu->kvm->arch.use_vectors) { + if (test_kvm_facility(vcpu->kvm, 129)) { restore_fp_ctl(&vcpu->run->s.regs.fpc); restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); } else { @@ -1207,7 +1210,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); - if (vcpu->kvm->arch.use_vectors) { + if (test_kvm_facility(vcpu->kvm, 129)) { save_fp_ctl(&vcpu->run->s.regs.fpc); save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); } else { @@ -1216,7 +1219,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) } save_access_regs(vcpu->run->s.regs.acrs); restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); - if (vcpu->kvm->arch.use_vectors) + if (test_kvm_facility(vcpu->kvm, 129)) restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); else restore_fp_regs(vcpu->arch.host_fpregs.fprs); @@ -1316,7 +1319,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp_has_sigpif()) vcpu->arch.sie_block->eca |= 0x10000000U; - if (vcpu->kvm->arch.use_vectors) { + if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 5d54191..c5aefef 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -149,6 +149,17 @@ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) __test_facility(nr, kvm->arch.model.fac->list); } +static inline int set_kvm_facility(u64 *fac_list, unsigned long nr) +{ + unsigned char *ptr; + + if (nr >= MAX_FACILITY_BIT) + return -EINVAL; + ptr = (unsigned char *) fac_list + (nr >> 3); + *ptr |= (0x80UL >> (nr & 7)); + return 0; +} + /* are cpu states controlled by user space */ static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm) { -- cgit v0.10.2 From 795a149e78f49c0e260c56cee9978c5d001a84f1 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Mar 2015 17:39:44 +0800 Subject: KVM: x86: Avoid using plain integer as NULL pointer warning This patch fix the following sparse warning: for file arch/x86/kvm/x86.c: warning: Using plain integer as NULL pointer Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d1a1fea..1309263 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5919,7 +5919,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) lapic_irq.dest_id = apicid; lapic_irq.delivery_mode = APIC_DM_REMRD; - kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL); + kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 52eb5a6d576b5bca14797a4085abdd68ad8c0b3f Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Fri, 13 Mar 2015 17:39:45 +0800 Subject: KVM: x86: For the symbols used locally only should be static type This patch fix the following sparse warnings: for arch/x86/kvm/x86.c: warning: symbol 'emulator_read_write' was not declared. Should it be static? warning: symbol 'emulator_write_emulated' was not declared. Should it be static? warning: symbol 'emulator_get_dr' was not declared. Should it be static? warning: symbol 'emulator_set_dr' was not declared. Should it be static? for arch/x86/kvm/pmu.c: warning: symbol 'fixed_pmc_events' was not declared. Should it be static? Signed-off-by: Xiubo Li Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 8e6b7d8..29fbf9d 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping { }; /* mapping between fixed pmc index and arch_events array */ -int fixed_pmc_events[] = {1, 0, 7}; +static int fixed_pmc_events[] = {1, 0, 7}; static bool pmc_is_gp(struct kvm_pmc *pmc) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 001e630..3ef203a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2906,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm) return 1; } -bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val) +static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, + unsigned long val) { unsigned long cr0 = svm->vcpu.arch.cr0; bool ret = false; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1309263..cc2c759 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4476,7 +4476,8 @@ mmio: return X86EMUL_CONTINUE; } -int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, +static int emulator_read_write(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes, struct x86_exception *exception, const struct read_write_emulator_ops *ops) @@ -4539,7 +4540,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, exception, &read_emultor); } -int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, +static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt, unsigned long addr, const void *val, unsigned int bytes, @@ -4738,12 +4739,14 @@ static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt) kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt)); } -int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest) +static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long *dest) { return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest); } -int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) +static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, + unsigned long value) { return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value); -- cgit v0.10.2 From faac2458518e20130664d77b657303758f1aaf5a Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Mon, 16 Mar 2015 17:18:25 -0400 Subject: KVM: SVM: Fix confusing message if no exit handlers are installed I hit this path on a AMD box and thought someone was playing a April Fool's joke on me. Signed-off-by: Bandan Das Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 3ef203a..155534c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3562,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code); + WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -- cgit v0.10.2 From 58d2930f4ee335ab703d768cb0318331fc1bb62c Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Tue, 17 Mar 2015 16:19:58 +0900 Subject: KVM: Eliminate extra function calls in kvm_get_dirty_log_protect() When all bits in mask are not set, kvm_arch_mmu_enable_log_dirty_pt_masked() has nothing to do. But since it needs to be called from the generic code, it cannot be inlined, and a few function calls, two when PML is enabled, are wasted. Since it is common to see many pages remain clean, e.g. framebuffers can stay calm for a long time, it is worth eliminating this overhead. Signed-off-by: Takuya Yoshikawa Reviewed-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 49900fc..ce7888a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1053,9 +1053,11 @@ int kvm_get_dirty_log_protect(struct kvm *kvm, mask = xchg(&dirty_bitmap[i], 0); dirty_bitmap_buffer[i] = mask; - offset = i * BITS_PER_LONG; - kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, - mask); + if (mask) { + offset = i * BITS_PER_LONG; + kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, + offset, mask); + } } spin_unlock(&kvm->mmu_lock); -- cgit v0.10.2 From 0a4e6be9ca17c54817cf814b4b5aa60478c6df27 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 23 Mar 2015 20:21:51 -0300 Subject: x86: kvm: Revert "remove sched notifier for cross-cpu migrations" The following point: 2. per-CPU pvclock time info is updated if the underlying CPU changes. Is not true anymore since "KVM: x86: update pvclock area conditionally, on cpu migration". Add task migration notification back. Problem noticed by Andy Lutomirski. Signed-off-by: Marcelo Tosatti CC: stable@kernel.org # 3.11+ diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index d6b078e..25b1cc0 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, struct pvclock_vsyscall_time_info { struct pvclock_vcpu_time_info pvti; + u32 migrate_count; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info) diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 2f355d2..e5ecd20 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); } +static struct pvclock_vsyscall_time_info *pvclock_vdso_info; + +static struct pvclock_vsyscall_time_info * +pvclock_get_vsyscall_user_time_info(int cpu) +{ + if (!pvclock_vdso_info) { + BUG(); + return NULL; + } + + return &pvclock_vdso_info[cpu]; +} + +struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu) +{ + return &pvclock_get_vsyscall_user_time_info(cpu)->pvti; +} + #ifdef CONFIG_X86_64 +static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l, + void *v) +{ + struct task_migration_notifier *mn = v; + struct pvclock_vsyscall_time_info *pvti; + + pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu); + + /* this is NULL when pvclock vsyscall is not initialized */ + if (unlikely(pvti == NULL)) + return NOTIFY_DONE; + + pvti->migrate_count++; + + return NOTIFY_DONE; +} + +static struct notifier_block pvclock_migrate = { + .notifier_call = pvclock_task_migrate, +}; + /* * Initialize the generic pvclock vsyscall state. This will allocate * a/some page(s) for the per-vcpu pvclock information, set up a @@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i, WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE); + pvclock_vdso_info = i; + for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) { __set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx, __pa(i) + (idx*PAGE_SIZE), PAGE_KERNEL_VVAR); } + + register_task_migration_notifier(&pvclock_migrate); + return 0; } #endif diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 9793322..3093376 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode) cycle_t ret; u64 last; u32 version; + u32 migrate_count; u8 flags; unsigned cpu, cpu1; /* - * Note: hypervisor must guarantee that: - * 1. cpu ID number maps 1:1 to per-CPU pvclock time info. - * 2. that per-CPU pvclock time info is updated if the - * underlying CPU changes. - * 3. that version is increased whenever underlying CPU - * changes. - * + * When looping to get a consistent (time-info, tsc) pair, we + * also need to deal with the possibility we can switch vcpus, + * so make sure we always re-fetch time-info for the current vcpu. */ do { cpu = __getcpu() & VGETCPU_CPU_MASK; @@ -104,6 +101,8 @@ static notrace cycle_t vread_pvclock(int *mode) pvti = get_pvti(cpu); + migrate_count = pvti->migrate_count; + version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* @@ -115,7 +114,8 @@ static notrace cycle_t vread_pvclock(int *mode) cpu1 = __getcpu() & VGETCPU_CPU_MASK; } while (unlikely(cpu != cpu1 || (pvti->pvti.version & 1) || - pvti->pvti.version != version)); + pvti->pvti.version != version || + pvti->migrate_count != migrate_count)); if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT))) *mode = VCLOCK_NONE; diff --git a/include/linux/sched.h b/include/linux/sched.h index 6d77432..be98910 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -176,6 +176,14 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load); extern void calc_global_load(unsigned long ticks); extern void update_cpu_load_nohz(void); +/* Notifier for when a task gets migrated to a new CPU */ +struct task_migration_notifier { + struct task_struct *task; + int from_cpu; + int to_cpu; +}; +extern void register_task_migration_notifier(struct notifier_block *n); + extern unsigned long get_parent_ip(unsigned long addr); extern void dump_cpu_task(int cpu); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f0f831e..d0c4209 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -996,6 +996,13 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) rq_clock_skip_update(rq, true); } +static ATOMIC_NOTIFIER_HEAD(task_migration_notifier); + +void register_task_migration_notifier(struct notifier_block *n) +{ + atomic_notifier_chain_register(&task_migration_notifier, n); +} + #ifdef CONFIG_SMP void set_task_cpu(struct task_struct *p, unsigned int new_cpu) { @@ -1026,10 +1033,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) trace_sched_migrate_task(p, new_cpu); if (task_cpu(p) != new_cpu) { + struct task_migration_notifier tmn; + if (p->sched_class->migrate_task_rq) p->sched_class->migrate_task_rq(p, new_cpu); p->se.nr_migrations++; perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0); + + tmn.task = p; + tmn.from_cpu = task_cpu(p); + tmn.to_cpu = new_cpu; + + atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn); } __set_task_cpu(p, new_cpu); -- cgit v0.10.2 From a123374ff3c6850e1340b6da010bb43668d710e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 19 Mar 2015 21:52:41 +0100 Subject: KVM: x86: inline kvm_ioapic_handles_vector() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An overhead from function call is not appropriate for its size and frequency of execution. Suggested-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index a2e9d96..24f0f17 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -473,13 +473,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, } } -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) -{ - struct kvm_ioapic *ioapic = kvm->arch.vioapic; - smp_rmb(); - return test_bit(vector, ioapic->handled_vectors); -} - void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 38d8402..6e265cf 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -98,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm) return kvm->arch.vioapic; } +static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) +{ + struct kvm_ioapic *ioapic = kvm->arch.vioapic; + smp_rmb(); + return test_bit(vector, ioapic->handled_vectors); +} + void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2); void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode); -bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector); int kvm_ioapic_init(struct kvm *kvm); void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, -- cgit v0.10.2 From e32edf4fd0fa4897e12ca66118ab67bf257e16e4 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Thu, 26 Mar 2015 14:39:28 +0000 Subject: KVM: Redesign kvm_io_bus_ API to pass VCPU structure to the callbacks. This is needed in e.g. ARM vGIC emulation, where the MMIO handling depends on the VCPU that does the access. Signed-off-by: Nikolay Nikolaev Signed-off-by: Andre Przywara Acked-by: Paolo Bonzini Acked-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 39b3a8f..8542f07 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1374,8 +1374,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val) return -ENXIO; } -static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, - int len, void *ptr) +static int kvm_mpic_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; @@ -1415,8 +1416,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, return ret; } -static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, - int len, const void *ptr) +static int kvm_mpic_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *ptr) { struct openpic *opp = container_of(this, struct openpic, mmio); int ret; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 27c0fac..24bfe40 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); @@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, idx = srcu_read_lock(&vcpu->kvm->srcu); - ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr, + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr, bytes, &run->mmio.data); srcu_read_unlock(&vcpu->kvm->srcu, idx); diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 9254aff..329ec75 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 3 contains the virtqueue index (passed as datamatch) * - gpr 4 contains the index on the bus (optionally) */ - ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, + ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS, vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 298781d..4dce6f8 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr) (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); } -static int pit_ioport_write(struct kvm_io_device *this, +static int pit_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this, return 0; } -static int pit_ioport_read(struct kvm_io_device *this, +static int pit_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *data) { struct kvm_pit *pit = dev_to_pit(this); @@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this, return 0; } -static int speaker_ioport_write(struct kvm_io_device *this, +static int speaker_ioport_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *data) { struct kvm_pit *pit = speaker_to_pit(this); @@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this, return 0; } -static int speaker_ioport_read(struct kvm_io_device *this, - gpa_t addr, int len, void *data) +static int speaker_ioport_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *data) { struct kvm_pit *pit = speaker_to_pit(this); struct kvm_kpit_state *pit_state = &pit->pit_state; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cc31f7c..8ff4eaa 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -528,42 +528,42 @@ static int picdev_read(struct kvm_pic *s, return 0; } -static int picdev_master_write(struct kvm_io_device *dev, +static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_master_read(struct kvm_io_device *dev, +static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_master), addr, len, val); } -static int picdev_slave_write(struct kvm_io_device *dev, +static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_slave_read(struct kvm_io_device *dev, +static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_slave), addr, len, val); } -static int picdev_eclr_write(struct kvm_io_device *dev, +static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, const void *val) { return picdev_write(container_of(dev, struct kvm_pic, dev_eclr), addr, len, val); } -static int picdev_eclr_read(struct kvm_io_device *dev, +static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, gpa_t addr, int len, void *val) { return picdev_read(container_of(dev, struct kvm_pic, dev_eclr), diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index b1947e0..8bf2e49 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -498,8 +498,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); } -static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, - void *val) +static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 result; @@ -541,8 +541,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, + gpa_t addr, int len, const void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 data; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index e55b5fc..ba57bb7 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1038,7 +1038,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) addr < apic->base_address + LAPIC_MMIO_LENGTH; } -static int apic_mmio_read(struct kvm_io_device *this, +static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, void *data) { struct kvm_lapic *apic = to_lapic(this); @@ -1358,7 +1358,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val) return ret; } -static int apic_mmio_write(struct kvm_io_device *this, +static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t address, int len, const void *data) { struct kvm_lapic *apic = to_lapic(this); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f7b20b4..317da9b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5822,7 +5822,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) gpa_t gpa; gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); - if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { skip_emulated_instruction(vcpu); return 1; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bd7a70b..5573d63 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4115,8 +4115,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v)) + && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v)) break; handled += n; addr += n; @@ -4135,8 +4135,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) do { n = min(len, 8); if (!(vcpu->arch.apic && - !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v)) - && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v)) + !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev, + addr, n, v)) + && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v)) break; trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v); handled += n; @@ -4630,10 +4631,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) int r; if (vcpu->arch.pio.in) - r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port, + r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); else - r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS, + r = kvm_io_bus_write(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port, vcpu->arch.pio.size, pd); return r; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ae9c720..9605e46 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -165,12 +165,12 @@ enum kvm_bus { KVM_NR_BUSES }; -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val); -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie); -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, - void *val); +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie); +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, + int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 00d8642..c831a40 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -60,8 +60,9 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) return 1; } -static int coalesced_mmio_write(struct kvm_io_device *this, - gpa_t addr, int len, const void *val) +static int coalesced_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index fc5f43e..26c72f3 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -715,8 +715,8 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) /* MMIO/PIO writes trigger an event if the addr/val match */ static int -ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, - const void *val) +ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, + int len, const void *val) { struct _ioeventfd *p = to_ioeventfd(this); diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h index 12fd3ca..9ef709c 100644 --- a/virt/kvm/iodev.h +++ b/virt/kvm/iodev.h @@ -20,6 +20,7 @@ #include struct kvm_io_device; +struct kvm_vcpu; /** * kvm_io_device_ops are called under kvm slots_lock. @@ -27,11 +28,13 @@ struct kvm_io_device; * or non-zero to have it passed to the next device. **/ struct kvm_io_device_ops { - int (*read)(struct kvm_io_device *this, + int (*read)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, void *val); - int (*write)(struct kvm_io_device *this, + int (*write)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, int len, const void *val); @@ -49,16 +52,20 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev, dev->ops = ops; } -static inline int kvm_iodevice_read(struct kvm_io_device *dev, - gpa_t addr, int l, void *v) +static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, void *v) { - return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } -static inline int kvm_iodevice_write(struct kvm_io_device *dev, - gpa_t addr, int l, const void *v) +static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, const void *v) { - return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; + return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; } static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a109370..664d67a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2997,7 +2997,7 @@ static int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, return off; } -static int __kvm_io_bus_write(struct kvm_io_bus *bus, +static int __kvm_io_bus_write(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, struct kvm_io_range *range, const void *val) { int idx; @@ -3008,7 +3008,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_write(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_write(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3018,7 +3018,7 @@ static int __kvm_io_bus_write(struct kvm_io_bus *bus, } /* kvm_io_bus_write - called under kvm->slots_lock */ -int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { struct kvm_io_bus *bus; @@ -3030,14 +3030,14 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_write(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_write(vcpu, bus, &range, val); return r < 0 ? r : 0; } /* kvm_io_bus_write_cookie - called under kvm->slots_lock */ -int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, const void *val, long cookie) +int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, + gpa_t addr, int len, const void *val, long cookie) { struct kvm_io_bus *bus; struct kvm_io_range range; @@ -3047,12 +3047,12 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); /* First try the device referenced by cookie. */ if ((cookie >= 0) && (cookie < bus->dev_count) && (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) - if (!kvm_iodevice_write(bus->range[cookie].dev, addr, len, + if (!kvm_iodevice_write(vcpu, bus->range[cookie].dev, addr, len, val)) return cookie; @@ -3060,11 +3060,11 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, * cookie contained garbage; fall back to search and return the * correct cookie value. */ - return __kvm_io_bus_write(bus, &range, val); + return __kvm_io_bus_write(vcpu, bus, &range, val); } -static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, - void *val) +static int __kvm_io_bus_read(struct kvm_vcpu *vcpu, struct kvm_io_bus *bus, + struct kvm_io_range *range, void *val) { int idx; @@ -3074,7 +3074,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, while (idx < bus->dev_count && kvm_io_bus_cmp(range, &bus->range[idx]) == 0) { - if (!kvm_iodevice_read(bus->range[idx].dev, range->addr, + if (!kvm_iodevice_read(vcpu, bus->range[idx].dev, range->addr, range->len, val)) return idx; idx++; @@ -3085,7 +3085,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range, EXPORT_SYMBOL_GPL(kvm_io_bus_write); /* kvm_io_bus_read - called under kvm->slots_lock */ -int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, +int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { struct kvm_io_bus *bus; @@ -3097,8 +3097,8 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, .len = len, }; - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - r = __kvm_io_bus_read(bus, &range, val); + bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); + r = __kvm_io_bus_read(vcpu, bus, &range, val); return r < 0 ? r : 0; } -- cgit v0.10.2 From af669ac6dc3f66bb56fb9612b9826adac6292794 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:29 +0000 Subject: KVM: move iodev.h from virt/kvm/ to include/kvm iodev.h contains definitions for the kvm_io_bus framework. This is needed both by the generic KVM code in virt/kvm as well as by architecture specific code under arch/. Putting the header file in virt/kvm and using local includes in the architecture part seems at least dodgy to me, so let's move the file into include/kvm, so that a more natural "#include " can be used by all of the code. This also solves a problem later when using struct kvm_io_device in arm_vgic.h. Fixing up the FSF address in the GPL header and a wrong include path on the way. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Reviewed-by: Marc Zyngier Reviewed-by: Marcelo Tosatti Signed-off-by: Marc Zyngier diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 8542f07..4703fad 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -34,7 +34,7 @@ #include #include #include -#include "iodev.h" +#include #define MAX_CPU 32 #define MAX_SRC 256 diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index dd1b16b..c84990b 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h @@ -3,7 +3,7 @@ #include -#include "iodev.h" +#include struct kvm_kpit_channel_state { u32 count; /* can be 65536 */ diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index c2e36d9..d9e02ca 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,7 +3,7 @@ #include -#include "iodev.h" +#include struct kvm; struct kvm_vcpu; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2d03568..ad68c73 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -27,7 +27,7 @@ #include #include -#include "iodev.h" +#include #include "ioapic.h" #include "lapic.h" diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 0bc6c65..e284c28 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -1,7 +1,7 @@ #ifndef __KVM_X86_LAPIC_H #define __KVM_X86_LAPIC_H -#include "iodev.h" +#include #include diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h new file mode 100644 index 0000000..a6d208b --- /dev/null +++ b/include/kvm/iodev.h @@ -0,0 +1,76 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __KVM_IODEV_H__ +#define __KVM_IODEV_H__ + +#include +#include + +struct kvm_io_device; +struct kvm_vcpu; + +/** + * kvm_io_device_ops are called under kvm slots_lock. + * read and write handlers return 0 if the transaction has been handled, + * or non-zero to have it passed to the next device. + **/ +struct kvm_io_device_ops { + int (*read)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, + int len, + void *val); + int (*write)(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, + int len, + const void *val); + void (*destructor)(struct kvm_io_device *this); +}; + + +struct kvm_io_device { + const struct kvm_io_device_ops *ops; +}; + +static inline void kvm_iodevice_init(struct kvm_io_device *dev, + const struct kvm_io_device_ops *ops) +{ + dev->ops = ops; +} + +static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, void *v) +{ + return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; +} + +static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *dev, gpa_t addr, + int l, const void *v) +{ + return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v) + : -EOPNOTSUPP; +} + +static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) +{ + if (dev->ops->destructor) + dev->ops->destructor(dev); +} + +#endif /* __KVM_IODEV_H__ */ diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index c831a40..571c1ce 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -8,7 +8,7 @@ * */ -#include "iodev.h" +#include #include #include diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 26c72f3..9ff4193 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -36,7 +36,7 @@ #include #include -#include "iodev.h" +#include #ifdef CONFIG_HAVE_KVM_IRQFD /* diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h deleted file mode 100644 index 9ef709c..0000000 --- a/virt/kvm/iodev.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __KVM_IODEV_H__ -#define __KVM_IODEV_H__ - -#include -#include - -struct kvm_io_device; -struct kvm_vcpu; - -/** - * kvm_io_device_ops are called under kvm slots_lock. - * read and write handlers return 0 if the transaction has been handled, - * or non-zero to have it passed to the next device. - **/ -struct kvm_io_device_ops { - int (*read)(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, - int len, - void *val); - int (*write)(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, - int len, - const void *val); - void (*destructor)(struct kvm_io_device *this); -}; - - -struct kvm_io_device { - const struct kvm_io_device_ops *ops; -}; - -static inline void kvm_iodevice_init(struct kvm_io_device *dev, - const struct kvm_io_device_ops *ops) -{ - dev->ops = ops; -} - -static inline int kvm_iodevice_read(struct kvm_vcpu *vcpu, - struct kvm_io_device *dev, gpa_t addr, - int l, void *v) -{ - return dev->ops->read ? dev->ops->read(vcpu, dev, addr, l, v) - : -EOPNOTSUPP; -} - -static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, - struct kvm_io_device *dev, gpa_t addr, - int l, const void *v) -{ - return dev->ops->write ? dev->ops->write(vcpu, dev, addr, l, v) - : -EOPNOTSUPP; -} - -static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) -{ - if (dev->ops->destructor) - dev->ops->destructor(dev); -} - -#endif /* __KVM_IODEV_H__ */ diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 664d67a..c5460b6 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -16,7 +16,7 @@ * */ -#include "iodev.h" +#include #include #include -- cgit v0.10.2 From 5d9d15af1cade35e84979f222b911cbc97106032 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:30 +0000 Subject: KVM: arm/arm64: remove now unneeded include directory from Makefile virt/kvm was never really a good include directory for anything else than locally included headers. With the move of iodev.h there is no need anymore to add this directory the compiler's include path, so remove it from the arm and arm64 kvm Makefile. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index a093bf1..139e46c 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt) plus_virt_def := -DREQUIRES_VIRT=1 endif -ccflags-y += -Ivirt/kvm -Iarch/arm/kvm +ccflags-y += -Iarch/arm/kvm CFLAGS_arm.o := -I. $(plus_virt_def) CFLAGS_mmu.o := -I. diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index b22c636..d5904f8 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -2,7 +2,7 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm +ccflags-y += -Iarch/arm64/kvm CFLAGS_arm.o := -I. CFLAGS_mmu.o := -I. -- cgit v0.10.2 From f0e4b2776c12c1633ccef17f210733d6e1b6b2b3 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:31 +0000 Subject: KVM: x86: remove now unneeded include directory from Makefile virt/kvm was never really a good include directory for anything else than locally included headers. With the move of iodev.h there is no need anymore to add this directory the compiler's include path, so remove it from the x86 kvm Makefile. Signed-off-by: Andre Przywara Reviewed-by: Marcelo Tosatti Signed-off-by: Marc Zyngier diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 08f790d..16e8f96 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -1,5 +1,5 @@ -ccflags-y += -Ivirt/kvm -Iarch/x86/kvm +ccflags-y += -Iarch/x86/kvm CFLAGS_x86.o := -I. CFLAGS_svm.o := -I. -- cgit v0.10.2 From cf50a1eb43b98daa181714e40e22c8e5ad5007d6 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:32 +0000 Subject: KVM: arm/arm64: rename struct kvm_mmio_range to vgic_io_range The name "kvm_mmio_range" is a bit bold, given that it only covers the VGIC's MMIO ranges. To avoid confusion with kvm_io_range, rename it to vgic_io_range. Signed-off-by: Andre Przywara Acked-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index c818662..ddb3135 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -319,7 +319,7 @@ static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } -static const struct kvm_mmio_range vgic_dist_ranges[] = { +static const struct vgic_io_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -647,7 +647,7 @@ static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, * CPU Interface Register accesses - these are not accessed by the VM, but by * user space for saving and restoring VGIC state. */ -static const struct kvm_mmio_range vgic_cpu_ranges[] = { +static const struct vgic_io_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, .len = 12, @@ -674,7 +674,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_device_attr *attr, u32 *reg, bool is_write) { - const struct kvm_mmio_range *r = NULL, *ranges; + const struct vgic_io_range *r = NULL, *ranges; phys_addr_t offset; int ret, cpuid, c; struct kvm_vcpu *vcpu, *tmp_vcpu; diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index b3f1546..14943e3 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -340,7 +340,7 @@ static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_v3_dist_ranges[] = { +static const struct vgic_io_range vgic_v3_dist_ranges[] = { { .base = GICD_CTLR, .len = 0x04, @@ -570,7 +570,7 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct kvm_mmio_range vgic_redist_sgi_ranges[] = { +static const struct vgic_io_range vgic_redist_sgi_ranges[] = { { .base = GICR_IGROUPR0, .len = 0x04, @@ -676,7 +676,7 @@ static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, return false; } -static const struct kvm_mmio_range vgic_redist_ranges[] = { +static const struct vgic_io_range vgic_redist_ranges[] = { { .base = GICR_CTLR, .len = 0x04, @@ -726,7 +726,7 @@ static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long rdbase = dist->vgic_redist_base; int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); int vcpu_id; - const struct kvm_mmio_range *mmio_range; + const struct vgic_io_range *mmio_range; if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { return vgic_handle_mmio_range(vcpu, run, mmio, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index ffd937c..21a3550 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -712,11 +712,11 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) { - const struct kvm_mmio_range *r = ranges; + const struct vgic_io_range *r = ranges; while (r->len) { if (offset >= r->base && @@ -729,7 +729,7 @@ struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, } static bool vgic_validate_access(const struct vgic_dist *dist, - const struct kvm_mmio_range *range, + const struct vgic_io_range *range, unsigned long offset) { int irq; @@ -757,7 +757,7 @@ static bool vgic_validate_access(const struct vgic_dist *dist, static bool call_range_handler(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, unsigned long offset, - const struct kvm_mmio_range *range) + const struct vgic_io_range *range) { u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; @@ -804,10 +804,10 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, */ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, + const struct vgic_io_range *ranges, unsigned long mmio_base) { - const struct kvm_mmio_range *range; + const struct vgic_io_range *range; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; bool updated_state; unsigned long offset; @@ -1984,7 +1984,7 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return r; } -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset) +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { struct kvm_exit_mmio dev_attr_mmio; diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 1e5a381..6fccb96 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -74,7 +74,7 @@ void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) *((u32 *)mmio->data) = cpu_to_le32(value) & mask; } -struct kvm_mmio_range { +struct vgic_io_range { phys_addr_t base; unsigned long len; int bits_per_irq; @@ -89,13 +89,13 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, } const -struct kvm_mmio_range *vgic_find_range(const struct kvm_mmio_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); +struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, + struct kvm_exit_mmio *mmio, + phys_addr_t offset); bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio, - const struct kvm_mmio_range *ranges, + const struct vgic_io_range *ranges, unsigned long mmio_base); bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, @@ -120,7 +120,7 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_kick_vcpus(struct kvm *kvm); -int vgic_has_attr_regs(const struct kvm_mmio_range *ranges, phys_addr_t offset); +int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); -- cgit v0.10.2 From 9f199d0a0eeb8efb564ff41a6b9f819c4c0285ea Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:33 +0000 Subject: KVM: arm/arm64: simplify vgic_find_range() and callers The vgic_find_range() function in vgic.c takes a struct kvm_exit_mmio argument, but actually only used the length field in there. Since we need to get rid of that structure in that part of the code anyway, let's rework the function (and it's callers) to pass the length argument to the function directly. Signed-off-by: Andre Przywara Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index ddb3135..1dd183e 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -715,7 +715,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, default: BUG(); } - r = vgic_find_range(ranges, &mmio, offset); + r = vgic_find_range(ranges, 4, offset); if (unlikely(!r || !r->handle_mmio)) { ret = -ENXIO; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 21a3550..8802ad7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -713,16 +713,13 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) const struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) + int len, gpa_t offset) { - const struct vgic_io_range *r = ranges; - - while (r->len) { - if (offset >= r->base && - (offset + mmio->len) <= (r->base + r->len)) - return r; - r++; + while (ranges->len) { + if (offset >= ranges->base && + (offset + len) <= (ranges->base + ranges->len)) + return ranges; + ranges++; } return NULL; @@ -813,7 +810,7 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long offset; offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio, offset); + range = vgic_find_range(ranges, mmio->len, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); @@ -1986,10 +1983,7 @@ int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) { - struct kvm_exit_mmio dev_attr_mmio; - - dev_attr_mmio.len = 4; - if (vgic_find_range(ranges, &dev_attr_mmio, offset)) + if (vgic_find_range(ranges, 4, offset)) return 0; else return -ENXIO; diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 6fccb96..01aa622 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -90,8 +90,7 @@ static inline bool is_in_range(phys_addr_t addr, unsigned long len, const struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - struct kvm_exit_mmio *mmio, - phys_addr_t offset); + int len, gpa_t offset); bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, struct kvm_exit_mmio *mmio, -- cgit v0.10.2 From 6777f77f0f544f686ee3158ff0db6a7d81b7d3a2 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:34 +0000 Subject: KVM: arm/arm64: implement kvm_io_bus MMIO handling for the VGIC Currently we use a lot of VGIC specific code to do the MMIO dispatching. Use the previous reworks to add kvm_io_bus style MMIO handlers. Those are not yet called by the MMIO abort handler, also the actual VGIC emulator function do not make use of it yet, but will be enabled with the following patches. Signed-off-by: Andre Przywara Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 9092fad..f90140c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -24,6 +24,7 @@ #include #include #include +#include #define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 @@ -147,6 +148,14 @@ struct vgic_vm_ops { int (*map_resources)(struct kvm *, const struct vgic_params *); }; +struct vgic_io_device { + gpa_t addr; + int len; + const struct vgic_io_range *reg_ranges; + struct kvm_vcpu *redist_vcpu; + struct kvm_io_device dev; +}; + struct vgic_dist { spinlock_t lock; bool in_kernel; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8802ad7..e968179 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -32,6 +32,8 @@ #include #include #include +#include +#include /* * How the whole thing works (courtesy of Christoffer Dall): @@ -837,6 +839,66 @@ bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, } /** + * vgic_handle_mmio_access - handle an in-kernel MMIO access + * This is called by the read/write KVM IO device wrappers below. + * @vcpu: pointer to the vcpu performing the access + * @this: pointer to the KVM IO device in charge + * @addr: guest physical address of the access + * @len: size of the access + * @val: pointer to the data region + * @is_write: read or write access + * + * returns true if the MMIO access could be performed + */ +static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, gpa_t addr, + int len, void *val, bool is_write) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_io_device *iodev = container_of(this, + struct vgic_io_device, dev); + struct kvm_run *run = vcpu->run; + const struct vgic_io_range *range; + struct kvm_exit_mmio mmio; + bool updated_state; + gpa_t offset; + + offset = addr - iodev->addr; + range = vgic_find_range(iodev->reg_ranges, len, offset); + if (unlikely(!range || !range->handle_mmio)) { + pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); + return -ENXIO; + } + + mmio.phys_addr = addr; + mmio.len = len; + mmio.is_write = is_write; + if (is_write) + memcpy(mmio.data, val, len); + mmio.private = iodev->redist_vcpu; + + spin_lock(&dist->lock); + offset -= range->base; + if (vgic_validate_access(dist, range, offset)) { + updated_state = call_range_handler(vcpu, &mmio, offset, range); + if (!is_write) + memcpy(val, mmio.data, len); + } else { + if (!is_write) + memset(val, 0, len); + updated_state = false; + } + spin_unlock(&dist->lock); + kvm_prepare_mmio(run, &mmio); + kvm_handle_mmio_return(vcpu, run); + + if (updated_state) + vgic_kick_vcpus(vcpu->kvm); + + return 0; +} + +/** * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation * @vcpu: pointer to the vcpu performing the access * @run: pointer to the kvm_run structure @@ -860,6 +922,73 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); } +static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); +} + +static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, + struct kvm_io_device *this, + gpa_t addr, int len, const void *val) +{ + return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, + true); +} + +struct kvm_io_device_ops vgic_io_ops = { + .read = vgic_handle_mmio_read, + .write = vgic_handle_mmio_write, +}; + +/** + * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus + * @kvm: The VM structure pointer + * @base: The (guest) base address for the register frame + * @len: Length of the register frame window + * @ranges: Describing the handler functions for each register + * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call + * @iodev: Points to memory to be passed on to the handler + * + * @iodev stores the parameters of this function to be usable by the handler + * respectively the dispatcher function (since the KVM I/O bus framework lacks + * an opaque parameter). Initialization is done in this function, but the + * reference should be valid and unique for the whole VGIC lifetime. + * If the register frame is not mapped for a specific VCPU, pass -1 to + * @redist_vcpu_id. + */ +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_vcpu_id, + struct vgic_io_device *iodev) +{ + struct kvm_vcpu *vcpu = NULL; + int ret; + + if (redist_vcpu_id >= 0) + vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); + + iodev->addr = base; + iodev->len = len; + iodev->reg_ranges = ranges; + iodev->redist_vcpu = vcpu; + + kvm_iodevice_init(&iodev->dev, &vgic_io_ops); + + mutex_lock(&kvm->slots_lock); + + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, + &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + /* Mark the iodev as invalid if registration fails. */ + if (ret) + iodev->dev.ops = NULL; + + return ret; +} + static int vgic_nr_shared_irqs(struct vgic_dist *dist) { return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 01aa622..28fa3aa 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -20,6 +20,8 @@ #ifndef __KVM_VGIC_H__ #define __KVM_VGIC_H__ +#include + #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) @@ -82,6 +84,11 @@ struct vgic_io_range { phys_addr_t offset); }; +int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, + const struct vgic_io_range *ranges, + int redist_id, + struct vgic_io_device *iodev); + static inline bool is_in_range(phys_addr_t addr, unsigned long len, phys_addr_t baseaddr, unsigned long size) { -- cgit v0.10.2 From a9cf86f62b785202684c3ba92895946f03d910c8 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:35 +0000 Subject: KVM: arm/arm64: prepare GICv2 emulation to be handled by kvm_io_bus Using the framework provided by the recent vgic.c changes we register a kvm_io_bus device when initializing the virtual GICv2. Signed-off-by: Andre Przywara Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f90140c..4523984 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -251,6 +251,7 @@ struct vgic_dist { unsigned long *irq_active_on_cpu; struct vgic_vm_ops vm_ops; + struct vgic_io_device dist_iodev; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 1dd183e..7460b37 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -506,6 +506,7 @@ static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) static int vgic_v2_map_resources(struct kvm *kvm, const struct vgic_params *params) { + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (!irqchip_in_kernel(kvm)) @@ -516,13 +517,17 @@ static int vgic_v2_map_resources(struct kvm *kvm, if (vgic_ready(kvm)) goto out; - if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_cpu_base)) { + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { kvm_err("Need to set vgic cpu and dist addresses first\n"); ret = -ENXIO; goto out; } + vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + KVM_VGIC_V2_DIST_SIZE, + vgic_dist_ranges, -1, &dist->dist_iodev); + /* * Initialize the vgic if this hasn't already been done on demand by * accessing the vgic state from userspace. @@ -530,18 +535,23 @@ static int vgic_v2_map_resources(struct kvm *kvm, ret = vgic_init(kvm); if (ret) { kvm_err("Unable to allocate maps\n"); - goto out; + goto out_unregister; } - ret = kvm_phys_addr_ioremap(kvm, kvm->arch.vgic.vgic_cpu_base, + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, true); if (ret) { kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + goto out_unregister; } - kvm->arch.vgic.ready = true; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + out: if (ret) kvm_vgic_destroy(kvm); -- cgit v0.10.2 From b3a2a9076d3149781c8622d6a98a51045ff946e4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 23 Mar 2015 19:27:19 +0100 Subject: KVM: nVMX: Add support for rdtscp If the guest CPU is supposed to support rdtscp and the host has rdtscp enabled in the secondary execution controls, we can also expose this feature to L1. Just extend nested_vmx_exit_handled to properly route EXIT_REASON_RDTSCP. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index c5f1a1d..1fe9218 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -67,6 +67,7 @@ #define EXIT_REASON_EPT_VIOLATION 48 #define EXIT_REASON_EPT_MISCONFIG 49 #define EXIT_REASON_INVEPT 50 +#define EXIT_REASON_RDTSCP 51 #define EXIT_REASON_PREEMPTION_TIMER 52 #define EXIT_REASON_INVVPID 53 #define EXIT_REASON_WBINVD 54 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 50c675b..fdd9f8b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2467,6 +2467,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_secondary_ctls_low = 0; vmx->nested.nested_vmx_secondary_ctls_high &= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | @@ -7510,7 +7511,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING); - case EXIT_REASON_RDTSC: + case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP: return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING); case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR: case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD: @@ -8517,6 +8518,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) exec_control); } } + if (nested && !vmx->rdtscp_enabled) + vmx->nested.nested_vmx_secondary_ctls_high &= + ~SECONDARY_EXEC_RDTSCP; } /* Exposing INVPCID only when PCID is exposed */ @@ -9146,8 +9150,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exec_control &= ~SECONDARY_EXEC_RDTSCP; /* Take the following fields only from vmcs12 */ exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | - SECONDARY_EXEC_APIC_REGISTER_VIRT); + SECONDARY_EXEC_APIC_REGISTER_VIRT); if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; -- cgit v0.10.2 From acaf6a97d623af123314c2f8ce4cf7254f6b2fc1 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 25 Feb 2015 13:08:05 +0000 Subject: MIPS: lose_fpu(): Disable FPU when MSA enabled The lose_fpu() function only disables the FPU in CP0_Status.CU1 if the FPU is in use and MSA isn't enabled. This isn't necessarily a problem because KSTK_STATUS(current), the version of CP0_Status stored on the kernel stack on entry from user mode, does always get updated and gets restored when returning to user mode, but I don't think it was intended, and it is inconsistent with the case of only the FPU being in use. Sometimes leaving the FPU enabled may also mask kernel bugs where FPU operations are executed when the FPU might not be enabled. So lets disable the FPU in the MSA case too. Fixes: 33c771ba5c5d ("MIPS: save/disable MSA in lose_fpu") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9323/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index dd083e9..9f26b07 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -170,6 +170,7 @@ static inline void lose_fpu(int save) } disable_msa(); clear_thread_flag(TIF_USEDMSA); + __disable_fpu(); } else if (is_fpu_owner()) { if (save) _save_fp(current); -- cgit v0.10.2 From 631afc65e8f4f845945ef9e90236d10cee601498 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 27 Mar 2015 17:00:03 +0000 Subject: MIPS: Push .set mips64r* into the functions needing it The {save,restore}_fp_context{,32} functions require that the assembler allows the use of sdc instructions on any FP register, and this is acomplished by setting the arch to mips64r2 or mips64r6 (using MIPS_ISA_ARCH_LEVEL_RAW). However this has the effect of enabling the assembler to use mips64 instructions in the expansion of pseudo-instructions. This was done in the (now-reverted) commit eec43a224cf1 "MIPS: Save/restore MSA context around signals" which led to my mistakenly believing that there was an assembler bug, when in reality the assembler was just emitting mips64 instructions. Avoid the issue for future commits which will add code to r4k_fpu.S by pushing the .set MIPS_ISA_ARCH_LEVEL_RAW directives into the functions that require it, and remove the spurious assertion declaring the assembler bug. Signed-off-by: Paul Burton [james.hogan@imgtec.com: Rebase on v4.0-rc1 and reword commit message to reflect use of MIPS_ISA_ARCH_LEVEL_RAW] Signed-off-by: James Hogan Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9612/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 0cae459..782dde7 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -326,8 +326,7 @@ SET_HARDFLOAT .insn .word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero + move \rd, $1 .set pop .endm @@ -337,8 +336,7 @@ SET_HARDFLOAT .insn .word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11) - /* move triggers an assembler bug... */ - or \rd, $1, zero + move \rd, $1 .set pop .endm @@ -346,8 +344,7 @@ .set push .set noat SET_HARDFLOAT - /* move triggers an assembler bug... */ - or $1, \rs, zero + move $1, \rs .word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm @@ -356,8 +353,7 @@ .set push .set noat SET_HARDFLOAT - /* move triggers an assembler bug... */ - or $1, \rs, zero + move $1, \rs .word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 676c503..1d88af2 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -34,7 +34,6 @@ .endm .set noreorder - .set MIPS_ISA_ARCH_LEVEL_RAW LEAF(_save_fp_context) .set push @@ -103,6 +102,7 @@ LEAF(_save_fp_context) /* Save 32-bit process floating point context */ LEAF(_save_fp_context32) .set push + .set MIPS_ISA_ARCH_LEVEL_RAW SET_HARDFLOAT cfc1 t1, fcr31 -- cgit v0.10.2 From f23ce3883a30743a5b779dc6fb90ca8620688a23 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:31 +0000 Subject: MIPS: assume at as source/dest of MSA copy/insert instructions Assuming at ($1) as the source or destination register of copy or insert instructions: - Simplifies the macros providing those instructions for toolchains without MSA support. - Avoids an unnecessary move instruction when at is used as the source or destination register anyway. - Is sufficient for the uses to be introduced in the kernel by a subsequent patch. Note that due to a patch ordering snafu on my part this also fixes the currently broken build with MSA support enabled. The build has been broken since commit c9017757c532 "MIPS: init upper 64b of vector registers when MSA is first used", which this patch should have preceeded. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9161/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 782dde7..91df136 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -227,35 +227,35 @@ .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set mips32r2 .set msa - copy_u.w \rd, $w\ws[\n] + copy_u.w $1, $w\ws[\n] .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set mips64r2 .set msa - copy_u.d \rd, $w\ws[\n] + copy_u.d $1, $w\ws[\n] .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set mips32r2 .set msa - insert.w $w\wd[\n], \rs + insert.w $w\wd[\n], $1 .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set mips64r2 .set msa - insert.d $w\wd[\n], \rs + insert.d $w\wd[\n], $1 .set pop .endm #else @@ -320,40 +320,36 @@ .set pop .endm - .macro copy_u_w rd, ws, n + .macro copy_u_w ws, n .set push .set noat SET_HARDFLOAT .insn .word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11) - move \rd, $1 .set pop .endm - .macro copy_u_d rd, ws, n + .macro copy_u_d ws, n .set push .set noat SET_HARDFLOAT .insn .word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11) - move \rd, $1 .set pop .endm - .macro insert_w wd, n, rs + .macro insert_w wd, n .set push .set noat SET_HARDFLOAT - move $1, \rs .word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm - .macro insert_d wd, n, rs + .macro insert_d wd, n .set push .set noat SET_HARDFLOAT - move $1, \rs .word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6) .set pop .endm -- cgit v0.10.2 From a3a49810c55e3489dfb5d72a9b2e41ab1db9ffb9 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:32 +0000 Subject: MIPS: remove MSA macro recursion Recursive macros made the code more concise & worked great for the case where the toolchain doesn't support MSA. However, with toolchains which do support MSA they lead to build failures such as: arch/mips/kernel/r4k_switch.S: Assembler messages: arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w(0+1)[2],$1' arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w(0+1)[3],$1' arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w((0+1)+1)[2],$1' arch/mips/kernel/r4k_switch.S:148: Error: invalid operands `insert.w $w((0+1)+1)[3],$1' ... Drop the recursion from msa_init_all_upper invoking the msa_init_upper macro explicitly for each vector register. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9162/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 91df136..73dec5c 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -444,9 +444,6 @@ insert_w \wd, 2 insert_w \wd, 3 #endif - .if 31-\wd - msa_init_upper (\wd+1) - .endif .endm .macro msa_init_all_upper @@ -455,6 +452,37 @@ SET_HARDFLOAT not $1, zero msa_init_upper 0 + msa_init_upper 1 + msa_init_upper 2 + msa_init_upper 3 + msa_init_upper 4 + msa_init_upper 5 + msa_init_upper 6 + msa_init_upper 7 + msa_init_upper 8 + msa_init_upper 9 + msa_init_upper 10 + msa_init_upper 11 + msa_init_upper 12 + msa_init_upper 13 + msa_init_upper 14 + msa_init_upper 15 + msa_init_upper 16 + msa_init_upper 17 + msa_init_upper 18 + msa_init_upper 19 + msa_init_upper 20 + msa_init_upper 21 + msa_init_upper 22 + msa_init_upper 23 + msa_init_upper 24 + msa_init_upper 25 + msa_init_upper 26 + msa_init_upper 27 + msa_init_upper 28 + msa_init_upper 29 + msa_init_upper 30 + msa_init_upper 31 .set pop .endm -- cgit v0.10.2 From e1bebbab1eaecac77d77033010b5e0f51b737e64 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:33 +0000 Subject: MIPS: wrap cfcmsa & ctcmsa accesses for toolchains with MSA support Uses of the cfcmsa & ctcmsa instructions were not being wrapped by a macro in the case where the toolchain supports MSA, since the arguments exactly match a typical use of the instructions. However using current toolchains this leads to errors such as: arch/mips/kernel/genex.S:437: Error: opcode not supported on this processor: mips32r2 (mips32r2) `cfcmsa $5,1' Thus uses of the instructions must be in the context of a ".set msa" directive, however doing that from the users of the instructions would be messy due to the possibility that the toolchain does not support MSA. Fix this by renaming the macros (prepending an underscore) in order to avoid recursion when attempting to emit the instructions, and provide implementations for the TOOLCHAIN_SUPPORTS_MSA case which ".set msa" as appropriate. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9163/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 73dec5c..a64e424 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -211,6 +211,22 @@ .endm #ifdef TOOLCHAIN_SUPPORTS_MSA + .macro _cfcmsa rd, cs + .set push + .set mips32r2 + .set msa + cfcmsa \rd, $\cs + .set pop + .endm + + .macro _ctcmsa cd, rs + .set push + .set mips32r2 + .set msa + ctcmsa $\cd, \rs + .set pop + .endm + .macro ld_d wd, off, base .set push .set mips32r2 @@ -283,7 +299,7 @@ /* * Temporary until all toolchains in use include MSA support. */ - .macro cfcmsa rd, cs + .macro _cfcmsa rd, cs .set push .set noat SET_HARDFLOAT @@ -293,7 +309,7 @@ .set pop .endm - .macro ctcmsa cd, rs + .macro _ctcmsa cd, rs .set push .set noat SET_HARDFLOAT @@ -391,7 +407,7 @@ .set push .set noat SET_HARDFLOAT - cfcmsa $1, MSA_CSR + _cfcmsa $1, MSA_CSR sw $1, THREAD_MSA_CSR(\thread) .set pop .endm @@ -401,7 +417,7 @@ .set noat SET_HARDFLOAT lw $1, THREAD_MSA_CSR(\thread) - ctcmsa MSA_CSR, $1 + _ctcmsa MSA_CSR, $1 .set pop ld_d 0, THREAD_FPR0, \thread ld_d 1, THREAD_FPR1, \thread -- cgit v0.10.2 From 091be550a70a086c3b4420c6155e733dc410f190 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:34 +0000 Subject: MIPS: clear MSACSR cause bits when handling MSA FP exception Much like for traditional scalar FP exceptions, the cause bits in the MSACSR register need to be cleared following an MSA FP exception. Without doing so the exception will simply be raised again whenever the kernel restores MSACSR from a tasks saved context, leading to undesirable spurious exceptions. Clear the cause bits from the handle_msa_fpe function, mirroring the way handle_fpe clears the cause bits in FCSR. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9164/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 2ebaabe..86e2242 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -368,6 +368,15 @@ NESTED(nmi_handler, PT_SIZE, sp) STI .endm + .macro __build_clear_msa_fpe + _cfcmsa a1, MSA_CSR + li a2, ~(0x3f << 12) + and a1, a1, a2 + _ctcmsa MSA_CSR, a1 + TRACE_IRQS_ON + STI + .endm + .macro __build_clear_ade MFC0 t0, CP0_BADVADDR PTR_S t0, PT_BVADDR(sp) @@ -426,7 +435,7 @@ NESTED(nmi_handler, PT_SIZE, sp) BUILD_HANDLER cpu cpu sti silent /* #11 */ BUILD_HANDLER ov ov sti silent /* #12 */ BUILD_HANDLER tr tr sti silent /* #13 */ - BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */ + BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent /* #14 */ BUILD_HANDLER fpe fpe fpe silent /* #15 */ BUILD_HANDLER ftlb ftlb none silent /* #16 */ BUILD_HANDLER msa msa sti silent /* #21 */ -- cgit v0.10.2 From ad70c13a938daf833cad86830f23865ee37aa5c7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:35 +0000 Subject: MIPS: Ensure FCSR cause bits are clear after invoking FPU emulator When running the emulator to handle an instruction that raised an FP unimplemented operation exception, the FCSR cause bits were being cleared. This is done to ensure that the kernel does not take an FP exception when later restoring FP context to registers. However, this was not being done when the emulator is invoked in response to a coprocessor unusable exception. This happens in 2 cases: - There is no FPU present in the system. In this case things were OK, since the FP context is never restored to hardware registers and thus no FP exception may be raised when restoring FCSR. - The FPU could not be configured to the mode required by the task. In this case it would be possible for the emulator to set cause bits which are later restored to hardware if the task migrates to a CPU whose associated FPU does support its mode requirements, or if the tasks FP mode requirements change. Consistently clear the cause bits after invoking the emulator, by moving the clearing to process_fpemu_return and ensuring this is always called before the tasks FP context is restored. This will make it easier to catch further paths invoking the emulator in future, as will be introduced in further patches. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9165/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 33984c0..8943ebe 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -701,6 +701,13 @@ asmlinkage void do_ov(struct pt_regs *regs) int process_fpemu_return(int sig, void __user *fault_addr) { + /* + * We can't allow the emulated instruction to leave any of the cause + * bits set in FCSR. If they were then the kernel would take an FP + * exception when restoring FP context. + */ + current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + if (sig == SIGSEGV || sig == SIGBUS) { struct siginfo si = {0}; si.si_addr = fault_addr; @@ -804,18 +811,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - /* - * We can't allow the emulated instruction to leave any of - * the cause bit set in $fcr31. - */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + /* If something went wrong, signal */ + process_fpemu_return(sig, fault_addr); /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ - /* If something went wrong, signal */ - process_fpemu_return(sig, fault_addr); - goto out; } else if (fcr31 & FPU_CSR_INV_X) info.si_code = FPE_FLTINV; -- cgit v0.10.2 From ac9ad83bc318635ed7496e9dff30beaa522eaec7 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:36 +0000 Subject: MIPS: prevent FP context set via ptrace being discarded If a ptracee has not used the FPU and the ptracer sets its FP context using PTRACE_POKEUSR, PTRACE_SETFPREGS or PTRACE_SETREGSET then that context will be discarded upon either the ptracee using the FPU or a further write to the context via ptrace. Prevent this loss by recording that the task has "used" math once its FP context has been written to. The context initialisation code that was present for the PTRACE_POKEUSR case is reused for the other 2 cases to provide consistent behaviour for the different ptrace requests. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9166/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 5104528..7da6e32 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -46,6 +46,26 @@ #define CREATE_TRACE_POINTS #include +static void init_fp_ctx(struct task_struct *target) +{ + /* If FP has been used then the target already has context */ + if (tsk_used_math(target)) + return; + + /* Begin with data registers set to all 1s... */ + memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr)); + + /* ...and FCSR zeroed */ + target->thread.fpu.fcr31 = 0; + + /* + * Record that the target has "used" math, such that the context + * just initialised, and any modifications made by the caller, + * aren't discarded. + */ + set_stopped_child_used_math(target); +} + /* * Called by kernel/ptrace.c when detaching.. * @@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data) if (!access_ok(VERIFY_READ, data, 33 * 8)) return -EIO; + init_fp_ctx(child); fregs = get_fpu_regs(child); for (i = 0; i < 32; i++) { @@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target, /* XXX fcr31 */ + init_fp_ctx(target); + if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t)) return user_regset_copyin(&pos, &count, &kbuf, &ubuf, &target->thread.fpu, @@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request, case FPR_BASE ... FPR_BASE + 31: { union fpureg *fregs = get_fpu_regs(child); - if (!tsk_used_math(child)) { - /* FP not yet used */ - memset(&child->thread.fpu, ~0, - sizeof(child->thread.fpu)); - child->thread.fpu.fcr31 = 0; - } + init_fp_ctx(child); #ifdef CONFIG_32BIT if (test_thread_flag(TIF_32BIT_FPREGS)) { /* -- cgit v0.10.2 From 84ab45b33858a87632e1f5e207e302bf48eaf52e Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 30 Jan 2015 12:09:37 +0000 Subject: MIPS: disable FPU if the mode is unsupported The expected semantics of __enable_fpu are for the FPU to be enabled in the given mode if possible, otherwise for the FPU to be left disabled and SIGFPE returned. The FPU was incorrectly being left enabled in cases where the desired value for FR was unavailable. Without ensuring the FPU is disabled in this case, it would be possible for userland to go on to execute further FP instructions natively in the incorrect mode, rather than those instructions being trapped & emulated as they need to be. Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9167/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 9f26b07..b104ad9 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -48,6 +48,12 @@ enum fpu_mode { #define FPU_FR_MASK 0x1 }; +#define __disable_fpu() \ +do { \ + clear_c0_status(ST0_CU1); \ + disable_fpu_hazard(); \ +} while (0) + static inline int __enable_fpu(enum fpu_mode mode) { int fr; @@ -86,7 +92,12 @@ fr_common: enable_fpu_hazard(); /* check FR has the desired value */ - return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE; + if (!!(read_c0_status() & ST0_FR) == !!fr) + return 0; + + /* unsupported FR value */ + __disable_fpu(); + return SIGFPE; default: BUG(); @@ -95,12 +106,6 @@ fr_common: return SIGFPE; } -#define __disable_fpu() \ -do { \ - clear_c0_status(ST0_CU1); \ - disable_fpu_hazard(); \ -} while (0) - #define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU) static inline int __is_fpu_owner(void) -- cgit v0.10.2 From 466aec5f292be469e15b3dc3d17b731dab93727c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 30 Jan 2015 12:09:38 +0000 Subject: Revert "MIPS: Don't assume 64-bit FP registers for context switch" This reverts commit 02987633df7ba2f62967791dda816eb191d1add3. The basic premise of the patch was incorrect since MSA context (including FP state) is saved using st.d which stores two consecutive 64-bit words in memory rather than a single 128-bit word. This means that even with big endian MSA, the FP state is still in the first 64-bit word. Signed-off-by: James Hogan Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9168/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h index cdac7b3..8038647 100644 --- a/arch/mips/include/asm/asmmacro-32.h +++ b/arch/mips/include/asm/asmmacro-32.h @@ -16,38 +16,38 @@ .set push SET_HARDFLOAT cfc1 \tmp, fcr31 - swc1 $f0, THREAD_FPR0_LS64(\thread) - swc1 $f1, THREAD_FPR1_LS64(\thread) - swc1 $f2, THREAD_FPR2_LS64(\thread) - swc1 $f3, THREAD_FPR3_LS64(\thread) - swc1 $f4, THREAD_FPR4_LS64(\thread) - swc1 $f5, THREAD_FPR5_LS64(\thread) - swc1 $f6, THREAD_FPR6_LS64(\thread) - swc1 $f7, THREAD_FPR7_LS64(\thread) - swc1 $f8, THREAD_FPR8_LS64(\thread) - swc1 $f9, THREAD_FPR9_LS64(\thread) - swc1 $f10, THREAD_FPR10_LS64(\thread) - swc1 $f11, THREAD_FPR11_LS64(\thread) - swc1 $f12, THREAD_FPR12_LS64(\thread) - swc1 $f13, THREAD_FPR13_LS64(\thread) - swc1 $f14, THREAD_FPR14_LS64(\thread) - swc1 $f15, THREAD_FPR15_LS64(\thread) - swc1 $f16, THREAD_FPR16_LS64(\thread) - swc1 $f17, THREAD_FPR17_LS64(\thread) - swc1 $f18, THREAD_FPR18_LS64(\thread) - swc1 $f19, THREAD_FPR19_LS64(\thread) - swc1 $f20, THREAD_FPR20_LS64(\thread) - swc1 $f21, THREAD_FPR21_LS64(\thread) - swc1 $f22, THREAD_FPR22_LS64(\thread) - swc1 $f23, THREAD_FPR23_LS64(\thread) - swc1 $f24, THREAD_FPR24_LS64(\thread) - swc1 $f25, THREAD_FPR25_LS64(\thread) - swc1 $f26, THREAD_FPR26_LS64(\thread) - swc1 $f27, THREAD_FPR27_LS64(\thread) - swc1 $f28, THREAD_FPR28_LS64(\thread) - swc1 $f29, THREAD_FPR29_LS64(\thread) - swc1 $f30, THREAD_FPR30_LS64(\thread) - swc1 $f31, THREAD_FPR31_LS64(\thread) + swc1 $f0, THREAD_FPR0(\thread) + swc1 $f1, THREAD_FPR1(\thread) + swc1 $f2, THREAD_FPR2(\thread) + swc1 $f3, THREAD_FPR3(\thread) + swc1 $f4, THREAD_FPR4(\thread) + swc1 $f5, THREAD_FPR5(\thread) + swc1 $f6, THREAD_FPR6(\thread) + swc1 $f7, THREAD_FPR7(\thread) + swc1 $f8, THREAD_FPR8(\thread) + swc1 $f9, THREAD_FPR9(\thread) + swc1 $f10, THREAD_FPR10(\thread) + swc1 $f11, THREAD_FPR11(\thread) + swc1 $f12, THREAD_FPR12(\thread) + swc1 $f13, THREAD_FPR13(\thread) + swc1 $f14, THREAD_FPR14(\thread) + swc1 $f15, THREAD_FPR15(\thread) + swc1 $f16, THREAD_FPR16(\thread) + swc1 $f17, THREAD_FPR17(\thread) + swc1 $f18, THREAD_FPR18(\thread) + swc1 $f19, THREAD_FPR19(\thread) + swc1 $f20, THREAD_FPR20(\thread) + swc1 $f21, THREAD_FPR21(\thread) + swc1 $f22, THREAD_FPR22(\thread) + swc1 $f23, THREAD_FPR23(\thread) + swc1 $f24, THREAD_FPR24(\thread) + swc1 $f25, THREAD_FPR25(\thread) + swc1 $f26, THREAD_FPR26(\thread) + swc1 $f27, THREAD_FPR27(\thread) + swc1 $f28, THREAD_FPR28(\thread) + swc1 $f29, THREAD_FPR29(\thread) + swc1 $f30, THREAD_FPR30(\thread) + swc1 $f31, THREAD_FPR31(\thread) sw \tmp, THREAD_FCR31(\thread) .set pop .endm @@ -56,38 +56,38 @@ .set push SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) - lwc1 $f0, THREAD_FPR0_LS64(\thread) - lwc1 $f1, THREAD_FPR1_LS64(\thread) - lwc1 $f2, THREAD_FPR2_LS64(\thread) - lwc1 $f3, THREAD_FPR3_LS64(\thread) - lwc1 $f4, THREAD_FPR4_LS64(\thread) - lwc1 $f5, THREAD_FPR5_LS64(\thread) - lwc1 $f6, THREAD_FPR6_LS64(\thread) - lwc1 $f7, THREAD_FPR7_LS64(\thread) - lwc1 $f8, THREAD_FPR8_LS64(\thread) - lwc1 $f9, THREAD_FPR9_LS64(\thread) - lwc1 $f10, THREAD_FPR10_LS64(\thread) - lwc1 $f11, THREAD_FPR11_LS64(\thread) - lwc1 $f12, THREAD_FPR12_LS64(\thread) - lwc1 $f13, THREAD_FPR13_LS64(\thread) - lwc1 $f14, THREAD_FPR14_LS64(\thread) - lwc1 $f15, THREAD_FPR15_LS64(\thread) - lwc1 $f16, THREAD_FPR16_LS64(\thread) - lwc1 $f17, THREAD_FPR17_LS64(\thread) - lwc1 $f18, THREAD_FPR18_LS64(\thread) - lwc1 $f19, THREAD_FPR19_LS64(\thread) - lwc1 $f20, THREAD_FPR20_LS64(\thread) - lwc1 $f21, THREAD_FPR21_LS64(\thread) - lwc1 $f22, THREAD_FPR22_LS64(\thread) - lwc1 $f23, THREAD_FPR23_LS64(\thread) - lwc1 $f24, THREAD_FPR24_LS64(\thread) - lwc1 $f25, THREAD_FPR25_LS64(\thread) - lwc1 $f26, THREAD_FPR26_LS64(\thread) - lwc1 $f27, THREAD_FPR27_LS64(\thread) - lwc1 $f28, THREAD_FPR28_LS64(\thread) - lwc1 $f29, THREAD_FPR29_LS64(\thread) - lwc1 $f30, THREAD_FPR30_LS64(\thread) - lwc1 $f31, THREAD_FPR31_LS64(\thread) + lwc1 $f0, THREAD_FPR0(\thread) + lwc1 $f1, THREAD_FPR1(\thread) + lwc1 $f2, THREAD_FPR2(\thread) + lwc1 $f3, THREAD_FPR3(\thread) + lwc1 $f4, THREAD_FPR4(\thread) + lwc1 $f5, THREAD_FPR5(\thread) + lwc1 $f6, THREAD_FPR6(\thread) + lwc1 $f7, THREAD_FPR7(\thread) + lwc1 $f8, THREAD_FPR8(\thread) + lwc1 $f9, THREAD_FPR9(\thread) + lwc1 $f10, THREAD_FPR10(\thread) + lwc1 $f11, THREAD_FPR11(\thread) + lwc1 $f12, THREAD_FPR12(\thread) + lwc1 $f13, THREAD_FPR13(\thread) + lwc1 $f14, THREAD_FPR14(\thread) + lwc1 $f15, THREAD_FPR15(\thread) + lwc1 $f16, THREAD_FPR16(\thread) + lwc1 $f17, THREAD_FPR17(\thread) + lwc1 $f18, THREAD_FPR18(\thread) + lwc1 $f19, THREAD_FPR19(\thread) + lwc1 $f20, THREAD_FPR20(\thread) + lwc1 $f21, THREAD_FPR21(\thread) + lwc1 $f22, THREAD_FPR22(\thread) + lwc1 $f23, THREAD_FPR23(\thread) + lwc1 $f24, THREAD_FPR24(\thread) + lwc1 $f25, THREAD_FPR25(\thread) + lwc1 $f26, THREAD_FPR26(\thread) + lwc1 $f27, THREAD_FPR27(\thread) + lwc1 $f28, THREAD_FPR28(\thread) + lwc1 $f29, THREAD_FPR29(\thread) + lwc1 $f30, THREAD_FPR30(\thread) + lwc1 $f31, THREAD_FPR31(\thread) ctc1 \tmp, fcr31 .set pop .endm diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index a64e424..6156ac8 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -60,22 +60,22 @@ .set push SET_HARDFLOAT cfc1 \tmp, fcr31 - sdc1 $f0, THREAD_FPR0_LS64(\thread) - sdc1 $f2, THREAD_FPR2_LS64(\thread) - sdc1 $f4, THREAD_FPR4_LS64(\thread) - sdc1 $f6, THREAD_FPR6_LS64(\thread) - sdc1 $f8, THREAD_FPR8_LS64(\thread) - sdc1 $f10, THREAD_FPR10_LS64(\thread) - sdc1 $f12, THREAD_FPR12_LS64(\thread) - sdc1 $f14, THREAD_FPR14_LS64(\thread) - sdc1 $f16, THREAD_FPR16_LS64(\thread) - sdc1 $f18, THREAD_FPR18_LS64(\thread) - sdc1 $f20, THREAD_FPR20_LS64(\thread) - sdc1 $f22, THREAD_FPR22_LS64(\thread) - sdc1 $f24, THREAD_FPR24_LS64(\thread) - sdc1 $f26, THREAD_FPR26_LS64(\thread) - sdc1 $f28, THREAD_FPR28_LS64(\thread) - sdc1 $f30, THREAD_FPR30_LS64(\thread) + sdc1 $f0, THREAD_FPR0(\thread) + sdc1 $f2, THREAD_FPR2(\thread) + sdc1 $f4, THREAD_FPR4(\thread) + sdc1 $f6, THREAD_FPR6(\thread) + sdc1 $f8, THREAD_FPR8(\thread) + sdc1 $f10, THREAD_FPR10(\thread) + sdc1 $f12, THREAD_FPR12(\thread) + sdc1 $f14, THREAD_FPR14(\thread) + sdc1 $f16, THREAD_FPR16(\thread) + sdc1 $f18, THREAD_FPR18(\thread) + sdc1 $f20, THREAD_FPR20(\thread) + sdc1 $f22, THREAD_FPR22(\thread) + sdc1 $f24, THREAD_FPR24(\thread) + sdc1 $f26, THREAD_FPR26(\thread) + sdc1 $f28, THREAD_FPR28(\thread) + sdc1 $f30, THREAD_FPR30(\thread) sw \tmp, THREAD_FCR31(\thread) .set pop .endm @@ -84,22 +84,22 @@ .set push .set mips64r2 SET_HARDFLOAT - sdc1 $f1, THREAD_FPR1_LS64(\thread) - sdc1 $f3, THREAD_FPR3_LS64(\thread) - sdc1 $f5, THREAD_FPR5_LS64(\thread) - sdc1 $f7, THREAD_FPR7_LS64(\thread) - sdc1 $f9, THREAD_FPR9_LS64(\thread) - sdc1 $f11, THREAD_FPR11_LS64(\thread) - sdc1 $f13, THREAD_FPR13_LS64(\thread) - sdc1 $f15, THREAD_FPR15_LS64(\thread) - sdc1 $f17, THREAD_FPR17_LS64(\thread) - sdc1 $f19, THREAD_FPR19_LS64(\thread) - sdc1 $f21, THREAD_FPR21_LS64(\thread) - sdc1 $f23, THREAD_FPR23_LS64(\thread) - sdc1 $f25, THREAD_FPR25_LS64(\thread) - sdc1 $f27, THREAD_FPR27_LS64(\thread) - sdc1 $f29, THREAD_FPR29_LS64(\thread) - sdc1 $f31, THREAD_FPR31_LS64(\thread) + sdc1 $f1, THREAD_FPR1(\thread) + sdc1 $f3, THREAD_FPR3(\thread) + sdc1 $f5, THREAD_FPR5(\thread) + sdc1 $f7, THREAD_FPR7(\thread) + sdc1 $f9, THREAD_FPR9(\thread) + sdc1 $f11, THREAD_FPR11(\thread) + sdc1 $f13, THREAD_FPR13(\thread) + sdc1 $f15, THREAD_FPR15(\thread) + sdc1 $f17, THREAD_FPR17(\thread) + sdc1 $f19, THREAD_FPR19(\thread) + sdc1 $f21, THREAD_FPR21(\thread) + sdc1 $f23, THREAD_FPR23(\thread) + sdc1 $f25, THREAD_FPR25(\thread) + sdc1 $f27, THREAD_FPR27(\thread) + sdc1 $f29, THREAD_FPR29(\thread) + sdc1 $f31, THREAD_FPR31(\thread) .set pop .endm @@ -118,22 +118,22 @@ .set push SET_HARDFLOAT lw \tmp, THREAD_FCR31(\thread) - ldc1 $f0, THREAD_FPR0_LS64(\thread) - ldc1 $f2, THREAD_FPR2_LS64(\thread) - ldc1 $f4, THREAD_FPR4_LS64(\thread) - ldc1 $f6, THREAD_FPR6_LS64(\thread) - ldc1 $f8, THREAD_FPR8_LS64(\thread) - ldc1 $f10, THREAD_FPR10_LS64(\thread) - ldc1 $f12, THREAD_FPR12_LS64(\thread) - ldc1 $f14, THREAD_FPR14_LS64(\thread) - ldc1 $f16, THREAD_FPR16_LS64(\thread) - ldc1 $f18, THREAD_FPR18_LS64(\thread) - ldc1 $f20, THREAD_FPR20_LS64(\thread) - ldc1 $f22, THREAD_FPR22_LS64(\thread) - ldc1 $f24, THREAD_FPR24_LS64(\thread) - ldc1 $f26, THREAD_FPR26_LS64(\thread) - ldc1 $f28, THREAD_FPR28_LS64(\thread) - ldc1 $f30, THREAD_FPR30_LS64(\thread) + ldc1 $f0, THREAD_FPR0(\thread) + ldc1 $f2, THREAD_FPR2(\thread) + ldc1 $f4, THREAD_FPR4(\thread) + ldc1 $f6, THREAD_FPR6(\thread) + ldc1 $f8, THREAD_FPR8(\thread) + ldc1 $f10, THREAD_FPR10(\thread) + ldc1 $f12, THREAD_FPR12(\thread) + ldc1 $f14, THREAD_FPR14(\thread) + ldc1 $f16, THREAD_FPR16(\thread) + ldc1 $f18, THREAD_FPR18(\thread) + ldc1 $f20, THREAD_FPR20(\thread) + ldc1 $f22, THREAD_FPR22(\thread) + ldc1 $f24, THREAD_FPR24(\thread) + ldc1 $f26, THREAD_FPR26(\thread) + ldc1 $f28, THREAD_FPR28(\thread) + ldc1 $f30, THREAD_FPR30(\thread) ctc1 \tmp, fcr31 .endm @@ -141,22 +141,22 @@ .set push .set mips64r2 SET_HARDFLOAT - ldc1 $f1, THREAD_FPR1_LS64(\thread) - ldc1 $f3, THREAD_FPR3_LS64(\thread) - ldc1 $f5, THREAD_FPR5_LS64(\thread) - ldc1 $f7, THREAD_FPR7_LS64(\thread) - ldc1 $f9, THREAD_FPR9_LS64(\thread) - ldc1 $f11, THREAD_FPR11_LS64(\thread) - ldc1 $f13, THREAD_FPR13_LS64(\thread) - ldc1 $f15, THREAD_FPR15_LS64(\thread) - ldc1 $f17, THREAD_FPR17_LS64(\thread) - ldc1 $f19, THREAD_FPR19_LS64(\thread) - ldc1 $f21, THREAD_FPR21_LS64(\thread) - ldc1 $f23, THREAD_FPR23_LS64(\thread) - ldc1 $f25, THREAD_FPR25_LS64(\thread) - ldc1 $f27, THREAD_FPR27_LS64(\thread) - ldc1 $f29, THREAD_FPR29_LS64(\thread) - ldc1 $f31, THREAD_FPR31_LS64(\thread) + ldc1 $f1, THREAD_FPR1(\thread) + ldc1 $f3, THREAD_FPR3(\thread) + ldc1 $f5, THREAD_FPR5(\thread) + ldc1 $f7, THREAD_FPR7(\thread) + ldc1 $f9, THREAD_FPR9(\thread) + ldc1 $f11, THREAD_FPR11(\thread) + ldc1 $f13, THREAD_FPR13(\thread) + ldc1 $f15, THREAD_FPR15(\thread) + ldc1 $f17, THREAD_FPR17(\thread) + ldc1 $f19, THREAD_FPR19(\thread) + ldc1 $f21, THREAD_FPR21(\thread) + ldc1 $f23, THREAD_FPR23(\thread) + ldc1 $f25, THREAD_FPR25(\thread) + ldc1 $f27, THREAD_FPR27(\thread) + ldc1 $f29, THREAD_FPR29(\thread) + ldc1 $f31, THREAD_FPR31(\thread) .set pop .endm diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 750d67a..3ee1565 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -167,72 +167,6 @@ void output_thread_fpu_defines(void) OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]); OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]); - /* the least significant 64 bits of each FP register */ - OFFSET(THREAD_FPR0_LS64, task_struct, - thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR1_LS64, task_struct, - thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR2_LS64, task_struct, - thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR3_LS64, task_struct, - thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR4_LS64, task_struct, - thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR5_LS64, task_struct, - thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR6_LS64, task_struct, - thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR7_LS64, task_struct, - thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR8_LS64, task_struct, - thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR9_LS64, task_struct, - thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR10_LS64, task_struct, - thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR11_LS64, task_struct, - thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR12_LS64, task_struct, - thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR13_LS64, task_struct, - thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR14_LS64, task_struct, - thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR15_LS64, task_struct, - thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR16_LS64, task_struct, - thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR17_LS64, task_struct, - thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR18_LS64, task_struct, - thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR19_LS64, task_struct, - thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR20_LS64, task_struct, - thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR21_LS64, task_struct, - thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR22_LS64, task_struct, - thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR23_LS64, task_struct, - thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR24_LS64, task_struct, - thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR25_LS64, task_struct, - thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR26_LS64, task_struct, - thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR27_LS64, task_struct, - thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR28_LS64, task_struct, - thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR29_LS64, task_struct, - thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR30_LS64, task_struct, - thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FPR31_LS64, task_struct, - thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]); - OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31); OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr); BLANK(); -- cgit v0.10.2 From 1f3a2c6e229ccb8df8115b04d16ad4832767cf3a Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 30 Jan 2015 12:09:39 +0000 Subject: MIPS: MSA: Fix big-endian FPR_IDX implementation The maximum word size is 64-bits since MSA state is saved using st.d which stores two 64-bit words, therefore reimplement FPR_IDX using xor, and only within each 64-bit word. Signed-off-by: James Hogan Signed-off-by: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/9169/ Signed-off-by: Ralf Baechle diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h index b5dcbee..9b3b48e 100644 --- a/arch/mips/include/asm/processor.h +++ b/arch/mips/include/asm/processor.h @@ -105,7 +105,7 @@ union fpureg { #ifdef CONFIG_CPU_LITTLE_ENDIAN # define FPR_IDX(width, idx) (idx) #else -# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx)) +# define FPR_IDX(width, idx) ((idx) ^ ((64 / (width)) - 1)) #endif #define BUILD_FPR_ACCESS(width) \ -- cgit v0.10.2 From 98119ad53376885819d93dfb8737b6a9a61ca0ba Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 6 Feb 2015 11:11:56 +0000 Subject: MIPS: KVM: Handle MSA Disabled exceptions from guest Guest user mode can generate a guest MSA Disabled exception on an MSA capable core by simply trying to execute an MSA instruction. Since this exception is unknown to KVM it will be passed on to the guest kernel. However guest Linux kernels prior to v3.15 do not set up an exception handler for the MSA Disabled exception as they don't support any MSA capable cores. This results in a guest OS panic. Since an older processor ID may be being emulated, and MSA support is not advertised to the guest, the correct behaviour is to generate a Reserved Instruction exception in the guest kernel so it can send the guest process an illegal instruction signal (SIGILL), as would happen with a non-MSA-capable core. Fix this as minimally as reasonably possible by preventing kvm_mips_check_privilege() from relaying MSA Disabled exceptions from guest user mode to the guest kernel, and handling the MSA Disabled exception by emulating a Reserved Instruction exception in the guest, via a new handle_msa_disabled() KVM callback. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # v3.15+ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index ac4fc71..f722b05 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -322,6 +322,7 @@ enum mips_mmu_types { #define T_TRAP 13 /* Trap instruction */ #define T_VCEI 14 /* Virtual coherency exception */ #define T_FPE 15 /* Floating point exception */ +#define T_MSADIS 21 /* MSA disabled exception */ #define T_WATCH 23 /* Watch address reference */ #define T_VCED 31 /* Virtual coherency data */ @@ -578,6 +579,7 @@ struct kvm_mips_callbacks { int (*handle_syscall)(struct kvm_vcpu *vcpu); int (*handle_res_inst)(struct kvm_vcpu *vcpu); int (*handle_break)(struct kvm_vcpu *vcpu); + int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); int (*vcpu_init)(struct kvm_vcpu *vcpu); int (*vcpu_setup)(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index fb3e8df..838d3a6 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -2176,6 +2176,7 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, case T_SYSCALL: case T_BREAK: case T_RES_INST: + case T_MSADIS: break; case T_COP_UNUSABLE: diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index c9eccf5..f5e7dda 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1119,6 +1119,10 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_break(vcpu); break; + case T_MSADIS: + ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); + break; + default: kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index fd7257b..4372cc8 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -330,6 +330,33 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + /* No MSA supported in guest, guest reserved instruction exception */ + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + + switch (er) { + case EMULATE_DONE: + ret = RESUME_GUEST; + break; + + case EMULATE_FAIL: + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + break; + + default: + BUG(); + } + return ret; +} + static int kvm_trap_emul_vm_init(struct kvm *kvm) { return 0; @@ -470,6 +497,7 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_syscall = kvm_trap_emul_handle_syscall, .handle_res_inst = kvm_trap_emul_handle_res_inst, .handle_break = kvm_trap_emul_handle_break, + .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, .vm_init = kvm_trap_emul_vm_init, .vcpu_init = kvm_trap_emul_vcpu_init, -- cgit v0.10.2 From 64bedffe496820dbb6b53302d80dd0f04db33d8e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 2 Dec 2014 13:44:13 +0000 Subject: MIPS: Clear [MSA]FPE CSR.Cause after notify_die() When handling floating point exceptions (FPEs) and MSA FPEs the Cause bits of the appropriate control and status register (FCSR for FPEs and MSACSR for MSA FPEs) are read and cleared before enabling interrupts, presumably so that it doesn't have to go through the pain of restoring those bits if the process is pre-empted, since writing those bits would cause another immediate exception while still in the kernel. The bits aren't normally ever restored again, since userland never expects to see them set. However for virtualisation it is necessary for the kernel to be able to restore these Cause bits, as the guest may have been interrupted in an FP exception handler but before it could read the Cause bits. This can be done by registering a die notifier, to get notified of the exception when such a value is restored, and if the PC was at the instruction which is used to restore the guest state, the handler can step over it and continue execution. The Cause bits can then remain set without causing further exceptions. For this to work safely a few changes are made: - __build_clear_fpe and __build_clear_msa_fpe no longer clear the Cause bits, and now return from exception level with interrupts disabled instead of enabled. - do_fpe() now clears the Cause bits and enables interrupts after notify_die() is called, so that the notifier can chose to return from exception without this happening. - do_msa_fpe() acts similarly, but now actually makes use of the second argument (msacsr) and calls notify_die() with the new DIE_MSAFP, allowing die notifiers to be informed of MSA FPEs too. Signed-off-by: James Hogan Acked-by: Ralf Baechle Cc: Paul Burton Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kdebug.h b/arch/mips/include/asm/kdebug.h index 6a9af5f..cba22ab 100644 --- a/arch/mips/include/asm/kdebug.h +++ b/arch/mips/include/asm/kdebug.h @@ -10,7 +10,8 @@ enum die_val { DIE_RI, DIE_PAGE_FAULT, DIE_BREAK, - DIE_SSTEPBP + DIE_SSTEPBP, + DIE_MSAFP }; #endif /* _ASM_MIPS_KDEBUG_H */ diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 86e2242..af42e70 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -360,21 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp) .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 - li a2, ~(0x3f << 12) - and a2, a1 - ctc1 a2, fcr31 .set pop - TRACE_IRQS_ON - STI + CLI + TRACE_IRQS_OFF .endm .macro __build_clear_msa_fpe _cfcmsa a1, MSA_CSR - li a2, ~(0x3f << 12) - and a1, a1, a2 - _ctcmsa MSA_CSR, a1 - TRACE_IRQS_ON - STI + CLI + TRACE_IRQS_OFF .endm .macro __build_clear_ade diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 8943ebe..5b4d711 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -788,6 +788,11 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP) goto out; + + /* Clear FCSR.Cause before enabling interrupts */ + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + local_irq_enable(); + die_if_kernel("FP exception in kernel code", regs); if (fcr31 & FPU_CSR_UNI_X) { @@ -1393,13 +1398,22 @@ out: exception_exit(prev_state); } -asmlinkage void do_msa_fpe(struct pt_regs *regs) +asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr) { enum ctx_state prev_state; prev_state = exception_enter(); + if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0, + regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP) + goto out; + + /* Clear MSACSR.Cause before enabling interrupts */ + write_msa_csr(msacsr & ~MSA_CSR_CAUSEF); + local_irq_enable(); + die_if_kernel("do_msa_fpe invoked from kernel context!", regs); force_sig(SIGFPE, current); +out: exception_exit(prev_state); } -- cgit v0.10.2 From 0a5604272d80c985f87de959f0bb7e36fd53d3c7 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 6 Feb 2015 16:03:57 +0000 Subject: MIPS: KVM: Handle TRAP exceptions from guest kernel Trap instructions are used by Linux to implement BUG_ON(), however KVM doesn't pass trap exceptions on to the guest if they occur in guest kernel mode, instead triggering an internal error "Exception Code: 13, not yet handled". The guest kernel then doesn't get a chance to print the usual BUG message and stack trace. Implement handling of the trap exception so that it gets passed to the guest and the user is left with a more useful log message. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: kvm@vger.kernel.org Cc: linux-mips@linux-mips.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index f722b05..8fc3ba2 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -119,6 +119,7 @@ struct kvm_vcpu_stat { u32 syscall_exits; u32 resvd_inst_exits; u32 break_inst_exits; + u32 trap_inst_exits; u32 flush_dcache_exits; u32 halt_successful_poll; u32 halt_wakeup; @@ -138,6 +139,7 @@ enum kvm_mips_exit_types { SYSCALL_EXITS, RESVD_INST_EXITS, BREAK_INST_EXITS, + TRAP_INST_EXITS, FLUSH_DCACHE_EXITS, MAX_KVM_MIPS_EXIT_TYPES }; @@ -579,6 +581,7 @@ struct kvm_mips_callbacks { int (*handle_syscall)(struct kvm_vcpu *vcpu); int (*handle_res_inst)(struct kvm_vcpu *vcpu); int (*handle_break)(struct kvm_vcpu *vcpu); + int (*handle_trap)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); int (*vcpu_init)(struct kvm_vcpu *vcpu); @@ -713,6 +716,11 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, struct kvm_run *run, struct kvm_vcpu *vcpu); +extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 838d3a6..33e132d 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1970,6 +1970,41 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause, return er; } +enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_TRAP << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver TRAP when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + /* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 @@ -2176,6 +2211,7 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, case T_SYSCALL: case T_BREAK: case T_RES_INST: + case T_TRAP: case T_MSADIS: break; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index f5e7dda..399b551 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -48,6 +48,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, + { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, @@ -1119,6 +1120,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_break(vcpu); break; + case T_TRAP: + ++vcpu->stat.trap_inst_exits; + trace_kvm_exit(vcpu, TRAP_INST_EXITS); + ret = kvm_mips_callbacks->handle_trap(vcpu); + break; + case T_MSADIS: ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); break; diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index a74d602..dd90b0a 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -25,6 +25,7 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = { "System Call", "Reserved Inst", "Break Inst", + "Trap Inst", "D-Cache Flushes", }; diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 4372cc8..dc01995 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -330,6 +330,24 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -497,6 +515,7 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_syscall = kvm_trap_emul_handle_syscall, .handle_res_inst = kvm_trap_emul_handle_res_inst, .handle_break = kvm_trap_emul_handle_break, + .handle_trap = kvm_trap_emul_handle_trap, .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, .vm_init = kvm_trap_emul_vm_init, -- cgit v0.10.2 From 1068eaaf2f64ffb44d97fbaa9ff7a4662b76cf9e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 26 Jun 2014 13:56:52 +0100 Subject: MIPS: KVM: Implement PRid CP0 register access Implement access to the guest Processor Identification CP0 register using the KVM_GET_ONE_REG and KVM_SET_ONE_REG ioctls. This allows the owning process to modify and read back the value that is exposed to the guest in this register. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0d7fc66..9a5f8a4 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1967,6 +1967,7 @@ registers, find a list below: MIPS | KVM_REG_MIPS_CP0_STATUS | 32 MIPS | KVM_REG_MIPS_CP0_CAUSE | 32 MIPS | KVM_REG_MIPS_CP0_EPC | 64 + MIPS | KVM_REG_MIPS_CP0_PRID | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 8fc3ba2..26d91b0 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -42,6 +42,7 @@ #define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) #define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) #define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) +#define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0) #define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1) #define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0) #define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 399b551..fd620cc 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -505,6 +505,7 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_CP0_STATUS, KVM_REG_MIPS_CP0_CAUSE, KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, KVM_REG_MIPS_CP0_CONFIG, KVM_REG_MIPS_CP0_CONFIG1, KVM_REG_MIPS_CP0_CONFIG2, @@ -574,6 +575,9 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_EPC: v = (long)kvm_read_c0_guest_epc(cop0); break; + case KVM_REG_MIPS_CP0_PRID: + v = (long)kvm_read_c0_guest_prid(cop0); + break; case KVM_REG_MIPS_CP0_ERROREPC: v = (long)kvm_read_c0_guest_errorepc(cop0); break; @@ -687,6 +691,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_EPC: kvm_write_c0_guest_epc(cop0, v); break; + case KVM_REG_MIPS_CP0_PRID: + kvm_write_c0_guest_prid(cop0, v); + break; case KVM_REG_MIPS_CP0_ERROREPC: kvm_write_c0_guest_errorepc(cop0, v); break; -- cgit v0.10.2 From e93d4c159caaf746422c1af4c4f09fce9b456f8c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 26 Jun 2014 13:47:22 +0100 Subject: MIPS: KVM: Sort kvm_mips_get_reg() registers Sort the registers in the kvm_mips_get_reg() switch by register number, which puts ERROREPC after the CONFIG registers. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index fd620cc..b909c00 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -578,9 +578,6 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_PRID: v = (long)kvm_read_c0_guest_prid(cop0); break; - case KVM_REG_MIPS_CP0_ERROREPC: - v = (long)kvm_read_c0_guest_errorepc(cop0); - break; case KVM_REG_MIPS_CP0_CONFIG: v = (long)kvm_read_c0_guest_config(cop0); break; @@ -596,6 +593,9 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONFIG7: v = (long)kvm_read_c0_guest_config7(cop0); break; + case KVM_REG_MIPS_CP0_ERROREPC: + v = (long)kvm_read_c0_guest_errorepc(cop0); + break; /* registers to be handled specially */ case KVM_REG_MIPS_CP0_COUNT: case KVM_REG_MIPS_COUNT_CTL: -- cgit v0.10.2 From 58a115bcec06f2de26923bc13e88cb73b780ae41 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 26 Jun 2014 15:21:11 +0100 Subject: MIPS: KVM: Drop pr_info messages on init/exit The information messages when the KVM module is loaded and unloaded are a bit pointless and out of line with other architectures, so lets drop them. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b909c00..0aab83d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1191,7 +1191,6 @@ int __init kvm_mips_init(void) kvm_mips_release_pfn_clean = kvm_release_pfn_clean; kvm_mips_is_error_pfn = is_error_pfn; - pr_info("KVM/MIPS Initialized\n"); return 0; } @@ -1202,8 +1201,6 @@ void __exit kvm_mips_exit(void) kvm_mips_gfn_to_pfn = NULL; kvm_mips_release_pfn_clean = NULL; kvm_mips_is_error_pfn = NULL; - - pr_info("KVM/MIPS unloaded\n"); } module_init(kvm_mips_init); -- cgit v0.10.2 From 7bd4acec42670a18b023392db6f4bfaa4dee179e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 2 Dec 2014 15:47:04 +0000 Subject: MIPS: KVM: Clean up register definitions a little Clean up KVM_GET_ONE_REG / KVM_SET_ONE_REG register definitions for MIPS, to prepare for adding a new group for FPU & MSA vector registers. Definitions are added for common bits in each group of registers, e.g. KVM_REG_MIPS_CP0 = KVM_REG_MIPS | 0x10000, for the coprocessor 0 registers. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 26d91b0..1bd392d 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -21,10 +21,10 @@ /* MIPS KVM register ids */ #define MIPS_CP0_32(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S))) + (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S))) #define MIPS_CP0_64(_R, _S) \ - (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S))) + (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S))) #define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0) #define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0) diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 2c04b6d..75d6d85 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -52,61 +52,76 @@ struct kvm_fpu { /* - * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0 + * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various * registers. The id field is broken down as follows: * - * bits[2..0] - Register 'sel' index. - * bits[7..3] - Register 'rd' index. - * bits[15..8] - Must be zero. - * bits[31..16] - 1 -> CP0 registers. - * bits[51..32] - Must be zero. * bits[63..52] - As per linux/kvm.h + * bits[51..32] - Must be zero. + * bits[31..16] - Register set. + * + * Register set = 0: GP registers from kvm_regs (see definitions below). + * + * Register set = 1: CP0 registers. + * bits[15..8] - Must be zero. + * bits[7..3] - Register 'rd' index. + * bits[2..0] - Register 'sel' index. + * + * Register set = 2: KVM specific registers (see definitions below). * * Other sets registers may be added in the future. Each set would * have its own identifier in bits[31..16]. - * - * The registers defined in struct kvm_regs are also accessible, the - * id values for these are below. */ -#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0) -#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1) -#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2) -#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3) -#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4) -#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5) -#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6) -#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7) -#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8) -#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9) -#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10) -#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11) -#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12) -#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13) -#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14) -#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15) -#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16) -#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17) -#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18) -#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19) -#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20) -#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21) -#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22) -#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23) -#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24) -#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25) -#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26) -#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27) -#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28) -#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29) -#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30) -#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31) - -#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32) -#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33) -#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34) - -/* KVM specific control registers */ +#define KVM_REG_MIPS_GP (KVM_REG_MIPS | 0x0000000000000000ULL) +#define KVM_REG_MIPS_CP0 (KVM_REG_MIPS | 0x0000000000010000ULL) +#define KVM_REG_MIPS_KVM (KVM_REG_MIPS | 0x0000000000020000ULL) + + +/* + * KVM_REG_MIPS_GP - General purpose registers from kvm_regs. + */ + +#define KVM_REG_MIPS_R0 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 0) +#define KVM_REG_MIPS_R1 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 1) +#define KVM_REG_MIPS_R2 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 2) +#define KVM_REG_MIPS_R3 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 3) +#define KVM_REG_MIPS_R4 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 4) +#define KVM_REG_MIPS_R5 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 5) +#define KVM_REG_MIPS_R6 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 6) +#define KVM_REG_MIPS_R7 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 7) +#define KVM_REG_MIPS_R8 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 8) +#define KVM_REG_MIPS_R9 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 9) +#define KVM_REG_MIPS_R10 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10) +#define KVM_REG_MIPS_R11 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11) +#define KVM_REG_MIPS_R12 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12) +#define KVM_REG_MIPS_R13 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13) +#define KVM_REG_MIPS_R14 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14) +#define KVM_REG_MIPS_R15 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15) +#define KVM_REG_MIPS_R16 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16) +#define KVM_REG_MIPS_R17 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17) +#define KVM_REG_MIPS_R18 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18) +#define KVM_REG_MIPS_R19 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19) +#define KVM_REG_MIPS_R20 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20) +#define KVM_REG_MIPS_R21 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21) +#define KVM_REG_MIPS_R22 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22) +#define KVM_REG_MIPS_R23 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23) +#define KVM_REG_MIPS_R24 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24) +#define KVM_REG_MIPS_R25 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25) +#define KVM_REG_MIPS_R26 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26) +#define KVM_REG_MIPS_R27 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27) +#define KVM_REG_MIPS_R28 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28) +#define KVM_REG_MIPS_R29 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29) +#define KVM_REG_MIPS_R30 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30) +#define KVM_REG_MIPS_R31 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31) + +#define KVM_REG_MIPS_HI (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32) +#define KVM_REG_MIPS_LO (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33) +#define KVM_REG_MIPS_PC (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34) + + +/* + * KVM_REG_MIPS_KVM - KVM specific control registers. + */ /* * CP0_Count control @@ -118,8 +133,7 @@ struct kvm_fpu { * safely without losing time or guest timer interrupts. * Other: Reserved, do not change. */ -#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 0) +#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0) #define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001 /* @@ -131,15 +145,14 @@ struct kvm_fpu { * emulated. * Modifications to times in the future are rejected. */ -#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 1) +#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1) /* * CP0_Count rate in Hz * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without * discontinuities in CP0_Count. */ -#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \ - 0x20000 | 2) +#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2) + /* * KVM MIPS specific structures and definitions -- cgit v0.10.2 From 2211ee810ac6fdcdb42b7a126e20d1b4e5c55124 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 4 Mar 2015 15:56:47 +0000 Subject: MIPS: KVM: Simplify default guest Config registers Various semi-used definitions exist in kvm_host.h for the default guest config registers. Remove them and use the appropriate values directly when initialising the Config registers. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 1bd392d..6996447 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -265,31 +265,6 @@ struct mips_coproc { #define CP0C3_SM 1 #define CP0C3_TL 0 -/* Have config1, Cacheable, noncoherent, write-back, write allocate*/ -#define MIPS_CONFIG0 \ - ((1 << CP0C0_M) | (0x3 << CP0C0_K0)) - -/* Have config2, no coprocessor2 attached, no MDMX support attached, - no performance counters, watch registers present, - no code compression, EJTAG present, no FPU, no watch registers */ -#define MIPS_CONFIG1 \ -((1 << CP0C1_M) | \ - (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \ - (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \ - (0 << CP0C1_FP)) - -/* Have config3, no tertiary/secondary caches implemented */ -#define MIPS_CONFIG2 \ -((1 << CP0C2_M)) - -/* No config4, no DSP ASE, no large physaddr (PABITS), - no external interrupt controller, no vectored interrupts, - no 1kb pages, no SmartMIPS ASE, no trace logic */ -#define MIPS_CONFIG3 \ -((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \ - (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \ - (0 << CP0C3_SM) | (0 << CP0C3_TL)) - /* MMU types, the first four entries have the same layout as the CP0C0_MT field. */ enum mips_mmu_types { diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index dc01995..bffba00 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -396,8 +396,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) * guest will come up as expected, for now we simulate a MIPS 24kc */ kvm_write_c0_guest_prid(cop0, 0x00019300); - kvm_write_c0_guest_config(cop0, - MIPS_CONFIG0 | (0x1 << CP0C0_AR) | + /* Have config1, Cacheable, noncoherent, write-back, write allocate */ + kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) | + (0x1 << CP0C0_AR) | (MMU_TYPE_R4000 << CP0C0_MT)); /* Read the cache characteristics from the host Config1 Register */ @@ -413,10 +414,12 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) (1 << CP0C1_WR) | (1 << CP0C1_CA)); kvm_write_c0_guest_config1(cop0, config1); - kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2); - /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */ - kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) | - (1 << CP0C3_ULRI)); + /* Have config3, no tertiary/secondary caches implemented */ + kvm_write_c0_guest_config2(cop0, MIPS_CONF_M); + /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */ + + /* No config4, UserLocal */ + kvm_write_c0_guest_config3(cop0, MIPS_CONF3_ULRI); /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); -- cgit v0.10.2 From c771607af959f282704268a209743560d3264eb3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 26 Jun 2014 15:11:29 +0100 Subject: MIPS: KVM: Add Config4/5 and writing of Config registers Add Config4 and Config5 co-processor 0 registers, and add capability to write the Config1, Config3, Config4, and Config5 registers using the KVM API. Only supported bits can be written, to minimise the chances of the guest being given a configuration from e.g. QEMU that is inconsistent with that being emulated, and as such the handling is in trap_emul.c as it may need to be different for VZ. Currently the only modification permitted is to make Config4 and Config5 exist via the M bits, but other bits will be added for FPU and MSA support in future patches. Care should be taken by userland not to change bits without fully handling the possible extra state that may then exist and which the guest may begin to use and depend on. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 9a5f8a4..3f295a0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1972,6 +1972,8 @@ registers, find a list below: MIPS | KVM_REG_MIPS_CP0_CONFIG1 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG2 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG3 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG4 | 32 + MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 MIPS | KVM_REG_MIPS_COUNT_CTL | 64 diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 6996447..3f58ee1 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -48,6 +48,8 @@ #define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1) #define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2) #define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3) +#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4) +#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5) #define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7) #define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0) #define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0) @@ -209,6 +211,8 @@ struct mips_coproc { #define MIPS_CP0_CONFIG1_SEL 1 #define MIPS_CP0_CONFIG2_SEL 2 #define MIPS_CP0_CONFIG3_SEL 3 +#define MIPS_CP0_CONFIG4_SEL 4 +#define MIPS_CP0_CONFIG5_SEL 5 /* Config0 register bits */ #define CP0C0_M 31 @@ -461,11 +465,15 @@ struct kvm_vcpu_arch { #define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1]) #define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2]) #define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3]) +#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4]) +#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5]) #define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7]) #define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val)) #define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val)) #define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val)) #define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val)) +#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val)) +#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val)) #define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val)) #define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0]) #define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val)) @@ -735,6 +743,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst, struct kvm_run *run, struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu); +unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu); + /* Dynamic binary translation */ extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc, struct kvm_vcpu *vcpu); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 33e132d..91d5b0e 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -884,6 +884,58 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +/** + * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config1 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) +{ + /* Read-only */ + return 0; +} + +/** + * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config3 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config4 is optional */ + return MIPS_CONF_M; +} + +/** + * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config4 CP0 + * register, by userland (currently read-only to the guest). + */ +unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) +{ + /* Config5 is optional */ + return MIPS_CONF_M; +} + +/** + * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5 + * @vcpu: Virtual CPU. + * + * Finds the mask of bits which are writable in the guest's Config5 CP0 + * register, by the guest itself. + */ +unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) +{ + /* Read-only */ + return 0; +} + enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, uint32_t cause, struct kvm_run *run, struct kvm_vcpu *vcpu) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 0aab83d..73eecc7 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -510,6 +510,8 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_CP0_CONFIG1, KVM_REG_MIPS_CP0_CONFIG2, KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, KVM_REG_MIPS_CP0_CONFIG7, KVM_REG_MIPS_CP0_ERROREPC, @@ -590,6 +592,12 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_CONFIG3: v = (long)kvm_read_c0_guest_config3(cop0); break; + case KVM_REG_MIPS_CP0_CONFIG4: + v = (long)kvm_read_c0_guest_config4(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + v = (long)kvm_read_c0_guest_config5(cop0); + break; case KVM_REG_MIPS_CP0_CONFIG7: v = (long)kvm_read_c0_guest_config7(cop0); break; @@ -701,6 +709,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_CP0_COUNT: case KVM_REG_MIPS_CP0_COMPARE: case KVM_REG_MIPS_CP0_CAUSE: + case KVM_REG_MIPS_CP0_CONFIG: + case KVM_REG_MIPS_CP0_CONFIG1: + case KVM_REG_MIPS_CP0_CONFIG2: + case KVM_REG_MIPS_CP0_CONFIG3: + case KVM_REG_MIPS_CP0_CONFIG4: + case KVM_REG_MIPS_CP0_CONFIG5: case KVM_REG_MIPS_COUNT_CTL: case KVM_REG_MIPS_COUNT_RESUME: case KVM_REG_MIPS_COUNT_HZ: diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index bffba00..8e09684 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -418,8 +418,14 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) kvm_write_c0_guest_config2(cop0, MIPS_CONF_M); /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */ - /* No config4, UserLocal */ - kvm_write_c0_guest_config3(cop0, MIPS_CONF3_ULRI); + /* Have config4, UserLocal */ + kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI); + + /* Have config5 */ + kvm_write_c0_guest_config4(cop0, MIPS_CONF_M); + + /* No config6 */ + kvm_write_c0_guest_config5(cop0, 0); /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); @@ -464,6 +470,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, { struct mips_coproc *cop0 = vcpu->arch.cop0; int ret = 0; + unsigned int cur, change; switch (reg->id) { case KVM_REG_MIPS_CP0_COUNT: @@ -492,6 +499,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, kvm_write_c0_guest_cause(cop0, v); } break; + case KVM_REG_MIPS_CP0_CONFIG: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG1: + cur = kvm_read_c0_guest_config1(cop0); + change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config1(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG2: + /* read-only for now */ + break; + case KVM_REG_MIPS_CP0_CONFIG3: + cur = kvm_read_c0_guest_config3(cop0); + change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config3(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG4: + cur = kvm_read_c0_guest_config4(cop0); + change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config4(cop0, v); + } + break; + case KVM_REG_MIPS_CP0_CONFIG5: + cur = kvm_read_c0_guest_config5(cop0); + change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu); + if (change) { + v = cur ^ change; + kvm_write_c0_guest_config5(cop0, v); + } + break; case KVM_REG_MIPS_COUNT_CTL: ret = kvm_mips_set_count_ctl(vcpu, v); break; -- cgit v0.10.2 From b86ecb3766abd9138289ff2a18381d25b73f4622 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 9 Feb 2015 16:35:20 +0000 Subject: MIPS: KVM: Add vcpu_get_regs/vcpu_set_regs callback Add a vcpu_get_regs() and vcpu_set_regs() callbacks for loading and restoring context which may be in hardware registers. This may include floating point and MIPS SIMD Architecture (MSA) state which may be accessed directly by the guest (but restored lazily by the hypervisor), and also dedicated guest registers as provided by the VZ ASE. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 3f58ee1..fb79d67 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -585,6 +585,8 @@ struct kvm_mips_callbacks { const struct kvm_one_reg *reg, s64 *v); int (*set_one_reg)(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 v); + int (*vcpu_get_regs)(struct kvm_vcpu *vcpu); + int (*vcpu_set_regs)(struct kvm_vcpu *vcpu); }; extern struct kvm_mips_callbacks *kvm_mips_callbacks; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index b6beb0e..aed0ac2 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -733,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) } } + /* restore guest state to registers */ + kvm_mips_callbacks->vcpu_set_regs(vcpu); + local_irq_restore(flags); } @@ -751,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) vcpu->arch.preempt_entryhi = read_c0_entryhi(); vcpu->arch.last_sched_cpu = cpu; + /* save guest state in registers */ + kvm_mips_callbacks->vcpu_get_regs(vcpu); + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & ASID_VERSION_MASK)) { kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 8e09684..0d2729d 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -552,6 +552,16 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, return ret; } +static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu) +{ + return 0; +} + +static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu) +{ + return 0; +} + static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { /* exit handlers */ .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, @@ -578,6 +588,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .irq_clear = kvm_mips_irq_clear_cb, .get_one_reg = kvm_trap_emul_get_one_reg, .set_one_reg = kvm_trap_emul_set_one_reg, + .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs, + .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs, }; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) -- cgit v0.10.2 From 98e91b8457d81f53fab990fac6c57e2a43c47627 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 18 Nov 2014 14:09:12 +0000 Subject: MIPS: KVM: Add base guest FPU support Add base code for supporting FPU in MIPS KVM guests. The FPU cannot yet be enabled in the guest, we're just laying the groundwork. Whether the guest's FPU context is loaded is stored in a bit in the fpu_inuse vcpu member. This allows the FPU to be disabled when the guest disables it, but keeping the FPU context loaded so it doesn't have to be reloaded if the guest re-enables it. An fpu_enabled vcpu member stores whether userland has enabled the FPU capability (which will be wired up in a later patch). New assembly code is added for saving and restoring the FPU context, and for saving/clearing and restoring FCSR (which can itself cause an FP exception depending on the value). The FCSR is restored before returning to the guest if the FPU is already enabled, and a die notifier is registered to catch the possible FP exception and step over the ctc1 instruction. The helper function kvm_lose_fpu() is added to save FPU context and disable the FPU, which is used when saving hardware state before a context switch or KVM exit (the vcpu_get_regs() callback). The helper function kvm_own_fpu() is added to enable the FPU and restore the FPU context if it isn't already loaded, which will be used in a later patch when the guest attempts to use the FPU for the first time and triggers a co-processor unusable exception. The helper function kvm_drop_fpu() is added to discard the FPU context and disable the FPU, which will be used in a later patch when the FPU state will become architecturally UNPREDICTABLE (change of FR mode) to force a reload of [stale] context in the new FR mode. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index fb79d67..866edf3 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -357,6 +357,8 @@ struct kvm_mips_tlb { long tlb_lo1; }; +#define KVM_MIPS_FPU_FPU 0x1 + #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { void *host_ebase, *guest_ebase; @@ -378,6 +380,8 @@ struct kvm_vcpu_arch { /* FPU State */ struct mips_fpu_struct fpu; + /* Which FPU state is loaded (KVM_MIPS_FPU_*) */ + unsigned int fpu_inuse; /* COP0 State */ struct mips_coproc *cop0; @@ -424,6 +428,8 @@ struct kvm_vcpu_arch { /* WAIT executed */ int wait; + + u8 fpu_enabled; }; @@ -554,6 +560,19 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg, kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \ } +/* Helpers */ + +static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu) +{ + return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) && + vcpu->fpu_enabled; +} + +static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu) +{ + return kvm_mips_guest_can_have_fpu(vcpu) && + kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP; +} struct kvm_mips_callbacks { int (*handle_cop_unusable)(struct kvm_vcpu *vcpu); @@ -597,6 +616,14 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); /* Trampoline ASM routine to start running in "Guest" context */ extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); +/* FPU context management */ +void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu); +void kvm_own_fpu(struct kvm_vcpu *vcpu); +void kvm_drop_fpu(struct kvm_vcpu *vcpu); +void kvm_lose_fpu(struct kvm_vcpu *vcpu); + /* TLB handling */ uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index 3ee1565..a12bcf9 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -404,6 +404,44 @@ void output_kvm_defines(void) OFFSET(VCPU_LO, kvm_vcpu_arch, lo); OFFSET(VCPU_HI, kvm_vcpu_arch, hi); OFFSET(VCPU_PC, kvm_vcpu_arch, pc); + BLANK(); + + OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]); + OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]); + OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]); + OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]); + OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]); + OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]); + OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]); + OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]); + OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]); + OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]); + OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]); + OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]); + OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]); + OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]); + OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]); + OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]); + OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]); + OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]); + OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]); + OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]); + OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]); + OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]); + OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]); + OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]); + OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]); + OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]); + OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]); + OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]); + OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]); + OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]); + OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]); + OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]); + + OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31); + BLANK(); + OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0); OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid); OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid); diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 401fe02..78d7bcd 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -7,7 +7,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm kvm-objs := $(common-objs) mips.o emulate.o locore.o \ interrupt.o stats.o commpage.o \ - dyntrans.o trap_emul.o + dyntrans.o trap_emul.o fpu.o obj-$(CONFIG_KVM) += kvm.o obj-y += callback.o tlb.o diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S new file mode 100644 index 0000000..531fbf5 --- /dev/null +++ b/arch/mips/kvm/fpu.S @@ -0,0 +1,122 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * FPU context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include +#include +#include +#include +#include + + .set noreorder + .set noat + +LEAF(__kvm_save_fpu) + .set push + .set mips64r2 + SET_HARDFLOAT + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + sdc1 $f1, VCPU_FPR1(a0) + sdc1 $f3, VCPU_FPR3(a0) + sdc1 $f5, VCPU_FPR5(a0) + sdc1 $f7, VCPU_FPR7(a0) + sdc1 $f9, VCPU_FPR9(a0) + sdc1 $f11, VCPU_FPR11(a0) + sdc1 $f13, VCPU_FPR13(a0) + sdc1 $f15, VCPU_FPR15(a0) + sdc1 $f17, VCPU_FPR17(a0) + sdc1 $f19, VCPU_FPR19(a0) + sdc1 $f21, VCPU_FPR21(a0) + sdc1 $f23, VCPU_FPR23(a0) + sdc1 $f25, VCPU_FPR25(a0) + sdc1 $f27, VCPU_FPR27(a0) + sdc1 $f29, VCPU_FPR29(a0) + sdc1 $f31, VCPU_FPR31(a0) +1: sdc1 $f0, VCPU_FPR0(a0) + sdc1 $f2, VCPU_FPR2(a0) + sdc1 $f4, VCPU_FPR4(a0) + sdc1 $f6, VCPU_FPR6(a0) + sdc1 $f8, VCPU_FPR8(a0) + sdc1 $f10, VCPU_FPR10(a0) + sdc1 $f12, VCPU_FPR12(a0) + sdc1 $f14, VCPU_FPR14(a0) + sdc1 $f16, VCPU_FPR16(a0) + sdc1 $f18, VCPU_FPR18(a0) + sdc1 $f20, VCPU_FPR20(a0) + sdc1 $f22, VCPU_FPR22(a0) + sdc1 $f24, VCPU_FPR24(a0) + sdc1 $f26, VCPU_FPR26(a0) + sdc1 $f28, VCPU_FPR28(a0) + jr ra + sdc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_save_fpu) + +LEAF(__kvm_restore_fpu) + .set push + .set mips64r2 + SET_HARDFLOAT + mfc0 t0, CP0_STATUS + sll t0, t0, 5 # is Status.FR set? + bgez t0, 1f # no: skip odd doubles + nop + ldc1 $f1, VCPU_FPR1(a0) + ldc1 $f3, VCPU_FPR3(a0) + ldc1 $f5, VCPU_FPR5(a0) + ldc1 $f7, VCPU_FPR7(a0) + ldc1 $f9, VCPU_FPR9(a0) + ldc1 $f11, VCPU_FPR11(a0) + ldc1 $f13, VCPU_FPR13(a0) + ldc1 $f15, VCPU_FPR15(a0) + ldc1 $f17, VCPU_FPR17(a0) + ldc1 $f19, VCPU_FPR19(a0) + ldc1 $f21, VCPU_FPR21(a0) + ldc1 $f23, VCPU_FPR23(a0) + ldc1 $f25, VCPU_FPR25(a0) + ldc1 $f27, VCPU_FPR27(a0) + ldc1 $f29, VCPU_FPR29(a0) + ldc1 $f31, VCPU_FPR31(a0) +1: ldc1 $f0, VCPU_FPR0(a0) + ldc1 $f2, VCPU_FPR2(a0) + ldc1 $f4, VCPU_FPR4(a0) + ldc1 $f6, VCPU_FPR6(a0) + ldc1 $f8, VCPU_FPR8(a0) + ldc1 $f10, VCPU_FPR10(a0) + ldc1 $f12, VCPU_FPR12(a0) + ldc1 $f14, VCPU_FPR14(a0) + ldc1 $f16, VCPU_FPR16(a0) + ldc1 $f18, VCPU_FPR18(a0) + ldc1 $f20, VCPU_FPR20(a0) + ldc1 $f22, VCPU_FPR22(a0) + ldc1 $f24, VCPU_FPR24(a0) + ldc1 $f26, VCPU_FPR26(a0) + ldc1 $f28, VCPU_FPR28(a0) + jr ra + ldc1 $f30, VCPU_FPR30(a0) + .set pop + END(__kvm_restore_fpu) + +LEAF(__kvm_restore_fcsr) + .set push + SET_HARDFLOAT + lw t0, VCPU_FCR31(a0) + /* + * The ctc1 must stay at this offset in __kvm_restore_fcsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + ctc1 t0, fcr31 + jr ra + nop + .set pop + END(__kvm_restore_fcsr) diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index 4a68b17..f559404 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -353,6 +353,23 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) LONG_L k0, VCPU_HOST_EBASE(k1) mtc0 k0,CP0_EBASE + /* + * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't + * trigger FPE for pending exceptions. + */ + .set at + and v1, v0, ST0_CU1 + beqz v1, 1f + nop + .set push + SET_HARDFLOAT + cfc1 t0, fcr31 + sw t0, VCPU_FCR31(k1) + ctc1 zero,fcr31 + .set pop + .set noat +1: + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 73eecc7..b26a48d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -1178,12 +1179,133 @@ skip_emul: } } + if (ret == RESUME_GUEST) { + /* + * If FPU is enabled (i.e. the guest's FPU context is live), + * restore FCR31. + * + * This should be before returning to the guest exception + * vector, as it may well cause an FP exception if there are + * pending exception bits unmasked. (see + * kvm_mips_csr_die_notifier() for how that is handled). + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch) && + read_c0_status() & ST0_CU1) + __kvm_restore_fcsr(&vcpu->arch); + } + /* Disable HTW before returning to guest or host */ htw_stop(); return ret; } +/* Enable FPU for guest and restore context */ +void kvm_own_fpu(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + /* + * Enable FPU for guest + * We set FR and FRE according to guest context + */ + sr = kvm_read_c0_guest_status(cop0); + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + enable_fpu_hazard(); + + /* If guest FPU state not active, restore it now */ + if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) { + __kvm_restore_fpu(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + } + + preempt_enable(); +} + +/* Drop FPU without saving it */ +void kvm_drop_fpu(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + clear_c0_status(ST0_CU1 | ST0_FR); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + } + preempt_enable(); +} + +/* Save and disable FPU */ +void kvm_lose_fpu(struct kvm_vcpu *vcpu) +{ + /* + * FPU gets disabled in root context (hardware) when it is disabled in + * guest context (software), but the register state in the hardware may + * still be in use. This is why we explicitly re-enable the hardware + * before saving. + */ + + preempt_disable(); + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + set_c0_status(ST0_CU1); + enable_fpu_hazard(); + + __kvm_save_fpu(&vcpu->arch); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; + + /* Disable FPU */ + clear_c0_status(ST0_CU1 | ST0_FR); + } + preempt_enable(); +} + +/* + * Step over a specific ctc1 to FCSR which is used to restore guest FCSR state + * and may trigger a "harmless" FP exception if cause bits are set in the value + * being written. + */ +static int kvm_mips_csr_die_notify(struct notifier_block *self, + unsigned long cmd, void *ptr) +{ + struct die_args *args = (struct die_args *)ptr; + struct pt_regs *regs = args->regs; + unsigned long pc; + + /* Only interested in FPE */ + if (cmd != DIE_FP) + return NOTIFY_DONE; + + /* Return immediately if guest context isn't active */ + if (!(current->flags & PF_VCPU)) + return NOTIFY_DONE; + + /* Should never get here from user mode */ + BUG_ON(user_mode(regs)); + + pc = instruction_pointer(regs); + switch (cmd) { + case DIE_FP: + /* match 2nd instruction in __kvm_restore_fcsr */ + if (pc != (unsigned long)&__kvm_restore_fcsr + 4) + return NOTIFY_DONE; + break; + } + + /* Move PC forward a little and continue executing */ + instruction_pointer(regs) += 4; + + return NOTIFY_STOP; +} + +static struct notifier_block kvm_mips_csr_die_notifier = { + .notifier_call = kvm_mips_csr_die_notify, +}; + int __init kvm_mips_init(void) { int ret; @@ -1193,6 +1315,8 @@ int __init kvm_mips_init(void) if (ret) return ret; + register_die_notifier(&kvm_mips_csr_die_notifier); + /* * On MIPS, kernel modules are executed from "mapped space", which * requires TLBs. The TLB handling code is statically linked with @@ -1215,6 +1339,8 @@ void __exit kvm_mips_exit(void) kvm_mips_gfn_to_pfn = NULL; kvm_mips_release_pfn_clean = NULL; kvm_mips_is_error_pfn = NULL; + + unregister_die_notifier(&kvm_mips_csr_die_notifier); } module_init(kvm_mips_init); diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 0d2729d..408af24 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -554,6 +554,8 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu) { + kvm_lose_fpu(vcpu); + return 0; } -- cgit v0.10.2 From 6cdc65e31d4f70561d71eeaf34a2a70ab68bf146 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 3 Feb 2015 13:59:38 +0000 Subject: MIPS: KVM: Emulate FPU bits in COP0 interface Emulate FPU related parts of COP0 interface so that the guest will be able to enable/disable the following once the FPU capability has been wired up: - The FPU (Status.CU1) - 64-bit FP register mode (Status.FR) - Hybrid FP register mode (Config5.FRE) Changing Status.CU1 has no immediate effect if the FPU state isn't live, as the FPU state is restored lazily on first use. After that, changes take place immediately in the host Status.CU1, so that the guest can start getting coprocessor unusable exceptions right away for guest FPU operations if it is disabled. The FPU state is saved lazily too, as the FPU may get re-enabled in the near future anyway. Any change to Status.FR causes the FPU state to be discarded and FPU disabled, as the register state is architecturally UNPREDICTABLE after such a change. This should also ensure that the FPU state is fully initialised (with stale state, but that's fine) when it is next used in the new FP mode. Any change to the Config5.FRE bit is immediately updated in the host state so that the guest can get the relevant exceptions right away for single-precision FPU operations. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 91d5b0e..3511bb2 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -893,8 +893,13 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu) */ unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) { - /* Read-only */ - return 0; + unsigned int mask = 0; + + /* Permit FPU to be present if FPU is supported */ + if (kvm_mips_guest_can_have_fpu(&vcpu->arch)) + mask |= MIPS_CONF1_FP; + + return mask; } /** @@ -932,8 +937,19 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) */ unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) { - /* Read-only */ - return 0; + unsigned int mask = 0; + + /* + * Permit guest FPU mode changes if FPU is enabled and the relevant + * feature exists according to FIR register. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + if (cpu_has_fre) + mask |= MIPS_CONF5_FRE; + /* We don't support UFR or UFE */ + } + + return mask; } enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, @@ -1073,18 +1089,91 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_mips_write_compare(vcpu, vcpu->arch.gprs[rt]); } else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) { - kvm_write_c0_guest_status(cop0, - vcpu->arch.gprs[rt]); + unsigned int old_val, val, change; + + old_val = kvm_read_c0_guest_status(cop0); + val = vcpu->arch.gprs[rt]; + change = val ^ old_val; + + /* Make sure that the NMI bit is never set */ + val &= ~ST0_NMI; + + /* + * Don't allow CU1 or FR to be set unless FPU + * capability enabled and exists in guest + * configuration. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + val &= ~(ST0_CU1 | ST0_FR); + + /* + * Also don't allow FR to be set if host doesn't + * support it. + */ + if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64)) + val &= ~ST0_FR; + + + /* Handle changes in FPU mode */ + preempt_disable(); + /* - * Make sure that CU1 and NMI bits are - * never set + * FPU and Vector register state is made + * UNPREDICTABLE by a change of FR, so don't + * even bother saving it. */ - kvm_clear_c0_guest_status(cop0, - (ST0_CU1 | ST0_NMI)); + if (change & ST0_FR) + kvm_drop_fpu(vcpu); + + /* + * Propagate CU1 (FPU enable) changes + * immediately if the FPU context is already + * loaded. When disabling we leave the context + * loaded so it can be quickly enabled again in + * the near future. + */ + if (change & ST0_CU1 && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + change_c0_status(ST0_CU1, val); + + preempt_enable(); + + kvm_write_c0_guest_status(cop0, val); #ifdef CONFIG_KVM_MIPS_DYN_TRANS - kvm_mips_trans_mtc0(inst, opc, vcpu); + /* + * If FPU present, we need CU1/FR bits to take + * effect fairly soon. + */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + kvm_mips_trans_mtc0(inst, opc, vcpu); #endif + } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) { + unsigned int old_val, val, change, wrmask; + + old_val = kvm_read_c0_guest_config5(cop0); + val = vcpu->arch.gprs[rt]; + + /* Only a few bits are writable in Config5 */ + wrmask = kvm_mips_config5_wrmask(vcpu); + change = (val ^ old_val) & wrmask; + val = old_val ^ change; + + + /* Handle changes in FPU modes */ + preempt_disable(); + + /* + * Propagate FRE changes immediately if the FPU + * context is already loaded. + */ + if (change & MIPS_CONF5_FRE && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + change_c0_config5(MIPS_CONF5_FRE, val); + + preempt_enable(); + + kvm_write_c0_guest_config5(cop0, val); } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) { uint32_t old_cause, new_cause; -- cgit v0.10.2 From 1c0cd66adbac8aa339b9521eceb18b00d1b0699e Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 6 Feb 2015 10:56:27 +0000 Subject: MIPS: KVM: Add FP exception handling Add guest exception handling for floating point exceptions and coprocessor 1 unusable exceptions. Floating point exceptions from the guest need passing to the guest kernel, so for these a guest FPE is emulated. Also, coprocessor 1 unusable exceptions are normally passed straight through to the guest (because no guest FPU was supported), but the hypervisor can now handle them if the guest has its FPU enabled by restoring the guest FPU context and enabling the FPU. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 866edf3..fb264d8 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -123,6 +123,7 @@ struct kvm_vcpu_stat { u32 resvd_inst_exits; u32 break_inst_exits; u32 trap_inst_exits; + u32 fpe_exits; u32 flush_dcache_exits; u32 halt_successful_poll; u32 halt_wakeup; @@ -143,6 +144,7 @@ enum kvm_mips_exit_types { RESVD_INST_EXITS, BREAK_INST_EXITS, TRAP_INST_EXITS, + FPE_EXITS, FLUSH_DCACHE_EXITS, MAX_KVM_MIPS_EXIT_TYPES }; @@ -585,6 +587,7 @@ struct kvm_mips_callbacks { int (*handle_res_inst)(struct kvm_vcpu *vcpu); int (*handle_break)(struct kvm_vcpu *vcpu); int (*handle_trap)(struct kvm_vcpu *vcpu); + int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); int (*vcpu_init)(struct kvm_vcpu *vcpu); @@ -734,6 +737,11 @@ extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, struct kvm_run *run, struct kvm_vcpu *vcpu); +extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 3511bb2..fbf169f 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -2146,6 +2146,41 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, return er; } +enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_FPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver FPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + /* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 @@ -2353,6 +2388,7 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, case T_BREAK: case T_RES_INST: case T_TRAP: + case T_FPE: case T_MSADIS: break; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index b26a48d..dd08338 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -50,6 +50,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, + { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, @@ -1148,6 +1149,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_trap(vcpu); break; + case T_FPE: + ++vcpu->stat.fpe_exits; + trace_kvm_exit(vcpu, FPE_EXITS); + ret = kvm_mips_callbacks->handle_fpe(vcpu); + break; + case T_MSADIS: ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); break; diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index dd90b0a..3843828 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -26,6 +26,7 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = { "Reserved Inst", "Break Inst", "Trap Inst", + "FPE", "D-Cache Flushes", }; diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 408af24..421d5b8 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) { + struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; unsigned long cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; - if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) - er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); - else + if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) { + /* FPU Unusable */ + if (!kvm_mips_guest_has_fpu(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) { + /* + * Unusable/no FPU in guest: + * deliver guest COP1 Unusable Exception + */ + er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu); + } else { + /* Restore FPU state */ + kvm_own_fpu(vcpu); + er = EMULATE_DONE; + } + } else { er = kvm_mips_emulate_inst(cause, opc, run, vcpu); + } switch (er) { case EMULATE_DONE: @@ -348,6 +362,24 @@ static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -576,6 +608,7 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_res_inst = kvm_trap_emul_handle_res_inst, .handle_break = kvm_trap_emul_handle_break, .handle_trap = kvm_trap_emul_handle_trap, + .handle_fpe = kvm_trap_emul_handle_fpe, .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, .vm_init = kvm_trap_emul_vm_init, -- cgit v0.10.2 From 379245cdf1d1efc1eccc38bf0cc985dae232123d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 2 Dec 2014 15:48:24 +0000 Subject: MIPS: KVM: Expose FPU registers Add KVM register numbers for the MIPS FPU registers, and implement access to them with the KVM_GET_ONE_REG / KVM_SET_ONE_REG ioctls when the FPU capability is enabled (exposed in a later patch) and present in the guest according to its Config1.FP bit. The registers are accessible in the current mode of the guest, with each sized access showing what the guest would see with an equivalent access, and like the architecture they may become UNPREDICTABLE if the FR mode is changed. When FR=0, odd doubles are inaccessible as they do not exist in that mode. Signed-off-by: James Hogan Acked-by: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-doc@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 3f295a0..f3c1983 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1979,6 +1979,10 @@ registers, find a list below: MIPS | KVM_REG_MIPS_COUNT_CTL | 64 MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 MIPS | KVM_REG_MIPS_COUNT_HZ | 64 + MIPS | KVM_REG_MIPS_FPR_32(0..31) | 32 + MIPS | KVM_REG_MIPS_FPR_64(0..31) | 64 + MIPS | KVM_REG_MIPS_FCR_IR | 32 + MIPS | KVM_REG_MIPS_FCR_CSR | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -2032,6 +2036,18 @@ patterns depending on whether they're 32-bit or 64-bit registers: MIPS KVM control registers (see above) have the following id bit patterns: 0x7030 0000 0002 +MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following +id bit patterns depending on the size of the register being accessed. They are +always accessed according to the current guest FPU mode (Status.FR and +Config5.FRE), i.e. as the guest would see them, and they become unpredictable +if the guest FPU mode is changed: + 0x7020 0000 0003 00 <0:3> (32-bit FPU registers) + 0x7030 0000 0003 00 <0:3> (64-bit FPU registers) + +MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the +following id bit patterns: + 0x7020 0000 0003 01 <0:3> + 4.69 KVM_GET_ONE_REG diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 75d6d85..401e6a6 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -36,18 +36,8 @@ struct kvm_regs { /* * for KVM_GET_FPU and KVM_SET_FPU - * - * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs - * are zero filled. */ struct kvm_fpu { - __u64 fpr[32]; - __u32 fir; - __u32 fccr; - __u32 fexr; - __u32 fenr; - __u32 fcsr; - __u32 pad; }; @@ -68,6 +58,8 @@ struct kvm_fpu { * * Register set = 2: KVM specific registers (see definitions below). * + * Register set = 3: FPU registers (see definitions below). + * * Other sets registers may be added in the future. Each set would * have its own identifier in bits[31..16]. */ @@ -75,6 +67,7 @@ struct kvm_fpu { #define KVM_REG_MIPS_GP (KVM_REG_MIPS | 0x0000000000000000ULL) #define KVM_REG_MIPS_CP0 (KVM_REG_MIPS | 0x0000000000010000ULL) #define KVM_REG_MIPS_KVM (KVM_REG_MIPS | 0x0000000000020000ULL) +#define KVM_REG_MIPS_FPU (KVM_REG_MIPS | 0x0000000000030000ULL) /* @@ -155,6 +148,30 @@ struct kvm_fpu { /* + * KVM_REG_MIPS_FPU - Floating Point registers. + * + * bits[15..8] - Register subset (see definitions below). + * bits[7..5] - Must be zero. + * bits[4..0] - Register number within register subset. + */ + +#define KVM_REG_MIPS_FPR (KVM_REG_MIPS_FPU | 0x0000000000000000ULL) +#define KVM_REG_MIPS_FCR (KVM_REG_MIPS_FPU | 0x0000000000000100ULL) + +/* + * KVM_REG_MIPS_FPR - Floating point / Vector registers. + */ +#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32 | (n)) +#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64 | (n)) + +/* + * KVM_REG_MIPS_FCR - Floating point control registers. + */ +#define KVM_REG_MIPS_FCR_IR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 0) +#define KVM_REG_MIPS_FCR_CSR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31) + + +/* * KVM MIPS specific structures and definitions * */ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index dd08338..5e41afe 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -526,10 +526,13 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; int ret; s64 v; + unsigned int idx; switch (reg->id) { + /* General purpose registers */ case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31: v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0]; break; @@ -543,6 +546,38 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, v = (long)vcpu->arch.pc; break; + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + v = get_fpr32(&fpu->fpr[idx], 0); + else + v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + v = get_fpr64(&fpu->fpr[idx], 0); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.fpu_id; + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + v = fpu->fcr31; + break; + + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: v = (long)kvm_read_c0_guest_index(cop0); break; @@ -636,7 +671,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) { struct mips_coproc *cop0 = vcpu->arch.cop0; - u64 v; + struct mips_fpu_struct *fpu = &vcpu->arch.fpu; + s64 v; + unsigned int idx; if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { u64 __user *uaddr64 = (u64 __user *)(long)reg->addr; @@ -655,6 +692,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, } switch (reg->id) { + /* General purpose registers */ case KVM_REG_MIPS_R0: /* Silently ignore requests to set $0 */ break; @@ -671,6 +709,38 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, vcpu->arch.pc = v; break; + /* Floating point registers */ + case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_32(0); + /* Odd singles in top of even double when FR=0 */ + if (kvm_read_c0_guest_status(cop0) & ST0_FR) + set_fpr32(&fpu->fpr[idx], 0, v); + else + set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v); + break; + case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31): + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_FPR_64(0); + /* Can't access odd doubles in FR=0 mode */ + if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + set_fpr64(&fpu->fpr[idx], 0, v); + break; + case KVM_REG_MIPS_FCR_IR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_FCR_CSR: + if (!kvm_mips_guest_has_fpu(&vcpu->arch)) + return -EINVAL; + fpu->fcr31 = v; + break; + + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: kvm_write_c0_guest_index(cop0, v); break; -- cgit v0.10.2 From 5fafd8748b366105e08c198892e9fe02ef15c021 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 8 Dec 2014 23:07:56 +0000 Subject: MIPS: KVM: Wire up FPU capability Now that the code is in place for KVM to support FPU in MIPS KVM guests, wire up the new KVM_CAP_MIPS_FPU capability. For backwards compatibility, the capability must be explicitly enabled in order to detect or make use of the FPU from the guest. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-doc@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f3c1983..a1e9bfa 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3312,6 +3312,19 @@ Parameters: none This capability enables the in-kernel irqchip for s390. Please refer to "4.24 KVM_CREATE_IRQCHIP" for details. +6.9 KVM_CAP_MIPS_FPU + +Architectures: mips +Target: vcpu +Parameters: args[0] is reserved for future use (should be 0). + +This capability allows the use of the host Floating Point Unit by the guest. It +allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is +done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed +(depending on the current guest FPU register mode), and the Status.FR, +Config5.FRE bits are accessible via the KVM API and also from the guest, +depending on them being supported by the FPU. + 7. Capabilities that can be enabled on VMs ------------------------------------------ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 5e41afe..7f86cb7 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -797,6 +797,30 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + int r = 0; + + if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap)) + return -EINVAL; + if (cap->flags) + return -EINVAL; + if (cap->args[0]) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_MIPS_FPU: + vcpu->arch.fpu_enabled = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -854,6 +878,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); break; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -ENOIOCTLCMD; } @@ -962,11 +995,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_ONE_REG: + case KVM_CAP_ENABLE_CAP: r = 1; break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_MIPS_FPU: + r = !!cpu_has_fpu; + break; default: r = 0; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1162ef7..ce49688 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -802,6 +802,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_MEM_OP 108 #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_MIPS_FPU 111 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 539cb89fbdfe082d00be6f83d0f2140b7802151c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Thu, 5 Mar 2015 11:43:36 +0000 Subject: MIPS: KVM: Add base guest MSA support Add base code for supporting the MIPS SIMD Architecture (MSA) in MIPS KVM guests. MSA cannot yet be enabled in the guest, we're just laying the groundwork. As with the FPU, whether the guest's MSA context is loaded is stored in another bit in the fpu_inuse vcpu member. This allows MSA to be disabled when the guest disables it, but keeping the MSA context loaded so it doesn't have to be reloaded if the guest re-enables it. New assembly code is added for saving and restoring the MSA context, restoring only the upper half of the MSA context (for if the FPU context is already loaded) and for saving/clearing and restoring MSACSR (which can itself cause an MSA FP exception depending on the value). The MSACSR is restored before returning to the guest if MSA is already enabled, and the existing FP exception die notifier is extended to catch the possible MSA FP exception and step over the ctcmsa instruction. The helper function kvm_own_msa() is added to enable MSA and restore the MSA context if it isn't already loaded, which will be used in a later patch when the guest attempts to use MSA for the first time and triggers an MSA disabled exception. The existing FPU helpers are extended to handle MSA. kvm_lose_fpu() saves the full MSA context if it is loaded (which includes the FPU context) and both kvm_lose_fpu() and kvm_drop_fpu() disable MSA. kvm_own_fpu() also needs to lose any MSA context if FR=0, since there would be a risk of getting reserved instruction exceptions if CU1 is enabled and we later try and save the MSA context. We shouldn't usually hit this case since it will be handled when emulating CU1 changes, however there's nothing to stop the guest modifying the Status register directly via the comm page, which will cause this case to get hit. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index fb264d8..1dc0dca 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -360,6 +360,7 @@ struct kvm_mips_tlb { }; #define KVM_MIPS_FPU_FPU 0x1 +#define KVM_MIPS_FPU_MSA 0x2 #define KVM_MIPS_GUEST_TLB_SIZE 64 struct kvm_vcpu_arch { @@ -432,6 +433,7 @@ struct kvm_vcpu_arch { int wait; u8 fpu_enabled; + u8 msa_enabled; }; @@ -576,6 +578,18 @@ static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu) kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP; } +static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu) +{ + return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) && + vcpu->msa_enabled; +} + +static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu) +{ + return kvm_mips_guest_can_have_msa(vcpu) && + kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA; +} + struct kvm_mips_callbacks { int (*handle_cop_unusable)(struct kvm_vcpu *vcpu); int (*handle_tlb_mod)(struct kvm_vcpu *vcpu); @@ -619,11 +633,16 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); /* Trampoline ASM routine to start running in "Guest" context */ extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu); -/* FPU context management */ +/* FPU/MSA context management */ void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu); void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu); void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu); +void __kvm_save_msa(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu); +void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu); void kvm_own_fpu(struct kvm_vcpu *vcpu); +void kvm_own_msa(struct kvm_vcpu *vcpu); void kvm_drop_fpu(struct kvm_vcpu *vcpu); void kvm_lose_fpu(struct kvm_vcpu *vcpu); diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index a12bcf9..e59fd7c 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -440,6 +440,7 @@ void output_kvm_defines(void) OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]); OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31); + OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr); BLANK(); OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0); diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile index 78d7bcd..637ebbe 100644 --- a/arch/mips/kvm/Makefile +++ b/arch/mips/kvm/Makefile @@ -1,11 +1,13 @@ # Makefile for KVM support for MIPS # -common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) +common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o) EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm -kvm-objs := $(common-objs) mips.o emulate.o locore.o \ +common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o + +kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \ interrupt.o stats.o commpage.o \ dyntrans.o trap_emul.o fpu.o diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index f559404..c567240 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -36,6 +36,8 @@ #define PT_HOST_USERLOCAL PT_EPC #define CP0_DDATA_LO $28,3 +#define CP0_CONFIG3 $16,3 +#define CP0_CONFIG5 $16,5 #define CP0_EBASE $15,1 #define CP0_INTCTL $12,1 @@ -370,6 +372,25 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra) .set noat 1: +#ifdef CONFIG_CPU_HAS_MSA + /* + * If MSA is enabled, save MSACSR and clear it so that later + * instructions don't trigger MSAFPE for pending exceptions. + */ + mfc0 t0, CP0_CONFIG3 + ext t0, t0, 28, 1 /* MIPS_CONF3_MSAP */ + beqz t0, 1f + nop + mfc0 t0, CP0_CONFIG5 + ext t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */ + beqz t0, 1f + nop + _cfcmsa t0, MSA_CSR + sw t0, VCPU_MSA_CSR(k1) + _ctcmsa MSA_CSR, zero +1: +#endif + /* Now that the new EBASE has been loaded, unset BEV and KSU_USER */ .set at and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 7f86cb7..a17f210 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -1295,17 +1295,21 @@ skip_emul: if (ret == RESUME_GUEST) { /* - * If FPU is enabled (i.e. the guest's FPU context is live), - * restore FCR31. + * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context + * is live), restore FCR31 / MSACSR. * * This should be before returning to the guest exception - * vector, as it may well cause an FP exception if there are - * pending exception bits unmasked. (see + * vector, as it may well cause an [MSA] FP exception if there + * are pending exception bits unmasked. (see * kvm_mips_csr_die_notifier() for how that is handled). */ if (kvm_mips_guest_has_fpu(&vcpu->arch) && read_c0_status() & ST0_CU1) __kvm_restore_fcsr(&vcpu->arch); + + if (kvm_mips_guest_has_msa(&vcpu->arch) && + read_c0_config5() & MIPS_CONF5_MSAEN) + __kvm_restore_msacsr(&vcpu->arch); } /* Disable HTW before returning to guest or host */ @@ -1322,11 +1326,26 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) preempt_disable(); + sr = kvm_read_c0_guest_status(cop0); + + /* + * If MSA state is already live, it is undefined how it interacts with + * FR=0 FPU state, and we don't want to hit reserved instruction + * exceptions trying to save the MSA state later when CU=1 && FR=1, so + * play it safe and save it first. + * + * In theory we shouldn't ever hit this case since kvm_lose_fpu() should + * get called when guest CU1 is set, however we can't trust the guest + * not to clobber the status register directly via the commpage. + */ + if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + kvm_lose_fpu(vcpu); + /* * Enable FPU for guest * We set FR and FRE according to guest context */ - sr = kvm_read_c0_guest_status(cop0); change_c0_status(ST0_CU1 | ST0_FR, sr); if (cpu_has_fre) { cfg5 = kvm_read_c0_guest_config5(cop0); @@ -1343,10 +1362,73 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } -/* Drop FPU without saving it */ +#ifdef CONFIG_CPU_HAS_MSA +/* Enable MSA for guest and restore context */ +void kvm_own_msa(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + unsigned int sr, cfg5; + + preempt_disable(); + + /* + * Enable FPU if enabled in guest, since we're restoring FPU context + * anyway. We set FR and FRE according to guest context. + */ + if (kvm_mips_guest_has_fpu(&vcpu->arch)) { + sr = kvm_read_c0_guest_status(cop0); + + /* + * If FR=0 FPU state is already live, it is undefined how it + * interacts with MSA state, so play it safe and save it first. + */ + if (!(sr & ST0_FR) && + (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | + KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU) + kvm_lose_fpu(vcpu); + + change_c0_status(ST0_CU1 | ST0_FR, sr); + if (sr & ST0_CU1 && cpu_has_fre) { + cfg5 = kvm_read_c0_guest_config5(cop0); + change_c0_config5(MIPS_CONF5_FRE, cfg5); + } + } + + /* Enable MSA for guest */ + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) { + case KVM_MIPS_FPU_FPU: + /* + * Guest FPU state already loaded, only restore upper MSA state + */ + __kvm_restore_msa_upper(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + break; + case 0: + /* Neither FPU or MSA already active, restore full MSA state */ + __kvm_restore_msa(&vcpu->arch); + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA; + if (kvm_mips_guest_has_fpu(&vcpu->arch)) + vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU; + break; + default: + break; + } + + preempt_enable(); +} +#endif + +/* Drop FPU & MSA without saving it */ void kvm_drop_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); + if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + disable_msa(); + vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA; + } if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { clear_c0_status(ST0_CU1 | ST0_FR); vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU; @@ -1354,18 +1436,29 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } -/* Save and disable FPU */ +/* Save and disable FPU & MSA */ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { /* - * FPU gets disabled in root context (hardware) when it is disabled in - * guest context (software), but the register state in the hardware may - * still be in use. This is why we explicitly re-enable the hardware + * FPU & MSA get disabled in root context (hardware) when it is disabled + * in guest context (software), but the register state in the hardware + * may still be in use. This is why we explicitly re-enable the hardware * before saving. */ preempt_disable(); - if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { + if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) { + set_c0_config5(MIPS_CONF5_MSAEN); + enable_fpu_hazard(); + + __kvm_save_msa(&vcpu->arch); + + /* Disable MSA & FPU */ + disable_msa(); + if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) + clear_c0_status(ST0_CU1 | ST0_FR); + vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA); + } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) { set_c0_status(ST0_CU1); enable_fpu_hazard(); @@ -1379,9 +1472,9 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu) } /* - * Step over a specific ctc1 to FCSR which is used to restore guest FCSR state - * and may trigger a "harmless" FP exception if cause bits are set in the value - * being written. + * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are + * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP + * exception if cause bits are set in the value being written. */ static int kvm_mips_csr_die_notify(struct notifier_block *self, unsigned long cmd, void *ptr) @@ -1390,8 +1483,8 @@ static int kvm_mips_csr_die_notify(struct notifier_block *self, struct pt_regs *regs = args->regs; unsigned long pc; - /* Only interested in FPE */ - if (cmd != DIE_FP) + /* Only interested in FPE and MSAFPE */ + if (cmd != DIE_FP && cmd != DIE_MSAFP) return NOTIFY_DONE; /* Return immediately if guest context isn't active */ @@ -1408,6 +1501,13 @@ static int kvm_mips_csr_die_notify(struct notifier_block *self, if (pc != (unsigned long)&__kvm_restore_fcsr + 4) return NOTIFY_DONE; break; + case DIE_MSAFP: + /* match 2nd/3rd instruction in __kvm_restore_msacsr */ + if (!cpu_has_msa || + pc < (unsigned long)&__kvm_restore_msacsr + 4 || + pc > (unsigned long)&__kvm_restore_msacsr + 8) + return NOTIFY_DONE; + break; } /* Move PC forward a little and continue executing */ diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S new file mode 100644 index 0000000..d02f0c6 --- /dev/null +++ b/arch/mips/kvm/msa.S @@ -0,0 +1,161 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * MIPS SIMD Architecture (MSA) context handling code for KVM. + * + * Copyright (C) 2015 Imagination Technologies Ltd. + */ + +#include +#include +#include +#include + + .set noreorder + .set noat + +LEAF(__kvm_save_msa) + st_d 0, VCPU_FPR0, a0 + st_d 1, VCPU_FPR1, a0 + st_d 2, VCPU_FPR2, a0 + st_d 3, VCPU_FPR3, a0 + st_d 4, VCPU_FPR4, a0 + st_d 5, VCPU_FPR5, a0 + st_d 6, VCPU_FPR6, a0 + st_d 7, VCPU_FPR7, a0 + st_d 8, VCPU_FPR8, a0 + st_d 9, VCPU_FPR9, a0 + st_d 10, VCPU_FPR10, a0 + st_d 11, VCPU_FPR11, a0 + st_d 12, VCPU_FPR12, a0 + st_d 13, VCPU_FPR13, a0 + st_d 14, VCPU_FPR14, a0 + st_d 15, VCPU_FPR15, a0 + st_d 16, VCPU_FPR16, a0 + st_d 17, VCPU_FPR17, a0 + st_d 18, VCPU_FPR18, a0 + st_d 19, VCPU_FPR19, a0 + st_d 20, VCPU_FPR20, a0 + st_d 21, VCPU_FPR21, a0 + st_d 22, VCPU_FPR22, a0 + st_d 23, VCPU_FPR23, a0 + st_d 24, VCPU_FPR24, a0 + st_d 25, VCPU_FPR25, a0 + st_d 26, VCPU_FPR26, a0 + st_d 27, VCPU_FPR27, a0 + st_d 28, VCPU_FPR28, a0 + st_d 29, VCPU_FPR29, a0 + st_d 30, VCPU_FPR30, a0 + st_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_save_msa) + +LEAF(__kvm_restore_msa) + ld_d 0, VCPU_FPR0, a0 + ld_d 1, VCPU_FPR1, a0 + ld_d 2, VCPU_FPR2, a0 + ld_d 3, VCPU_FPR3, a0 + ld_d 4, VCPU_FPR4, a0 + ld_d 5, VCPU_FPR5, a0 + ld_d 6, VCPU_FPR6, a0 + ld_d 7, VCPU_FPR7, a0 + ld_d 8, VCPU_FPR8, a0 + ld_d 9, VCPU_FPR9, a0 + ld_d 10, VCPU_FPR10, a0 + ld_d 11, VCPU_FPR11, a0 + ld_d 12, VCPU_FPR12, a0 + ld_d 13, VCPU_FPR13, a0 + ld_d 14, VCPU_FPR14, a0 + ld_d 15, VCPU_FPR15, a0 + ld_d 16, VCPU_FPR16, a0 + ld_d 17, VCPU_FPR17, a0 + ld_d 18, VCPU_FPR18, a0 + ld_d 19, VCPU_FPR19, a0 + ld_d 20, VCPU_FPR20, a0 + ld_d 21, VCPU_FPR21, a0 + ld_d 22, VCPU_FPR22, a0 + ld_d 23, VCPU_FPR23, a0 + ld_d 24, VCPU_FPR24, a0 + ld_d 25, VCPU_FPR25, a0 + ld_d 26, VCPU_FPR26, a0 + ld_d 27, VCPU_FPR27, a0 + ld_d 28, VCPU_FPR28, a0 + ld_d 29, VCPU_FPR29, a0 + ld_d 30, VCPU_FPR30, a0 + ld_d 31, VCPU_FPR31, a0 + jr ra + nop + END(__kvm_restore_msa) + + .macro kvm_restore_msa_upper wr, off, base + .set push + .set noat +#ifdef CONFIG_64BIT + ld $1, \off(\base) + insert_d \wr, 1 +#elif defined(CONFIG_CPU_LITTLE_ENDIAN) + lw $1, \off(\base) + insert_w \wr, 2 + lw $1, (\off+4)(\base) + insert_w \wr, 3 +#else /* CONFIG_CPU_BIG_ENDIAN */ + lw $1, (\off+4)(\base) + insert_w \wr, 2 + lw $1, \off(\base) + insert_w \wr, 3 +#endif + .set pop + .endm + +LEAF(__kvm_restore_msa_upper) + kvm_restore_msa_upper 0, VCPU_FPR0 +8, a0 + kvm_restore_msa_upper 1, VCPU_FPR1 +8, a0 + kvm_restore_msa_upper 2, VCPU_FPR2 +8, a0 + kvm_restore_msa_upper 3, VCPU_FPR3 +8, a0 + kvm_restore_msa_upper 4, VCPU_FPR4 +8, a0 + kvm_restore_msa_upper 5, VCPU_FPR5 +8, a0 + kvm_restore_msa_upper 6, VCPU_FPR6 +8, a0 + kvm_restore_msa_upper 7, VCPU_FPR7 +8, a0 + kvm_restore_msa_upper 8, VCPU_FPR8 +8, a0 + kvm_restore_msa_upper 9, VCPU_FPR9 +8, a0 + kvm_restore_msa_upper 10, VCPU_FPR10+8, a0 + kvm_restore_msa_upper 11, VCPU_FPR11+8, a0 + kvm_restore_msa_upper 12, VCPU_FPR12+8, a0 + kvm_restore_msa_upper 13, VCPU_FPR13+8, a0 + kvm_restore_msa_upper 14, VCPU_FPR14+8, a0 + kvm_restore_msa_upper 15, VCPU_FPR15+8, a0 + kvm_restore_msa_upper 16, VCPU_FPR16+8, a0 + kvm_restore_msa_upper 17, VCPU_FPR17+8, a0 + kvm_restore_msa_upper 18, VCPU_FPR18+8, a0 + kvm_restore_msa_upper 19, VCPU_FPR19+8, a0 + kvm_restore_msa_upper 20, VCPU_FPR20+8, a0 + kvm_restore_msa_upper 21, VCPU_FPR21+8, a0 + kvm_restore_msa_upper 22, VCPU_FPR22+8, a0 + kvm_restore_msa_upper 23, VCPU_FPR23+8, a0 + kvm_restore_msa_upper 24, VCPU_FPR24+8, a0 + kvm_restore_msa_upper 25, VCPU_FPR25+8, a0 + kvm_restore_msa_upper 26, VCPU_FPR26+8, a0 + kvm_restore_msa_upper 27, VCPU_FPR27+8, a0 + kvm_restore_msa_upper 28, VCPU_FPR28+8, a0 + kvm_restore_msa_upper 29, VCPU_FPR29+8, a0 + kvm_restore_msa_upper 30, VCPU_FPR30+8, a0 + kvm_restore_msa_upper 31, VCPU_FPR31+8, a0 + jr ra + nop + END(__kvm_restore_msa_upper) + +LEAF(__kvm_restore_msacsr) + lw t0, VCPU_MSA_CSR(a0) + /* + * The ctcmsa must stay at this offset in __kvm_restore_msacsr. + * See kvm_mips_csr_die_notify() which handles t0 containing a value + * which triggers an MSA FP Exception, which must be stepped over and + * ignored since the set cause bits must remain there for the guest. + */ + _ctcmsa MSA_CSR, t0 + jr ra + nop + END(__kvm_restore_msacsr) -- cgit v0.10.2 From 2b6009d646887cac8888f1ce8694af0beefce88b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 6 Feb 2015 23:01:00 +0000 Subject: MIPS: KVM: Emulate MSA bits in COP0 interface Emulate MSA related parts of COP0 interface so that the guest will be able to enable/disable MSA (Config5.MSAEn) once the MSA capability has been wired up. As with the FPU (Status.CU1) setting Config5.MSAEn has no immediate effect if the MSA state isn't live, as MSA state is restored lazily on first use. Changes after the MSA state has been restored take immediate effect, so that the guest can start getting MSA disabled exceptions right away for guest MSA operations. The MSA state is saved lazily too, as MSA may get re-enabled in the near future anyway. A special case is also added for when Status.CU1 is set while FR=0 and the MSA state is live. In this case we are at risk of getting reserved instruction exceptions if we try and save the MSA state, so we lose the MSA state sooner while MSA is still usable. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index fbf169f..07f554c 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -912,7 +912,13 @@ unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu) unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu) { /* Config4 is optional */ - return MIPS_CONF_M; + unsigned int mask = MIPS_CONF_M; + + /* Permit MSA to be present if MSA is supported */ + if (kvm_mips_guest_can_have_msa(&vcpu->arch)) + mask |= MIPS_CONF3_MSA; + + return mask; } /** @@ -939,6 +945,10 @@ unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu) { unsigned int mask = 0; + /* Permit MSAEn changes if MSA supported and enabled */ + if (kvm_mips_guest_has_msa(&vcpu->arch)) + mask |= MIPS_CONF5_MSAEN; + /* * Permit guest FPU mode changes if FPU is enabled and the relevant * feature exists according to FIR register. @@ -1126,6 +1136,18 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, kvm_drop_fpu(vcpu); /* + * If MSA state is already live, it is undefined + * how it interacts with FR=0 FPU state, and we + * don't want to hit reserved instruction + * exceptions trying to save the MSA state later + * when CU=1 && FR=1, so play it safe and save + * it first. + */ + if (change & ST0_CU1 && !(val & ST0_FR) && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + kvm_lose_fpu(vcpu); + + /* * Propagate CU1 (FPU enable) changes * immediately if the FPU context is already * loaded. When disabling we leave the context @@ -1160,7 +1182,7 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, val = old_val ^ change; - /* Handle changes in FPU modes */ + /* Handle changes in FPU/MSA modes */ preempt_disable(); /* @@ -1171,6 +1193,17 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc, vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) change_c0_config5(MIPS_CONF5_FRE, val); + /* + * Propagate MSAEn changes immediately if the + * MSA context is already loaded. When disabling + * we leave the context loaded so it can be + * quickly enabled again in the near future. + */ + if (change & MIPS_CONF5_MSAEN && + vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) + change_c0_config5(MIPS_CONF5_MSAEN, + val); + preempt_enable(); kvm_write_c0_guest_config5(cop0, val); -- cgit v0.10.2 From c2537ed9fb8e17d713e5e67fcede047699d25814 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 6 Feb 2015 10:56:27 +0000 Subject: MIPS: KVM: Add MSA exception handling Add guest exception handling for MIPS SIMD Architecture (MSA) floating point exceptions and MSA disabled exceptions. MSA floating point exceptions from the guest need passing to the guest kernel, so for these a guest MSAFPE is emulated. MSA disabled exceptions are normally handled by passing a reserved instruction exception to the guest (because no guest MSA was supported), but the hypervisor can now handle them if the guest has MSA by passing an MSA disabled exception to the guest, or if the guest has MSA enabled by transparently restoring the guest MSA context and enabling MSA and the FPU. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 1dc0dca..4c25823 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -123,7 +123,9 @@ struct kvm_vcpu_stat { u32 resvd_inst_exits; u32 break_inst_exits; u32 trap_inst_exits; + u32 msa_fpe_exits; u32 fpe_exits; + u32 msa_disabled_exits; u32 flush_dcache_exits; u32 halt_successful_poll; u32 halt_wakeup; @@ -144,7 +146,9 @@ enum kvm_mips_exit_types { RESVD_INST_EXITS, BREAK_INST_EXITS, TRAP_INST_EXITS, + MSA_FPE_EXITS, FPE_EXITS, + MSA_DISABLED_EXITS, FLUSH_DCACHE_EXITS, MAX_KVM_MIPS_EXIT_TYPES }; @@ -305,6 +309,7 @@ enum mips_mmu_types { */ #define T_TRAP 13 /* Trap instruction */ #define T_VCEI 14 /* Virtual coherency exception */ +#define T_MSAFPE 14 /* MSA floating point exception */ #define T_FPE 15 /* Floating point exception */ #define T_MSADIS 21 /* MSA disabled exception */ #define T_WATCH 23 /* Watch address reference */ @@ -601,6 +606,7 @@ struct kvm_mips_callbacks { int (*handle_res_inst)(struct kvm_vcpu *vcpu); int (*handle_break)(struct kvm_vcpu *vcpu); int (*handle_trap)(struct kvm_vcpu *vcpu); + int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); @@ -756,11 +762,21 @@ extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, struct kvm_run *run, struct kvm_vcpu *vcpu); +extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, uint32_t *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); +extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, struct kvm_run *run); diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 07f554c..6230f37 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -2179,6 +2179,41 @@ enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause, return er; } +enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_MSAFPE << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, uint32_t *opc, struct kvm_run *run, @@ -2214,6 +2249,41 @@ enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause, return er; } +enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause, + uint32_t *opc, + struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + struct kvm_vcpu_arch *arch = &vcpu->arch; + enum emulation_result er = EMULATE_DONE; + + if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) { + /* save old pc */ + kvm_write_c0_guest_epc(cop0, arch->pc); + kvm_set_c0_guest_status(cop0, ST0_EXL); + + if (cause & CAUSEF_BD) + kvm_set_c0_guest_cause(cop0, CAUSEF_BD); + else + kvm_clear_c0_guest_cause(cop0, CAUSEF_BD); + + kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc); + + kvm_change_c0_guest_cause(cop0, (0xff), + (T_MSADIS << CAUSEB_EXCCODE)); + + /* Set PC to the exception entry point */ + arch->pc = KVM_GUEST_KSEG0 + 0x180; + + } else { + kvm_err("Trying to deliver MSADIS when EXL is already set\n"); + er = EMULATE_FAIL; + } + + return er; +} + /* ll/sc, rdhwr, sync emulation */ #define OPCODE 0xfc000000 @@ -2421,6 +2491,7 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause, case T_BREAK: case T_RES_INST: case T_TRAP: + case T_MSAFPE: case T_FPE: case T_MSADIS: break; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index a17f210..e02c7e5 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -50,7 +50,9 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, + { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU }, { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, + { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, @@ -1256,6 +1258,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) ret = kvm_mips_callbacks->handle_trap(vcpu); break; + case T_MSAFPE: + ++vcpu->stat.msa_fpe_exits; + trace_kvm_exit(vcpu, MSA_FPE_EXITS); + ret = kvm_mips_callbacks->handle_msa_fpe(vcpu); + break; + case T_FPE: ++vcpu->stat.fpe_exits; trace_kvm_exit(vcpu, FPE_EXITS); @@ -1263,6 +1271,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; case T_MSADIS: + ++vcpu->stat.msa_disabled_exits; + trace_kvm_exit(vcpu, MSA_DISABLED_EXITS); ret = kvm_mips_callbacks->handle_msa_disabled(vcpu); break; diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c index 3843828..888bb67 100644 --- a/arch/mips/kvm/stats.c +++ b/arch/mips/kvm/stats.c @@ -26,7 +26,9 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = { "Reserved Inst", "Break Inst", "Trap Inst", + "MSA FPE", "FPE", + "MSA Disabled", "D-Cache Flushes", }; diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 421d5b8..d836ed5 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -362,6 +362,24 @@ static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu) return ret; } +static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc; + unsigned long cause = vcpu->arch.host_cp0_cause; + enum emulation_result er = EMULATE_DONE; + int ret = RESUME_GUEST; + + er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu); + if (er == EMULATE_DONE) { + ret = RESUME_GUEST; + } else { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + ret = RESUME_HOST; + } + return ret; +} + static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; @@ -380,16 +398,36 @@ static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu) return ret; } +/** + * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use MSA when it is disabled. + */ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) { + struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc; unsigned long cause = vcpu->arch.host_cp0_cause; enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; - /* No MSA supported in guest, guest reserved instruction exception */ - er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + if (!kvm_mips_guest_has_msa(&vcpu->arch) || + (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) { + /* + * No MSA in guest, or FPU enabled and not in FR=1 mode, + * guest reserved instruction exception + */ + er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu); + } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) { + /* MSA disabled by guest, guest MSA disabled exception */ + er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu); + } else { + /* Restore MSA/FPU state */ + kvm_own_msa(vcpu); + er = EMULATE_DONE; + } switch (er) { case EMULATE_DONE: @@ -608,6 +646,7 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_res_inst = kvm_trap_emul_handle_res_inst, .handle_break = kvm_trap_emul_handle_break, .handle_trap = kvm_trap_emul_handle_trap, + .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe, .handle_fpe = kvm_trap_emul_handle_fpe, .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, -- cgit v0.10.2 From ab86bd600400357ffa0dfdb1797f587476d01352 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 2 Dec 2014 15:48:24 +0000 Subject: MIPS: KVM: Expose MSA registers Add KVM register numbers for the MIPS SIMD Architecture (MSA) registers, and implement access to them with the KVM_GET_ONE_REG / KVM_SET_ONE_REG ioctls when the MSA capability is enabled (exposed in a later patch) and present in the guest according to its Config3.MSAP bit. The MSA vector registers use the same register numbers as the FPU registers except with a different size (128bits). Since MSA depends on Status.FR=1, these registers are inaccessible when Status.FR=0. These registers are returned as a single native endian 128bit value, rather than least significant half first with each 64-bit half native endian as the kernel uses internally. Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: Paul Burton Cc: Ralf Baechle Cc: Gleb Natapov Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-doc@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a1e9bfa..6280987 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1981,8 +1981,11 @@ registers, find a list below: MIPS | KVM_REG_MIPS_COUNT_HZ | 64 MIPS | KVM_REG_MIPS_FPR_32(0..31) | 32 MIPS | KVM_REG_MIPS_FPR_64(0..31) | 64 + MIPS | KVM_REG_MIPS_VEC_128(0..31) | 128 MIPS | KVM_REG_MIPS_FCR_IR | 32 MIPS | KVM_REG_MIPS_FCR_CSR | 32 + MIPS | KVM_REG_MIPS_MSA_IR | 32 + MIPS | KVM_REG_MIPS_MSA_CSR | 32 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: @@ -2040,14 +2043,21 @@ MIPS FPU registers (see KVM_REG_MIPS_FPR_{32,64}() above) have the following id bit patterns depending on the size of the register being accessed. They are always accessed according to the current guest FPU mode (Status.FR and Config5.FRE), i.e. as the guest would see them, and they become unpredictable -if the guest FPU mode is changed: +if the guest FPU mode is changed. MIPS SIMD Architecture (MSA) vector +registers (see KVM_REG_MIPS_VEC_128() above) have similar patterns as they +overlap the FPU registers: 0x7020 0000 0003 00 <0:3> (32-bit FPU registers) 0x7030 0000 0003 00 <0:3> (64-bit FPU registers) + 0x7040 0000 0003 00 <0:3> (128-bit MSA vector registers) MIPS FPU control registers (see KVM_REG_MIPS_FCR_{IR,CSR} above) have the following id bit patterns: 0x7020 0000 0003 01 <0:3> +MIPS MSA control registers (see KVM_REG_MIPS_MSA_{IR,CSR} above) have the +following id bit patterns: + 0x7020 0000 0003 02 <0:3> + 4.69 KVM_GET_ONE_REG diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 401e6a6..6985eb5 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -58,7 +58,7 @@ struct kvm_fpu { * * Register set = 2: KVM specific registers (see definitions below). * - * Register set = 3: FPU registers (see definitions below). + * Register set = 3: FPU / MSA registers (see definitions below). * * Other sets registers may be added in the future. Each set would * have its own identifier in bits[31..16]. @@ -148,7 +148,7 @@ struct kvm_fpu { /* - * KVM_REG_MIPS_FPU - Floating Point registers. + * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers. * * bits[15..8] - Register subset (see definitions below). * bits[7..5] - Must be zero. @@ -157,12 +157,14 @@ struct kvm_fpu { #define KVM_REG_MIPS_FPR (KVM_REG_MIPS_FPU | 0x0000000000000000ULL) #define KVM_REG_MIPS_FCR (KVM_REG_MIPS_FPU | 0x0000000000000100ULL) +#define KVM_REG_MIPS_MSACR (KVM_REG_MIPS_FPU | 0x0000000000000200ULL) /* * KVM_REG_MIPS_FPR - Floating point / Vector registers. */ #define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32 | (n)) #define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64 | (n)) +#define KVM_REG_MIPS_VEC_128(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n)) /* * KVM_REG_MIPS_FCR - Floating point control registers. @@ -170,6 +172,12 @@ struct kvm_fpu { #define KVM_REG_MIPS_FCR_IR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 0) #define KVM_REG_MIPS_FCR_CSR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31) +/* + * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers. + */ +#define KVM_REG_MIPS_MSA_IR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 0) +#define KVM_REG_MIPS_MSA_CSR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 1) + /* * KVM MIPS specific structures and definitions diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index e02c7e5..35d3146 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -531,6 +531,7 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, struct mips_fpu_struct *fpu = &vcpu->arch.fpu; int ret; s64 v; + s64 vs[2]; unsigned int idx; switch (reg->id) { @@ -579,6 +580,35 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, v = fpu->fcr31; break; + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Can't access MSA registers in FR=0 mode */ + if (!(kvm_read_c0_guest_status(cop0) & ST0_FR)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 0); + vs[1] = get_fpr64(&fpu->fpr[idx], 1); +#else + /* most significant byte first */ + vs[0] = get_fpr64(&fpu->fpr[idx], 1); + vs[1] = get_fpr64(&fpu->fpr[idx], 0); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = boot_cpu_data.msa_id; + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + v = fpu->msacsr; + break; + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: v = (long)kvm_read_c0_guest_index(cop0); @@ -664,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, u32 v32 = (u32)v; return put_user(v32, uaddr32); + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_to_user(uaddr, vs, 16); } else { return -EINVAL; } @@ -675,6 +709,7 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, struct mips_coproc *cop0 = vcpu->arch.cop0; struct mips_fpu_struct *fpu = &vcpu->arch.fpu; s64 v; + s64 vs[2]; unsigned int idx; if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) { @@ -689,6 +724,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, if (get_user(v32, uaddr32) != 0) return -EFAULT; v = (s64)v32; + } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) { + void __user *uaddr = (void __user *)(long)reg->addr; + + return copy_from_user(vs, uaddr, 16); } else { return -EINVAL; } @@ -742,6 +781,32 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, fpu->fcr31 = v; break; + /* MIPS SIMD Architecture (MSA) registers */ + case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31): + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + idx = reg->id - KVM_REG_MIPS_VEC_128(0); +#ifdef CONFIG_CPU_LITTLE_ENDIAN + /* least significant byte first */ + set_fpr64(&fpu->fpr[idx], 0, vs[0]); + set_fpr64(&fpu->fpr[idx], 1, vs[1]); +#else + /* most significant byte first */ + set_fpr64(&fpu->fpr[idx], 1, vs[0]); + set_fpr64(&fpu->fpr[idx], 0, vs[1]); +#endif + break; + case KVM_REG_MIPS_MSA_IR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + /* Read-only */ + break; + case KVM_REG_MIPS_MSA_CSR: + if (!kvm_mips_guest_has_msa(&vcpu->arch)) + return -EINVAL; + fpu->msacsr = v; + break; + /* Co-processor 0 registers */ case KVM_REG_MIPS_CP0_INDEX: kvm_write_c0_guest_index(cop0, v); -- cgit v0.10.2 From d952bd070f79b6dcbad52c03dbc41cbc8ba086c8 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Mon, 8 Dec 2014 23:07:56 +0000 Subject: MIPS: KVM: Wire up MSA capability Now that the code is in place for KVM to support MIPS SIMD Architecutre (MSA) in MIPS guests, wire up the new KVM_CAP_MIPS_MSA capability. For backwards compatibility, the capability must be explicitly enabled in order to detect or make use of MSA from the guest. The capability is not supported if the hardware supports MSA vector partitioning, since the extra support cannot be tested yet and it extends the state that the userland program would have to save. Signed-off-by: James Hogan Acked-by: Paolo Bonzini Cc: Ralf Baechle Cc: Gleb Natapov Cc: Jonathan Corbet Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: linux-api@vger.kernel.org Cc: linux-doc@vger.kernel.org diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6280987..1490eb0 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3335,6 +3335,18 @@ done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed Config5.FRE bits are accessible via the KVM API and also from the guest, depending on them being supported by the FPU. +6.10 KVM_CAP_MIPS_MSA + +Architectures: mips +Target: vcpu +Parameters: args[0] is reserved for future use (should be 0). + +This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest. +It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest. +Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be +accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from +the guest. + 7. Capabilities that can be enabled on VMs ------------------------------------------ diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 35d3146..bb68e8d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -880,6 +880,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, case KVM_CAP_MIPS_FPU: vcpu->arch.fpu_enabled = true; break; + case KVM_CAP_MIPS_MSA: + vcpu->arch.msa_enabled = true; + break; default: r = -EINVAL; break; @@ -1071,6 +1074,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MIPS_FPU: r = !!cpu_has_fpu; break; + case KVM_CAP_MIPS_MSA: + /* + * We don't support MSA vector partitioning yet: + * 1) It would require explicit support which can't be tested + * yet due to lack of support in current hardware. + * 2) It extends the state that would need to be saved/restored + * by e.g. QEMU for migration. + * + * When vector partitioning hardware becomes available, support + * could be added by requiring a flag when enabling + * KVM_CAP_MIPS_MSA capability to indicate that userland knows + * to save/restore the appropriate extra state. + */ + r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF); + break; default: r = 0; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index ce49688..05a2083 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -803,6 +803,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 #define KVM_CAP_MIPS_FPU 111 +#define KVM_CAP_MIPS_MSA 112 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 2dccb4cdbf8fd4cb1d779a6f7ddd66d193bb5805 Mon Sep 17 00:00:00 2001 From: Petr Matousek Date: Wed, 11 Mar 2015 12:16:09 +0100 Subject: kvm: x86: i8259: return initialized data on invalid-size read If data is read from PIC with invalid access size, the return data stays uninitialized even though success is returned. Fix this by always initializing the data. Signed-off-by: Petr Matousek Reported-by: Nadav Amit Message-Id: <20150311111609.GG8544@dhcp-25-225.brq.redhat.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index cc31f7c..9541ba3 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s, return -EOPNOTSUPP; if (len != 1) { + memset(val, 0, len); pr_pic_unimpl("non byte read\n"); return 0; } -- cgit v0.10.2 From b91aa14d95bf4cf8ed0426bd25c0af1548519696 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 30 Mar 2015 15:39:19 +0300 Subject: KVM: x86: CMOV emulation on legacy mode is wrong On legacy mode CMOV emulation should still clear bits [63:32] even if the assignment is not done. The previous fix 140bad89fd ("KVM: x86: emulation of dword cmov on long-mode should clear [63:32]") was incomplete. Signed-off-by: Nadav Amit Message-Id: <1427719163-5429-2-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index c941abe..62f7a39 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5126,8 +5126,7 @@ twobyte_insn: case 0x40 ... 0x4f: /* cmov */ if (test_cc(ctxt->b, ctxt->eflags)) ctxt->dst.val = ctxt->src.val; - else if (ctxt->mode != X86EMUL_MODE_PROT64 || - ctxt->op_bytes != 4) + else if (ctxt->op_bytes != 4) ctxt->dst.type = OP_NONE; /* no writeback */ break; case 0x80 ... 0x8f: /* jnz rel, etc*/ -- cgit v0.10.2 From 6fd8e1275709a5bb084847eda6730b983538a572 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 30 Mar 2015 15:39:20 +0300 Subject: KVM: x86: POPA emulation may not clear bits [63:32] POPA should assign the values to the registers as usual registers are assigned. In other words, 32-bits register assignments should clear bits [63:32] of the register. Split the code of register assignments that will be used by future changes as well. Signed-off-by: Nadav Amit Message-Id: <1427719163-5429-3-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 62f7a39..4961dc5 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -478,6 +478,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask) *dest = (*dest & ~mask) | (src & mask); } +static void assign_register(unsigned long *reg, u64 val, int bytes) +{ + /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ + switch (bytes) { + case 1: + *(u8 *)reg = (u8)val; + break; + case 2: + *(u16 *)reg = (u16)val; + break; + case 4: + *reg = (u32)val; + break; /* 64b: zero-extend */ + case 8: + *reg = val; + break; + } +} + static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt) { return (1UL << (ctxt->ad_bytes << 3)) - 1; @@ -1691,21 +1710,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, static void write_register_operand(struct operand *op) { - /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */ - switch (op->bytes) { - case 1: - *(u8 *)op->addr.reg = (u8)op->val; - break; - case 2: - *(u16 *)op->addr.reg = (u16)op->val; - break; - case 4: - *op->addr.reg = (u32)op->val; - break; /* 64b: zero-extend */ - case 8: - *op->addr.reg = op->val; - break; - } + return assign_register(op->addr.reg, op->val, op->bytes); } static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op) @@ -1926,6 +1931,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) { int rc = X86EMUL_CONTINUE; int reg = VCPU_REGS_RDI; + u32 val; while (reg >= VCPU_REGS_RAX) { if (reg == VCPU_REGS_RSP) { @@ -1933,9 +1939,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) --reg; } - rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); + rc = emulate_pop(ctxt, &val, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) break; + assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes); --reg; } return rc; -- cgit v0.10.2 From 900efe200e317649aecbeaa55619a4fc3adb2251 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Mon, 30 Mar 2015 15:39:21 +0300 Subject: KVM: x86: BSF and BSR emulation change register unnecassarily If the source of BSF and BSR is zero, the destination register should not change. That is how real hardware behaves. If we set the destination even with the same value that we had before, we may clear bits [63:32] unnecassarily. Signed-off-by: Nadav Amit Message-Id: <1427719163-5429-4-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4961dc5..7004577 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -962,6 +962,22 @@ FASTOP2(xadd); FASTOP2R(cmp, cmp_r); +static int em_bsf_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsf); +} + +static int em_bsr_c(struct x86_emulate_ctxt *ctxt) +{ + /* If src is zero, do not writeback, but update flags */ + if (ctxt->src.val == 0) + ctxt->dst.type = OP_NONE; + return fastop(ctxt, em_bsr); +} + static u8 test_cc(unsigned int condition, unsigned long flags) { u8 rc; @@ -4188,7 +4204,8 @@ static const struct opcode twobyte_table[256] = { N, N, G(BitOp, group8), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc), - F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr), + I(DstReg | SrcMem | ModRM, em_bsf_c), + I(DstReg | SrcMem | ModRM, em_bsr_c), D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov), /* 0xC0 - 0xC7 */ F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd), -- cgit v0.10.2 From 0efb04406de834d820f7ba150a00d1d3194aa8a6 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 29 Mar 2015 16:33:03 +0300 Subject: KVM: x86: removing redundant eflags bits definitions The eflags are redefined (using other defines) in emulate.c. Use the definition from processor-flags.h as some mess already started. No functional change. Signed-off-by: Nadav Amit Message-Id: <1427635984-8113-2-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bf5a160..7ba3d9d 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -84,8 +84,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define SELECTOR_TI_MASK (1 << 2) #define SELECTOR_RPL_MASK 0x03 -#define IOPL_SHIFT 12 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 7004577..e49caba 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -248,27 +248,7 @@ struct mode_dual { struct opcode mode64; }; -/* EFLAGS bit definitions. */ -#define EFLG_ID (1<<21) -#define EFLG_VIP (1<<20) -#define EFLG_VIF (1<<19) -#define EFLG_AC (1<<18) -#define EFLG_VM (1<<17) -#define EFLG_RF (1<<16) -#define EFLG_IOPL (3<<12) -#define EFLG_NT (1<<14) -#define EFLG_OF (1<<11) -#define EFLG_DF (1<<10) -#define EFLG_IF (1<<9) -#define EFLG_TF (1<<8) -#define EFLG_SF (1<<7) -#define EFLG_ZF (1<<6) -#define EFLG_AF (1<<4) -#define EFLG_PF (1<<2) -#define EFLG_CF (1<<0) - #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a -#define EFLG_RESERVED_ONE_MASK 2 enum x86_transfer_type { X86_TRANSFER_NONE, @@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt) * These EFLAGS bits are restored from saved value during emulation, and * any changes are written back to the saved value after emulation. */ -#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF) +#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\ + X86_EFLAGS_PF|X86_EFLAGS_CF) #ifdef CONFIG_X86_64 #define ON64(x) x @@ -1434,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, unsigned int in_page, n; unsigned int count = ctxt->rep_prefix ? address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; - in_page = (ctxt->eflags & EFLG_DF) ? + in_page = (ctxt->eflags & X86_EFLAGS_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count); @@ -1447,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, } if (ctxt->rep_prefix && (ctxt->d & String) && - !(ctxt->eflags & EFLG_DF)) { + !(ctxt->eflags & X86_EFLAGS_DF)) { ctxt->dst.data = rc->data + rc->pos; ctxt->dst.type = OP_MEM_STR; ctxt->dst.count = (rc->end - rc->pos) / size; @@ -1813,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, { int rc; unsigned long val, change_mask; - int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; int cpl = ctxt->ops->cpl(ctxt); rc = emulate_pop(ctxt, &val, len); if (rc != X86EMUL_CONTINUE) return rc; - change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF - | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID; + change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF | + X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT | + X86_EFLAGS_AC | X86_EFLAGS_ID; switch(ctxt->mode) { case X86EMUL_MODE_PROT64: case X86EMUL_MODE_PROT32: case X86EMUL_MODE_PROT16: if (cpl == 0) - change_mask |= EFLG_IOPL; + change_mask |= X86_EFLAGS_IOPL; if (cpl <= iopl) - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; case X86EMUL_MODE_VM86: if (iopl < 3) return emulate_gp(ctxt, 0); - change_mask |= EFLG_IF; + change_mask |= X86_EFLAGS_IF; break; default: /* real mode */ - change_mask |= (EFLG_IOPL | EFLG_IF); + change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF); break; } @@ -1939,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt) static int em_pushf(struct x86_emulate_ctxt *ctxt) { - ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM; + ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM; return em_push(ctxt); } @@ -1979,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) if (rc != X86EMUL_CONTINUE) return rc; - ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC); + ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC); ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS); rc = em_push(ctxt); @@ -2045,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) unsigned long temp_eip = 0; unsigned long temp_eflags = 0; unsigned long cs = 0; - unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF | - EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF | - EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */ - unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP; + unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | + X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF | + X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF | + X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF | + X86_EFLAGS_AC | X86_EFLAGS_ID | + X86_EFLAGS_FIXED_BIT; + unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF | + X86_EFLAGS_VIP; /* TODO: Add stack limit check */ @@ -2077,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) ctxt->_eip = temp_eip; - if (ctxt->op_bytes == 4) ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask)); else if (ctxt->op_bytes == 2) { @@ -2086,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) } ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED_BIT; ctxt->ops->set_nmi_mask(ctxt, false); return rc; @@ -2168,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); - ctxt->eflags &= ~EFLG_ZF; + ctxt->eflags &= ~X86_EFLAGS_ZF; } else { ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | (u32) reg_read(ctxt, VCPU_REGS_RBX); - ctxt->eflags |= EFLG_ZF; + ctxt->eflags |= X86_EFLAGS_ZF; } return X86EMUL_CONTINUE; } @@ -2245,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) ctxt->src.val = ctxt->dst.orig_val; fastop(ctxt, em_cmp); - if (ctxt->eflags & EFLG_ZF) { + if (ctxt->eflags & X86_EFLAGS_ZF) { /* Success: write back to memory; no update of EAX */ ctxt->src.type = OP_NONE; ctxt->dst.val = ctxt->src.orig_val; @@ -2404,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; - ctxt->eflags |= EFLG_RESERVED_ONE_MASK; + ctxt->eflags |= X86_EFLAGS_FIXED_BIT; #endif } else { /* legacy mode */ ops->get_msr(ctxt, MSR_STAR, &msr_data); ctxt->_eip = (u32)msr_data; - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); } return X86EMUL_CONTINUE; @@ -2448,7 +2434,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) if ((msr_data & 0xfffc) == 0x0) return emulate_gp(ctxt, 0); - ctxt->eflags &= ~(EFLG_VM | EFLG_IF); + ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; ss_sel = cs_sel + 8; if (efer & EFER_LMA) { @@ -2535,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt) return false; if (ctxt->mode == X86EMUL_MODE_VM86) return true; - iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT; + iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT; return ctxt->ops->cpl(ctxt) > iopl; } @@ -2977,7 +2963,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg, struct operand *op) { - int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count; + int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count; register_address_increment(ctxt, reg, df * op->bytes); op->addr.mem.ea = register_address(ctxt, reg); @@ -3516,7 +3502,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt) { u32 flags; - flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF; + flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF | + X86_EFLAGS_SF; flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8; ctxt->eflags &= ~0xffUL; @@ -4772,9 +4759,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt) if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) || (ctxt->b == 0xae) || (ctxt->b == 0xaf)) && (((ctxt->rep_prefix == REPE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == 0)) + ((ctxt->eflags & X86_EFLAGS_ZF) == 0)) || ((ctxt->rep_prefix == REPNE_PREFIX) && - ((ctxt->eflags & EFLG_ZF) == EFLG_ZF)))) + ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF)))) return true; return false; @@ -4926,7 +4913,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) /* All REP prefixes have the same first termination condition */ if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { ctxt->eip = ctxt->_eip; - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; goto done; } } @@ -4976,9 +4963,9 @@ special_insn: } if (ctxt->rep_prefix && (ctxt->d & String)) - ctxt->eflags |= EFLG_RF; + ctxt->eflags |= X86_EFLAGS_RF; else - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; if (ctxt->execute) { if (ctxt->d & Fastop) { @@ -5027,7 +5014,7 @@ special_insn: rc = emulate_int(ctxt, ctxt->src.val); break; case 0xce: /* into */ - if (ctxt->eflags & EFLG_OF) + if (ctxt->eflags & X86_EFLAGS_OF) rc = emulate_int(ctxt, 4); break; case 0xe9: /* jmp rel */ @@ -5040,19 +5027,19 @@ special_insn: break; case 0xf5: /* cmc */ /* complement carry flag from eflags reg */ - ctxt->eflags ^= EFLG_CF; + ctxt->eflags ^= X86_EFLAGS_CF; break; case 0xf8: /* clc */ - ctxt->eflags &= ~EFLG_CF; + ctxt->eflags &= ~X86_EFLAGS_CF; break; case 0xf9: /* stc */ - ctxt->eflags |= EFLG_CF; + ctxt->eflags |= X86_EFLAGS_CF; break; case 0xfc: /* cld */ - ctxt->eflags &= ~EFLG_DF; + ctxt->eflags &= ~X86_EFLAGS_DF; break; case 0xfd: /* std */ - ctxt->eflags |= EFLG_DF; + ctxt->eflags |= X86_EFLAGS_DF; break; default: goto cannot_emulate; @@ -5113,7 +5100,7 @@ writeback: } goto done; /* skip rip writeback */ } - ctxt->eflags &= ~EFLG_RF; + ctxt->eflags &= ~X86_EFLAGS_RF; } ctxt->eip = ctxt->_eip; -- cgit v0.10.2 From b32a99180027ec980af971d548781eac1f6bb9b5 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Sun, 29 Mar 2015 16:33:04 +0300 Subject: KVM: x86: Remove redundant definitions Some constants are redfined in emulate.c. Avoid it. s/SELECTOR_RPL_MASK/SEGMENT_RPL_MASK s/SELECTOR_TI_MASK/SEGMENT_TI_MASK No functional change. Signed-off-by: Nadav Amit Message-Id: <1427635984-8113-3-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7ba3d9d..30b28dc 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -81,9 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } -#define SELECTOR_TI_MASK (1 << 2) -#define SELECTOR_RPL_MASK 0x03 - #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 #define KVM_MMU_HASH_SHIFT 10 diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e49caba..cf7d424 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2435,7 +2435,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF); - cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK; + cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK; ss_sel = cs_sel + 8; if (efer & EFER_LMA) { cs.d = 0; @@ -2502,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) return emulate_gp(ctxt, 0); break; } - cs_sel |= SELECTOR_RPL_MASK; - ss_sel |= SELECTOR_RPL_MASK; + cs_sel |= SEGMENT_RPL_MASK; + ss_sel |= SEGMENT_RPL_MASK; ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fdd9f8b..63ca692 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3263,8 +3263,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, * default value. */ if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS) - save->selector &= ~SELECTOR_RPL_MASK; - save->dpl = save->selector & SELECTOR_RPL_MASK; + save->selector &= ~SEGMENT_RPL_MASK; + save->dpl = save->selector & SEGMENT_RPL_MASK; save->s = 1; } vmx_set_segment(vcpu, save, seg); @@ -3837,7 +3837,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu) unsigned int cs_rpl; vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); - cs_rpl = cs.selector & SELECTOR_RPL_MASK; + cs_rpl = cs.selector & SEGMENT_RPL_MASK; if (cs.unusable) return false; @@ -3865,7 +3865,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu) unsigned int ss_rpl; vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - ss_rpl = ss.selector & SELECTOR_RPL_MASK; + ss_rpl = ss.selector & SEGMENT_RPL_MASK; if (ss.unusable) return true; @@ -3887,7 +3887,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg) unsigned int rpl; vmx_get_segment(vcpu, &var, seg); - rpl = var.selector & SELECTOR_RPL_MASK; + rpl = var.selector & SEGMENT_RPL_MASK; if (var.unusable) return true; @@ -3914,7 +3914,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu) if (tr.unusable) return false; - if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */ return false; @@ -3932,7 +3932,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu) if (ldtr.unusable) return true; - if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */ + if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */ return false; if (ldtr.type != 2) return false; @@ -3949,8 +3949,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu) vmx_get_segment(vcpu, &cs, VCPU_SREG_CS); vmx_get_segment(vcpu, &ss, VCPU_SREG_SS); - return ((cs.selector & SELECTOR_RPL_MASK) == - (ss.selector & SELECTOR_RPL_MASK)); + return ((cs.selector & SEGMENT_RPL_MASK) == + (ss.selector & SEGMENT_RPL_MASK)); } /* -- cgit v0.10.2 From 2f729b10bb74f97797beb310113f6182f262d36a Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Sun, 29 Mar 2015 01:27:17 +0300 Subject: KVM: remove useless check of "ret" variable prior to returning the same value A trivial code cleanup. This `if` is redundant. Signed-off-by: Eugene Korenevsky Message-Id: <20150328222717.GA6508@gnote> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index cf7d424..b304728 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2791,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, return ret; ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, X86_TRANSFER_TASK_SWITCH, NULL); - if (ret != X86EMUL_CONTINUE) - return ret; - return X86EMUL_CONTINUE; + return ret; } static int task_switch_32(struct x86_emulate_ctxt *ctxt, -- cgit v0.10.2 From 0ba10d53920d030cd7772a9553b13b5ea1aa4115 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:36 +0000 Subject: KVM: arm/arm64: merge GICv3 RD_base and SGI_base register frames Currently we handle the redistributor registers in two separate MMIO regions, one for the overall behaviour and SPIs and one for the SGIs/PPIs. That latter forces the creation of _two_ KVM I/O bus devices for each redistributor. Since the spec mandates those two pages to be contigious, we could as well merge them and save the churn with the second KVM I/O bus device. Signed-off-by: Andre Przywara Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 14943e3..2f03a36 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -502,6 +502,43 @@ static const struct vgic_io_range vgic_v3_dist_ranges[] = { {}, }; +static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + /* since we don't support LPIs, this register is zero for now */ + vgic_reg_access(mmio, NULL, offset, + ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); + return false; +} + +static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + u64 mpidr; + struct kvm_vcpu *redist_vcpu = mmio->private; + int target_vcpu_id = redist_vcpu->vcpu_id; + + /* the upper 32 bits contain the affinity value */ + if ((offset & ~3) == 4) { + mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); + reg = compress_mpidr(mpidr); + + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; + } + + reg = redist_vcpu->vcpu_id << 8; + if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) + reg |= GICR_TYPER_LAST; + vgic_reg_access(mmio, ®, offset, + ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); + return false; +} + static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -570,146 +607,107 @@ static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, return vgic_handle_cfg_reg(reg, mmio, offset); } -static const struct vgic_io_range vgic_redist_sgi_ranges[] = { +#define SGI_base(x) ((x) + SZ_64K) + +static const struct vgic_io_range vgic_redist_ranges[] = { + { + .base = GICR_CTLR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_ctlr_redist, + }, { - .base = GICR_IGROUPR0, + .base = GICR_TYPER, + .len = 0x08, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_typer_redist, + }, + { + .base = GICR_IIDR, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_iidr, + }, + { + .base = GICR_WAKER, + .len = 0x04, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_raz_wi, + }, + { + .base = GICR_IDREGS, + .len = 0x30, + .bits_per_irq = 0, + .handle_mmio = handle_mmio_idregs, + }, + { + .base = SGI_base(GICR_IGROUPR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_rao_wi, }, { - .base = GICR_ISENABLER0, + .base = SGI_base(GICR_ISENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_enable_reg_redist, }, { - .base = GICR_ICENABLER0, + .base = SGI_base(GICR_ICENABLER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_enable_reg_redist, }, { - .base = GICR_ISPENDR0, + .base = SGI_base(GICR_ISPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_set_pending_reg_redist, }, { - .base = GICR_ICPENDR0, + .base = SGI_base(GICR_ICPENDR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_clear_pending_reg_redist, }, { - .base = GICR_ISACTIVER0, + .base = SGI_base(GICR_ISACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_ICACTIVER0, + .base = SGI_base(GICR_ICACTIVER0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_IPRIORITYR0, + .base = SGI_base(GICR_IPRIORITYR0), .len = 0x20, .bits_per_irq = 8, .handle_mmio = handle_mmio_priority_reg_redist, }, { - .base = GICR_ICFGR0, + .base = SGI_base(GICR_ICFGR0), .len = 0x08, .bits_per_irq = 2, .handle_mmio = handle_mmio_cfg_reg_redist, }, { - .base = GICR_IGRPMODR0, + .base = SGI_base(GICR_IGRPMODR0), .len = 0x04, .bits_per_irq = 1, .handle_mmio = handle_mmio_raz_wi, }, { - .base = GICR_NSACR, + .base = SGI_base(GICR_NSACR), .len = 0x04, .handle_mmio = handle_mmio_raz_wi, }, {}, }; -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static const struct vgic_io_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - /* * This function splits accesses between the distributor and the two * redistributor parts (private/SPI). As each redistributor is accessible @@ -726,7 +724,6 @@ static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, unsigned long rdbase = dist->vgic_redist_base; int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); int vcpu_id; - const struct vgic_io_range *mmio_range; if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { return vgic_handle_mmio_range(vcpu, run, mmio, @@ -741,13 +738,8 @@ static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - if (mmio->phys_addr >= rdbase + SGI_BASE_OFFSET) { - rdbase += SGI_BASE_OFFSET; - mmio_range = vgic_redist_sgi_ranges; - } else { - mmio_range = vgic_redist_ranges; - } - return vgic_handle_mmio_range(vcpu, run, mmio, mmio_range, rdbase); + return vgic_handle_mmio_range(vcpu, run, mmio, vgic_redist_ranges, + rdbase); } static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) -- cgit v0.10.2 From fb8f61abab48467ef670ef165ff664cdc94f742e Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Thu, 26 Mar 2015 14:39:37 +0000 Subject: KVM: arm/arm64: prepare GICv3 emulation to use kvm_io_bus MMIO handling Using the framework provided by the recent vgic.c changes, we register a kvm_io_bus device on mapping the virtual GICv3 resources. The distributor mapping is pretty straight forward, but the redistributors need some more love, since they need to be tagged with the respective redistributor (read: VCPU) they are connected with. We use the kvm_io_bus framework to register one devices per VCPU. Signed-off-by: Andre Przywara Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4523984..d6705f4 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -252,6 +252,7 @@ struct vgic_dist { struct vgic_vm_ops vm_ops; struct vgic_io_device dist_iodev; + struct vgic_io_device *redist_iodevs; }; struct vgic_v2_cpu_if { diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index 2f03a36..eb1a797 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -758,6 +758,9 @@ static int vgic_v3_map_resources(struct kvm *kvm, { int ret = 0; struct vgic_dist *dist = &kvm->arch.vgic; + gpa_t rdbase = dist->vgic_redist_base; + struct vgic_io_device *iodevs = NULL; + int i; if (!irqchip_in_kernel(kvm)) return 0; @@ -783,7 +786,41 @@ static int vgic_v3_map_resources(struct kvm *kvm, goto out; } - kvm->arch.vgic.ready = true; + ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, + GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, + -1, &dist->dist_iodev); + if (ret) + goto out; + + iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); + if (!iodevs) { + ret = -ENOMEM; + goto out_unregister; + } + + for (i = 0; i < dist->nr_cpus; i++) { + ret = vgic_register_kvm_io_dev(kvm, rdbase, + SZ_128K, vgic_redist_ranges, + i, &iodevs[i]); + if (ret) + goto out_unregister; + rdbase += GIC_V3_REDIST_SIZE; + } + + dist->redist_iodevs = iodevs; + dist->ready = true; + goto out; + +out_unregister: + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); + if (iodevs) { + for (i = 0; i < dist->nr_cpus; i++) { + if (iodevs[i].dev.ops) + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, + &iodevs[i].dev); + } + } + out: if (ret) kvm_vgic_destroy(kvm); -- cgit v0.10.2 From 950324ab81bf006542f30a1d1ab3d65fcf15cbc1 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Sat, 28 Mar 2015 01:13:13 +0000 Subject: KVM: arm/arm64: rework MMIO abort handling to use KVM MMIO bus Currently we have struct kvm_exit_mmio for encapsulating MMIO abort data to be passed on from syndrome decoding all the way down to the VGIC register handlers. Now as we switch the MMIO handling to be routed through the KVM MMIO bus, it does not make sense anymore to use that structure already from the beginning. So we keep the data in local variables until we put them into the kvm_io_bus framework. Then we fill kvm_exit_mmio in the VGIC only, making it a VGIC private structure. On that way we replace the data buffer in that structure with a pointer pointing to a single location in a local variable, so we get rid of some copying on the way. With all of the virtual GIC emulation code now being registered with the kvm_io_bus, we can remove all of the old MMIO handling code and its dispatching functionality. I didn't bother to rename kvm_exit_mmio (to vgic_mmio or something), because that touches a lot of code lines without any good reason. This is based on an original patch by Nikolay. Signed-off-by: Andre Przywara Cc: Nikolay Nikolaev Reviewed-by: Marc Zyngier Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h index 3f83db2..d8e90c8 100644 --- a/arch/arm/include/asm/kvm_mmio.h +++ b/arch/arm/include/asm/kvm_mmio.h @@ -28,28 +28,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 5d3bfc0..974b1c6 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) return 0; } -static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, - struct kvm_exit_mmio *mmio) +static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) { unsigned long rt; - int len; - bool is_write, sign_extend; + int access_size; + bool sign_extend; if (kvm_vcpu_dabt_isextabt(vcpu)) { /* cache operation on I/O addr, tell guest unsupported */ @@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return 1; } - len = kvm_vcpu_dabt_get_as(vcpu); - if (unlikely(len < 0)) - return len; + access_size = kvm_vcpu_dabt_get_as(vcpu); + if (unlikely(access_size < 0)) + return access_size; - is_write = kvm_vcpu_dabt_iswrite(vcpu); + *is_write = kvm_vcpu_dabt_iswrite(vcpu); sign_extend = kvm_vcpu_dabt_issext(vcpu); rt = kvm_vcpu_dabt_get_rd(vcpu); - mmio->is_write = is_write; - mmio->phys_addr = fault_ipa; - mmio->len = len; + *len = access_size; vcpu->arch.mmio_decode.sign_extend = sign_extend; vcpu->arch.mmio_decode.rt = rt; @@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa) { - struct kvm_exit_mmio mmio; unsigned long data; unsigned long rt; int ret; + bool is_write; + int len; + u8 data_buf[8]; /* - * Prepare MMIO operation. First stash it in a private - * structure that we can use for in-kernel emulation. If the - * kernel can't handle it, copy it into run->mmio and let user - * space do its magic. + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. */ - if (kvm_vcpu_dabt_isvalid(vcpu)) { - ret = decode_hsr(vcpu, fault_ipa, &mmio); + ret = decode_hsr(vcpu, &is_write, &len); if (ret) return ret; } else { @@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, rt = vcpu->arch.mmio_decode.rt; - if (mmio.is_write) { - data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), - mmio.len); + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data); + mmio_write_buf(data_buf, len, data); - trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len, - fault_ipa, data); - mmio_write_buf(mmio.data, mmio.len, data); + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } else { - trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len, + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, fault_ipa, 0); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); } - if (vgic_handle_mmio(vcpu, run, &mmio)) + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + memcpy(run->mmio.data, data_buf, len); + + if (!ret) { + /* We handled the access successfully in the kernel. */ + kvm_handle_mmio_return(vcpu, run); return 1; + } - kvm_prepare_mmio(run, &mmio); + run->exit_reason = KVM_EXIT_MMIO; return 0; } diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 9f52beb..889c908 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -31,28 +31,6 @@ struct kvm_decode { bool sign_extend; }; -/* - * The in-kernel MMIO emulation code wants to use a copy of run->mmio, - * which is an anonymous type. Use our own type instead. - */ -struct kvm_exit_mmio { - phys_addr_t phys_addr; - u8 data[8]; - u32 len; - bool is_write; - void *private; -}; - -static inline void kvm_prepare_mmio(struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - run->mmio.phys_addr = mmio->phys_addr; - run->mmio.len = mmio->len; - run->mmio.is_write = mmio->is_write; - memcpy(run->mmio.data, mmio->data, mmio->len); - run->exit_reason = KVM_EXIT_MMIO; -} - int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run); int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, phys_addr_t fault_ipa); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d6705f4..16ec2c8 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -140,8 +140,6 @@ struct vgic_params { }; struct vgic_vm_ops { - bool (*handle_mmio)(struct kvm_vcpu *, struct kvm_run *, - struct kvm_exit_mmio *); bool (*queue_sgi)(struct kvm_vcpu *, int irq); void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); int (*init_model)(struct kvm *); @@ -313,8 +311,6 @@ struct vgic_cpu { struct kvm; struct kvm_vcpu; -struct kvm_run; -struct kvm_exit_mmio; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); @@ -330,8 +326,6 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c index 7460b37..1390797 100644 --- a/virt/kvm/arm/vgic-v2-emul.c +++ b/virt/kvm/arm/vgic-v2-emul.c @@ -404,24 +404,6 @@ static const struct vgic_io_range vgic_dist_ranges[] = { {} }; -static bool vgic_v2_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - unsigned long base = vcpu->kvm->arch.vgic.vgic_dist_base; - - if (!is_in_range(mmio->phys_addr, mmio->len, base, - KVM_VGIC_V2_DIST_SIZE)) - return false; - - /* GICv2 does not support accesses wider than 32 bits */ - if (mmio->len > 4) { - kvm_inject_dabt(vcpu, mmio->phys_addr); - return true; - } - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_dist_ranges, base); -} - static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) { struct kvm *kvm = vcpu->kvm; @@ -580,7 +562,6 @@ void vgic_v2_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v2_handle_mmio; dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; dist->vm_ops.init_model = vgic_v2_init_model; @@ -690,6 +671,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, struct kvm_vcpu *vcpu, *tmp_vcpu; struct vgic_dist *vgic; struct kvm_exit_mmio mmio; + u32 data; offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> @@ -711,6 +693,7 @@ static int vgic_attr_regs_access(struct kvm_device *dev, mmio.len = 4; mmio.is_write = is_write; + mmio.data = &data; if (is_write) mmio_data_write(&mmio, ~0, *reg); switch (attr->group) { diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index eb1a797..e9c3a7a 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -708,40 +708,6 @@ static const struct vgic_io_range vgic_redist_ranges[] = { {}, }; -/* - * This function splits accesses between the distributor and the two - * redistributor parts (private/SPI). As each redistributor is accessible - * from any CPU, we have to determine the affected VCPU by taking the faulting - * address into account. We then pass this VCPU to the handler function via - * the private parameter. - */ -#define SGI_BASE_OFFSET SZ_64K -static bool vgic_v3_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long dbase = dist->vgic_dist_base; - unsigned long rdbase = dist->vgic_redist_base; - int nrcpus = atomic_read(&vcpu->kvm->online_vcpus); - int vcpu_id; - - if (is_in_range(mmio->phys_addr, mmio->len, dbase, GIC_V3_DIST_SIZE)) { - return vgic_handle_mmio_range(vcpu, run, mmio, - vgic_v3_dist_ranges, dbase); - } - - if (!is_in_range(mmio->phys_addr, mmio->len, rdbase, - GIC_V3_REDIST_SIZE * nrcpus)) - return false; - - vcpu_id = (mmio->phys_addr - rdbase) / GIC_V3_REDIST_SIZE; - rdbase += (vcpu_id * GIC_V3_REDIST_SIZE); - mmio->private = kvm_get_vcpu(vcpu->kvm, vcpu_id); - - return vgic_handle_mmio_range(vcpu, run, mmio, vgic_redist_ranges, - rdbase); -} - static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) { if (vgic_queue_irq(vcpu, 0, irq)) { @@ -861,7 +827,6 @@ void vgic_v3_init_emulation(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - dist->vm_ops.handle_mmio = vgic_v3_handle_mmio; dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; dist->vm_ops.init_model = vgic_v3_init_model; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e968179..b70174e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -758,7 +758,6 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, unsigned long offset, const struct vgic_io_range *range) { - u32 *data32 = (void *)mmio->data; struct kvm_exit_mmio mmio32; bool ret; @@ -775,70 +774,17 @@ static bool call_range_handler(struct kvm_vcpu *vcpu, mmio32.private = mmio->private; mmio32.phys_addr = mmio->phys_addr + 4; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[1]; + mmio32.data = &((u32 *)mmio->data)[1]; ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - if (!mmio->is_write) - data32[1] = *(u32 *)mmio32.data; mmio32.phys_addr = mmio->phys_addr; - if (mmio->is_write) - *(u32 *)mmio32.data = data32[0]; + mmio32.data = &((u32 *)mmio->data)[0]; ret |= range->handle_mmio(vcpu, &mmio32, offset); - if (!mmio->is_write) - data32[0] = *(u32 *)mmio32.data; return ret; } /** - * vgic_handle_mmio_range - handle an in-kernel MMIO access - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * @ranges: array of MMIO ranges in a given region - * @mmio_base: base address of that region - * - * returns true if the MMIO access could be performed - */ -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct vgic_io_range *ranges, - unsigned long mmio_base) -{ - const struct vgic_io_range *range; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool updated_state; - unsigned long offset; - - offset = mmio->phys_addr - mmio_base; - range = vgic_find_range(ranges, mmio->len, offset); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", - mmio->is_write, mmio->phys_addr, mmio->len); - return false; - } - - spin_lock(&vcpu->kvm->arch.vgic.lock); - offset -= range->base; - if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, mmio, offset, range); - } else { - if (!mmio->is_write) - memset(mmio->data, 0, mmio->len); - updated_state = false; - } - spin_unlock(&vcpu->kvm->arch.vgic.lock); - kvm_prepare_mmio(run, mmio); - kvm_handle_mmio_return(vcpu, run); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return true; -} - -/** * vgic_handle_mmio_access - handle an in-kernel MMIO access * This is called by the read/write KVM IO device wrappers below. * @vcpu: pointer to the vcpu performing the access @@ -873,23 +819,24 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, mmio.phys_addr = addr; mmio.len = len; mmio.is_write = is_write; - if (is_write) - memcpy(mmio.data, val, len); + mmio.data = val; mmio.private = iodev->redist_vcpu; spin_lock(&dist->lock); offset -= range->base; if (vgic_validate_access(dist, range, offset)) { updated_state = call_range_handler(vcpu, &mmio, offset, range); - if (!is_write) - memcpy(val, mmio.data, len); } else { if (!is_write) memset(val, 0, len); updated_state = false; } spin_unlock(&dist->lock); - kvm_prepare_mmio(run, &mmio); + run->mmio.is_write = is_write; + run->mmio.len = len; + run->mmio.phys_addr = addr; + memcpy(run->mmio.data, val, len); + kvm_handle_mmio_return(vcpu, run); if (updated_state) @@ -898,30 +845,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, return 0; } -/** - * vgic_handle_mmio - handle an in-kernel MMIO access for the GIC emulation - * @vcpu: pointer to the vcpu performing the access - * @run: pointer to the kvm_run structure - * @mmio: pointer to the data describing the access - * - * returns true if the MMIO access has been performed in kernel space, - * and false if it needs to be emulated in user space. - * Calls the actual handling routine for the selected VGIC model. - */ -bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio) -{ - if (!irqchip_in_kernel(vcpu->kvm)) - return false; - - /* - * This will currently call either vgic_v2_handle_mmio() or - * vgic_v3_handle_mmio(), which in turn will call - * vgic_handle_mmio_range() defined above. - */ - return vcpu->kvm->arch.vgic.vm_ops.handle_mmio(vcpu, run, mmio); -} - static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr, int len, void *val) diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h index 28fa3aa..0df74cb 100644 --- a/virt/kvm/arm/vgic.h +++ b/virt/kvm/arm/vgic.h @@ -59,6 +59,14 @@ void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); +struct kvm_exit_mmio { + phys_addr_t phys_addr; + void *data; + u32 len; + bool is_write; + void *private; +}; + void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, phys_addr_t offset, int mode); bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, @@ -99,11 +107,6 @@ const struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, int len, gpa_t offset); -bool vgic_handle_mmio_range(struct kvm_vcpu *vcpu, struct kvm_run *run, - struct kvm_exit_mmio *mmio, - const struct vgic_io_range *ranges, - unsigned long mmio_base); - bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id, int access); -- cgit v0.10.2 From d44758c0dfc5993a4b9952935a7eae4c91ebb6b4 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Sat, 24 Jan 2015 12:00:02 +0000 Subject: KVM: arm/arm64: enable KVM_CAP_IOEVENTFD As the infrastructure for eventfd has now been merged, report the ioeventfd capability as being supported. Signed-off-by: Nikolay Nikolaev [maz: grouped the case entry with the others, fixed commit log] Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e98370c..6f53645 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -172,6 +172,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_IRQCHIP: case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: -- cgit v0.10.2 From 2ba459685204af53b034d269d5cdb3059d4b471e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Mar 2015 13:12:32 +0100 Subject: KVM: s390: store the breaking-event address on pgm interrupts If the PER-3 facility is installed, the breaking-event address is to be stored in the low core. There is no facility bit for PER-3 in stfl(e) and Linux always uses the value at address 272 no matter if PER-3 is available or not. We can't hide its existence from the guest. All program interrupts injected via the SIE automatically store this information if the PER-3 facility is available in the hypervisor. Also the itdb contains the address automatically. As there is no switch to turn this mechanism off, let's simply make it consistent and also store the breaking event address in case of manual program interrupt injection. Reviewed-by: Jens Freimann Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2afec60..2361b8e 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -585,6 +585,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) kvm_s390_rewind_psw(vcpu, ilc); rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC); + rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea, + (u64 *) __LC_LAST_BREAK); rc |= put_guest_lc(vcpu, pgm_info.code, (u16 *)__LC_PGM_INT_CODE); rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW, -- cgit v0.10.2 From a3ed8dae6e3db479ca275883ba7fe994170b0ae6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 18 Mar 2015 13:54:31 +0100 Subject: KVM: s390: enable more features that need no hypervisor changes After some review about what these facilities do, the following facilities will work under KVM and can, therefore, be reported to the guest if the cpu model and the host cpu provide this bit. There are plans underway to make the whole bit thing more readable, but its not yet finished. So here are some last bit changes and we enhance the KVM mask with: 9 The sense-running-status facility is installed in the z/Architecture architectural mode. ---> handled by SIE or KVM 10 The conditional-SSKE facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 13 The IPTE-range facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 36 The enhanced-monitor facility is installed in the z/Architecture architectural mode. ---> handled by SIE 47 The CMPSC-enhancement facility is installed in the z/Architecture architectural mode. ---> handled by SIE 48 The decimal-floating-point zoned-conversion facility is installed in the z/Architecture architectural mode. ---> handled by SIE 49 The execution-hint, load-and-trap, miscellaneous- instruction-extensions and processor-assist ---> handled by SIE 51 The local-TLB-clearing facility is installed in the z/Architecture architectural mode. ---> handled by SIE 52 The interlocked-access facility 2 is installed. ---> handled by SIE 53 The load/store-on-condition facility 2 and load-and- zero-rightmost-byte facility are installed in the z/Architecture architectural mode. ---> handled by SIE 57 The message-security-assist-extension-5 facility is installed in the z/Architecture architectural mode. ---> handled by SIE 66 The reset-reference-bits-multiple facility is installed in the z/Architecture architectural mode. ---> handled by SIE. KVM will retry SIE 80 The decimal-floating-point packed-conversion facility is installed in the z/Architecture architectural mode. ---> handled by SIE Signed-off-by: Christian Borntraeger Tested-by: Michael Mueller Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9072127..a130885 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -105,8 +105,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { /* upper facilities limit for kvm */ unsigned long kvm_s390_fac_list_mask[] = { - 0xff82fffbf4fc2000UL, - 0x005c000000000000UL, + 0xffe6fffbfcfdfc40UL, + 0x205c800000000000UL, }; unsigned long kvm_s390_fac_list_mask_size(void) -- cgit v0.10.2 From 1d804d079a92138d011900785193b6b00b44bc00 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 30 Mar 2015 16:46:09 -0700 Subject: x86: Use bool function return values of true/false not 1/0 Use the normal return values for bool functions Signed-off-by: Joe Perches Message-Id: <9f593eb2f43b456851cd73f7ed09654ca58fb570.1427759009.git.joe@perches.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index e62cf89..c1adf33 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void) static inline bool kvm_para_available(void) { - return 0; + return false; } static inline unsigned int kvm_arch_para_features(void) diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 4452eed..2622846 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -26,7 +26,7 @@ static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *best; if (!static_cpu_has(X86_FEATURE_XSAVE)) - return 0; + return false; best = kvm_find_cpuid_entry(vcpu, 1, 0); return best && (best->ecx & bit(X86_FEATURE_XSAVE)); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 63ca692..0caaf56 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7314,21 +7314,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, else if (port < 0x10000) bitmap = vmcs12->io_bitmap_b; else - return 1; + return true; bitmap += (port & 0x7fff) / 8; if (last_bitmap != bitmap) if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1)) - return 1; + return true; if (b & (1 << (port & 7))) - return 1; + return true; port++; size--; last_bitmap = bitmap; } - return 0; + return false; } /* @@ -7344,7 +7344,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, gpa_t bitmap; if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) - return 1; + return true; /* * The MSR_BITMAP page is divided into four 1024-byte bitmaps, @@ -7363,10 +7363,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, if (msr_index < 1024*8) { unsigned char b; if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1)) - return 1; + return true; return 1 & (b >> (msr_index & 7)); } else - return 1; /* let L1 handle the wrong parameter */ + return true; /* let L1 handle the wrong parameter */ } /* @@ -7388,7 +7388,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, case 0: if (vmcs12->cr0_guest_host_mask & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; break; case 3: if ((vmcs12->cr3_target_count >= 1 && @@ -7399,37 +7399,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, vmcs12->cr3_target_value2 == val) || (vmcs12->cr3_target_count >= 4 && vmcs12->cr3_target_value3 == val)) - return 0; + return false; if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) - return 1; + return true; break; case 4: if (vmcs12->cr4_guest_host_mask & (vmcs12->cr4_read_shadow ^ val)) - return 1; + return true; break; case 8: if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING)) - return 1; + return true; break; } break; case 2: /* clts */ if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) && (vmcs12->cr0_read_shadow & X86_CR0_TS)) - return 1; + return true; break; case 1: /* mov from cr */ switch (cr) { case 3: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR3_STORE_EXITING) - return 1; + return true; break; case 8: if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_CR8_STORE_EXITING) - return 1; + return true; break; } break; @@ -7440,14 +7440,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, */ if (vmcs12->cr0_guest_host_mask & 0xe & (val ^ vmcs12->cr0_read_shadow)) - return 1; + return true; if ((vmcs12->cr0_guest_host_mask & 0x1) && !(vmcs12->cr0_read_shadow & 0x1) && (val & 0x1)) - return 1; + return true; break; } - return 0; + return false; } /* @@ -7470,43 +7470,43 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) KVM_ISA_VMX); if (vmx->nested.nested_run_pending) - return 0; + return false; if (unlikely(vmx->fail)) { pr_info_ratelimited("%s failed vm entry %x\n", __func__, vmcs_read32(VM_INSTRUCTION_ERROR)); - return 1; + return true; } switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: if (!is_exception(intr_info)) - return 0; + return false; else if (is_page_fault(intr_info)) return enable_ept; else if (is_no_device(intr_info) && !(vmcs12->guest_cr0 & X86_CR0_TS)) - return 0; + return false; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: - return 0; + return false; case EXIT_REASON_TRIPLE_FAULT: - return 1; + return true; case EXIT_REASON_PENDING_INTERRUPT: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING); case EXIT_REASON_NMI_WINDOW: return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING); case EXIT_REASON_TASK_SWITCH: - return 1; + return true; case EXIT_REASON_CPUID: if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) - return 0; - return 1; + return false; + return true; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); case EXIT_REASON_INVD: - return 1; + return true; case EXIT_REASON_INVLPG: return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING); case EXIT_REASON_RDPMC: @@ -7523,7 +7523,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * VMX instructions trap unconditionally. This allows L1 to * emulate them for its L2 guest, i.e., allows 3-level nesting! */ - return 1; + return true; case EXIT_REASON_CR_ACCESS: return nested_vmx_exit_handled_cr(vcpu, vmcs12); case EXIT_REASON_DR_ACCESS: @@ -7534,7 +7534,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_MSR_WRITE: return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason); case EXIT_REASON_INVALID_STATE: - return 1; + return true; case EXIT_REASON_MWAIT_INSTRUCTION: return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING); case EXIT_REASON_MONITOR_INSTRUCTION: @@ -7544,7 +7544,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) nested_cpu_has2(vmcs12, SECONDARY_EXEC_PAUSE_LOOP_EXITING); case EXIT_REASON_MCE_DURING_VMENTRY: - return 0; + return false; case EXIT_REASON_TPR_BELOW_THRESHOLD: return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: @@ -7553,7 +7553,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_APIC_WRITE: case EXIT_REASON_EOI_INDUCED: /* apic_write and eoi_induced should exit unconditionally. */ - return 1; + return true; case EXIT_REASON_EPT_VIOLATION: /* * L0 always deals with the EPT violation. If nested EPT is @@ -7561,7 +7561,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * missing in the guest EPT table (EPT12), the EPT violation * will be injected with nested_ept_inject_page_fault() */ - return 0; + return false; case EXIT_REASON_EPT_MISCONFIG: /* * L2 never uses directly L1's EPT, but rather L0's own EPT @@ -7569,11 +7569,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) * (EPT on EPT). So any problems with the structure of the * table is L0's fault. */ - return 0; + return false; case EXIT_REASON_WBINVD: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); case EXIT_REASON_XSETBV: - return 1; + return true; case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS: /* * This should never happen, since it is not possible to @@ -7583,7 +7583,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); default: - return 1; + return true; } } -- cgit v0.10.2 From 94aa033efcac47b09db22cb561e135baf37b7887 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 16 Mar 2015 12:17:13 +0100 Subject: KVM: s390: fix get_all_floating_irqs This fixes a bug introduced with commit c05c4186bbe4 ("KVM: s390: add floating irq controller"). get_all_floating_irqs() does copy_to_user() while holding a spin lock. Let's fix this by filling a temporary buffer first and copy it to userspace after giving up the lock. Cc: # 3.18+: 69a8d4562638 KVM: s390: no need to hold... Reviewed-by: David Hildenbrand Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/Documentation/virtual/kvm/devices/s390_flic.txt b/Documentation/virtual/kvm/devices/s390_flic.txt index 4ceef53..d1ad9d5 100644 --- a/Documentation/virtual/kvm/devices/s390_flic.txt +++ b/Documentation/virtual/kvm/devices/s390_flic.txt @@ -27,6 +27,9 @@ Groups: Copies all floating interrupts into a buffer provided by userspace. When the buffer is too small it returns -ENOMEM, which is the indication for userspace to try again with a bigger buffer. + -ENOBUFS is returned when the allocation of a kernelspace buffer has + failed. + -EFAULT is returned when copying data to userspace failed. All interrupts remain pending, i.e. are not deleted from the list of currently pending interrupts. attr->addr contains the userspace address of the buffer into which all diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2361b8e..5ebd500 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1477,61 +1478,66 @@ void kvm_s390_clear_float_irqs(struct kvm *kvm) spin_unlock(&fi->lock); } -static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti, - u8 *addr) +static void inti_to_irq(struct kvm_s390_interrupt_info *inti, + struct kvm_s390_irq *irq) { - struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr; - struct kvm_s390_irq irq = {0}; - - irq.type = inti->type; + irq->type = inti->type; switch (inti->type) { case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: case KVM_S390_INT_SERVICE: - irq.u.ext = inti->ext; + irq->u.ext = inti->ext; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - irq.u.io = inti->io; + irq->u.io = inti->io; break; case KVM_S390_MCHK: - irq.u.mchk = inti->mchk; + irq->u.mchk = inti->mchk; break; - default: - return -EINVAL; } - - if (copy_to_user(uptr, &irq, sizeof(irq))) - return -EFAULT; - - return 0; } -static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len) +static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; + struct kvm_s390_irq *buf; + int max_irqs; int ret = 0; int n = 0; + if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) + return -EINVAL; + + /* + * We are already using -ENOMEM to signal + * userspace it may retry with a bigger buffer, + * so we need to use something else for this case + */ + buf = vzalloc(len); + if (!buf) + return -ENOBUFS; + + max_irqs = len / sizeof(struct kvm_s390_irq); + fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) { - if (len < sizeof(struct kvm_s390_irq)) { + if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; break; } - ret = copy_irq_to_user(inti, buf); - if (ret) - break; - buf += sizeof(struct kvm_s390_irq); - len -= sizeof(struct kvm_s390_irq); + inti_to_irq(inti, &buf[n]); n++; } - spin_unlock(&fi->lock); + if (!ret && n > 0) { + if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) + ret = -EFAULT; + } + vfree(buf); return ret < 0 ? ret : n; } @@ -1542,7 +1548,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) switch (attr->group) { case KVM_DEV_FLIC_GET_ALL_IRQS: - r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr, + r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr, attr->attr); break; default: -- cgit v0.10.2 From 6d3da241416e6088f83a7ff1f37fb6bb518d9bc8 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Wed, 3 Jul 2013 15:18:35 +0200 Subject: KVM: s390: deliver floating interrupts in order of priority This patch makes interrupt handling compliant to the z/Architecture Principles of Operation with regard to interrupt priorities. Add a bitmap for pending floating interrupts. Each bit relates to a interrupt type and its list. A turned on bit indicates that a list contains items (interrupts) which need to be delivered. When delivering interrupts on a cpu we can merge the existing bitmap for cpu-local interrupts and floating interrupts and have a single mechanism for delivery. Currently we have one list for all kinds of floating interrupts and a corresponding spin lock. This patch adds a separate list per interrupt type. An exception to this are service signal and machine check interrupts, as there can be only one pending interrupt at a time. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b8d1e97..d01fc58 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -344,6 +344,11 @@ enum irq_types { IRQ_PEND_COUNT }; +/* We have 2M for virtio device descriptor pages. Smallest amount of + * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381 + */ +#define KVM_S390_MAX_VIRTIO_IRQS 87381 + /* * Repressible (non-floating) machine check interrupts * subclass bits in MCIC @@ -421,13 +426,32 @@ struct kvm_s390_local_interrupt { unsigned long pending_irqs; }; +#define FIRQ_LIST_IO_ISC_0 0 +#define FIRQ_LIST_IO_ISC_1 1 +#define FIRQ_LIST_IO_ISC_2 2 +#define FIRQ_LIST_IO_ISC_3 3 +#define FIRQ_LIST_IO_ISC_4 4 +#define FIRQ_LIST_IO_ISC_5 5 +#define FIRQ_LIST_IO_ISC_6 6 +#define FIRQ_LIST_IO_ISC_7 7 +#define FIRQ_LIST_PFAULT 8 +#define FIRQ_LIST_VIRTIO 9 +#define FIRQ_LIST_COUNT 10 +#define FIRQ_CNTR_IO 0 +#define FIRQ_CNTR_SERVICE 1 +#define FIRQ_CNTR_VIRTIO 2 +#define FIRQ_CNTR_PFAULT 3 +#define FIRQ_MAX_COUNT 4 + struct kvm_s390_float_interrupt { + unsigned long pending_irqs; spinlock_t lock; - struct list_head list; - atomic_t active; + struct list_head lists[FIRQ_LIST_COUNT]; + int counters[FIRQ_MAX_COUNT]; + struct kvm_s390_mchk_info mchk; + struct kvm_s390_ext_info srv_signal; int next_rr_cpu; unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; - unsigned int irq_count; }; struct kvm_hw_wp_info_arch { diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5ebd500..2872fdb 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" @@ -34,11 +35,6 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 -static int is_ioint(u64 type) -{ - return ((type & 0xfffe0000u) != 0xfffe0000u); -} - int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -74,70 +70,25 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) return 1; } -static u64 int_word_to_isc_bits(u32 int_word) +static inline int is_ioirq(unsigned long irq_type) { - u8 isc = (int_word & 0x38000000) >> 27; + return ((irq_type >= IRQ_PEND_IO_ISC_0) && + (irq_type <= IRQ_PEND_IO_ISC_7)); +} +static uint64_t isc_to_isc_bits(int isc) +{ return (0x80 >> isc) << 24; } -static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static inline u8 int_word_to_isc(u32 int_word) { - switch (inti->type) { - case KVM_S390_INT_EXTERNAL_CALL: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x2000ul) - return 1; - return 0; - case KVM_S390_INT_EMERGENCY: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) - return 1; - return 0; - case KVM_S390_INT_CLOCK_COMP: - return ckc_interrupts_enabled(vcpu); - case KVM_S390_INT_CPU_TIMER: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x400ul) - return 1; - return 0; - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_INIT: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[0] & 0x200ul) - return 1; - return 0; - case KVM_S390_PROGRAM_INT: - case KVM_S390_SIGP_STOP: - case KVM_S390_SIGP_SET_PREFIX: - case KVM_S390_RESTART: - return 1; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14) - return 1; - return 0; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - return 0; - if (vcpu->arch.sie_block->gcr[6] & - int_word_to_isc_bits(inti->io.io_int_word)) - return 1; - return 0; - default: - printk(KERN_WARNING "illegal interrupt type %llx\n", - inti->type); - BUG(); - } - return 0; + return (int_word & 0x38000000) >> 27; +} + +static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu) +{ + return vcpu->kvm->arch.float_int.pending_irqs; } static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) @@ -145,12 +96,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu) return vcpu->arch.local_int.pending_irqs; } -static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) +static unsigned long disable_iscs(struct kvm_vcpu *vcpu, + unsigned long active_mask) { - unsigned long active_mask = pending_local_irqs(vcpu); + int i; + + for (i = 0; i <= MAX_ISC; i++) + if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i))) + active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i)); + + return active_mask; +} + +static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu) +{ + unsigned long active_mask; + + active_mask = pending_local_irqs(vcpu); + active_mask |= pending_floating_irqs(vcpu); if (psw_extint_disabled(vcpu)) active_mask &= ~IRQ_PEND_EXT_MASK; + if (psw_ioint_disabled(vcpu)) + active_mask &= ~IRQ_PEND_IO_MASK; + else + active_mask = disable_iscs(vcpu, active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul)) __clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul)) @@ -159,8 +129,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu) __clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul)) __clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); + if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul)) + __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask); if (psw_mchk_disabled(vcpu)) active_mask &= ~IRQ_PEND_MCHK_MASK; + if (!(vcpu->arch.sie_block->gcr[14] & + vcpu->kvm->arch.float_int.mchk.cr14)) + __clear_bit(IRQ_PEND_MCHK_REP, &active_mask); /* * STOP irqs will never be actively delivered. They are triggered via @@ -202,6 +177,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); } +static void set_intercept_indicators_io(struct kvm_vcpu *vcpu) +{ + if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK)) + return; + else if (psw_ioint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_IO_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR6; +} + static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu) { if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK)) @@ -228,43 +213,15 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu) __set_cpuflag(vcpu, CPUSTAT_STOP_INT); } -/* Set interception request for non-deliverable local interrupts */ -static void set_intercept_indicators_local(struct kvm_vcpu *vcpu) +/* Set interception request for non-deliverable interrupts */ +static void set_intercept_indicators(struct kvm_vcpu *vcpu) { + set_intercept_indicators_io(vcpu); set_intercept_indicators_ext(vcpu); set_intercept_indicators_mchk(vcpu); set_intercept_indicators_stop(vcpu); } -static void __set_intercept_indicator(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - switch (inti->type) { - case KVM_S390_INT_SERVICE: - case KVM_S390_INT_PFAULT_DONE: - case KVM_S390_INT_VIRTIO: - if (psw_extint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_EXT_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR0; - break; - case KVM_S390_MCHK: - if (psw_mchk_disabled(vcpu)) - vcpu->arch.sie_block->ictl |= ICTL_LPSW; - else - vcpu->arch.sie_block->lctl |= LCTL_CR14; - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - if (psw_ioint_disabled(vcpu)) - __set_cpuflag(vcpu, CPUSTAT_IO_INT); - else - vcpu->arch.sie_block->lctl |= LCTL_CR6; - break; - default: - BUG(); - } -} - static u16 get_ilc(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { @@ -350,42 +307,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu) static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu) { + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_mchk_info mchk; + struct kvm_s390_mchk_info mchk = {}; unsigned long adtl_status_addr; - int rc; + int deliver = 0; + int rc = 0; + spin_lock(&fi->lock); spin_lock(&li->lock); - mchk = li->irq.mchk; + if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) || + test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) { + /* + * If there was an exigent machine check pending, then any + * repressible machine checks that might have been pending + * are indicated along with it, so always clear bits for + * repressible and exigent interrupts + */ + mchk = li->irq.mchk; + clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); + clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); + memset(&li->irq.mchk, 0, sizeof(mchk)); + deliver = 1; + } /* - * If there was an exigent machine check pending, then any repressible - * machine checks that might have been pending are indicated along - * with it, so always clear both bits + * We indicate floating repressible conditions along with + * other pending conditions. Channel Report Pending and Channel + * Subsystem damage are the only two and and are indicated by + * bits in mcic and masked in cr14. */ - clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); - clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); - memset(&li->irq.mchk, 0, sizeof(mchk)); + if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + mchk.mcic |= fi->mchk.mcic; + mchk.cr14 |= fi->mchk.cr14; + memset(&fi->mchk, 0, sizeof(mchk)); + deliver = 1; + } spin_unlock(&li->lock); + spin_unlock(&fi->lock); - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk.mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk.cr14, mchk.mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, - &adtl_status_addr, sizeof(unsigned long)); - rc |= kvm_s390_vcpu_store_adtl_status(vcpu, adtl_status_addr); - rc |= put_guest_lc(vcpu, mchk.mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk.failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk.fixed_logout, sizeof(mchk.fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (deliver) { + VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", + mchk.mcic); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_MCHK, + mchk.cr14, mchk.mcic); + + rc = kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_PREFIXED); + rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, + &adtl_status_addr, + sizeof(unsigned long)); + rc |= kvm_s390_vcpu_store_adtl_status(vcpu, + adtl_status_addr); + rc |= put_guest_lc(vcpu, mchk.mcic, + (u64 __user *) __LC_MCCK_CODE); + rc |= put_guest_lc(vcpu, mchk.failing_storage_address, + (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); + rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, + &mchk.fixed_logout, + sizeof(mchk.fixed_logout)); + rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + } return rc ? -EFAULT : 0; } @@ -597,16 +584,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu) return rc ? -EFAULT : 0; } -static int __must_check __deliver_service(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_service(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_ext_info ext; + int rc = 0; + + spin_lock(&fi->lock); + if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) { + spin_unlock(&fi->lock); + return 0; + } + ext = fi->srv_signal; + memset(&fi->srv_signal, 0, sizeof(ext)); + clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); + spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", - inti->ext.ext_params); + ext.ext_params); vcpu->stat.deliver_service_signal++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, 0); + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE, + ext.ext_params, 0); rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE); rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR); @@ -614,106 +612,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, + rc |= put_guest_lc(vcpu, ext.ext_params, (u32 *)__LC_EXT_PARAMS); + return rc ? -EFAULT : 0; } -static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, - KVM_S390_INT_PFAULT_DONE, 0, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT], + struct kvm_s390_interrupt_info, + list); + if (inti) { + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + KVM_S390_INT_PFAULT_DONE, 0, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_PFAULT] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_PFAULT])) + clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, PFAULT_DONE, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } -static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) +static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu) { - int rc; + struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct kvm_s390_interrupt_info *inti; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx", - inti->ext.ext_params, inti->ext.ext_params2); - vcpu->stat.deliver_virtio_interrupt++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - inti->ext.ext_params, - inti->ext.ext_params2); + spin_lock(&fi->lock); + inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO], + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, + "interrupt: virtio parm:%x,parm64:%llx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + inti->ext.ext_params, + inti->ext.ext_params2); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_VIRTIO] -= 1; + } + if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO])) + clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); - rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE); - rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR); - rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= put_guest_lc(vcpu, inti->ext.ext_params, - (u32 *)__LC_EXT_PARAMS); - rc |= put_guest_lc(vcpu, inti->ext.ext_params2, - (u64 *)__LC_EXT_PARAMS2); + if (inti) { + rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, + (u16 *)__LC_EXT_INT_CODE); + rc |= put_guest_lc(vcpu, VIRTIO_PARAM, + (u16 *)__LC_EXT_CPU_ADDR); + rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= put_guest_lc(vcpu, inti->ext.ext_params, + (u32 *)__LC_EXT_PARAMS); + rc |= put_guest_lc(vcpu, inti->ext.ext_params2, + (u64 *)__LC_EXT_PARAMS2); + kfree(inti); + } return rc ? -EFAULT : 0; } static int __must_check __deliver_io(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) + unsigned long irq_type) { - int rc; + struct list_head *isc_list; + struct kvm_s390_float_interrupt *fi; + struct kvm_s390_interrupt_info *inti = NULL; + int rc = 0; - VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); - vcpu->stat.deliver_io_int++; - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, - ((__u32)inti->io.subchannel_id << 16) | - inti->io.subchannel_nr, - ((__u64)inti->io.io_int_parm << 32) | - inti->io.io_int_word); - - rc = put_guest_lc(vcpu, inti->io.subchannel_id, - (u16 *)__LC_SUBCHANNEL_ID); - rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, - (u16 *)__LC_SUBCHANNEL_NR); - rc |= put_guest_lc(vcpu, inti->io.io_int_parm, - (u32 *)__LC_IO_INT_PARM); - rc |= put_guest_lc(vcpu, inti->io.io_int_word, - (u32 *)__LC_IO_INT_WORD); - rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - return rc ? -EFAULT : 0; -} + fi = &vcpu->kvm->arch.float_int; -static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - struct kvm_s390_mchk_info *mchk = &inti->mchk; - int rc; + spin_lock(&fi->lock); + isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0]; + inti = list_first_entry_or_null(isc_list, + struct kvm_s390_interrupt_info, + list); + if (inti) { + VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type); + vcpu->stat.deliver_io_int++; + trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, + inti->type, + ((__u32)inti->io.subchannel_id << 16) | + inti->io.subchannel_nr, + ((__u64)inti->io.io_int_parm << 32) | + inti->io.io_int_word); + list_del(&inti->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + } + if (list_empty(isc_list)) + clear_bit(irq_type, &fi->pending_irqs); + spin_unlock(&fi->lock); + + if (inti) { + rc = put_guest_lc(vcpu, inti->io.subchannel_id, + (u16 *)__LC_SUBCHANNEL_ID); + rc |= put_guest_lc(vcpu, inti->io.subchannel_nr, + (u16 *)__LC_SUBCHANNEL_NR); + rc |= put_guest_lc(vcpu, inti->io.io_int_parm, + (u32 *)__LC_IO_INT_PARM); + rc |= put_guest_lc(vcpu, inti->io.io_int_word, + (u32 *)__LC_IO_INT_WORD); + rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW, + &vcpu->arch.sie_block->gpsw, + sizeof(psw_t)); + kfree(inti); + } - VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx", - mchk->mcic); - trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK, - mchk->cr14, mchk->mcic); - - rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED); - rc |= put_guest_lc(vcpu, mchk->mcic, - (u64 __user *) __LC_MCCK_CODE); - rc |= put_guest_lc(vcpu, mchk->failing_storage_address, - (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR); - rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA, - &mchk->fixed_logout, sizeof(mchk->fixed_logout)); - rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); - rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW, - &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); return rc ? -EFAULT : 0; } @@ -721,6 +759,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu); static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_MCHK_EX] = __deliver_machine_check, + [IRQ_PEND_MCHK_REP] = __deliver_machine_check, [IRQ_PEND_PROG] = __deliver_prog, [IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal, [IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call, @@ -729,36 +768,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { [IRQ_PEND_RESTART] = __deliver_restart, [IRQ_PEND_SET_PREFIX] = __deliver_set_prefix, [IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init, + [IRQ_PEND_EXT_SERVICE] = __deliver_service, + [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done, + [IRQ_PEND_VIRTIO] = __deliver_virtio, }; -static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu, - struct kvm_s390_interrupt_info *inti) -{ - int rc; - - switch (inti->type) { - case KVM_S390_INT_SERVICE: - rc = __deliver_service(vcpu, inti); - break; - case KVM_S390_INT_PFAULT_DONE: - rc = __deliver_pfault_done(vcpu, inti); - break; - case KVM_S390_INT_VIRTIO: - rc = __deliver_virtio(vcpu, inti); - break; - case KVM_S390_MCHK: - rc = __deliver_mchk_floating(vcpu, inti); - break; - case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: - rc = __deliver_io(vcpu, inti); - break; - default: - BUG(); - } - - return rc; -} - /* Check whether an external call is pending (deliverable or not) */ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { @@ -774,21 +788,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) { - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *inti; int rc; - rc = !!deliverable_local_irqs(vcpu); - - if ((!rc) && atomic_read(&fi->active)) { - spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) - if (__interrupt_is_deliverable(vcpu, inti)) { - rc = 1; - break; - } - spin_unlock(&fi->lock); - } + rc = !!deliverable_irqs(vcpu); if (!rc && kvm_cpu_has_pending_timer(vcpu)) rc = 1; @@ -907,13 +909,10 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int; - struct kvm_s390_interrupt_info *n, *inti = NULL; deliver_irq_t func; - int deliver; int rc = 0; unsigned long irq_type; - unsigned long deliverable_irqs; + unsigned long irqs; __reset_intercept_indicators(vcpu); @@ -923,44 +922,27 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); do { - deliverable_irqs = deliverable_local_irqs(vcpu); + irqs = deliverable_irqs(vcpu); /* bits are in the order of interrupt priority */ - irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT); + irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT); if (irq_type == IRQ_PEND_COUNT) break; - func = deliver_irq_funcs[irq_type]; - if (!func) { - WARN_ON_ONCE(func == NULL); - clear_bit(irq_type, &li->pending_irqs); - continue; + if (is_ioirq(irq_type)) { + rc = __deliver_io(vcpu, irq_type); + } else { + func = deliver_irq_funcs[irq_type]; + if (!func) { + WARN_ON_ONCE(func == NULL); + clear_bit(irq_type, &li->pending_irqs); + continue; + } + rc = func(vcpu); } - rc = func(vcpu); - } while (!rc && irq_type != IRQ_PEND_COUNT); - - set_intercept_indicators_local(vcpu); + if (rc) + break; + } while (!rc); - if (!rc && atomic_read(&fi->active)) { - do { - deliver = 0; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { - if (__interrupt_is_deliverable(vcpu, inti)) { - list_del(&inti->list); - fi->irq_count--; - deliver = 1; - break; - } - __set_intercept_indicator(vcpu, inti); - } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); - if (deliver) { - rc = __deliver_floating_interrupt(vcpu, inti); - kfree(inti); - } - } while (!rc && deliver); - } + set_intercept_indicators(vcpu); return rc; } @@ -1195,80 +1177,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu) return 0; } +static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm, + int isc, u32 schid) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + struct kvm_s390_interrupt_info *iter; + u16 id = (schid & 0xffff0000U) >> 16; + u16 nr = schid & 0x0000ffffU; + spin_lock(&fi->lock); + list_for_each_entry(iter, isc_list, list) { + if (schid && (id != iter->io.subchannel_id || + nr != iter->io.subchannel_nr)) + continue; + /* found an appropriate entry */ + list_del_init(&iter->list); + fi->counters[FIRQ_CNTR_IO] -= 1; + if (list_empty(isc_list)) + clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); + spin_unlock(&fi->lock); + return iter; + } + spin_unlock(&fi->lock); + return NULL; +} + +/* + * Dequeue and return an I/O interrupt matching any of the interruption + * subclasses as designated by the isc mask in cr6 and the schid (if != 0). + */ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid) + u64 isc_mask, u32 schid) +{ + struct kvm_s390_interrupt_info *inti = NULL; + int isc; + + for (isc = 0; isc <= MAX_ISC && !inti; isc++) { + if (isc_mask & isc_to_isc_bits(isc)) + inti = get_io_int(kvm, isc, schid); + } + return inti; +} + +#define SCCB_MASK 0xFFFFFFF8 +#define SCCB_EVENT_PENDING 0x3 + +static int __inject_service(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING; + /* + * Early versions of the QEMU s390 bios will inject several + * service interrupts after another without handling a + * condition code indicating busy. + * We will silently ignore those superfluous sccb values. + * A future version of QEMU will take care of serialization + * of servc requests + */ + if (fi->srv_signal.ext_params & SCCB_MASK) + goto out; + fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK; + set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs); +out: + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_virtio(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_VIRTIO] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]); + set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +static int __inject_pfault_done(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + if (fi->counters[FIRQ_CNTR_PFAULT] >= + (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) { + spin_unlock(&fi->lock); + return -EBUSY; + } + fi->counters[FIRQ_CNTR_PFAULT] += 1; + list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]); + set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs); + spin_unlock(&fi->lock); + return 0; +} + +#define CR_PENDING_SUBCLASS 28 +static int __inject_float_mchk(struct kvm *kvm, + struct kvm_s390_interrupt_info *inti) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + + spin_lock(&fi->lock); + fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS); + fi->mchk.mcic |= inti->mchk.mcic; + set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs); + spin_unlock(&fi->lock); + kfree(inti); + return 0; +} + +static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *inti, *iter; + struct list_head *list; + int isc; - if ((!schid && !cr6) || (schid && cr6)) - return NULL; fi = &kvm->arch.float_int; spin_lock(&fi->lock); - inti = NULL; - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (cr6 && - ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0)) - continue; - if (schid) { - if (((schid & 0x00000000ffff0000) >> 16) != - iter->io.subchannel_id) - continue; - if ((schid & 0x000000000000ffff) != - iter->io.subchannel_nr) - continue; - } - inti = iter; - break; - } - if (inti) { - list_del_init(&inti->list); - fi->irq_count--; + if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) { + spin_unlock(&fi->lock); + return -EBUSY; } - if (list_empty(&fi->list)) - atomic_set(&fi->active, 0); + fi->counters[FIRQ_CNTR_IO] += 1; + + isc = int_word_to_isc(inti->io.io_int_word); + list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc]; + list_add_tail(&inti->list, list); + set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs); spin_unlock(&fi->lock); - return inti; + return 0; } static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) { struct kvm_s390_local_interrupt *li; struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *iter; struct kvm_vcpu *dst_vcpu = NULL; int sigcpu; - int rc = 0; + u64 type = READ_ONCE(inti->type); + int rc; fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) { + + switch (type) { + case KVM_S390_MCHK: + rc = __inject_float_mchk(kvm, inti); + break; + case KVM_S390_INT_VIRTIO: + rc = __inject_virtio(kvm, inti); + break; + case KVM_S390_INT_SERVICE: + rc = __inject_service(kvm, inti); + break; + case KVM_S390_INT_PFAULT_DONE: + rc = __inject_pfault_done(kvm, inti); + break; + case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: + rc = __inject_io(kvm, inti); + break; + default: rc = -EINVAL; - goto unlock_fi; } - fi->irq_count++; - if (!is_ioint(inti->type)) { - list_add_tail(&inti->list, &fi->list); - } else { - u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word); + if (rc) + return rc; - /* Keep I/O interrupts sorted in isc order. */ - list_for_each_entry(iter, &fi->list, list) { - if (!is_ioint(iter->type)) - continue; - if (int_word_to_isc_bits(iter->io.io_int_word) - <= isc_bits) - continue; - break; - } - list_add_tail(&inti->list, &iter->list); - } - atomic_set(&fi->active, 1); - if (atomic_read(&kvm->online_vcpus) == 0) - goto unlock_fi; sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); if (sigcpu == KVM_MAX_VCPUS) { do { @@ -1280,7 +1364,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) dst_vcpu = kvm_get_vcpu(kvm, sigcpu); li = &dst_vcpu->arch.local_int; spin_lock(&li->lock); - switch (inti->type) { + switch (type) { case KVM_S390_MCHK: atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); break; @@ -1293,9 +1377,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti) } spin_unlock(&li->lock); kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu)); -unlock_fi: - spin_unlock(&fi->lock); - return rc; + return 0; + } int kvm_s390_inject_vm(struct kvm *kvm, @@ -1462,20 +1545,14 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return rc; } -void kvm_s390_clear_float_irqs(struct kvm *kvm) +static inline void clear_irq_list(struct list_head *_list) { - struct kvm_s390_float_interrupt *fi; - struct kvm_s390_interrupt_info *n, *inti = NULL; + struct kvm_s390_interrupt_info *inti, *n; - fi = &kvm->arch.float_int; - spin_lock(&fi->lock); - list_for_each_entry_safe(inti, n, &fi->list, list) { + list_for_each_entry_safe(inti, n, _list, list) { list_del(&inti->list); kfree(inti); } - fi->irq_count = 0; - atomic_set(&fi->active, 0); - spin_unlock(&fi->lock); } static void inti_to_irq(struct kvm_s390_interrupt_info *inti, @@ -1486,26 +1563,37 @@ static void inti_to_irq(struct kvm_s390_interrupt_info *inti, case KVM_S390_INT_PFAULT_INIT: case KVM_S390_INT_PFAULT_DONE: case KVM_S390_INT_VIRTIO: - case KVM_S390_INT_SERVICE: irq->u.ext = inti->ext; break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: irq->u.io = inti->io; break; - case KVM_S390_MCHK: - irq->u.mchk = inti->mchk; - break; } } +void kvm_s390_clear_float_irqs(struct kvm *kvm) +{ + struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; + int i; + + spin_lock(&fi->lock); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + clear_irq_list(&fi->lists[i]); + for (i = 0; i < FIRQ_MAX_COUNT; i++) + fi->counters[i] = 0; + spin_unlock(&fi->lock); +}; + static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) { struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; struct kvm_s390_irq *buf; + struct kvm_s390_irq *irq; int max_irqs; int ret = 0; int n = 0; + int i; if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) return -EINVAL; @@ -1523,15 +1611,41 @@ static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len) fi = &kvm->arch.float_int; spin_lock(&fi->lock); - list_for_each_entry(inti, &fi->list, list) { + for (i = 0; i < FIRQ_LIST_COUNT; i++) { + list_for_each_entry(inti, &fi->lists[i], list) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + inti_to_irq(inti, &buf[n]); + n++; + } + } + if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */ ret = -ENOMEM; - break; + goto out; } - inti_to_irq(inti, &buf[n]); + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_INT_SERVICE; + irq->u.ext = fi->srv_signal; n++; } + if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { + if (n == max_irqs) { + /* signal userspace to try again */ + ret = -ENOMEM; + goto out; + } + irq = (struct kvm_s390_irq *) &buf[n]; + irq->type = KVM_S390_MCHK; + irq->u.mchk = fi->mchk; + n++; +} + +out: spin_unlock(&fi->lock); if (!ret && n > 0) { if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n)) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index a130885..dbc9ca3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include "kvm-s390.h" #include "gaccess.h" @@ -1069,7 +1070,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) goto out_err; spin_lock_init(&kvm->arch.float_int.lock); - INIT_LIST_HEAD(&kvm->arch.float_int.list); + for (i = 0; i < FIRQ_LIST_COUNT; i++) + INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]); init_waitqueue_head(&kvm->arch.ipte_wq); mutex_init(&kvm->arch.ipte_mutex); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index c5aefef..343644a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -178,7 +178,7 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, - u64 cr6, u64 schid); + u64 isc_mask, u32 schid); int kvm_s390_reinject_io_int(struct kvm *kvm, struct kvm_s390_interrupt_info *inti); int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 5e4658d..d22d8ee 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -294,10 +294,13 @@ reinject_interrupt: static int handle_tsch(struct kvm_vcpu *vcpu) { - struct kvm_s390_interrupt_info *inti; + struct kvm_s390_interrupt_info *inti = NULL; + const u64 isc_mask = 0xffUL << 24; /* all iscs set */ - inti = kvm_s390_get_io_int(vcpu->kvm, 0, - vcpu->run->s.regs.gprs[1]); + /* a valid schid has at least one bit set */ + if (vcpu->run->s.regs.gprs[1]) + inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask, + vcpu->run->s.regs.gprs[1]); /* * Prepare exit to userspace. -- cgit v0.10.2 From b4aec92567f3146167cbc262c686ff73730aa4ca Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 1 Dec 2014 15:55:42 +0100 Subject: KVM: s390: cpu timer irq priority We now have a mechanism for delivering interrupts according to their priority. Let's inject them using our new infrastructure (instead of letting only hardware handle them), so we can be sure that the irq priorities are satisfied. For s390, the cpu timer and the clock comparator are to be checked for common code kvm_cpu_has_pending_timer(), although the cpu timer is only stepped when the guest is being executed. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2872fdb..8a0786c 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -70,6 +70,26 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu) return 1; } +static int ckc_irq_pending(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.sie_block->ckc < + get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) + return 0; + return ckc_interrupts_enabled(vcpu); +} + +static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu) +{ + return !psw_extint_disabled(vcpu) && + (vcpu->arch.sie_block->gcr[0] & 0x400ul); +} + +static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.sie_block->cputm >> 63) && + cpu_timer_interrupts_enabled(vcpu); +} + static inline int is_ioirq(unsigned long irq_type) { return ((irq_type >= IRQ_PEND_IO_ISC_0) && @@ -809,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.sie_block->ckc < - get_tod_clock_fast() + vcpu->arch.sie_block->epoch)) - return 0; - if (!ckc_interrupts_enabled(vcpu)) - return 0; - return 1; + return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu); } int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) @@ -918,9 +933,14 @@ int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) /* pending ckc conditions might have been invalidated */ clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); - if (kvm_cpu_has_pending_timer(vcpu)) + if (ckc_irq_pending(vcpu)) set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); + /* pending cpu timer conditions might have been invalidated */ + clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + if (cpu_timer_irq_pending(vcpu)) + set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); + do { irqs = deliverable_irqs(vcpu); /* bits are in the order of interrupt priority */ -- cgit v0.10.2 From 47b43c52ee4b0425449d1b2b1eedca7f6b7a578a Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Tue, 11 Nov 2014 20:57:06 +0100 Subject: KVM: s390: add ioctl to inject local interrupts We have introduced struct kvm_s390_irq a while ago which allows to inject all kinds of interrupts as defined in the Principles of Operation. Add ioctl to inject interrupts with the extended struct kvm_s390_irq Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 0d7fc66..a7c651d 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2820,6 +2820,62 @@ single frame starting at start_gfn for count frames. Note: If any architecturally invalid key value is found in the given data then the ioctl will return -EINVAL. +4.92 KVM_S390_IRQ + +Capability: KVM_CAP_S390_INJECT_IRQ +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq (in) +Returns: 0 on success, -1 on error +Errors: + EINVAL: interrupt type is invalid + type is KVM_S390_SIGP_STOP and flag parameter is invalid value + type is KVM_S390_INT_EXTERNAL_CALL and code is bigger + than the maximum of VCPUs + EBUSY: type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped + type is KVM_S390_SIGP_STOP and a stop irq is already pending + type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt + is already pending + +Allows to inject an interrupt to the guest. + +Using struct kvm_s390_irq as a parameter allows +to inject additional payload which is not +possible via KVM_S390_INTERRUPT. + +Interrupt parameters are passed via kvm_s390_irq: + +struct kvm_s390_irq { + __u64 type; + union { + struct kvm_s390_io_info io; + struct kvm_s390_ext_info ext; + struct kvm_s390_pgm_info pgm; + struct kvm_s390_emerg_info emerg; + struct kvm_s390_extcall_info extcall; + struct kvm_s390_prefix_info prefix; + struct kvm_s390_stop_info stop; + struct kvm_s390_mchk_info mchk; + char reserved[64]; + } u; +}; + +type can be one of the following: + +KVM_S390_SIGP_STOP - sigp stop; parameter in .stop +KVM_S390_PROGRAM_INT - program check; parameters in .pgm +KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix +KVM_S390_RESTART - restart; no parameters +KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters +KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters +KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg +KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall +KVM_S390_MCHK - machine check interrupt; parameters in .mchk + + +Note that the vcpu ioctl is asynchronous to vcpu execution. + + 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dbc9ca3..8bc25d4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -177,6 +177,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: @@ -2391,6 +2392,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, long r; switch (ioctl) { + case KVM_S390_IRQ: { + struct kvm_s390_irq s390irq; + + r = -EFAULT; + if (copy_from_user(&s390irq, argp, sizeof(s390irq))) + break; + r = kvm_s390_inject_vcpu(vcpu, &s390irq); + break; + } case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int; struct kvm_s390_irq s390irq; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 1162ef7..c0632e8 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -802,6 +802,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_MEM_OP 108 #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 +#define KVM_CAP_S390_INJECT_IRQ 113 #ifdef KVM_CAP_IRQ_ROUTING @@ -1182,6 +1183,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_S390_SKEYS */ #define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys) #define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) +/* Available with KVM_CAP_S390_INJECT_IRQ */ +#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a109370..34310a8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2118,7 +2118,7 @@ static long kvm_vcpu_ioctl(struct file *filp, * Special cases: vcpu ioctls that are asynchronous to vcpu execution, * so vcpu_load() would break it. */ - if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT) + if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT) return kvm_arch_vcpu_ioctl(filp, ioctl, arg); #endif -- cgit v0.10.2 From 79e87a103de1eda0cb4d726cd8581798e2d38f3e Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Thu, 19 Mar 2015 15:12:12 +0100 Subject: KVM: s390: refactor vcpu injection function Let's provide a version of kvm_s390_inject_vcpu() that does not acquire the local-interrupt lock and skips waking up the vcpu. To be used in a later patch for vcpu-local interrupt migration, where we are already holding the lock. Reviewed-by: David Hildenbrand Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 8a0786c..bc09880 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1514,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu) spin_unlock(&li->lock); } -int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; int rc; - spin_lock(&li->lock); switch (irq->type) { case KVM_S390_PROGRAM_INT: VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", @@ -1559,6 +1557,17 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) default: rc = -EINVAL; } + + return rc; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + int rc; + + spin_lock(&li->lock); + rc = do_inject_vcpu(vcpu, irq); spin_unlock(&li->lock); if (!rc) kvm_s390_vcpu_wakeup(vcpu); -- cgit v0.10.2 From 816c7667ea97c61884e014cfeedaede5b67b0e58 Mon Sep 17 00:00:00 2001 From: Jens Freimann Date: Mon, 24 Nov 2014 17:13:46 +0100 Subject: KVM: s390: migrate vcpu interrupt state This patch adds support to migrate vcpu interrupts. Two new vcpu ioctls are added which get/set the complete status of pending interrupts in one go. The ioctls are marked as available with the new capability KVM_CAP_S390_IRQ_STATE. We can not use a ONEREG, as the number of pending local interrupts is not constant and depends on the number of CPUs. To retrieve the interrupt state we add an ioctl KVM_S390_GET_IRQ_STATE. Its input parameter is a pointer to a struct kvm_s390_irq_state which has a buffer and length. For all currently pending interrupts, we copy a struct kvm_s390_irq into the buffer and pass it to userspace. To store interrupt state into a buffer provided by userspace, we add an ioctl KVM_S390_SET_IRQ_STATE. It passes a struct kvm_s390_irq_state into the kernel and injects all interrupts contained in the buffer. Signed-off-by: Jens Freimann Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7c651d..18fb763 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2875,6 +2875,67 @@ KVM_S390_MCHK - machine check interrupt; parameters in .mchk Note that the vcpu ioctl is asynchronous to vcpu execution. +4.94 KVM_S390_GET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (out) +Returns: >= number of bytes copied into buffer, + -EINVAL if buffer size is 0, + -ENOBUFS if buffer size is too small to fit all pending interrupts, + -EFAULT if the buffer address was invalid + +This ioctl allows userspace to retrieve the complete state of all currently +pending interrupts in a single buffer. Use cases include migration +and introspection. The parameter structure contains the address of a +userspace buffer and its length: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + +Userspace passes in the above struct and for each pending interrupt a +struct kvm_s390_irq is copied to the provided buffer. + +If -ENOBUFS is returned the buffer provided was too small and userspace +may retry with a bigger buffer. + +4.95 KVM_S390_SET_IRQ_STATE + +Capability: KVM_CAP_S390_IRQ_STATE +Architectures: s390 +Type: vcpu ioctl +Parameters: struct kvm_s390_irq_state (in) +Returns: 0 on success, + -EFAULT if the buffer address was invalid, + -EINVAL for an invalid buffer length (see below), + -EBUSY if there were already interrupts pending, + errors occurring when actually injecting the + interrupt. See KVM_S390_IRQ. + +This ioctl allows userspace to set the complete state of all cpu-local +interrupts currently pending for the vcpu. It is intended for restoring +interrupt state after a migration. The input parameter is a userspace buffer +containing a struct kvm_s390_irq_state: + +struct kvm_s390_irq_state { + __u64 buf; + __u32 len; + __u32 pad; +}; + +The userspace memory referenced by buf contains a struct kvm_s390_irq +for each interrupt to be injected into the guest. +If one of the interrupts could not be injected for some reason the +ioctl aborts. + +len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0 +and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq), +which is the maximum number of possibly pending cpu-local interrupts. 5. The kvm_run structure ------------------------ diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bc09880..9de4726 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -2123,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, { return -EINVAL; } + +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len) +{ + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + struct kvm_s390_irq *buf; + int r = 0; + int n; + + buf = vmalloc(len); + if (!buf) + return -ENOMEM; + + if (copy_from_user((void *) buf, irqstate, len)) { + r = -EFAULT; + goto out_free; + } + + /* + * Don't allow setting the interrupt state + * when there are already interrupts pending + */ + spin_lock(&li->lock); + if (li->pending_irqs) { + r = -EBUSY; + goto out_unlock; + } + + for (n = 0; n < len / sizeof(*buf); n++) { + r = do_inject_vcpu(vcpu, &buf[n]); + if (r) + break; + } + +out_unlock: + spin_unlock(&li->lock); +out_free: + vfree(buf); + + return r; +} + +static void store_local_irq(struct kvm_s390_local_interrupt *li, + struct kvm_s390_irq *irq, + unsigned long irq_type) +{ + switch (irq_type) { + case IRQ_PEND_MCHK_EX: + case IRQ_PEND_MCHK_REP: + irq->type = KVM_S390_MCHK; + irq->u.mchk = li->irq.mchk; + break; + case IRQ_PEND_PROG: + irq->type = KVM_S390_PROGRAM_INT; + irq->u.pgm = li->irq.pgm; + break; + case IRQ_PEND_PFAULT_INIT: + irq->type = KVM_S390_INT_PFAULT_INIT; + irq->u.ext = li->irq.ext; + break; + case IRQ_PEND_EXT_EXTERNAL: + irq->type = KVM_S390_INT_EXTERNAL_CALL; + irq->u.extcall = li->irq.extcall; + break; + case IRQ_PEND_EXT_CLOCK_COMP: + irq->type = KVM_S390_INT_CLOCK_COMP; + break; + case IRQ_PEND_EXT_CPU_TIMER: + irq->type = KVM_S390_INT_CPU_TIMER; + break; + case IRQ_PEND_SIGP_STOP: + irq->type = KVM_S390_SIGP_STOP; + irq->u.stop = li->irq.stop; + break; + case IRQ_PEND_RESTART: + irq->type = KVM_S390_RESTART; + break; + case IRQ_PEND_SET_PREFIX: + irq->type = KVM_S390_SIGP_SET_PREFIX; + irq->u.prefix = li->irq.prefix; + break; + } +} + +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) +{ + uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + unsigned long pending_irqs; + struct kvm_s390_irq irq; + unsigned long irq_type; + int cpuaddr; + int n = 0; + + spin_lock(&li->lock); + pending_irqs = li->pending_irqs; + memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending, + sizeof(sigp_emerg_pending)); + spin_unlock(&li->lock); + + for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) { + memset(&irq, 0, sizeof(irq)); + if (irq_type == IRQ_PEND_EXT_EMERGENCY) + continue; + if (n + sizeof(irq) > len) + return -ENOBUFS; + store_local_irq(&vcpu->arch.local_int, &irq, irq_type); + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) { + for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) { + memset(&irq, 0, sizeof(irq)); + if (n + sizeof(irq) > len) + return -ENOBUFS; + irq.type = KVM_S390_INT_EMERGENCY; + irq.u.emerg.code = cpuaddr; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + } + + if ((sigp_ctrl & SIGP_CTRL_C) && + (atomic_read(&vcpu->arch.sie_block->cpuflags) & + CPUSTAT_ECALL_PEND)) { + if (n + sizeof(irq) > len) + return -ENOBUFS; + memset(&irq, 0, sizeof(irq)); + irq.type = KVM_S390_INT_EXTERNAL_CALL; + irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + if (copy_to_user(&buf[n], &irq, sizeof(irq))) + return -EFAULT; + n += sizeof(irq); + } + + return n; +} diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8bc25d4..3040b14 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -41,6 +41,9 @@ #include "trace-s390.h" #define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */ +#define LOCAL_IRQS 32 +#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ + (KVM_MAX_VCPUS + LOCAL_IRQS)) #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU @@ -181,6 +184,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: + case KVM_CAP_S390_IRQ_STATE: r = 1; break; case KVM_CAP_S390_MEM_OP: @@ -2500,6 +2504,38 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = -EFAULT; break; } + case KVM_S390_SET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len > VCPU_IRQS_MAX_BUF || + irq_state.len == 0 || + irq_state.len % sizeof(struct kvm_s390_irq) > 0) { + r = -EINVAL; + break; + } + r = kvm_s390_set_irq_state(vcpu, + (void __user *) irq_state.buf, + irq_state.len); + break; + } + case KVM_S390_GET_IRQ_STATE: { + struct kvm_s390_irq_state irq_state; + + r = -EFAULT; + if (copy_from_user(&irq_state, argp, sizeof(irq_state))) + break; + if (irq_state.len == 0) { + r = -EINVAL; + break; + } + r = kvm_s390_get_irq_state(vcpu, + (__u8 __user *) irq_state.buf, + irq_state.len); + break; + } default: r = -ENOTTY; } diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 343644a..ca108b9 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -272,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu); extern struct kvm_device_ops kvm_flic_ops; int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu); void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu); +int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, + void __user *buf, int len); +int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, + __u8 __user *buf, int len); /* implemented in guestdbg.c */ void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c0632e8..c045c72 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -558,6 +558,13 @@ struct kvm_s390_irq { } u; }; +struct kvm_s390_irq_state { + __u64 buf; + __u32 flags; + __u32 len; + __u32 reserved[4]; +}; + /* for KVM_SET_GUEST_DEBUG */ #define KVM_GUESTDBG_ENABLE 0x00000001 @@ -803,6 +810,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_USER_STSI 109 #define KVM_CAP_S390_SKEYS 110 #define KVM_CAP_S390_INJECT_IRQ 113 +#define KVM_CAP_S390_IRQ_STATE 114 #ifdef KVM_CAP_IRQ_ROUTING @@ -1185,6 +1193,9 @@ struct kvm_s390_ucas_mapping { #define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys) /* Available with KVM_CAP_S390_INJECT_IRQ */ #define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq) +/* Available with KVM_CAP_S390_IRQ_STATE */ +#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) +#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) -- cgit v0.10.2 From 35fd68a38d574188835110cde2937d18fe9b46dd Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 8 Apr 2015 14:08:14 +0800 Subject: kvm: x86: fix x86 eflags fixed bit Guest can't be booted w/ ept=0, there is a message dumped as below: If you're running a guest on an Intel machine without unrestricted mode support, the failure can be most likely due to the guest entering an invalid state for Intel VT. For example, the guest maybe running in big real mode which is not supported on less recent Intel processors. EAX=00000011 EBX=f000d2f6 ECX=00006cac EDX=000f8956 ESI=bffbdf62 EDI=00000000 EBP=00006c68 ESP=00006c68 EIP=0000d187 EFL=00000004 [-----P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =e000 000e0000 ffffffff 00809300 DPL=0 DS16 [-WA] CS =f000 000f0000 ffffffff 00809b00 DPL=0 CS16 [-RA] SS =0000 00000000 ffffffff 00809300 DPL=0 DS16 [-WA] DS =0000 00000000 ffffffff 00809300 DPL=0 DS16 [-WA] FS =0000 00000000 ffffffff 00809300 DPL=0 DS16 [-WA] GS =0000 00000000 ffffffff 00809300 DPL=0 DS16 [-WA] LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy GDT= 000f6a80 00000037 IDT= 000f6abe 00000000 CR0=00000011 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Code=01 1e b8 6a 2e 0f 01 16 74 6a 0f 20 c0 66 83 c8 01 0f 22 c0 <66> ea 8f d1 0f 00 08 00 b8 10 00 00 00 8e d8 8e c0 8e d0 8e e0 8e e8 89 c8 ff e2 89 c1 b8X X86 eflags bit 1 is fixed set, which means that 1 << 1 is set instead of 1, this patch fix it. Signed-off-by: Wanpeng Li Message-Id: <1428473294-6633-1-git-send-email-wanpeng.li@linux.intel.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b304728..630bcb0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2033,7 +2033,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF | X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF | X86_EFLAGS_AC | X86_EFLAGS_ID | - X86_EFLAGS_FIXED_BIT; + X86_EFLAGS_FIXED; unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF | X86_EFLAGS_VIP; @@ -2072,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) } ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */ - ctxt->eflags |= X86_EFLAGS_FIXED_BIT; + ctxt->eflags |= X86_EFLAGS_FIXED; ctxt->ops->set_nmi_mask(ctxt, false); return rc; @@ -2390,7 +2390,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data); ctxt->eflags &= ~msr_data; - ctxt->eflags |= X86_EFLAGS_FIXED_BIT; + ctxt->eflags |= X86_EFLAGS_FIXED; #endif } else { /* legacy mode */ -- cgit v0.10.2 From 362c698f8220e636edf1c40b1935715fa57f492f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Feb 2015 12:48:04 +0100 Subject: KVM: x86: extract blocking logic from __vcpu_run Rename the old __vcpu_run to vcpu_run, and extract part of it to a new function vcpu_block. The next patch will add a new condition in vcpu_block, avoid extra indentation. Reviewed-by: David Matlack Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a284c92..6256dfa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6186,7 +6186,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, } /* - * Returns 1 to let __vcpu_run() continue the guest execution loop without + * Returns 1 to let vcpu_run() continue the guest execution loop without * exiting to the userspace. Otherwise, the value will be returned to the * userspace. */ @@ -6404,42 +6404,46 @@ out: return r; } +static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) +{ + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_block(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) + return 1; + + kvm_apic_accept_events(vcpu); + switch(vcpu->arch.mp_state) { + case KVM_MP_STATE_HALTED: + vcpu->arch.pv.pv_unhalted = false; + vcpu->arch.mp_state = + KVM_MP_STATE_RUNNABLE; + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.apf.halted = false; + break; + case KVM_MP_STATE_INIT_RECEIVED: + break; + default: + return -EINTR; + break; + } + return 1; +} -static int __vcpu_run(struct kvm_vcpu *vcpu) +static int vcpu_run(struct kvm_vcpu *vcpu) { int r; struct kvm *kvm = vcpu->kvm; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - r = 1; - while (r > 0) { + for (;;) { if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) r = vcpu_enter_guest(vcpu); - else { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) { - kvm_apic_accept_events(vcpu); - switch(vcpu->arch.mp_state) { - case KVM_MP_STATE_HALTED: - vcpu->arch.pv.pv_unhalted = false; - vcpu->arch.mp_state = - KVM_MP_STATE_RUNNABLE; - case KVM_MP_STATE_RUNNABLE: - vcpu->arch.apf.halted = false; - break; - case KVM_MP_STATE_INIT_RECEIVED: - break; - default: - r = -EINTR; - break; - } - } - } - + else + r = vcpu_block(kvm, vcpu); if (r <= 0) break; @@ -6451,6 +6455,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.request_irq_exits; + break; } kvm_check_async_pf_completion(vcpu); @@ -6459,6 +6464,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; + break; } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); @@ -6590,7 +6596,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); - r = __vcpu_run(vcpu); + r = vcpu_run(vcpu); out: post_kvm_run_save(vcpu); -- cgit v0.10.2 From 9c8fd1ba2201c072bd3cf6940e2ca4d0a7aed723 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 6 Feb 2015 12:58:42 +0100 Subject: KVM: x86: optimize delivery of TSC deadline timer interrupt The newly-added tracepoint shows the following results on the tscdeadline_latency test: qemu-kvm-8387 [002] 6425.558974: kvm_vcpu_wakeup: poll time 10407 ns qemu-kvm-8387 [002] 6425.558984: kvm_vcpu_wakeup: poll time 0 ns qemu-kvm-8387 [002] 6425.561242: kvm_vcpu_wakeup: poll time 10477 ns qemu-kvm-8387 [002] 6425.561251: kvm_vcpu_wakeup: poll time 0 ns and so on. This is because we need to go through kvm_vcpu_block again after the timer IRQ is injected. Avoid it by polling once before entering kvm_vcpu_block. On my machine (Xeon E5 Sandy Bridge) this removes about 500 cycles (7%) from the latency of the TSC deadline timer. Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6256dfa..50861dd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6406,12 +6406,13 @@ out: static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { - srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_vcpu_block(vcpu); - vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); - - if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) - return 1; + if (!kvm_arch_vcpu_runnable(vcpu)) { + srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); + kvm_vcpu_block(vcpu); + vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) + return 1; + } kvm_apic_accept_events(vcpu); switch(vcpu->arch.mp_state) { -- cgit v0.10.2 From 3180a7fcbc0ec7ed7cc85ed5015bdd7a8c2176e8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 2 Apr 2015 14:08:20 +0200 Subject: KVM: remove kvm_read_hva and kvm_read_hva_atomic The corresponding write functions just use __copy_to_user. Do the same on the read side. This reverts what's left of commit 86ab8cffb498 (KVM: introduce gfn_to_hva_read/kvm_read_hva/kvm_read_hva_atomic, 2012-08-21) Cc: Xiao Guangrong Signed-off-by: Paolo Bonzini Message-Id: <1427976500-28533-1-git-send-email-pbonzini@redhat.com> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 0d06b7b..aadef26 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1187,16 +1187,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable) return gfn_to_hva_memslot_prot(slot, gfn, writable); } -static int kvm_read_hva(void *data, void __user *hva, int len) -{ - return __copy_from_user(data, hva, len); -} - -static int kvm_read_hva_atomic(void *data, void __user *hva, int len) -{ - return __copy_from_user_inatomic(data, hva, len); -} - static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, int write, struct page **page) { @@ -1548,7 +1538,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, addr = gfn_to_hva_prot(kvm, gfn, NULL); if (kvm_is_error_hva(addr)) return -EFAULT; - r = kvm_read_hva(data, (void __user *)addr + offset, len); + r = __copy_from_user(data, (void __user *)addr + offset, len); if (r) return -EFAULT; return 0; @@ -1587,7 +1577,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, if (kvm_is_error_hva(addr)) return -EFAULT; pagefault_disable(); - r = kvm_read_hva_atomic(data, (void __user *)addr + offset, len); + r = __copy_from_user_inatomic(data, (void __user *)addr + offset, len); pagefault_enable(); if (r) return -EFAULT; -- cgit v0.10.2 From 80f7fdb1c7f0f9266421f823964fd1962681f6ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 2 Apr 2015 20:44:23 +0200 Subject: x86: vdso: fix pvclock races with task migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we were migrated right after __getcpu, but before reading the migration_count, we wouldn't notice that we read TSC of a different VCPU, nor that KVM's bug made pvti invalid, as only migration_count on source VCPU is increased. Change vdso instead of updating migration_count on destination. Cc: stable@vger.kernel.org Signed-off-by: Radim Krčmář Fixes: 0a4e6be9ca17 ("x86: kvm: Revert "remove sched notifier for cross-cpu migrations"") Message-Id: <1428000263-11892-1-git-send-email-rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 3093376..40d2473 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c @@ -99,21 +99,25 @@ static notrace cycle_t vread_pvclock(int *mode) * __getcpu() calls (Gleb). */ - pvti = get_pvti(cpu); + /* Make sure migrate_count will change if we leave the VCPU. */ + do { + pvti = get_pvti(cpu); + migrate_count = pvti->migrate_count; - migrate_count = pvti->migrate_count; + cpu1 = cpu; + cpu = __getcpu() & VGETCPU_CPU_MASK; + } while (unlikely(cpu != cpu1)); version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags); /* * Test we're still on the cpu as well as the version. - * We could have been migrated just after the first - * vgetcpu but before fetching the version, so we - * wouldn't notice a version change. + * - We must read TSC of pvti's VCPU. + * - KVM doesn't follow the versioning protocol, so data could + * change before version if we left the VCPU. */ - cpu1 = __getcpu() & VGETCPU_CPU_MASK; - } while (unlikely(cpu != cpu1 || - (pvti->pvti.version & 1) || + smp_rmb(); + } while (unlikely((pvti->pvti.version & 1) || pvti->pvti.version != version || pvti->migrate_count != migrate_count)); -- cgit v0.10.2 From 80f0e95d1b221688b2608e0a80134e7acccd9a89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 2 Apr 2015 21:11:05 +0200 Subject: KVM: vmx: pass error code with internal error #2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exposing the on-stack error code with internal error is cheap and potentially useful. Signed-off-by: Radim Krčmář Message-Id: <1428001865-32280-1-git-send-email-rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b5a6425..9e4b12b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5089,9 +5089,10 @@ static int handle_exception(struct kvm_vcpu *vcpu) !(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) { vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX; - vcpu->run->internal.ndata = 2; + vcpu->run->internal.ndata = 3; vcpu->run->internal.data[0] = vect_info; vcpu->run->internal.data[1] = intr_info; + vcpu->run->internal.data[2] = error_code; return 0; } -- cgit v0.10.2 From 5a4f55cde81f1633cb7ae9f0963b722e47acdc36 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Sun, 29 Mar 2015 23:56:12 +0300 Subject: KVM: x86: cache maxphyaddr CPUID leaf in struct kvm_vcpu cpuid_maxphyaddr(), which performs lot of memory accesses is called extensively across KVM, especially in nVMX code. This patch adds a cached value of maxphyaddr to vcpu.arch to reduce the pressure onto CPU cache and simplify the code of cpuid_maxphyaddr() callers. The cached value is initialized in kvm_arch_vcpu_init() and reloaded every time CPUID is updated by usermode. It is obvious that these reloads occur infrequently. Signed-off-by: Eugene Korenevsky Message-Id: <20150329205612.GA1223@gnote> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30b28dc..8d92e3b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -426,6 +426,9 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; + + int maxphyaddr; + /* emulate context */ struct x86_emulate_ctxt emulate_ctxt; @@ -1124,7 +1127,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); -int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 8a80737..59b69f6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) ((best->eax & 0xff00) >> 8) != 0) return -EINVAL; + /* Update physical-address width */ + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_pmu_cpuid_update(vcpu); return 0; } @@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu) } } +int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *best; + + best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); + if (!best || best->eax < 0x80000008) + goto not_found; + best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); + if (best) + return best->eax & 0xff; +not_found: + return 36; +} +EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr); + /* when an old userspace process fills a new kernel module */ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, @@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry); -int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid_entry2 *best; - - best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0); - if (!best || best->eax < 0x80000008) - goto not_found; - best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); - if (best) - return best->eax & 0xff; -not_found: - return 36; -} -EXPORT_SYMBOL_GPL(cpuid_maxphyaddr); - /* * If no match is found, check whether we exceed the vCPU's limit * and return the content of the highest valid _standard_ leaf instead. diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 2622846..c3b1ad9 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -20,6 +20,12 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 __user *entries); void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx); +int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu); + +static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.maxphyaddr; +} static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 50861dd..a578629 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7317,6 +7317,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.guest_supported_xcr0 = 0; vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; + vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + kvm_async_pf_hash_reset(vcpu); kvm_pmu_init(vcpu); -- cgit v0.10.2 From 9090422f1ca5270795738549cf91a4ae7cb47662 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Sun, 29 Mar 2015 23:56:27 +0300 Subject: KVM: nVMX: checks for address bits beyond MAXPHYADDR on VM-entry On each VM-entry CPU should check the following VMCS fields for zero bits beyond physical address width: - APIC-access address - virtual-APIC address - posted-interrupt descriptor address This patch adds these checks required by Intel SDM. Signed-off-by: Eugene Korenevsky Message-Id: <20150329205627.GA1244@gnote> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9e4b12b..6f770e8 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8622,10 +8622,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int maxphyaddr = cpuid_maxphyaddr(vcpu); if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - /* TODO: Also verify bits beyond physical address width are 0 */ - if (!PAGE_ALIGNED(vmcs12->apic_access_addr)) + if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || + vmcs12->apic_access_addr >> maxphyaddr) return false; /* @@ -8641,8 +8642,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - /* TODO: Also verify bits beyond physical address width are 0 */ - if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr)) + if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || + vmcs12->virtual_apic_page_addr >> maxphyaddr) return false; if (vmx->nested.virtual_apic_page) /* shouldn't happen */ @@ -8665,7 +8666,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, } if (nested_cpu_has_posted_intr(vmcs12)) { - if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64)) + if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || + vmcs12->posted_intr_desc_addr >> maxphyaddr) return false; if (vmx->nested.pi_desc_page) { /* shouldn't happen */ @@ -9386,7 +9388,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) } if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { - /*TODO: Also verify bits beyond physical address width are 0*/ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } -- cgit v0.10.2 From 92d71bc6951364c20f7d8c70a83cd93a68a63ea7 Mon Sep 17 00:00:00 2001 From: Eugene Korenevsky Date: Sun, 29 Mar 2015 23:56:44 +0300 Subject: KVM: nVMX: remove unnecessary double caching of MAXPHYADDR After speed-up of cpuid_maxphyaddr() it can be called frequently: instead of heavyweight enumeration of CPUID entries it returns a cached pre-computed value. It is also inlined now. So caching its result became unnecessary and can be removed. Signed-off-by: Eugene Korenevsky Message-Id: <20150329205644.GA1258@gnote> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6f770e8..ddce07e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8866,9 +8866,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, unsigned long count_field, - unsigned long addr_field, - int maxphyaddr) + unsigned long addr_field) { + int maxphyaddr; u64 count, addr; if (vmcs12_read_any(vcpu, count_field, &count) || @@ -8878,6 +8878,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, } if (count == 0) return 0; + maxphyaddr = cpuid_maxphyaddr(vcpu); if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr || (addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) { pr_warn_ratelimited( @@ -8891,19 +8892,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu, static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - int maxphyaddr; - if (vmcs12->vm_exit_msr_load_count == 0 && vmcs12->vm_exit_msr_store_count == 0 && vmcs12->vm_entry_msr_load_count == 0) return 0; /* Fast path */ - maxphyaddr = cpuid_maxphyaddr(vcpu); if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT, - VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) || + VM_EXIT_MSR_LOAD_ADDR) || nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT, - VM_EXIT_MSR_STORE_ADDR, maxphyaddr) || + VM_EXIT_MSR_STORE_ADDR) || nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT, - VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr)) + VM_ENTRY_MSR_LOAD_ADDR)) return -EINVAL; return 0; } -- cgit v0.10.2 From 19456060315cedc5595a47007f886369871dfbc5 Mon Sep 17 00:00:00 2001 From: Arseny Solokha Date: Tue, 24 Feb 2015 16:05:04 +0700 Subject: kvm/ppc/mpic: drop unused IRQ_testbit Drop unused static procedure which doesn't have callers within its translation unit. It had been already removed independently in QEMU[1] from the OpenPIC implementation borrowed from the kernel. [1] https://lists.gnu.org/archive/html/qemu-devel/2014-06/msg01812.html Signed-off-by: Arseny Solokha Cc: Alexander Graf Cc: Gleb Natapov Cc: Paolo Bonzini Message-Id: <1424768706-23150-3-git-send-email-asolokha@kb.kras.ru> Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 4703fad..6249cdc 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ) clear_bit(n_IRQ, q->queue); } -static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ) -{ - return test_bit(n_IRQ, q->queue); -} - static void IRQ_check(struct openpic *opp, struct irq_queue *q) { int irq = -1; -- cgit v0.10.2 From 03d2249ea60818e97475ac529aa183cf130935bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 12 Feb 2015 19:41:31 +0100 Subject: KVM: x86: use MDA for interrupt matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mixed modes, we musn't deliver xAPIC IPIs like x2APIC and vice versa. Instead of preserving the information in apic_send_ipi(), we regain it by converting all destinations into correct MDA in the slow path. This allows easier reasoning about subsequent matching. Our kvm_apic_broadcast() had an interesting design decision: it didn't consider IOxAPIC 0xff as broadcast in x2APIC mode ... everything worked because IOxAPIC can't set that in physical mode and logical mode considered it as a message for first 8 VCPUs. This patch interprets IOxAPIC 0xff as x2APIC broadcast. Signed-off-by: Radim Krčmář Message-Id: <1423766494-26150-2-git-send-email-rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 44f7b9a..6956974 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -588,15 +588,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) apic_update_ppr(apic); } -static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) { - return dest == (apic_x2apic_mode(apic) ? - X2APIC_BROADCAST : APIC_BROADCAST); + if (apic_x2apic_mode(apic)) + return mda == X2APIC_BROADCAST; + + return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; } -static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest) +static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) { - return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest); + if (kvm_apic_broadcast(apic, mda)) + return true; + + if (apic_x2apic_mode(apic)) + return mda == kvm_apic_id(apic); + + return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); } static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) @@ -613,6 +621,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) && (logical_id & mda & 0xffff) != 0; logical_id = GET_APIC_LOGICAL_ID(logical_id); + mda = GET_APIC_DEST_FIELD(mda); switch (kvm_apic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: @@ -627,10 +636,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) } } +/* KVM APIC implementation has two quirks + * - dest always begins at 0 while xAPIC MDA has offset 24, + * - IOxAPIC messages have to be delivered (directly) to x2APIC. + */ +static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source, + struct kvm_lapic *target) +{ + bool ipi = source != NULL; + bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); + + if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda) + return X2APIC_BROADCAST; + + return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); +} + bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode) { struct kvm_lapic *target = vcpu->arch.apic; + u32 mda = kvm_apic_mda(dest, source, target); apic_debug("target %p, source %p, dest 0x%x, " "dest_mode 0x%x, short_hand 0x%x\n", @@ -640,9 +666,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, switch (short_hand) { case APIC_DEST_NOSHORT: if (dest_mode == APIC_DEST_PHYSICAL) - return kvm_apic_match_physical_addr(target, dest); + return kvm_apic_match_physical_addr(target, mda); else - return kvm_apic_match_logical_addr(target, dest); + return kvm_apic_match_logical_addr(target, mda); case APIC_DEST_SELF: return target == source; case APIC_DEST_ALLINC: -- cgit v0.10.2 From 9ea369b032d87b88f1a47187b51ad4321dea5766 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 12 Feb 2015 19:41:32 +0100 Subject: KVM: x86: fix mixed APIC mode broadcast MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Broadcast allowed only one global APIC mode, but mixed modes are theoretically possible. x2APIC IPI doesn't mean 0xff as broadcast, the rest does. x2APIC broadcasts are accepted by xAPIC. If we take SDM to be logical, even addreses beginning with 0xff should be accepted, but real hardware disagrees. This patch aims for simple code by considering most of real behavior as undefined. Signed-off-by: Radim Krčmář Message-Id: <1423766494-26150-3-git-send-email-rkrcmar@redhat.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8d92e3b..5b1bc97 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -552,7 +552,7 @@ struct kvm_apic_map { struct rcu_head rcu; u8 ldr_bits; /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask, broadcast; + u32 cid_shift, cid_mask, lid_mask; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6956974..d45f00b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -151,7 +151,6 @@ static void recalculate_apic_map(struct kvm *kvm) new->cid_shift = 8; new->cid_mask = 0; new->lid_mask = 0xff; - new->broadcast = APIC_BROADCAST; kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; @@ -163,7 +162,6 @@ static void recalculate_apic_map(struct kvm *kvm) new->ldr_bits = 32; new->cid_shift = 16; new->cid_mask = new->lid_mask = 0xffff; - new->broadcast = X2APIC_BROADCAST; } else if (kvm_apic_get_reg(apic, APIC_LDR)) { if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { @@ -690,6 +688,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic **dst; int i; bool ret = false; + bool x2apic_ipi = src && apic_x2apic_mode(src); *r = -1; @@ -701,15 +700,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, if (irq->shorthand) return false; + if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST)) + return false; + rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); if (!map) goto out; - if (irq->dest_id == map->broadcast) - goto out; - ret = true; if (irq->dest_mode == APIC_DEST_PHYSICAL) { -- cgit v0.10.2 From 3548a259f6990d8cb4f520e6c14f4b45b1f2fd38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 12 Feb 2015 19:41:33 +0100 Subject: KVM: x86: avoid logical_map when it is invalid MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to support mixed modes and the easiest solution is to avoid optimizing those weird and unlikely scenarios. Signed-off-by: Radim Krčmář Message-Id: <1423766494-26150-4-git-send-email-rkrcmar@redhat.com> [Add comment above KVM_APIC_MODE_* defines. - Paolo] Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5b1bc97..9477b27 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -548,8 +548,20 @@ struct kvm_arch_memory_slot { struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; +/* + * We use as the mode the number of bits allocated in the LDR for the + * logical processor ID. It happens that these are all powers of two. + * This makes it is very easy to detect cases where the APICs are + * configured for multiple modes; in that case, we cannot use the map and + * hence cannot use kvm_irq_delivery_to_apic_fast either. + */ +#define KVM_APIC_MODE_XAPIC_CLUSTER 4 +#define KVM_APIC_MODE_XAPIC_FLAT 8 +#define KVM_APIC_MODE_X2APIC 16 + struct kvm_apic_map { struct rcu_head rcu; + u8 mode; u8 ldr_bits; /* fields bellow are used to decode ldr values in different modes */ u32 cid_shift, cid_mask, lid_mask; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d45f00b..fa4bd89 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -133,6 +133,14 @@ static inline int kvm_apic_id(struct kvm_lapic *apic) return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; } +/* The logical map is definitely wrong if we have multiple + * modes at the same time. (Physical map is always right.) + */ +static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) +{ + return !(map->mode & (map->mode - 1)); +} + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -162,16 +170,19 @@ static void recalculate_apic_map(struct kvm *kvm) new->ldr_bits = 32; new->cid_shift = 16; new->cid_mask = new->lid_mask = 0xffff; + new->mode |= KVM_APIC_MODE_X2APIC; } else if (kvm_apic_get_reg(apic, APIC_LDR)) { if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) { new->cid_shift = 4; new->cid_mask = 0xf; new->lid_mask = 0xf; + new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; } else { new->cid_shift = 8; new->cid_mask = 0; new->lid_mask = 0xff; + new->mode |= KVM_APIC_MODE_XAPIC_FLAT; } } @@ -201,6 +212,10 @@ static void recalculate_apic_map(struct kvm *kvm) if (aid < ARRAY_SIZE(new->phys_map)) new->phys_map[aid] = apic; + + if (!kvm_apic_logical_map_valid(new)); + continue; + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } @@ -718,7 +733,14 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = &map->phys_map[irq->dest_id]; } else { u32 mda = irq->dest_id << (32 - map->ldr_bits); - u16 cid = apic_cluster_id(map, mda); + u16 cid; + + if (!kvm_apic_logical_map_valid(map)) { + ret = false; + goto out; + } + + cid = apic_cluster_id(map, mda); if (cid >= ARRAY_SIZE(map->logical_map)) goto out; -- cgit v0.10.2 From 3b5a5ffa928a3f875b0d5dd284eeb7c322e1688a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 12 Feb 2015 19:41:34 +0100 Subject: KVM: x86: simplify kvm_apic_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit recalculate_apic_map() uses two passes over all VCPUs. This is a relic from time when we selected a global mode in the first pass and set up the optimized table in the second pass (to have a consistent mode). Recent changes made mixed mode unoptimized and we can do it in one pass. Format of logical MDA is a function of the mode, so we encode it in apic_logical_id() and drop obsoleted variables from the struct. Signed-off-by: Radim Krčmář Message-Id: <1423766494-26150-5-git-send-email-rkrcmar@redhat.com> [Add lid_bits temporary in apic_logical_id. - Paolo] Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9477b27..d0cfdb0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -562,9 +562,6 @@ struct kvm_arch_memory_slot { struct kvm_apic_map { struct rcu_head rcu; u8 mode; - u8 ldr_bits; - /* fields bellow are used to decode ldr values in different modes */ - u32 cid_shift, cid_mask, lid_mask; struct kvm_lapic *phys_map[256]; /* first index is cluster id second is cpu id in a cluster */ struct kvm_lapic *logical_map[16][16]; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index fa4bd89..11a0af1 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -141,6 +141,20 @@ static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map) return !(map->mode & (map->mode - 1)); } +static inline void +apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid) +{ + unsigned lid_bits; + + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4); + BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8); + BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16); + lid_bits = map->mode; + + *cid = dest_id >> lid_bits; + *lid = dest_id & ((1 << lid_bits) - 1); +} + static void recalculate_apic_map(struct kvm *kvm) { struct kvm_apic_map *new, *old = NULL; @@ -154,49 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm) if (!new) goto out; - new->ldr_bits = 8; - /* flat mode is default */ - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - - kvm_for_each_vcpu(i, vcpu, kvm) { - struct kvm_lapic *apic = vcpu->arch.apic; - - if (!kvm_apic_present(vcpu)) - continue; - - if (apic_x2apic_mode(apic)) { - new->ldr_bits = 32; - new->cid_shift = 16; - new->cid_mask = new->lid_mask = 0xffff; - new->mode |= KVM_APIC_MODE_X2APIC; - } else if (kvm_apic_get_reg(apic, APIC_LDR)) { - if (kvm_apic_get_reg(apic, APIC_DFR) == - APIC_DFR_CLUSTER) { - new->cid_shift = 4; - new->cid_mask = 0xf; - new->lid_mask = 0xf; - new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; - } else { - new->cid_shift = 8; - new->cid_mask = 0; - new->lid_mask = 0xff; - new->mode |= KVM_APIC_MODE_XAPIC_FLAT; - } - } - - /* - * All APICs have to be configured in the same mode by an OS. - * We take advatage of this while building logical id loockup - * table. After reset APICs are in software disabled mode, so if - * we find apic with different setting we assume this is the mode - * OS wants all apics to be in; build lookup table accordingly. - */ - if (kvm_apic_sw_enabled(apic)) - break; - } - kvm_for_each_vcpu(i, vcpu, kvm) { struct kvm_lapic *apic = vcpu->arch.apic; u16 cid, lid; @@ -207,15 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm) aid = kvm_apic_id(apic); ldr = kvm_apic_get_reg(apic, APIC_LDR); - cid = apic_cluster_id(new, ldr); - lid = apic_logical_id(new, ldr); if (aid < ARRAY_SIZE(new->phys_map)) new->phys_map[aid] = apic; - if (!kvm_apic_logical_map_valid(new)); + if (apic_x2apic_mode(apic)) { + new->mode |= KVM_APIC_MODE_X2APIC; + } else if (ldr) { + ldr = GET_APIC_LOGICAL_ID(ldr); + if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT) + new->mode |= KVM_APIC_MODE_XAPIC_FLAT; + else + new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER; + } + + if (!kvm_apic_logical_map_valid(new)) continue; + apic_logical_id(new, ldr, &cid, &lid); + if (lid && cid < ARRAY_SIZE(new->logical_map)) new->logical_map[cid][ffs(lid) - 1] = apic; } @@ -732,7 +713,6 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, dst = &map->phys_map[irq->dest_id]; } else { - u32 mda = irq->dest_id << (32 - map->ldr_bits); u16 cid; if (!kvm_apic_logical_map_valid(map)) { @@ -740,15 +720,13 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, goto out; } - cid = apic_cluster_id(map, mda); + apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap); if (cid >= ARRAY_SIZE(map->logical_map)) goto out; dst = map->logical_map[cid]; - bitmap = apic_logical_id(map, mda); - if (irq->delivery_mode == APIC_DM_LOWEST) { int l = -1; for_each_set_bit(i, &bitmap, 16) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e284c28..9d28383 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm) return kvm_x86_ops->vm_has_apicv(kvm); } -static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr) -{ - u16 cid; - ldr >>= 32 - map->ldr_bits; - cid = (ldr >> map->cid_shift) & map->cid_mask; - - return cid; -} - -static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr) -{ - ldr >>= (32 - map->ldr_bits); - return ldr & map->lid_mask; -} - static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) { return vcpu->arch.apic->pending_events; -- cgit v0.10.2 From 58d269d8cccc53643f1a0900cfc0940e85ec9691 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 2 Apr 2015 03:10:36 +0300 Subject: KVM: x86: BSP in MSR_IA32_APICBASE is writable After reset, the CPU can change the BSP, which will be used upon INIT. Reset should return the BSP which QEMU asked for, and therefore handled accordingly. To quote: "If the MP protocol has completed and a BSP is chosen, subsequent INITs (either to a specific processor or system wide) do not cause the MP protocol to be repeated." [Intel SDM 8.4.2: MP Initialization Protocol Requirements and Restrictions] Signed-off-by: Nadav Amit Message-Id: <1427933438-12782-3-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 11a0af1..4a6e58a 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1523,8 +1523,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value) return; } - if (!kvm_vcpu_is_bsp(apic->vcpu)) - value &= ~MSR_IA32_APICBASE_BSP; vcpu->arch.apic_base = value; /* update jump label if enable bit changes */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 155534c..ce741b8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_bsp(&svm->vcpu)) + if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; svm_init_osvw(&svm->vcpu); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ddce07e..8c14d6a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4706,7 +4706,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); kvm_set_cr8(&vmx->vcpu, 0); apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE; - if (kvm_vcpu_is_bsp(&vmx->vcpu)) + if (kvm_vcpu_is_reset_bsp(&vmx->vcpu)) apic_base_msr.data |= MSR_IA32_APICBASE_BSP; apic_base_msr.host_initiated = true; kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a578629..f7a78c6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7269,7 +7269,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; - if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) + if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; else vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 27bd53b..82af5d0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -982,11 +982,16 @@ static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) #endif /* CONFIG_HAVE_KVM_EVENTFD */ #ifdef CONFIG_KVM_APIC_ARCHITECTURE -static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } +static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.apic_base & MSR_IA32_APICBASE_BSP) != 0; +} + bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); #else -- cgit v0.10.2 From ae561edeb421fbc24f97df7af8607c14009c16b2 Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 2 Apr 2015 03:10:37 +0300 Subject: KVM: x86: DR0-DR3 are not clear on reset DR0-DR3 are not cleared as they should during reset and when they are set from userspace. It appears to be caused by c77fb5fe6f03 ("KVM: x86: Allow the guest to run with dirty debug registers"). Force their reload on these situations. Signed-off-by: Nadav Amit Message-Id: <1427933438-12782-4-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index d0cfdb0..9f1d66e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -340,6 +340,7 @@ struct kvm_pmu { enum { KVM_DEBUGREG_BP_ENABLED = 1, KVM_DEBUGREG_WONT_EXIT = 2, + KVM_DEBUGREG_RELOAD = 4, }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f7a78c6..ad3809d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr0123(struct kvm_vcpu *vcpu) +{ + int i; + + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) { + for (i = 0; i < KVM_NR_DB_REGS; i++) + vcpu->arch.eff_db[i] = vcpu->arch.db[i]; + vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD; + } +} + static void kvm_update_dr6(struct kvm_vcpu *vcpu) { if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) @@ -3150,6 +3161,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, return -EINVAL; memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); + kvm_update_dr0123(vcpu); vcpu->arch.dr6 = dbgregs->dr6; kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; @@ -6323,6 +6335,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) set_debugreg(vcpu->arch.eff_db[2], 2); set_debugreg(vcpu->arch.eff_db[3], 3); set_debugreg(vcpu->arch.dr6, 6); + vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } trace_kvm_entry(vcpu->vcpu_id); @@ -7104,6 +7117,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) kvm_clear_exception_queue(vcpu); memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); + kvm_update_dr0123(vcpu); vcpu->arch.dr6 = DR6_INIT; kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; -- cgit v0.10.2 From 1119022c71fb11826041787cf0ebffc1a1b0ba5b Mon Sep 17 00:00:00 2001 From: Nadav Amit Date: Thu, 2 Apr 2015 03:10:38 +0300 Subject: KVM: x86: Clear CR2 on VCPU reset CR2 is not cleared as it should after reset. See Intel SDM table named "IA-32 Processor States Following Power-up, Reset, or INIT". Signed-off-by: Nadav Amit Message-Id: <1427933438-12782-5-git-send-email-namit@cs.technion.ac.il> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ad3809d..faf044d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7123,6 +7123,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); + vcpu->arch.cr2 = 0; + kvm_make_request(KVM_REQ_EVENT, vcpu); vcpu->arch.apf.msr_val = 0; vcpu->arch.st.msr_val = 0; -- cgit v0.10.2 From 3ea3b7fa9af067982f34b6745584558821eea79d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Fri, 3 Apr 2015 15:40:25 +0800 Subject: kvm: mmu: lazy collapse small sptes into large sptes Dirty logging tracks sptes in 4k granularity, meaning that large sptes have to be split. If live migration is successful, the guest in the source machine will be destroyed and large sptes will be created in the destination. However, the guest continues to run in the source machine (for example if live migration fails), small sptes will remain around and cause bad performance. This patch introduce lazy collapsing of small sptes into large sptes. The rmap will be scanned in ioctl context when dirty logging is stopped, dropping those sptes which can be collapsed into a single large-page spte. Later page faults will create the large-page sptes. Reviewed-by: Xiao Guangrong Signed-off-by: Wanpeng Li Message-Id: <1428046825-6905-1-git-send-email-wanpeng.li@linux.intel.com> Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9f1d66e..dea2e7e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -867,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, struct kvm_memory_slot *memslot); +void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, + struct kvm_memory_slot *memslot); void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot); void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cee7592..146f295 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, kvm_flush_remote_tlbs(kvm); } +static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, + unsigned long *rmapp) +{ + u64 *sptep; + struct rmap_iterator iter; + int need_tlb_flush = 0; + pfn_t pfn; + struct kvm_mmu_page *sp; + + for (sptep = rmap_get_first(*rmapp, &iter); sptep;) { + BUG_ON(!(*sptep & PT_PRESENT_MASK)); + + sp = page_header(__pa(sptep)); + pfn = spte_to_pfn(*sptep); + + /* + * Only EPT supported for now; otherwise, one would need to + * find out efficiently whether the guest page tables are + * also using huge pages. + */ + if (sp->role.direct && + !kvm_is_reserved_pfn(pfn) && + PageTransCompound(pfn_to_page(pfn))) { + drop_spte(kvm, sptep); + sptep = rmap_get_first(*rmapp, &iter); + need_tlb_flush = 1; + } else + sptep = rmap_get_next(&iter); + } + + return need_tlb_flush; +} + +void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + bool flush = false; + unsigned long *rmapp; + unsigned long last_index, index; + gfn_t gfn_start, gfn_end; + + spin_lock(&kvm->mmu_lock); + + gfn_start = memslot->base_gfn; + gfn_end = memslot->base_gfn + memslot->npages - 1; + + if (gfn_start >= gfn_end) + goto out; + + rmapp = memslot->arch.rmap[0]; + last_index = gfn_to_index(gfn_end, memslot->base_gfn, + PT_PAGE_TABLE_LEVEL); + + for (index = 0; index <= last_index; ++index, ++rmapp) { + if (*rmapp) + flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp); + + if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { + if (flush) { + kvm_flush_remote_tlbs(kvm); + flush = false; + } + cond_resched_lock(&kvm->mmu_lock); + } + } + + if (flush) + kvm_flush_remote_tlbs(kvm); + +out: + spin_unlock(&kvm->mmu_lock); +} + void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, struct kvm_memory_slot *memslot) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index faf044d..b8cb1d0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7665,6 +7665,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, new = id_to_memslot(kvm->memslots, mem->slot); /* + * Dirty logging tracks sptes in 4k granularity, meaning that large + * sptes have to be split. If live migration is successful, the guest + * in the source machine will be destroyed and large sptes will be + * created in the destination. However, if the guest continues to run + * in the source machine (for example if live migration fails), small + * sptes will remain around and cause bad performance. + * + * Scan sptes if dirty logging has been stopped, dropping those + * which can be collapsed into a single large-page spte. Later + * page faults will create the large-page sptes. + */ + if ((change != KVM_MR_DELETE) && + (old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + kvm_mmu_zap_collapsible_sptes(kvm, new); + + /* * Set up write protection and/or dirty logging for the new slot. * * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have -- cgit v0.10.2 From ca3f0874723fad81d0c701b63ae3a17a408d5f25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 8 Apr 2015 14:16:48 +0200 Subject: KVM: use slowpath for cross page cached accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_write_guest_cached() does not mark all written pages as dirty and code comments in kvm_gfn_to_hva_cache_init() talk about NULL memslot with cross page accesses. Fix all the easy way. The check is '<= 1' to have the same result for 'len = 0' cache anywhere in the page. (nr_pages_needed is 0 on page boundary.) Fixes: 8f964525a121 ("KVM: Allow cross page reads and writes from cached translations.") Signed-off-by: Radim Krčmář Message-Id: <20150408121648.GA3519@potion.brq.redhat.com> Reviewed-by: Wanpeng Li Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aadef26..91a36e2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1637,8 +1637,8 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->generation = slots->generation; ghc->len = len; ghc->memslot = gfn_to_memslot(kvm, start_gfn); - ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, &nr_pages_avail); - if (!kvm_is_error_hva(ghc->hva) && nr_pages_avail >= nr_pages_needed) { + ghc->hva = gfn_to_hva_many(ghc->memslot, start_gfn, NULL); + if (!kvm_is_error_hva(ghc->hva) && nr_pages_needed <= 1) { ghc->hva += offset; } else { /* -- cgit v0.10.2