From 0a00a775d2a49d16efba721c786859844184599c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 27 Nov 2013 16:35:22 +0200 Subject: KVM: Change maintainer email address My Red Hat email address will stop working soon. Updated if with other address. Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini diff --git a/MAINTAINERS b/MAINTAINERS index f216db8..b4433da 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4891,7 +4891,7 @@ F: include/linux/sunrpc/ F: include/uapi/linux/sunrpc/ KERNEL VIRTUAL MACHINE (KVM) -M: Gleb Natapov +M: Gleb Natapov M: Paolo Bonzini L: kvm@vger.kernel.org W: http://www.linux-kvm.org -- cgit v0.10.2 From 210b1607012cc9034841a393e0591b2c86d9e26c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 19 Sep 2013 16:26:18 +0200 Subject: KVM: s390: Removed SIE_INTERCEPT_UCONTROL The SIE_INTERCEPT_UCONTROL can be removed by moving the related code from kvm_arch_vcpu_ioctl_run() to vcpu_post_run(). Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 569494e..7f47835 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -732,14 +732,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (exit_reason >= 0) { rc = 0; - } else { - if (kvm_is_ucontrol(vcpu->kvm)) { - rc = SIE_INTERCEPT_UCONTROL; - } else { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - trace_kvm_s390_sie_fault(vcpu); - rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } + } else if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + vcpu->run->s390_ucontrol.pgm_code = 0x10; + rc = -EREMOTE; } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); @@ -833,16 +831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = -EINTR; } -#ifdef CONFIG_KVM_S390_UCONTROL - if (rc == SIE_INTERCEPT_UCONTROL) { - kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; - kvm_run->s390_ucontrol.trans_exc_code = - current->thread.gmap_addr; - kvm_run->s390_ucontrol.pgm_code = 0x10; - rc = 0; - } -#endif - if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index b44912a..aad541f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -27,8 +27,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ extern unsigned long *vfacilities; -/* negativ values are error codes, positive values for internal conditions */ -#define SIE_INTERCEPT_UCONTROL (1<<0) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ -- cgit v0.10.2 From ac5b03420150241dc2db3cb4aa4f58c1e7e4640f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 7 Oct 2013 17:50:22 +0200 Subject: KVM: s390: Removed VIRTIODESCSPACE VIRTIODESCSPACE is completely unused nowadays and thus can be removed without any problems. Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index aad541f..fcd25b4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,9 +19,6 @@ #include #include -/* The current code can have up to 256 pages for virtio */ -#define VIRTIODESCSPACE (256ul * 4096ul) - typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ -- cgit v0.10.2 From f092669e743048f50c714a1af7f8e3478d7b9e1b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 14:15:54 +0200 Subject: KVM: s390: Fix access to CR6 in TPI handler The TPI handler currently uses vcpu->run->s.regs.crs[6] to get the current value of CR6. I think this is wrong, because vcpu->run->s.regs.crs is only updated when kvm_arch_vcpu_ioctl_run() drops back to userspace. So let's change the TPI handler to use vcpu->arch.sie_block->gcr[6] instead. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2440602..b18fe52 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); cc = 0; - inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); if (!inti) goto no_interrupt; cc = 1; -- cgit v0.10.2 From c95221f69dfa5d3696b2b91374cbd7e5897657c5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 16:49:03 +0200 Subject: KVM: s390: Do not set CC3 for EQBS and SQBS The EQBS and SQBS instructions do not set CC3 for invalid channels, but should throw an operation exception instead when not available. Thus they should not be handled by the handle_io_inst() wrapper but drop to userspace instead (which will then inject the operation exception). Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b18fe52..05537ab 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, - [0x9c] = handle_io_inst, [0xaf] = handle_pfmf, }; @@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) static const intercept_handler_t eb_handlers[256] = { [0x2f] = handle_lctlg, - [0x8a] = handle_io_inst, }; int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From e879892c725217a4af1012f31ae56be762473216 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 6 Nov 2013 15:46:33 +0100 Subject: KVM: s390: Always store status during SIGP STOP_AND_STORE_STATUS The SIGP order STOP_AND_STORE_STATUS is defined to stop a CPU and store its status. However, we only stored the status if the CPU was still running, so make sure that the status is now also stored if the CPU was already stopped. This fixes the problem that the CPU information was not stored correctly in kdump files, rendering them unreadable. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Cc: stable@vger.kernel.org Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f47835..55eb8de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -873,7 +873,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from, * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit * KVM_S390_STORE_STATUS_PREFIXED: -> prefix */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; @@ -891,15 +891,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) } else prefix = 0; - /* - * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy - * copying in vcpu load/put. Lets update our copies before we save - * it into the save area - */ - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); - save_access_regs(vcpu->run->s.regs.acrs); - if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), vcpu->arch.guest_fpregs.fprs, 128, prefix)) return -EFAULT; @@ -944,6 +935,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return 0; } +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + + return kvm_s390_store_status_unloaded(vcpu, addr); +} + static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, struct kvm_enable_cap *cap) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index fcd25b4..36f6b18 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -145,8 +145,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, - unsigned long addr); +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); void s390_vcpu_block(struct kvm_vcpu *vcpu); void s390_vcpu_unblock(struct kvm_vcpu *vcpu); void exit_sie(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bec398c..6805601 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -130,6 +130,7 @@ unlock: static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) { struct kvm_s390_interrupt_info *inti; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; inti = kzalloc(sizeof(*inti), GFP_ATOMIC); if (!inti) @@ -139,6 +140,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) spin_lock_bh(&li->lock); if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); + if ((action & ACTION_STORE_ON_STOP) != 0) + rc = -ESHUTDOWN; goto out; } list_add_tail(&inti->list, &li->list); @@ -150,7 +153,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) out: spin_unlock_bh(&li->lock); - return SIGP_CC_ORDER_CODE_ACCEPTED; + return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) @@ -174,6 +177,16 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) unlock: spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + + if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + /* If the CPU has already been stopped, we still have + * to save the status when doing stop-and-store. This + * has to be done after unlocking all spinlocks. */ + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + rc = kvm_s390_store_status_unloaded(dst_vcpu, + KVM_S390_STORE_STATUS_NOADDR); + } + return rc; } -- cgit v0.10.2 From 178bd789775ab29233e0553155253ec8d73af71f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:28:18 +0100 Subject: KVM: s390: Fix clock comparator field for STORE STATUS Only the most 7 significant bytes of the clock comparator must be saved to the status area, and the byte at offset 304 has to be zero. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 55eb8de..1bb1dda 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -877,6 +877,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; + u64 clkcomp; if (addr == KVM_S390_STORE_STATUS_NOADDR) { if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) @@ -920,8 +921,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) &vcpu->arch.sie_block->cputm, 8, prefix)) return -EFAULT; + clkcomp = vcpu->arch.sie_block->ckc >> 8; if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), - &vcpu->arch.sie_block->ckc, 8, prefix)) + &clkcomp, 8, prefix)) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), -- cgit v0.10.2 From 743db27c526e0f31cc507959d662e97e2048a86f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 11 Nov 2013 13:56:47 +0100 Subject: KVM: s390: fix diagnose code extraction The diagnose code to be used is the contents of the base register (if not zero), plus the displacement. The current code ignores the base register contents. So let's fix that... Reviewed-by: Cornelia Huck Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 78d967f..5ff29be 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); -- cgit v0.10.2 From 00e9e435f97b409db8986f9cd35d126ae2d02a0c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:48:51 +0100 Subject: KVM: s390: Add SIGP store-status-at-address order The STORE STATUS AT ADDRESS order of SIGP was still missing. Now it is supported, using the common kvm_s390_store_status() function. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 6805601..c137ed3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -275,6 +275,37 @@ out_fi: return rc; } +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, + u32 addr, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + int flags; + int rc; + + if (cpu_id < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + spin_lock_bh(&dst_vcpu->arch.local_int.lock); + flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); + spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + if (!(flags & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } + + addr &= 0x7ffffe00; + rc = kvm_s390_store_status_unloaded(dst_vcpu, addr); + if (rc == -EFAULT) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + rc = SIGP_CC_STATUS_STORED; + } + return rc; +} + static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { @@ -379,6 +410,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | ACTION_STOP_ON_STOP); break; + case SIGP_STORE_STATUS_AT_ADDRESS: + rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; rc = __sigp_set_arch(vcpu, parameter); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 0c991c6..3db76b2 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity, {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \ {SIGP_SET_ARCHITECTURE, "set architecture"}, \ {SIGP_SET_PREFIX, "set prefix"}, \ + {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \ {SIGP_SENSE_RUNNING, "sense running"}, \ {SIGP_RESTART, "restart"} -- cgit v0.10.2 From 36daca9bb36f0395755817d1b0c45ab6fbf0441b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 14 Nov 2013 11:08:20 +0100 Subject: KVM: s390: Removed kvm_s390_inject_sigp_stop() The function kvm_s390_inject_sigp_stop() as been unused since the removal of the old mmu reload code and thus can be removed safely. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 36f6b18..095cf51 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -128,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm, int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c137ed3..c370058 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -190,12 +190,6 @@ unlock: return rc; } -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - return __inject_sigp_stop(li, action); -} - static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; -- cgit v0.10.2 From 4fda342cc7f577599c53fd27b99c953c7b1da18a Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 19 Nov 2013 14:59:12 -0500 Subject: arm/arm64: kvm: Use virt_to_idmap instead of virt_to_phys for idmap mappings KVM initialisation fails on architectures implementing virt_to_idmap() because virt_to_phys() on such architectures won't fetch you the correct idmap page. So update the KVM ARM code to use the virt_to_idmap() to fix the issue. Since the KVM code is shared between arm and arm64, we create kvm_virt_to_phys() and handle the redirection in respective headers. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Catalin Marinas Signed-off-by: Santosh Shilimkar Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 77de4a4..2d122ad 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5809069..659db0e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -916,9 +916,9 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); - hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { /* @@ -945,7 +945,7 @@ int kvm_mmu_init(void) */ kvm_flush_dcache_to_poc(init_bounce_page, len); - phys_base = virt_to_phys(init_bounce_page); + phys_base = kvm_virt_to_phys(init_bounce_page); hyp_idmap_vector += phys_base - hyp_idmap_start; hyp_idmap_start = phys_base; hyp_idmap_end = phys_base + len; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 680f74e..7f1f940 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ -- cgit v0.10.2 From d9101fca3d572b8675b7fe3f4ef9f6f99bbf4364 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 13 Nov 2013 11:15:02 +0100 Subject: KVM: s390: diagnose call documentation Add some further documentation on the DIAGNOSE calls we support. Reviewed-by: Thomas Huth Signed-off-by: Cornelia Huck diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index 022198e..d922d73 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -17,6 +17,9 @@ S390: S390 uses diagnose instruction as hypercall (0x500) along with hypercall number in R1. + For further information on the S390 diagnose call as supported by KVM, + refer to Documentation/virtual/kvm/s390-diag.txt. + PowerPC: It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers. Return value is placed in R3. diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt new file mode 100644 index 0000000..f1de4fb --- /dev/null +++ b/Documentation/virtual/kvm/s390-diag.txt @@ -0,0 +1,80 @@ +The s390 DIAGNOSE call on KVM +============================= + +KVM on s390 supports the DIAGNOSE call for making hypercalls, both for +native hypercalls and for selected hypercalls found on other s390 +hypervisors. + +Note that bits are numbered as by the usual s390 convention (most significant +bit on the left). + + +General remarks +--------------- + +DIAGNOSE calls by the guest cause a mandatory intercept. This implies +all supported DIAGNOSE calls need to be handled by either KVM or its +userspace. + +All DIAGNOSE calls supported by KVM use the RS-a format: + +-------------------------------------- +| '83' | R1 | R3 | B2 | D2 | +-------------------------------------- +0 8 12 16 20 31 + +The second-operand address (obtained by the base/displacement calculation) +is not used to address data. Instead, bits 48-63 of this address specify +the function code, and bits 0-47 are ignored. + +The supported DIAGNOSE function codes vary by the userspace used. For +DIAGNOSE function codes not specific to KVM, please refer to the +documentation for the s390 hypervisors defining them. + + +DIAGNOSE function code 'X'500' - KVM virtio functions +----------------------------------------------------- + +If the function code specifies 0x500, various virtio-related functions +are performed. + +General register 1 contains the virtio subfunction code. Supported +virtio subfunctions depend on KVM's userspace. Generally, userspace +provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3). + +Upon completion of the DIAGNOSE instruction, general register 2 contains +the function's return code, which is either a return code or a subcode +specific value. + +Subcode 0 - s390-virtio notification and early console printk + Handled by userspace. + +Subcode 1 - s390-virtio reset + Handled by userspace. + +Subcode 2 - s390-virtio set status + Handled by userspace. + +Subcode 3 - virtio-ccw notification + Handled by either userspace or KVM (ioeventfd case). + + General register 2 contains a subchannel-identification word denoting + the subchannel of the virtio-ccw proxy device to be notified. + + General register 3 contains the number of the virtqueue to be notified. + + General register 4 contains a 64bit identifier for KVM usage (the + kvm_io_bus cookie). If general register 4 does not contain a valid + identifier, it is ignored. + + After completion of the DIAGNOSE call, general register 2 may contain + a 64bit identifier (in the kvm_io_bus cookie case). + + See also the virtio standard for a discussion of this hypercall. + + +DIAGNOSE function code 'X'501 - KVM breakpoint +---------------------------------------------- + +If the function code specifies 0x501, breakpoint functions may be performed. +This function code is handled by userspace. -- cgit v0.10.2 From 949c007acd8b6887cf5f3ac86512a7b12fa245dc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 26 Nov 2013 12:27:16 +0100 Subject: KVM: s390: Use helper function to set CC in SIGP handler We've got a helper function for setting the condition code now, so let's use it in the SIGP handler, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c370058..bc0d85a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -435,7 +435,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (rc < 0) return rc; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, rc); return 0; } -- cgit v0.10.2 From b13d3580ee47ba3b2814e90b8a9b8241f7a4ba83 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 21 Nov 2013 16:01:48 +0100 Subject: KVM: s390: Add the SIGP order CONDITIONAL EMERGENCY SIGNAL This patch adds the missing SIGP order "conditional emergency signal" by calling the "emergency signal" SIGP handler if the required conditions are met. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 5a87d16..c002cd5 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -12,6 +12,7 @@ #define SIGP_SET_PREFIX 13 #define SIGP_STORE_STATUS_AT_ADDRESS 14 #define SIGP_SET_ARCHITECTURE 18 +#define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 /* SIGP condition codes */ diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bc0d85a..eee1402 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,7 +1,7 @@ /* * handling interprocessor communication * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -89,6 +89,37 @@ unlock: return rc; } +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, + u16 asn, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; + u16 p_asn, s_asn; + psw_t *psw; + u32 flags; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + psw = &dst_vcpu->arch.sie_block->gpsw; + p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ + s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ + + /* Deliver the emergency signal? */ + if (!(flags & CPUSTAT_STOPPED) + || (psw->mask & psw_int_mask) != psw_int_mask + || ((flags & CPUSTAT_WAIT) && psw->addr != 0) + || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + return __sigp_emergency(vcpu, cpu_addr); + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } +} + static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; @@ -417,6 +448,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_COND_EMERGENCY_SIGNAL: + rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; rc = __sigp_sense_running(vcpu, cpu_addr, -- cgit v0.10.2 From cc92d6dea11cd43842e20cd05c066963de586417 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 27 Nov 2013 11:47:10 +0100 Subject: KVM: s390: Reworked SIGP RESTART order When SIGP RESTART detected an illegal CPU address, there is no need to drop to userspace, we can return CC3 to the guest directly instead. Also renamed __sigp_restart() to sigp_check_callable() (since this is a better description of what the function is really doing) and moved a string specific to RESTART to the calling place instead, so that this function gets usable by other SIGP orders, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index eee1402..509547d 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -363,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, return rc; } -static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) +/* Test whether the destination CPU is available and not busy */ +static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; @@ -382,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) spin_lock_bh(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - else - VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", - cpu_addr); spin_unlock_bh(&li->lock); out: spin_unlock(&fi->lock); @@ -459,10 +457,15 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - rc = __sigp_restart(vcpu, cpu_addr); - if (rc == SIGP_CC_BUSY) - break; - /* user space must know about restart */ + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { + VCPU_EVENT(vcpu, 4, + "sigp restart %x to handle userspace", + cpu_addr); + /* user space must know about restart */ + rc = -EOPNOTSUPP; + } + break; default: return -EOPNOTSUPP; } -- cgit v0.10.2 From 58bc33b2b700f8524772f3fc20272da2187060c8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 3 Dec 2013 12:54:55 +0100 Subject: KVM: s390: SIGP START has to report BUSY while stopping a CPU Just like the RESTART order, the START order also has to report BUSY while a STOP request is pending, to avoid that the START might be ignored due to a race condition between the STOP and the START order. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index c002cd5..d091aa1 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -5,6 +5,7 @@ #define SIGP_SENSE 1 #define SIGP_EXTERNAL_CALL 2 #define SIGP_EMERGENCY_SIGNAL 3 +#define SIGP_START 4 #define SIGP_STOP 5 #define SIGP_RESTART 6 #define SIGP_STOP_AND_STORE_STATUS 9 diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 509547d..87c2b3a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -455,6 +455,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense_running(vcpu, cpu_addr, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_START: + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) + rc = -EOPNOTSUPP; /* Handle START in user space */ + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; rc = sigp_check_callable(vcpu, cpu_addr); -- cgit v0.10.2 From ff1f3cb4b3ac5d039f02679f34cb1498d110d241 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 9 Dec 2013 18:30:01 +0100 Subject: KVM: s390: ioeventfd: ignore leftmost bits The diagnose 500 subcode 3 contains the 32 bit subchannel id in bits 32-63 (counting from the left). As for other I/O instructions, bits 0-31 should be ignored and thus not be passed to kvm_io_bus_write_cookie(). This fixes a bug where the guest passed non-zero bits 0-31 which the host tried to interpret, leading to ioeventfd notification failures. Cc: stable@vger.kernel.org Signed-off-by: Dominik Dingel Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5ff29be..8216c0e 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 4 contains the index on the bus (optionally) */ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], + vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); -- cgit v0.10.2 From 2961e8764faad212234e93907a370a7c36a67da5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 25 Nov 2013 15:37:13 +0200 Subject: KVM: VMX: shadow VM_(ENTRY|EXIT)_CONTROLS vmcs field VM_(ENTRY|EXIT)_CONTROLS vmcs fields are read/written on each guest entry but most times it can be avoided since values do not changes. Keep fields copy in memory to avoid unnecessary reads from vmcs. Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b2fe1c2..1024689 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -418,6 +418,8 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + u32 vm_entry_controls_shadow; + u32 vm_exit_controls_shadow; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -1326,6 +1328,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VM_ENTRY_CONTROLS, val); + vmx->vm_entry_controls_shadow = val; +} + +static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) +{ + if (vmx->vm_entry_controls_shadow != val) + vm_entry_controls_init(vmx, val); +} + +static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) +{ + return vmx->vm_entry_controls_shadow; +} + + +static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); +} + +static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); +} + +static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VM_EXIT_CONTROLS, val); + vmx->vm_exit_controls_shadow = val; +} + +static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) +{ + if (vmx->vm_exit_controls_shadow != val) + vm_exit_controls_init(vmx, val); +} + +static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) +{ + return vmx->vm_exit_controls_shadow; +} + + +static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); +} + +static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); +} + static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) { vmx->segment_cache.bitmask = 0; @@ -1410,11 +1468,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } -static void clear_atomic_switch_msr_special(unsigned long entry, - unsigned long exit) +static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) { - vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); - vmcs_clear_bits(VM_EXIT_CONTROLS, exit); + vm_entry_controls_clearbit(vmx, entry); + vm_exit_controls_clearbit(vmx, exit); } static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) @@ -1425,14 +1483,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) switch (msr) { case MSR_EFER: if (cpu_has_load_ia32_efer) { - clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, + clear_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER); return; } break; case MSR_CORE_PERF_GLOBAL_CTRL: if (cpu_has_load_perf_global_ctrl) { - clear_atomic_switch_msr_special( + clear_atomic_switch_msr_special(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); return; @@ -1453,14 +1512,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); } -static void add_atomic_switch_msr_special(unsigned long entry, - unsigned long exit, unsigned long guest_val_vmcs, - unsigned long host_val_vmcs, u64 guest_val, u64 host_val) +static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit, + unsigned long guest_val_vmcs, unsigned long host_val_vmcs, + u64 guest_val, u64 host_val) { vmcs_write64(guest_val_vmcs, guest_val); vmcs_write64(host_val_vmcs, host_val); - vmcs_set_bits(VM_ENTRY_CONTROLS, entry); - vmcs_set_bits(VM_EXIT_CONTROLS, exit); + vm_entry_controls_setbit(vmx, entry); + vm_exit_controls_setbit(vmx, exit); } static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, @@ -1472,7 +1532,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, switch (msr) { case MSR_EFER: if (cpu_has_load_ia32_efer) { - add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, + add_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER, GUEST_IA32_EFER, HOST_IA32_EFER, @@ -1482,7 +1543,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, break; case MSR_CORE_PERF_GLOBAL_CTRL: if (cpu_has_load_perf_global_ctrl) { - add_atomic_switch_msr_special( + add_atomic_switch_msr_special(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, GUEST_IA32_PERF_GLOBAL_CTRL, @@ -3182,14 +3243,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) vmx_load_host_state(to_vmx(vcpu)); vcpu->arch.efer = efer; if (efer & EFER_LMA) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_IA32E_MODE); + vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); msr->data = efer; } else { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_IA32E_MODE); + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); msr->data = efer & ~EFER_LME; } @@ -3217,9 +3274,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) static void exit_lmode(struct kvm_vcpu *vcpu) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) - & ~VM_ENTRY_IA32E_MODE); + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } @@ -4346,10 +4401,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + + vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); /* 22.2.1, 20.8.1 */ - vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); + vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); set_cr4_guest_host_mask(vmx); @@ -7759,12 +7815,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exit_control = vmcs_config.vmexit_ctrl; if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; - vmcs_write32(VM_EXIT_CONTROLS, exit_control); + vm_exit_controls_init(vmx, exit_control); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. */ - vmcs_write32(VM_ENTRY_CONTROLS, + vm_entry_controls_init(vmx, (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); @@ -8186,7 +8242,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->vm_entry_controls = (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | - (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ @@ -8390,6 +8446,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) vcpu->cpu = cpu; put_cpu(); + vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); + vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); /* if no vmcs02 cache requested, remove the one we used */ -- cgit v0.10.2 From 6dfacadd5858882eee1983995854d4e1fb1b966e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 4 Dec 2013 08:58:54 +0100 Subject: KVM: nVMX: Add support for activity state HLT We can easily emulate the HLT activity state for L1: If it decides that L2 shall be halted on entry, just invoke the normal emulation of halt after switching to L2. We do not depend on specific host features to provide this, so we can expose the capability unconditionally. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d..2067264 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -100,6 +100,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 +#define VMX_MISC_ACTIVITY_HLT 0x00000040 /* VMCS Encodings */ enum vmcs_field { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1024689..f90320b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2340,6 +2340,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; nested_vmx_misc_high = 0; } @@ -7938,7 +7939,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } @@ -8067,6 +8069,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) prepare_vmcs02(vcpu, vmcs12); + if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + return kvm_emulate_halt(vcpu); + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet -- cgit v0.10.2 From c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:07:21 +0900 Subject: KVM: Use cond_resched() directly and remove useless kvm_resched() Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers to make kvm preemptible"), the remaining stuff in this function is a simple cond_resched() call with an extra need_resched() check which was there to avoid dropping VCPUs unnecessarily. Now it is meaningless. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 985bf80..53f44be 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -702,7 +702,7 @@ again: out: srcu_read_unlock(&vcpu->kvm->srcu, idx); if (r > 0) { - kvm_resched(vcpu); + cond_resched(); idx = srcu_read_lock(&vcpu->kvm->srcu); goto again; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 072287f..3fa99b2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1348,7 +1348,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - kvm_resched(vcpu); + cond_resched(); spin_lock(&vc->lock); now = get_tb(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21ef1ba..4fb1ee6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6125,7 +6125,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_resched(vcpu); + cond_resched(); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9523d2a..4ecf107 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -583,7 +583,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); bool kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -void kvm_resched(struct kvm_vcpu *vcpu); void kvm_load_guest_fpu(struct kvm_vcpu *vcpu); void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a0aa84b..03c97e7 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1710,14 +1710,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) EXPORT_SYMBOL_GPL(kvm_vcpu_kick); #endif /* !CONFIG_S390 */ -void kvm_resched(struct kvm_vcpu *vcpu) -{ - if (!need_resched()) - return; - cond_resched(); -} -EXPORT_SYMBOL_GPL(kvm_resched); - bool kvm_vcpu_yield_to(struct kvm_vcpu *target) { struct pid *pid; -- cgit v0.10.2 From 9357d93952143b178fa9d1f5095b8f273b01a1f1 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:08:38 +0900 Subject: KVM: x86: Add comment on vcpu_enter_guest()'s return value Giving proper names to the 0 and 1 was once suggested. But since 0 is returned to the userspace, giving it another name can introduce extra confusion. This patch just explains the meanings instead. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4fb1ee6..1dc0359 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5865,6 +5865,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_apic_update_tmr(vcpu, tmr); } +/* + * Returns 1 to let __vcpu_run() continue the guest execution loop without + * exiting to the userspace. Otherwise, the value will be returned to the + * userspace. + */ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; -- cgit v0.10.2 From beb11fc71370bb49c58d7454d5d9c5a00a7cdb4b Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 21:42:24 +0530 Subject: KVM: Documentation: Fix typo for KVM_ARM_VCPU_INIT ioctl Fix minor typo in "Parameters:" of KVM_ARM_VCPU_INIT documentation. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a30035d..aad3244 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2327,7 +2327,7 @@ current state. "addr" is ignored. Capability: basic Architectures: arm, arm64 Type: vcpu ioctl -Parameters: struct struct kvm_vcpu_init (in) +Parameters: struct kvm_vcpu_init (in) Returns: 0 on success; -1 on error Errors:  EINVAL:    the target is unknown, or the combination of features is invalid. -- cgit v0.10.2 From ca3f257ae570c37d3da30a524a2f61ce602c6c99 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Dec 2013 12:55:46 +0100 Subject: KVM: nVMX: Support direct APIC access from L2 It's a pathological case, but still a valid one: If L1 disables APIC virtualization and also allows L2 to directly write to the APIC page, we have to forcibly enable APIC virtualization while in L2 if the in-kernel APIC is in use. This allows to run the direct interrupt test case in the vmx unit test without x2APIC. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f90320b..31eb577 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7763,6 +7763,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->nested.apic_access_page)); + } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { + exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmcs_write64(APIC_ACCESS_ADDR, + page_to_phys(vcpu->kvm->arch.apic_access_page)); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); -- cgit v0.10.2 From 4c4d563b49830a66537c3f51070dad74d7a81d3a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 18 Dec 2013 19:16:24 +0100 Subject: KVM: VMX: Do not skip the instruction if handle_dr injects a fault If kvm_get_dr or kvm_set_dr reports that it raised a fault, we must not advance the instruction pointer. Otherwise the exception will hit the wrong instruction. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 31eb577..9cc5484 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5137,10 +5137,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { unsigned long val; - if (!kvm_get_dr(vcpu, dr, &val)) - kvm_register_write(vcpu, reg, val); + + if (kvm_get_dr(vcpu, dr, &val)) + return 1; + kvm_register_write(vcpu, reg, val); } else - kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); + if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) + return 1; + skip_emulated_instruction(vcpu); return 1; } -- cgit v0.10.2 From 989c6b34f6a9480e397b170cc62237e89bf4fdb9 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 19 Dec 2013 15:28:51 -0200 Subject: KVM: MMU: handle invalid root_hpa at __direct_map It is possible for __direct_map to be called on invalid root_hpa (-1), two examples: 1) try_async_pf -> can_do_async_pf -> vmx_interrupt_allowed -> nested_vmx_vmexit 2) vmx_handle_exit -> vmx_interrupt_allowed -> nested_vmx_vmexit Then to load_vmcs12_host_state and kvm_mmu_reset_context. Check for this possibility, let fault exception be regenerated. BZ: https://bugzilla.redhat.com/show_bug.cgi?id=924916 Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40772ef..31a5702 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int emulate = 0; gfn_t pseudo_gfn; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return 0; + for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, -- cgit v0.10.2 From 478a8237f656d86d25b3e4e4bf3c48f590156294 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Nov 2013 17:43:19 -0800 Subject: arm: KVM: Don't return PSCI_INVAL if waitqueue is inactive The current KVM implementation of PSCI returns INVALID_PARAMETERS if the waitqueue for the corresponding CPU is not active. This does not seem correct, since KVM should not care what the specific thread is doing, for example, user space may not have called KVM_RUN on this VCPU yet or the thread may be busy looping to user space because it received a signal; this is really up to the user space implementation. Instead we should check specifically that the CPU is marked as being turned off, regardless of the VCPU thread state, and if it is, we shall simply clear the pause flag on the CPU and wake up the thread if it happens to be blocked for us. Further, the implementation seems to be racy when executing multiple VCPU threads. There really isn't a reasonable user space programming scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init before turning on the boot CPU. Therefore, set the pause flag on the vcpu at VCPU init time (which can reasonably be expected to be completed for all CPUs by user space before running any VCPUs) and clear both this flag and the feature (in case the feature can somehow get set again in the future) and ping the waitqueue on turning on a VCPU using PSCI. Reported-by: Peter Maydell Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2a700e0..151eb91 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -478,15 +478,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Handle the "start in power-off" case by calling into the - * PSCI code. - */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); - } - return 0; } @@ -700,6 +691,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -713,8 +722,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 0881bf1..448f60e 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) } } - if (!vcpu) + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu || !vcpu->arch.pause) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; - kvm_reset_vcpu(vcpu); /* Gracefully handle Thumb2 entry point */ @@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); return KVM_PSCI_RET_SUCCESS; -- cgit v0.10.2 From a1a64387adeeba7a34ce06f2774e81f496ee803b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 16 Nov 2013 10:51:25 -0800 Subject: arm/arm64: KVM: arch_timer: Initialize cntvoff at kvm_init Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs at the first time because that will overwrite any potentially restored values from user space. Cc: Andre Przywara Acked-by: Marc Zynger Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2a700e0..13205bd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -137,6 +137,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 685fc72..81e9481 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1409,7 +1409,6 @@ int kvm_vgic_init(struct kvm *kvm) for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4) vgic_set_target_reg(kvm, 0, i); - kvm_timer_init(kvm); kvm->arch.vgic.ready = true; out: mutex_unlock(&kvm->lock); -- cgit v0.10.2 From 39735a3a390431bcf60f9174b7d64f787fd6afa9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 13 Dec 2013 14:23:26 +0100 Subject: ARM/KVM: save and restore generic timer registers For migration to work we need to save (and later restore) the state of each core's virtual generic timer. Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export the three needed registers (control, counter, compare value). Though they live in cp15 space, we don't use the existing list, since they need special accessor functions and the arch timer is optional. Acked-by: Marc Zynger Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a6f6db..098f7dd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c498b60..835b867 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -119,6 +119,26 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define ARM_CP15_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) + +#define __ARM_CP15_REG(op1,crn,crm,op2) \ + (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \ + ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \ + ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \ + ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \ + ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2)) + +#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32) + +#define __ARM_CP15_REG64(op1,crm) \ + (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) +#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) + +#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 20f8d97..2786eae 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +198,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f42..7c25ca8 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -129,6 +129,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 +#define ARM64_SYS_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \ + KVM_REG_ARM64_SYSREG_ ## n ## _MASK) + +#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \ + ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) + +#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) +#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index c2e1ef4..5081e80 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info) enable_percpu_irq(host_vtimer_irq, 0); } +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + timer->cntv_ctl = value; + break; + case KVM_REG_ARM_TIMER_CNT: + vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value; + break; + case KVM_REG_ARM_TIMER_CVAL: + timer->cntv_cval = value; + break; + default: + return -1; + } + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + return timer->cntv_ctl; + case KVM_REG_ARM_TIMER_CNT: + return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + case KVM_REG_ARM_TIMER_CVAL: + return timer->cntv_cval; + } + return (u64)-1; +} static int kvm_timer_cpu_notify(struct notifier_block *self, unsigned long action, void *cpu) -- cgit v0.10.2 From e1ba0207a1b3714bb3f000e506285ae5123cdfa7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:55 -0700 Subject: ARM: KVM: Allow creating the VGIC after VCPUs Rework the VGIC initialization slightly to allow initialization of the vgic cpu-specific state even if the irqchip (the VGIC) hasn't been created by user space yet. This is safe, because the vgic data structures are already allocated when the CPU is allocated if VGIC support is compiled into the kernel. Further, the init process does not depend on any other information and the sacrifice is a slight performance degradation for creating VMs in the no-VGIC case. The reason is that the new device control API doesn't mandate creating the VGIC before creating the VCPU and it is unreasonable to require user space to create the VGIC before creating the VCPUs. At the same time move the irqchip_in_kernel check out of kvm_vcpu_first_run_init and into the init function to make the per-vcpu and global init functions symmetric and add comments on the exported functions making it a bit easier to understand the init flow by only looking at vgic.c. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 13205bd..c9fe9d7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -464,6 +464,8 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; @@ -473,9 +475,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Initialize the VGIC before running a vcpu the first time on * this VM. */ - if (irqchip_in_kernel(vcpu->kvm) && - unlikely(!vgic_initialized(vcpu->kvm))) { - int ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); if (ret) return ret; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 81e9481..5e9df47 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1243,15 +1243,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data) return IRQ_HANDLED; } +/** + * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state + * @vcpu: pointer to the vcpu struct + * + * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to + * this vcpu and enable the VGIC for this VCPU + */ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; int i; - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - if (vcpu->vcpu_id >= VGIC_MAX_CPUS) return -EBUSY; @@ -1383,10 +1387,22 @@ out: return ret; } +/** + * kvm_vgic_init - Initialize global VGIC state before running any VCPUs + * @kvm: pointer to the kvm struct + * + * Map the virtual CPU interface into the VM before running any VCPUs. We + * can't do this at creation time, because user space must first set the + * virtual CPU interface address in the guest physical address space. Also + * initialize the ITARGETSRn regs to 0 on the emulated distributor. + */ int kvm_vgic_init(struct kvm *kvm) { int ret = 0, i; + if (!irqchip_in_kernel(kvm)) + return 0; + mutex_lock(&kvm->lock); if (vgic_initialized(kvm)) -- cgit v0.10.2 From 7330672befe6269e575f79b924a7068b26c144b4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 17:29:18 +0100 Subject: KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC Support creating the ARM VGIC device through the KVM_CREATE_DEVICE ioctl, which can then later be leveraged to use the KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in a more generic API than the ARM-specific one and is useful for save/restore of VGIC state. Adds KVM_CAP_DEVICE_CTRL to ARM capabilities. Note that we change the check for creating a VGIC from bailing out if any VCPUs were created, to bailing out if any VCPUs were ever run. This is an important distinction that shouldn't break anything, but allows creating the VGIC after the VCPUs have been created. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt new file mode 100644 index 0000000..38f27f7 --- /dev/null +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -0,0 +1,10 @@ +ARM Virtual Generic Interrupt Controller (VGIC) +=============================================== + +Device types supported: + KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 + +Only one VGIC instance may be instantiated through either this API or the +legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt +controller, requiring emulated user-space devices to inject interrupts to the +VGIC instead of directly to CPUs. diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c9fe9d7..cc7c41a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -190,6 +190,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: r = vgic_present; break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4ecf107..1f46f66 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1075,6 +1075,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp); extern struct kvm_device_ops kvm_mpic_ops; extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_vfio_ops; +extern struct kvm_device_ops kvm_arm_vgic_v2_ops; #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 902f124..b647c29 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -853,6 +853,7 @@ struct kvm_device_attr { #define KVM_DEV_VFIO_GROUP 1 #define KVM_DEV_VFIO_GROUP_ADD 1 #define KVM_DEV_VFIO_GROUP_DEL 2 +#define KVM_DEV_TYPE_ARM_VGIC_V2 5 /* * ioctls for VM fds diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 5e9df47..b15d6c1 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1433,20 +1433,45 @@ out: int kvm_vgic_create(struct kvm *kvm) { - int ret = 0; + int i, vcpu_lock_idx = -1, ret = 0; + struct kvm_vcpu *vcpu; mutex_lock(&kvm->lock); - if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) { + if (kvm->arch.vgic.vctrl_base) { ret = -EEXIST; goto out; } + /* + * Any time a vcpu is run, vcpu_load is called which tries to grab the + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure + * that no other VCPUs are run while we create the vgic. + */ + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!mutex_trylock(&vcpu->mutex)) + goto out_unlock; + vcpu_lock_idx = i; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.has_run_once) { + ret = -EBUSY; + goto out_unlock; + } + } + spin_lock_init(&kvm->arch.vgic.lock); kvm->arch.vgic.vctrl_base = vgic_vctrl_base; kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; +out_unlock: + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&vcpu->mutex); + } + out: mutex_unlock(&kvm->lock); return ret; @@ -1510,3 +1535,37 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) mutex_unlock(&kvm->lock); return r; } + +static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm); +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_set_attr, + .get_attr = vgic_get_attr, + .has_attr = vgic_has_attr, +}; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 03c97e7..3efba97 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2273,6 +2273,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm, ops = &kvm_vfio_ops; break; #endif +#ifdef CONFIG_KVM_ARM_VGIC + case KVM_DEV_TYPE_ARM_VGIC_V2: + ops = &kvm_arm_vgic_v2_ops; + break; +#endif default: return -ENODEV; } -- cgit v0.10.2 From ce01e4e8874d410738f4b4733b26642d6611a331 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: KVM: arm-vgic: Set base addr through device API Support setting the distributor and cpu interface base addresses in the VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API in addition to the ARM specific API. This has the added benefit of being able to share more code in user space and do things in a uniform manner. Also deprecate the older API at the same time, but backwards compatibility will be maintained. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a30035d..867112f 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2391,7 +2391,8 @@ struct kvm_reg_list { This ioctl returns the guest registers that are supported for the KVM_GET_ONE_REG/KVM_SET_ONE_REG calls. -4.85 KVM_ARM_SET_DEVICE_ADDR + +4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated) Capability: KVM_CAP_ARM_SET_DEVICE_ADDR Architectures: arm, arm64 @@ -2429,6 +2430,10 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. +Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API +should be used instead. + + 4.86 KVM_PPC_RTAS_DEFINE_TOKEN Capability: KVM_CAP_PPC_RTAS diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 38f27f7..c9febb2 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -8,3 +8,14 @@ Only one VGIC instance may be instantiated through either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt controller, requiring emulated user-space devices to inject interrupts to the VGIC instead of directly to CPUs. + +Groups: + KVM_DEV_ARM_VGIC_GRP_ADDR + Attributes: + KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit) + Base address in the guest physical address space of the GIC distributor + register mappings. + + KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) + Base address in the guest physical address space of the GIC virtual cpu + interface register mappings. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 835b867..76a7427 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -163,6 +163,8 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc7c41a..f290b22 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -776,7 +776,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 7e2d158..be85127 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -144,7 +144,7 @@ struct kvm_run; struct kvm_exit_mmio; #ifdef CONFIG_KVM_ARM_VGIC -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr); +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); int kvm_vgic_hyp_init(void); int kvm_vgic_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm); diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index b15d6c1..45db48d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1495,6 +1495,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, { int ret; + if (addr & ~KVM_PHYS_MASK) + return -E2BIG; + + if (addr & (SZ_4K - 1)) + return -EINVAL; + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) return -EEXIST; if (addr + size < addr) @@ -1507,26 +1513,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, return ret; } -int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX + * @addr: pointer to address value + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + */ +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - mutex_lock(&kvm->lock); switch (type) { case KVM_VGIC_V2_ADDR_TYPE_DIST: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, - addr, KVM_VGIC_V2_DIST_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base, + *addr, KVM_VGIC_V2_DIST_SIZE); + } else { + *addr = vgic->vgic_dist_base; + } break; case KVM_VGIC_V2_ADDR_TYPE_CPU: - r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, - addr, KVM_VGIC_V2_CPU_SIZE); + if (write) { + r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base, + *addr, KVM_VGIC_V2_CPU_SIZE); + } else { + *addr = vgic->vgic_cpu_base; + } break; default: r = -ENODEV; @@ -1538,16 +1559,58 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr) static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + r = kvm_vgic_addr(dev->kvm, type, &addr, true); + return (r == -ENODEV) ? -ENXIO : r; + } + } + return -ENXIO; } static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { - return -ENXIO; + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + r = kvm_vgic_addr(dev->kvm, type, &addr, false); + if (r) + return (r == -ENODEV) ? -ENXIO : r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + } + } + + return r; } static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + } return -ENXIO; } -- cgit v0.10.2 From 0307e1770fdeff2732cf7a35d0f7f49db67c6621 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: irqchip: arm-gic: Define additional MMIO offsets and masks Define CPU interface offsets for the GICC_ABPR, GICC_APR, and GICC_IIDR registers. Define distributor registers for the GICD_SPENDSGIR and the GICD_CPENDSGIR. KVM/ARM needs to know about these definitions to fully support save/restore of the VGIC. Also define some masks and shifts for the various GICH_VMCR fields. Cc: Thomas Gleixner Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index cac496b..0ceb389 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -17,6 +17,9 @@ #define GIC_CPU_EOI 0x10 #define GIC_CPU_RUNNINGPRI 0x14 #define GIC_CPU_HIGHPRI 0x18 +#define GIC_CPU_ALIAS_BINPOINT 0x1c +#define GIC_CPU_ACTIVEPRIO 0xd0 +#define GIC_CPU_IDENT 0xfc #define GIC_DIST_CTRL 0x000 #define GIC_DIST_CTR 0x004 @@ -56,6 +59,15 @@ #define GICH_LR_ACTIVE_BIT (1 << 29) #define GICH_LR_EOI (1 << 19) +#define GICH_VMCR_CTRL_SHIFT 0 +#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT) +#define GICH_VMCR_PRIMASK_SHIFT 27 +#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT) +#define GICH_VMCR_BINPOINT_SHIFT 21 +#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT) +#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18 +#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT) + #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) -- cgit v0.10.2 From 1006e8cb22e861260688917ca4cfe6cde8ad69eb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: KVM: arm-vgic: Make vgic mmio functions more generic Rename the vgic_ranges array to vgic_dist_ranges to be more specific and to prepare for handling CPU interface register access as well (for save/restore of VGIC state). Pass offset from distributor or interface MMIO base to find_matching_range function instead of the physical address of the access in the VM memory map. This allows other callers unaware of the VM specifics, but with generic VGIC knowledge to reuse the function. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 45db48d..e2596f6 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -602,7 +602,7 @@ struct mmio_range { phys_addr_t offset); }; -static const struct mmio_range vgic_ranges[] = { +static const struct mmio_range vgic_dist_ranges[] = { { .base = GIC_DIST_CTRL, .len = 12, @@ -669,14 +669,13 @@ static const struct mmio_range vgic_ranges[] = { static const struct mmio_range *find_matching_range(const struct mmio_range *ranges, struct kvm_exit_mmio *mmio, - phys_addr_t base) + phys_addr_t offset) { const struct mmio_range *r = ranges; - phys_addr_t addr = mmio->phys_addr - base; while (r->len) { - if (addr >= r->base && - (addr + mmio->len) <= (r->base + r->len)) + if (offset >= r->base && + (offset + mmio->len) <= (r->base + r->len)) return r; r++; } @@ -713,7 +712,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run, return true; } - range = find_matching_range(vgic_ranges, mmio, base); + offset = mmio->phys_addr - base; + range = find_matching_range(vgic_dist_ranges, mmio, offset); if (unlikely(!range || !range->handle_mmio)) { pr_warn("Unhandled access %d %08llx %d\n", mmio->is_write, mmio->phys_addr, mmio->len); -- cgit v0.10.2 From e9b152cb957cb194437f37e79f0f3c9d34fe53d6 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 11 Dec 2013 20:29:11 -0800 Subject: arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put The arch-generic KVM code expects the cpu field of a vcpu to be -1 if the vcpu is no longer assigned to a cpu. This is used for the optimized make_all_cpus_request path and will be used by the vgic code to check that no vcpus are running. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f290b22..b92ff6d3 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -342,6 +342,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + kvm_arm_set_running_vcpu(NULL); } -- cgit v0.10.2 From c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:17:31 +0100 Subject: KVM: arm-vgic: Add vgic reg access from dev attr Add infrastructure to handle distributor and cpu interface register accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups and defining the semantics of the attr field to be the MMIO offset as specified in the GICv2 specs. Missing register accesses or other changes in individual register access functions to support save/restore of the VGIC state is added in subsequent patches. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index c9febb2..7f4e91b 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -19,3 +19,55 @@ Groups: KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit) Base address in the guest physical address space of the GIC virtual cpu interface register mappings. + + KVM_DEV_ARM_VGIC_GRP_DIST_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All distributor regs are (rw, 32-bit) + + The offset is relative to the "Distributor base address" as defined in the + GICv2 specs. Getting or setting such a register has the same effect as + reading or writing the register on the actual hardware from the cpu + specified with cpu id field. Note that most distributor fields are not + banked, but return the same value regardless of the cpu id used to access + the register. + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running + + KVM_DEV_ARM_VGIC_GRP_CPU_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 | + values: | reserved | cpu id | offset | + + All CPU interface regs are (rw, 32-bit) + + The offset specifies the offset from the "CPU interface base address" as + defined in the GICv2 specs. Getting or setting such a register has the + same effect as reading or writing the register on the actual hardware. + + The Active Priorities Registers APRn are implementation defined, so we set a + fixed format for our implementation that fits with the model of a "GICv2 + implementation without the security extensions" which we present to the + guest. This interface always exposes four register APR[0-3] describing the + maximum possible 128 preemption levels. The semantics of the register + indicate if any interrupts in a given preemption level are in the active + state by setting the corresponding bit. + + Thus, preemption level X has one or more active interrupts if and only if: + + APRn[X mod 32] == 0b1, where n = X / 32 + + Bits for undefined preemption levels are RAZ/WI. + + Limitations: + - Priorities are not implemented, and registers are RAZ/WI + Errors: + -ENODEV: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 76a7427..ef0c878 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -165,6 +165,12 @@ struct kvm_arch_memory_slot { /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e2596f6..88599b5 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,20 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + +static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + return false; +} + /* * I would have liked to use the kvm_bus_io_*() API instead, but it * cannot cope with banked registers (only the VM pointer is passed @@ -663,6 +677,16 @@ static const struct mmio_range vgic_dist_ranges[] = { .len = 4, .handle_mmio = handle_mmio_sgi_reg, }, + { + .base = GIC_DIST_SGI_PENDING_CLEAR, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_clear, + }, + { + .base = GIC_DIST_SGI_PENDING_SET, + .len = VGIC_NR_SGIS, + .handle_mmio = handle_mmio_sgi_set, + }, {} }; @@ -1557,6 +1581,114 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) return r; } +static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return true; +} + +static const struct mmio_range vgic_cpu_ranges[] = { + { + .base = GIC_CPU_CTRL, + .len = 12, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ALIAS_BINPOINT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_ACTIVEPRIO, + .len = 16, + .handle_mmio = handle_cpu_mmio_misc, + }, + { + .base = GIC_CPU_IDENT, + .len = 4, + .handle_mmio = handle_cpu_mmio_misc, + }, +}; + +static int vgic_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + const struct mmio_range *r = NULL, *ranges; + phys_addr_t offset; + int ret, cpuid, c; + struct kvm_vcpu *vcpu, *tmp_vcpu; + struct vgic_dist *vgic; + struct kvm_exit_mmio mmio; + + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + mutex_lock(&dev->kvm->lock); + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { + ret = -EINVAL; + goto out; + } + + vcpu = kvm_get_vcpu(dev->kvm, cpuid); + vgic = &dev->kvm->arch.vgic; + + mmio.len = 4; + mmio.is_write = is_write; + if (is_write) + mmio_data_write(&mmio, ~0, *reg); + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + mmio.phys_addr = vgic->vgic_dist_base + offset; + ranges = vgic_dist_ranges; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + mmio.phys_addr = vgic->vgic_cpu_base + offset; + ranges = vgic_cpu_ranges; + break; + default: + BUG(); + } + r = find_matching_range(ranges, &mmio, offset); + + if (unlikely(!r || !r->handle_mmio)) { + ret = -ENXIO; + goto out; + } + + + spin_lock(&vgic->lock); + + /* + * Ensure that no other VCPU is running by checking the vcpu->cpu + * field. If no other VPCUs are running we can safely access the VGIC + * state, because even if another VPU is run after this point, that + * VCPU will not touch the vgic state, because it will block on + * getting the vgic->lock in kvm_vgic_sync_hwstate(). + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { + if (unlikely(tmp_vcpu->cpu != -1)) { + ret = -EBUSY; + goto out_vgic_unlock; + } + } + + offset -= r->base; + r->handle_mmio(vcpu, &mmio, offset); + + if (!is_write) + *reg = mmio_data_read(&mmio, ~0); + + ret = 0; +out_vgic_unlock: + spin_unlock(&vgic->lock); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { int r; @@ -1573,6 +1705,18 @@ static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) r = kvm_vgic_addr(dev->kvm, type, &addr, true); return (r == -ENODEV) ? -ENXIO : r; } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_attr_regs_access(dev, attr, ®, true); + } + } return -ENXIO; @@ -1594,14 +1738,42 @@ static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (copy_to_user(uaddr, &addr, sizeof(addr))) return -EFAULT; + break; + } + + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + r = vgic_attr_regs_access(dev, attr, ®, false); + if (r) + return r; + r = put_user(reg, uaddr); + break; } + } return r; } +static int vgic_has_attr_regs(const struct mmio_range *ranges, + phys_addr_t offset) +{ + struct kvm_exit_mmio dev_attr_mmio; + + dev_attr_mmio.len = 4; + if (find_matching_range(ranges, &dev_attr_mmio, offset)) + return 0; + else + return -ENXIO; +} + static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { + phys_addr_t offset; + switch (attr->group) { case KVM_DEV_ARM_VGIC_GRP_ADDR: switch (attr->attr) { @@ -1610,6 +1782,12 @@ static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr) return 0; } break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_dist_ranges, offset); + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + return vgic_has_attr_regs(vgic_cpu_ranges, offset); } return -ENXIO; } -- cgit v0.10.2 From cbd333a4bfd0d93bba36d46a0e4e7979228873a6 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 15 Nov 2013 20:51:31 -0800 Subject: KVM: arm-vgic: Support unqueueing of LRs to the dist To properly access the VGIC state from user space it is very unpractical to have to loop through all the LRs in all register access functions. Instead, support moving all pending state from LRs to the distributor, but leave active state LRs alone. Note that to accurately present the active and pending state to VCPUs reading these distributor registers from a live VM, we would have to stop all other VPUs than the calling VCPU and ask each CPU to unqueue their LR state onto the distributor and add fields to track active state on the distributor side as well. We don't have any users of such functionality yet and there are other inaccuracies of the GIC emulation, so don't provide accurate synchronized access to this state just yet. However, when the time comes, having this function should help. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 88599b5..d08ba28 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -589,6 +589,80 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, return false; } +#define LR_CPUID(lr) \ + (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) +#define LR_IRQID(lr) \ + ((lr) & GICH_LR_VIRTUALID) + +static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu) +{ + clear_bit(lr_nr, vgic_cpu->lr_used); + vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE; + vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; +} + +/** + * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor + * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs + * + * Move any pending IRQs that have already been assigned to LRs back to the + * emulated distributor state so that the complete emulated state can be read + * from the main emulation structures without investigating the LRs. + * + * Note that IRQs in the active state in the LRs get their pending state moved + * to the distributor but the active state stays in the LRs, because we don't + * track the active state on the distributor side. + */ +static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + int vcpu_id = vcpu->vcpu_id; + int i, irq, source_cpu; + u32 *lr; + + for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + lr = &vgic_cpu->vgic_lr[i]; + irq = LR_IRQID(*lr); + source_cpu = LR_CPUID(*lr); + + /* + * There are three options for the state bits: + * + * 01: pending + * 10: active + * 11: pending and active + * + * If the LR holds only an active interrupt (not pending) then + * just leave it alone. + */ + if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT) + continue; + + /* + * Reestablish the pending state on the distributor and the + * CPU interface. It may have already been pending, but that + * is fine, then we are only setting a few bits that were + * already set. + */ + vgic_dist_irq_set(vcpu, irq); + if (irq < VGIC_NR_SGIS) + dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu; + *lr &= ~GICH_LR_PENDING_BIT; + + /* + * If there's no state left on the LR (it could still be + * active), then the LR does not hold any useful info and can + * be marked as free for other use. + */ + if (!(*lr & GICH_LR_STATE)) + vgic_retire_lr(i, irq, vgic_cpu); + + /* Finally update the VGIC state. */ + vgic_update_state(vcpu->kvm); + } +} + static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) @@ -848,8 +922,6 @@ static void vgic_update_state(struct kvm *kvm) } } -#define LR_CPUID(lr) \ - (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT) #define MK_LR_PEND(src, irq) \ (GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq)) @@ -871,9 +943,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID; if (!vgic_irq_is_enabled(vcpu, irq)) { - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; - clear_bit(lr, vgic_cpu->lr_used); - vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE; + vgic_retire_lr(lr, irq, vgic_cpu); if (vgic_irq_is_active(vcpu, irq)) vgic_irq_clear_active(vcpu, irq); } @@ -1675,6 +1745,14 @@ static int vgic_attr_regs_access(struct kvm_device *dev, } } + /* + * Move all pending IRQs from the LRs on all VCPUs so the pending + * state can be properly represented in the register state accessible + * through this API. + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) + vgic_unqueue_irqs(tmp_vcpu); + offset -= r->base; r->handle_mmio(vcpu, &mmio, offset); -- cgit v0.10.2 From 90a5355ee7639e92c0492ec592cba5c31bd80687 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:22:31 +0100 Subject: KVM: arm-vgic: Add GICD_SPENDSGIR and GICD_CPENDSGIR handlers Handle MMIO accesses to the two registers which should support both the case where the VMs want to read/write either of these registers and the case where user space reads/writes these registers to do save/restore of the VGIC state. Note that the added complexity compared to simple set/clear enable registers stems from the bookkeping of source cpu ids. It may be possible to change the underlying data structure to simplify the complexity, but since this is not in the critical path at all, this will do. Also note that reading this register from a live guest will not be accurate compared to on hardware, because some state may be living on the CPU LRs and the only way to give a consistent read would be to force stop all the VCPUs and request them to unqueu the LR state onto the distributor. Until we have an actual user of live reading this register, we can live with the difference. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d08ba28..e59aaa4 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -663,18 +663,80 @@ static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) } } -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) +/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ +static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg = 0; + + /* Copy source SGIs from distributor side */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + int shift = 8 * (sgi - min_sgi); + reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift; + } + + mmio_data_write(mmio, ~0, reg); return false; } +static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset, bool set) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int sgi; + int min_sgi = (offset & ~0x3) * 4; + int max_sgi = min_sgi + 3; + int vcpu_id = vcpu->vcpu_id; + u32 reg; + bool updated = false; + + reg = mmio_data_read(mmio, ~0); + + /* Clear pending SGIs on the distributor */ + for (sgi = min_sgi; sgi <= max_sgi; sgi++) { + u8 mask = reg >> (8 * (sgi - min_sgi)); + if (set) { + if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] |= mask; + } else { + if (dist->irq_sgi_sources[vcpu_id][sgi] & mask) + updated = true; + dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask; + } + } + + if (updated) + vgic_update_state(vcpu->kvm); + + return updated; +} + static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return false; + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); +} + +static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + if (!mmio->is_write) + return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); + else + return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); } /* -- cgit v0.10.2 From fa20f5aea56f271f83e91b9cde00f043a5a14990 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:57 -0700 Subject: KVM: arm-vgic: Support CPU interface reg access Implement support for the CPU interface register access driven by MMIO address offsets from the CPU interface base address. Useful for user space to support save/restore of the VGIC state. This commit adds support only for the same logic as the current VGIC support, and no more. For example, the active priority registers are handled as RAZ/WI, just like setting priorities on the emulated distributor. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index e59aaa4..be456ce 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -71,6 +71,10 @@ #define VGIC_ADDR_UNDEF (-1) #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b +#define GICC_ARCH_VERSION_V2 0x2 + /* Physical address of vgic virtual cpu interface */ static phys_addr_t vgic_vcpu_base; @@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, u32 word_offset = offset & 3; switch (offset & ~3) { - case 0: /* CTLR */ + case 0: /* GICD_CTLR */ reg = vcpu->kvm->arch.vgic.enabled; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); @@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu, } break; - case 4: /* TYPER */ + case 4: /* GICD_TYPER */ reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; reg |= (VGIC_NR_IRQS >> 5) - 1; vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; - case 8: /* IIDR */ - reg = 0x4B00043B; + case 8: /* GICD_IIDR */ + reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); vgic_reg_access(mmio, ®, word_offset, ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); break; @@ -1716,9 +1720,70 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, phys_addr_t offset) { - return true; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u32 reg, mask = 0, shift = 0; + bool updated = false; + + switch (offset & ~0x3) { + case GIC_CPU_CTRL: + mask = GICH_VMCR_CTRL_MASK; + shift = GICH_VMCR_CTRL_SHIFT; + break; + case GIC_CPU_PRIMASK: + mask = GICH_VMCR_PRIMASK_MASK; + shift = GICH_VMCR_PRIMASK_SHIFT; + break; + case GIC_CPU_BINPOINT: + mask = GICH_VMCR_BINPOINT_MASK; + shift = GICH_VMCR_BINPOINT_SHIFT; + break; + case GIC_CPU_ALIAS_BINPOINT: + mask = GICH_VMCR_ALIAS_BINPOINT_MASK; + shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT; + break; + } + + if (!mmio->is_write) { + reg = (vgic_cpu->vgic_vmcr & mask) >> shift; + mmio_data_write(mmio, ~0, reg); + } else { + reg = mmio_data_read(mmio, ~0); + reg = (reg << shift) & mask; + if (reg != (vgic_cpu->vgic_vmcr & mask)) + updated = true; + vgic_cpu->vgic_vmcr &= ~mask; + vgic_cpu->vgic_vmcr |= reg; + } + return updated; +} + +static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, phys_addr_t offset) +{ + return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); } +static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, + struct kvm_exit_mmio *mmio, + phys_addr_t offset) +{ + u32 reg; + + if (mmio->is_write) + return false; + + /* GICC_IIDR */ + reg = (PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + (IMPLEMENTER_ARM << 0); + mmio_data_write(mmio, ~0, reg); + return false; +} + +/* + * CPU Interface Register accesses - these are not accessed by the VM, but by + * user space for saving and restoring VGIC state. + */ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_CTRL, @@ -1728,17 +1793,17 @@ static const struct mmio_range vgic_cpu_ranges[] = { { .base = GIC_CPU_ALIAS_BINPOINT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_abpr, }, { .base = GIC_CPU_ACTIVEPRIO, .len = 16, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_mmio_raz_wi, }, { .base = GIC_CPU_IDENT, .len = 4, - .handle_mmio = handle_cpu_mmio_misc, + .handle_mmio = handle_cpu_mmio_ident, }, }; -- cgit v0.10.2 From da7814700a0c408bead58ce4714b7625ffbaade1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:22 +0000 Subject: arm64: KVM: Add Kconfig option for max VCPUs per-Guest Current max VCPUs per-Guest is set to 4 which is preventing us from creating a Guest (or VM) with 8 VCPUs on Host (e.g. X-Gene Storm SOC) with 8 Host CPUs. The correct value of max VCPUs per-Guest should be same as the max CPUs supported by GICv2 which is 8 but, increasing value of max VCPUs per-Guest can make things slower hence we add Kconfig option to let KVM users select appropriate max VCPUs per-Guest. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d85a02..0a1d697 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,7 +26,12 @@ #include #include -#define KVM_MAX_VCPUS 4 +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4480ab3..8ba85e9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,17 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + config KVM_ARM_VGIC bool depends on KVM_ARM_HOST && OF -- cgit v0.10.2 From e28100bd8ed9e37b7cd4578140a1e7f95bd40835 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 14 Nov 2013 15:20:08 +0000 Subject: arm64: KVM: Support X-Gene guest VCPU on APM X-Gene host This patch allows us to have X-Gene guest VCPU when using KVM arm64 on APM X-Gene host. We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu() when running on X-Gene host with Potenza core. [maz: sanitized the commit log] Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f42..d9f026b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -55,8 +55,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 +#define KVM_ARM_TARGET_XGENE_POTENZA 3 -#define KVM_ARM_NUM_TARGETS 3 +#define KVM_ARM_NUM_TARGETS 4 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f0731e..0874557 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void) unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; + switch (implementor) { + case ARM_CPU_IMP_ARM: + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + return KVM_ARM_TARGET_CORTEX_A57; + }; + break; + case ARM_CPU_IMP_APM: + switch (part_number) { + case APM_CPU_PART_POTENZA: + return KVM_ARM_TARGET_XGENE_POTENZA; + }; + break; + }; - switch (part_number) { - case ARM_CPU_PART_AEM_V8: - return KVM_ARM_TARGET_AEM_V8; - case ARM_CPU_PART_FOUNDATION: - return KVM_ARM_TARGET_FOUNDATION_V8; - case ARM_CPU_PART_CORTEX_A57: - /* Currently handled by the generic backend */ - return KVM_ARM_TARGET_CORTEX_A57; - default: - return -EINVAL; - } + return -EINVAL; } int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 4268ab9..8fe6f76 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, + &genericv8_target_table); + return 0; } late_initcall(sys_reg_genericv8_init); -- cgit v0.10.2 From e5cf9dcdbfd26cd4e1991db08755da900454efeb Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:23 +0000 Subject: arm64: KVM: Force undefined exception for Guest SMC intructions The SMC-based PSCI emulation for Guest is going to be very different from the in-kernel HVC-based PSCI emulation hence for now just inject undefined exception when Guest executes SMC instruction. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: marc Zyngier diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8da5606..df84d7b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } -- cgit v0.10.2 From 171800328f6e2443e0e356de5b41fb7e0fff4448 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Sun, 22 Dec 2013 01:21:23 +0900 Subject: KVM: doc: Fix typo in doc/virtual/kvm Correct spelling typo in Documentations/virtual/kvm Signed-off-by: Masanari Iida Acked-by: Randy Dunlap Signed-off-by: Marcelo Tosatti diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 867112f..a8cdc7b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2104,7 +2104,7 @@ Returns: 0 on success, -1 on error Allows setting an eventfd to directly trigger a guest interrupt. kvm_irqfd.fd specifies the file descriptor to use as the eventfd and kvm_irqfd.gsi specifies the irqchip pin toggled by this event. When -an event is tiggered on the eventfd, an interrupt is injected into +an event is triggered on the eventfd, an interrupt is injected into the guest using the specified gsi pin. The irqfd is removed using the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd and kvm_irqfd.gsi. @@ -2115,7 +2115,7 @@ interrupts. When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an additional eventfd in the kvm_irqfd.resamplefd field. When operating in resample mode, posting of an interrupt through kvm_irq.fd asserts the specified gsi in the irqchip. When the irqchip is resampled, such -as from an EOI, the gsi is de-asserted and the user is notifed via +as from an EOI, the gsi is de-asserted and the user is notified via kvm_irqfd.resamplefd. It is the user's responsibility to re-queue the interrupt if the device making use of it still requires service. Note that closing the resamplefd is not sufficient to disable the diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index d922d73..c8d040e 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -77,7 +77,7 @@ Usage example : A vcpu of a paravirtualized guest that is busywaiting in guest kernel mode for an event to occur (ex: a spinlock to become available) can execute HLT instruction once it has busy-waited for more than a threshold time-interval. Execution of HLT instruction would cause the hypervisor to put -the vcpu to sleep until occurence of an appropriate event. Another vcpu of the +the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) is used in the hypercall for future use. diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index f886941..d68af4d 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -112,7 +112,7 @@ The Dirty bit is lost in this case. In order to avoid this kind of issue, we always treat the spte as "volatile" if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means, -the spte is always atomicly updated in this case. +the spte is always atomically updated in this case. 3): flush tlbs due to spte updated If the spte is updated from writable to readonly, we should flush all TLBs, @@ -125,7 +125,7 @@ be flushed caused by this reason in mmu_spte_update() since this is a common function to update spte (present -> present). Since the spte is "volatile" if it can be updated out of mmu-lock, we always -atomicly update the spte, the race caused by fast page fault can be avoided, +atomically update the spte, the race caused by fast page fault can be avoided, See the comments in spte_has_volatile_bits() and mmu_spte_update(). 3. Reference diff --git a/Documentation/virtual/kvm/ppc-pv.txt b/Documentation/virtual/kvm/ppc-pv.txt index 4cd076f..4643cde 100644 --- a/Documentation/virtual/kvm/ppc-pv.txt +++ b/Documentation/virtual/kvm/ppc-pv.txt @@ -115,7 +115,7 @@ If any other bit changes in the MSR, please still use mtmsr(d). Patched instructions ==================== -The "ld" and "std" instructions are transormed to "lwz" and "stw" instructions +The "ld" and "std" instructions are transformed to "lwz" and "stw" instructions respectively on 32 bit systems with an added offset of 4 to accommodate for big endianness. diff --git a/Documentation/virtual/kvm/timekeeping.txt b/Documentation/virtual/kvm/timekeeping.txt index df894637..76808a1 100644 --- a/Documentation/virtual/kvm/timekeeping.txt +++ b/Documentation/virtual/kvm/timekeeping.txt @@ -467,7 +467,7 @@ at any time. This causes problems as the passage of real time, the injection of machine interrupts and the associated clock sources are no longer completely synchronized with real time. -This same problem can occur on native harware to a degree, as SMM mode may +This same problem can occur on native hardware to a degree, as SMM mode may steal cycles from the naturally on X86 systems when SMM mode is used by the BIOS, but not in such an extreme fashion. However, the fact that SMM mode may cause similar problems to virtualization makes it a good justification for -- cgit v0.10.2 From 2f0a6397dd3cac2fb05b46cad08c1d532c04d6b8 Mon Sep 17 00:00:00 2001 From: Zhihui Zhang Date: Mon, 30 Dec 2013 15:56:29 -0500 Subject: KVM: VMX: check use I/O bitmap first before unconditional I/O exit According to Table C-1 of Intel SDM 3C, a VM exit happens on an I/O instruction when "use I/O bitmaps" VM-execution control was 0 _and_ the "unconditional I/O exiting" VM-execution control was 1. So we can't just check "unconditional I/O exiting" alone. This patch was improved by suggestion from Jan Kiszka. Reviewed-by: Jan Kiszka Signed-off-by: Zhihui Zhang Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9cc5484..0abf8b7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6521,11 +6521,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, int size; u8 b; - if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) - return 1; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return 0; + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); exit_qualification = vmcs_readl(EXIT_QUALIFICATION); -- cgit v0.10.2 From 7940876e1330671708186ac3386aa521ffb5c182 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 Dec 2013 12:12:29 -0800 Subject: kvm: make local functions static Running 'make namespacecheck' found lots of functions that should be declared static, since only used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: Marcelo Tosatti diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1f46f66..4306c56 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -463,8 +463,6 @@ void kvm_exit(void); void kvm_get_kvm(struct kvm *kvm); void kvm_put_kvm(struct kvm *kvm); -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation); static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) { @@ -537,7 +535,6 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable); unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn); void kvm_release_page_clean(struct page *page); void kvm_release_page_dirty(struct page *page); -void kvm_set_page_dirty(struct page *page); void kvm_set_page_accessed(struct page *page); pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn); @@ -549,7 +546,6 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault, pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn); pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn); -void kvm_release_pfn_dirty(pfn_t pfn); void kvm_release_pfn_clean(pfn_t pfn); void kvm_set_pfn_dirty(pfn_t pfn); void kvm_set_pfn_accessed(pfn_t pfn); @@ -576,8 +572,6 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); @@ -604,8 +598,6 @@ int kvm_get_dirty_log(struct kvm *kvm, int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem); int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, bool line_status); long kvm_arch_vm_ioctl(struct file *filp, @@ -653,8 +645,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void kvm_free_physmem(struct kvm *kvm); - void *kvm_kvzalloc(unsigned long size); void kvm_kvfree(const void *addr); @@ -1097,12 +1087,6 @@ static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val) static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } - -static inline bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) -{ - return true; -} - #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 2d68297..ce9ed99 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -520,7 +520,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, return 0; } -void kvm_ioapic_reset(struct kvm_ioapic *ioapic) +static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) { int i; diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h index 615d8c9..90d43e9 100644 --- a/virt/kvm/ioapic.h +++ b/virt/kvm/ioapic.h @@ -91,7 +91,6 @@ void kvm_ioapic_destroy(struct kvm *kvm); int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id, int level, bool line_status); void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id); -void kvm_ioapic_reset(struct kvm_ioapic *ioapic); int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3efba97..e7c6ddd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -95,6 +95,12 @@ static int hardware_enable_all(void); static void hardware_disable_all(void); static void kvm_io_bus_destroy(struct kvm_io_bus *bus); +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, u64 last_generation); + +static void kvm_release_pfn_dirty(pfn_t pfn); +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, gfn_t gfn); bool kvm_rebooting; EXPORT_SYMBOL_GPL(kvm_rebooting); @@ -553,7 +559,7 @@ static void kvm_free_physmem_slot(struct kvm *kvm, struct kvm_memory_slot *free, free->npages = 0; } -void kvm_free_physmem(struct kvm *kvm) +static void kvm_free_physmem(struct kvm *kvm) { struct kvm_memslots *slots = kvm->memslots; struct kvm_memory_slot *memslot; @@ -675,8 +681,9 @@ static void sort_memslots(struct kvm_memslots *slots) slots->id_to_index[slots->memslots[i].id] = i; } -void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new, - u64 last_generation) +static void update_memslots(struct kvm_memslots *slots, + struct kvm_memory_slot *new, + u64 last_generation) { if (new) { int id = new->id; @@ -924,8 +931,8 @@ int kvm_set_memory_region(struct kvm *kvm, } EXPORT_SYMBOL_GPL(kvm_set_memory_region); -int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, - struct kvm_userspace_memory_region *mem) +static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem) { if (mem->slot >= KVM_USER_MEM_SLOTS) return -EINVAL; @@ -1047,7 +1054,7 @@ static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn, } unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, - gfn_t gfn) + gfn_t gfn) { return gfn_to_hva_many(slot, gfn, NULL); } @@ -1387,18 +1394,11 @@ void kvm_release_page_dirty(struct page *page) } EXPORT_SYMBOL_GPL(kvm_release_page_dirty); -void kvm_release_pfn_dirty(pfn_t pfn) +static void kvm_release_pfn_dirty(pfn_t pfn) { kvm_set_pfn_dirty(pfn); kvm_release_pfn_clean(pfn); } -EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); - -void kvm_set_page_dirty(struct page *page) -{ - kvm_set_pfn_dirty(page_to_pfn(page)); -} -EXPORT_SYMBOL_GPL(kvm_set_page_dirty); void kvm_set_pfn_dirty(pfn_t pfn) { @@ -1640,8 +1640,9 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len) } EXPORT_SYMBOL_GPL(kvm_clear_guest); -void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, - gfn_t gfn) +static void mark_page_dirty_in_slot(struct kvm *kvm, + struct kvm_memory_slot *memslot, + gfn_t gfn) { if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; @@ -1757,7 +1758,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); * locking does not harm. It may result in trying to yield to same VCPU, fail * and continue with next VCPU and so on. */ -bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) +static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { bool eligible; -- cgit v0.10.2 From ea0269bc34a7df6bda1ee862ad198dee0839f170 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 Dec 2013 12:13:08 -0800 Subject: kvm: remove dead code The function kvm_io_bus_read_cookie is defined but never used in current in-tree code. Signed-off-by: Stephen Hemminger Signed-off-by: Marcelo Tosatti diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4306c56..b8e9a43 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -172,8 +172,6 @@ int kvm_io_bus_write_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val, long cookie); int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); -int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, void *val, long cookie); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e7c6ddd..b28579e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2934,33 +2934,6 @@ int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, return r < 0 ? r : 0; } -/* kvm_io_bus_read_cookie - called under kvm->slots_lock */ -int kvm_io_bus_read_cookie(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, - int len, void *val, long cookie) -{ - struct kvm_io_bus *bus; - struct kvm_io_range range; - - range = (struct kvm_io_range) { - .addr = addr, - .len = len, - }; - - bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - - /* First try the device referenced by cookie. */ - if ((cookie >= 0) && (cookie < bus->dev_count) && - (kvm_io_bus_cmp(&range, &bus->range[cookie]) == 0)) - if (!kvm_iodevice_read(bus->range[cookie].dev, addr, len, - val)) - return cookie; - - /* - * cookie contained garbage; fall back to search and return the - * correct cookie value. - */ - return __kvm_io_bus_read(bus, &range, val); -} /* Caller must hold slots_lock. */ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, -- cgit v0.10.2 From 96893977b8f732493815e7a2b552c37e1bb967e5 Mon Sep 17 00:00:00 2001 From: Chen Fan Date: Thu, 2 Jan 2014 17:14:11 +0800 Subject: KVM: x86: Fix debug typo error in lapic fix the 'vcpi' typos when apic_debug is enabled. Signed-off-by: Chen Fan Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5439117..206715b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -432,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) u8 val; if (pv_eoi_get_user(vcpu, &val) < 0) apic_debug("Can't read EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return val & 0x1; } @@ -440,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { apic_debug("Can't set EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); @@ -450,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { apic_debug("Can't clear EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); -- cgit v0.10.2 From 26a865f4aa8e66a6d94958de7656f7f1b03c6c56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Jan 2014 17:00:51 -0200 Subject: KVM: VMX: fix use after free of vmx->loaded_vmcs After free_loaded_vmcs executes, the "loaded_vmcs" structure is kfreed, and now vmx->loaded_vmcs points to a kfreed area. Subsequent free_loaded_vmcs then attempts to manipulate vmx->loaded_vmcs. Switch the order to avoid the problem. https://bugzilla.redhat.com/show_bug.cgi?id=1047892 Reviewed-by: Jan Kiszka Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0abf8b7..7661eb1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7390,8 +7390,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); - free_nested(vmx); free_loaded_vmcs(vmx->loaded_vmcs); + free_nested(vmx); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); -- cgit v0.10.2 From 136d737fd20102f1be9b02356590fd55e3a40d0e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Dec 2013 16:56:06 +0000 Subject: arm/arm64: KVM: relax the requirements of VMA alignment for THP The THP code in KVM/ARM is a bit restrictive in not allowing a THP to be used if the VMA is not 2MB aligned. Actually, it is not so much the VMA that matters, but the associated memslot: A process can perfectly mmap a region with no particular alignment restriction, and then pass a 2MB aligned address to KVM. In this case, KVM will only use this 2MB aligned region, and will ignore the range between vma->vm_start and memslot->userspace_addr. It can also choose to place this memslot at whatever alignment it wants in the IPA space. In the end, what matters is the relative alignment of the user space and IPA mappings with respect to a 2M page. They absolutely must be the same if you want to use THP. Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 659db0e..7789857 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { /* - * Pages belonging to VMAs not aligned to the PMD mapping - * granularity cannot be mapped using block descriptors even - * if the pages belong to a THP for the process, because the - * stage-2 block descriptor will cover more than a single THP - * and we loose atomicity for unmapping, updates, and splits - * of the THP or other pages in the stage-2 block range. + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. */ - if (vma->vm_start & ~PMD_MASK) + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) force_pte = true; } up_read(¤t->mm->mmap_sem); -- cgit v0.10.2 From 61466710de078c697106fa5b70ec7afc9feab520 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 7 Jan 2014 13:45:15 +0530 Subject: KVM: ARM: Remove duplicate include trace.h was included twice. Remove duplicate inclusion. Signed-off-by: Sachin Kamat Signed-off-by: Christoffer Dall diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a920790..0de91fc 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@ #include "trace.h" -#include "trace.h" - typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -- cgit v0.10.2 From e81d1ad32753cdeaef56b9bffe3b8ab7b5c776e5 Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Fri, 10 Jan 2014 01:28:46 +0100 Subject: kvm: vfio: silence GCC warning Building vfio.o triggers a GCC warning (when building for 32 bits x86): arch/x86/kvm/../../../virt/kvm/vfio.c: In function 'kvm_vfio_set_group': arch/x86/kvm/../../../virt/kvm/vfio.c:104:22: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast] void __user *argp = (void __user *)arg; ^ Silence this warning by casting arg to unsigned long. argp's current type, "void __user *", is always casted to "int32_t __user *". So its type might as well be changed to "int32_t __user *". Signed-off-by: Paul Bolle Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index ca4260e..b4f9507 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -101,14 +101,14 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) struct kvm_vfio *kv = dev->private; struct vfio_group *vfio_group; struct kvm_vfio_group *kvg; - void __user *argp = (void __user *)arg; + int32_t __user *argp = (int32_t __user *)(unsigned long)arg; struct fd f; int32_t fd; int ret; switch (attr) { case KVM_DEV_VFIO_GROUP_ADD: - if (get_user(fd, (int32_t __user *)argp)) + if (get_user(fd, argp)) return -EFAULT; f = fdget(fd); @@ -148,7 +148,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) return 0; case KVM_DEV_VFIO_GROUP_DEL: - if (get_user(fd, (int32_t __user *)argp)) + if (get_user(fd, argp)) return -EFAULT; f = fdget(fd); -- cgit v0.10.2 From 4a55dd7273c95b4a19fbcf0ae1bbd1cfd09dfc36 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 9 Jan 2014 18:43:16 -0600 Subject: kvm: Provide kvm_vcpu_eligible_for_directed_yield() stub Commit 7940876e1330671708186ac3386aa521ffb5c182 ("kvm: make local functions static") broke KVM PPC builds due to removing (rather than moving) the stub version of kvm_vcpu_eligible_for_directed_yield(). This patch reintroduces it. Signed-off-by: Scott Wood Cc: Stephen Hemminger Cc: Alexander Graf [Move the #ifdef inside the function. - Paolo] Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b28579e..9ed9c8c 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1735,7 +1735,6 @@ bool kvm_vcpu_yield_to(struct kvm_vcpu *target) } EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); -#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT /* * Helper that checks whether a VCPU is eligible for directed yield. * Most eligible candidate to yield is decided by following heuristics: @@ -1760,6 +1759,7 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_yield_to); */ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) { +#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT bool eligible; eligible = !vcpu->spin_loop.in_spin_loop || @@ -1770,8 +1770,10 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) kvm_vcpu_set_dy_eligible(vcpu, !vcpu->spin_loop.dy_eligible); return eligible; -} +#else + return true; #endif +} void kvm_vcpu_on_spin(struct kvm_vcpu *me) { -- cgit v0.10.2 From 37f6a4e237303549c8676dfe1fd1991ceab512eb Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Jan 2014 17:09:32 -0200 Subject: KVM: x86: handle invalid root_hpa everywhere Rom Freiman notes other code paths vulnerable to bug fixed by 989c6b34f6a9480e397b. Signed-off-by: Marcelo Tosatti diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 31a5702..e50425d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2832,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, bool ret = false; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return false; + if (!page_fault_can_be_fast(error_code)) return false; @@ -3227,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) struct kvm_shadow_walk_iterator iterator; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return spte; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) if (!is_shadow_present_pte(spte)) @@ -4513,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) u64 spte; int nr_sptes = 0; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return nr_sptes; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { sptes[iterator.level-1] = spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ad75d77..cba218a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + goto out_gpte_changed; + for (shadow_walk_init(&it, vcpu, addr); shadow_walk_okay(&it) && it.level > gw->level; shadow_walk_next(&it)) { @@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) */ mmu_topup_memory_caches(vcpu); + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { + WARN_ON(1); + return; + } + spin_lock(&vcpu->kvm->mmu_lock); for_each_shadow_entry(vcpu, gva, iterator) { level = iterator.level; -- cgit v0.10.2 From 9ed96e87c5748de4c2807ef17e81287c7304186c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jan 2014 12:00:02 -0200 Subject: KVM: x86: limit PIT timer frequency Limit PIT timer frequency similarly to the limit applied by LAPIC timer. Cc: stable@kernel.org Reviewed-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa..518d864 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -37,6 +37,7 @@ #include "irq.h" #include "i8254.h" +#include "x86.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) atomic_set(&ps->pending, 0); ps->irq_ack = 1; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (ps->is_periodic) { + s64 min_period = min_timer_period_us * 1000LL; + + if (ps->period < min_period) { + pr_info_ratelimited( + "kvm: requested %lld ns " + "i8254 timer period limited to %lld ns\n", + ps->period, min_period); + ps->period = min_period; + } + } + hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 206715b..1ac0093 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -71,9 +71,6 @@ #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) -static unsigned int min_timer_period_us = 500; -module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); - static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { *((u32 *) (apic->regs + reg_off)) = val; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1dc0359..0fd2bd7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +unsigned int min_timer_period_us = 500; +module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); + bool kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); u32 kvm_max_guest_tsc_khz; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 587fb9e..8da5823 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) extern u64 host_xcr0; +extern unsigned int min_timer_period_us; + extern struct static_key kvm_no_apic_vcpu; #endif -- cgit v0.10.2 From f25e656d31ad112612839edaded18920cafea3b1 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jan 2014 12:18:59 -0200 Subject: KVM: x86: fix tsc catchup issue with tsc scaling To fix a problem related to different resolution of TSC and system clock, the offset in TSC units is approximated by delta = vcpu->hv_clock.tsc_timestamp - vcpu->last_guest_tsc (Guest TSC value at (Guest TSC value at last VM-exit) the last kvm_guest_time_update call) Delta is then later scaled using mult,shift pair found in hv_clock structure (which is correct against tsc_timestamp in that structure). However, if a frequency change is performed between these two points, this delta is measured using different TSC frequencies, but scaled using mult,shift pair for one frequency only. The end result is an incorrect delta. The bug which this code works around is not the only cause for clock backwards events. The global accumulator is still necessary, so remove the max_kernel_ns fix and rely on the global accumulator for no clock backwards events. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fd2bd7..842abd3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long flags, this_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; - s64 kernel_ns, max_kernel_ns; + s64 kernel_ns; u64 tsc_timestamp, host_tsc; struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; @@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) if (!vcpu->pv_time_enabled) return 0; - /* - * Time as measured by the TSC may go backwards when resetting the base - * tsc_timestamp. The reason for this is that the TSC resolution is - * higher than the resolution of the other clock scales. Thus, many - * possible measurments of the TSC correspond to one measurement of any - * other clock, and so a spread of values is possible. This is not a - * problem for the computation of the nanosecond clock; with TSC rates - * around 1GHZ, there can only be a few cycles which correspond to one - * nanosecond value, and any path through this code will inevitably - * take longer than that. However, with the kernel_ns value itself, - * the precision may be much lower, down to HZ granularity. If the - * first sampling of TSC against kernel_ns ends in the low part of the - * range, and the second in the high end of the range, we can get: - * - * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new - * - * As the sampling errors potentially range in the thousands of cycles, - * it is possible such a time value has already been observed by the - * guest. To protect against this, we must compute the system time as - * observed by the guest and ensure the new system time is greater. - */ - max_kernel_ns = 0; - if (vcpu->hv_clock.tsc_timestamp) { - max_kernel_ns = vcpu->last_guest_tsc - - vcpu->hv_clock.tsc_timestamp; - max_kernel_ns = pvclock_scale_delta(max_kernel_ns, - vcpu->hv_clock.tsc_to_system_mul, - vcpu->hv_clock.tsc_shift); - max_kernel_ns += vcpu->last_kernel_ns; - } - if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, &vcpu->hv_clock.tsc_shift, @@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hw_tsc_khz = this_tsc_khz; } - /* with a master tuple, - * pvclock clock reads always increase at the (scaled) rate - * of guest TSC - no need to deal with sampling errors. - */ - if (!use_master_clock) { - if (max_kernel_ns > kernel_ns) - kernel_ns = max_kernel_ns; - } /* With all the info we got, fill in the values */ vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; -- cgit v0.10.2 From aab6d7ce37cf20753a336dc74473cf8a8aefa7c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 15 Jan 2014 18:07:31 +0100 Subject: KVM: remove useless write to vcpu->hv_clock.tsc_timestamp After the previous patch from Marcelo, the comment before this write became obsolete. In fact, the write is unnecessary. The calls to kvm_write_tsc ultimately result in a master clock update as soon as all TSCs agree and the master clock is re-enabled. This master clock update will rewrite tsc_timestamp. So, together with the comment, delete the dead write too. Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 842abd3..0fbdced 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1278,8 +1278,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm->arch.last_tsc_write = data; kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; - /* Reset of TSC must disable overshoot protection below */ - vcpu->arch.hv_clock.tsc_timestamp = 0; vcpu->arch.last_guest_tsc = data; /* Keep track of which generation this VCPU has synchronized to */ -- cgit v0.10.2 From e984097b553ed2d6551c805223e4057421370f00 Mon Sep 17 00:00:00 2001 From: Vadim Rozenfeld Date: Thu, 16 Jan 2014 20:18:37 +1100 Subject: add support for Hyper-V reference time counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off: Peter Lieven Signed-off: Gleb Natapov Signed-off: Vadim Rozenfeld After some consideration I decided to submit only Hyper-V reference counters support this time. I will submit iTSC support as a separate patch as soon as it is ready. v1 -> v2 1. mark TSC page dirty as suggested by Eric Northup and Gleb 2. disable local irq when calling get_kernel_ns, as it was done by Peter Lieven 3. move check for TSC page enable from second patch to this one. v3 -> v4     Get rid of ref counter offset. v4 -> v5 replace __copy_to_user with kvm_write_guest when updateing iTSC page. Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d783..33fef07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -605,6 +605,7 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + u64 hv_tsc_page; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c01..462efe7 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -28,6 +28,9 @@ /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* A partition's reference time stamp counter (TSC) page */ +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + /* * There is a single feature flag that signifies the presence of the MSR * that can be used to retrieve both the local APIC Timer frequency as @@ -198,6 +201,9 @@ #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + #define HV_PROCESSOR_POWER_STATE_C0 0 #define HV_PROCESSOR_POWER_STATE_C1 1 #define HV_PROCESSOR_POWER_STATE_C2 2 @@ -210,4 +216,11 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +typedef struct _HV_REFERENCE_TSC_PAGE { + __u32 tsc_sequence; + __u32 res1; + __u64 tsc_scale; + __s64 tsc_offset; +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fbdced..0b3fd80 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -839,11 +839,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 10 +#define KVM_SAVE_MSRS_BEGIN 12 static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, @@ -1788,6 +1789,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: r = true; break; } @@ -1829,6 +1832,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) kvm->arch.hv_hypercall = data; break; } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + HV_REFERENCE_TSC_PAGE tsc_ref; + memset(&tsc_ref, 0, sizeof(tsc_ref)); + kvm->arch.hv_tsc_page = data; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + break; + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + if (kvm_write_guest(kvm, data, + &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + break; + } default: vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " "data 0x%llx\n", msr, data); @@ -2253,6 +2270,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = kvm->arch.hv_hypercall; break; + case HV_X64_MSR_TIME_REF_COUNT: { + data = + div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = kvm->arch.hv_tsc_page; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -2566,6 +2591,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: + case KVM_CAP_HYPERV_TIME: #endif r = 1; break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b647c29..932d7f2 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -674,6 +674,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_ARM_EL1_32BIT 93 #define KVM_CAP_SPAPR_MULTITCE 94 #define KVM_CAP_EXT_EMUL_CPUID 95 +#define KVM_CAP_HYPERV_TIME 96 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 9926c9fdbdd54bb229fe6fdbd15ca3af2b8425ae Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:15 +0100 Subject: KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS Whenever we change arch.dr7, we also have to call kvm_update_dr7. In case guest debugging is off, this will synchronize the new state into hardware. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b3fd80..59907c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2976,6 +2976,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = dbgregs->dr6; vcpu->arch.dr7 = dbgregs->dr7; + kvm_update_dr7(vcpu); return 0; } -- cgit v0.10.2 From 73aaf249ee2287b4686ff079dcbdbbb658156e64 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:16 +0100 Subject: KVM: SVM: Fix reading of DR6 In contrast to VMX, SVM dose not automatically transfer DR6 into the VCPU's arch.dr6. So if we face a DR6 read, we must consult a new vendor hook to obtain the current value. And as SVM now picks the DR6 state from its VMCB, we also need a set callback in order to write updates of DR6 back. Fixes a regression of 020df0794f. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 33fef07..fdf83af 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -700,6 +700,8 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + u64 (*get_dr6)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7168a5..e81df8f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } +static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +{ + return to_svm(vcpu)->vmcb->save.dr6; +} + +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.dr6 = value; + mark_dirty(svm->vmcb, VMCB_DR); +} + static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .get_dr6 = svm_get_dr6, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7661eb1..79b360e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5149,6 +5149,15 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; } +static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.dr6; +} + +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ +} + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -8556,6 +8565,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .get_dr6 = vmx_get_dr6, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59907c9..59b95b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -722,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr6(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); +} + static void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -750,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + kvm_update_dr6(vcpu); break; case 5: if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) @@ -791,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) return 1; /* fall through */ case 6: - *val = vcpu->arch.dr6; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + *val = vcpu->arch.dr6; + else + *val = kvm_x86_ops->get_dr6(vcpu); break; case 5: if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) @@ -2960,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, struct kvm_debugregs *dbgregs) { + unsigned long val; + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); - dbgregs->dr6 = vcpu->arch.dr6; + _kvm_get_dr(vcpu, 6, &val); + dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); @@ -2975,6 +2988,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = dbgregs->dr6; + kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; kvm_update_dr7(vcpu); @@ -6749,6 +6763,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = DR6_FIXED_1; + kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); -- cgit v0.10.2 From 8246bf52c75aa9b9b336a84f31ed2248754d0f71 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:17 +0100 Subject: KVM: VMX: Fix DR6 update on #DB exception According to the SDM, only bits 0-3 of DR6 "may" be cleared by "certain" debug exception. So do update them on #DB exception in KVM, but leave the rest alone, only setting BD and BS in addition to already set bits in DR6. This also aligns us with kvm_vcpu_check_singlestep. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 79b360e..c8eb27f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4869,7 +4869,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 = dr6 | DR6_FIXED_1; + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= dr6; kvm_queue_exception(vcpu, DB_VECTOR); return 1; } -- cgit v0.10.2 From 42124925c1f580068661bebd963d7c102175a8a9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:19 +0100 Subject: KVM: nVMX: Leave VMX mode on clearing of feature control MSR When userspace sets MSR_IA32_FEATURE_CONTROL to 0, make sure we leave root and non-root mode, fully disabling VMX. The register state of the VCPU is undefined after this step, so userspace has to set it to a proper state afterward. This enables to reboot a VM while it is running some hypervisor code. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c8eb27f..bff5555 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2455,6 +2455,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 1; } +static void vmx_leave_nested(struct kvm_vcpu *vcpu); + static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr_index = msr_info->index; @@ -2470,6 +2472,8 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) & FEATURE_CONTROL_LOCKED) return 0; to_vmx(vcpu)->nested.msr_ia32_feature_control = data; + if (host_initialized && data == 0) + vmx_leave_nested(vcpu); return 1; } @@ -8504,6 +8508,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) } /* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +static void vmx_leave_nested(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) + nested_vmx_vmexit(vcpu); + free_nested(to_vmx(vcpu)); +} + +/* * L1's failure to enter L2 is a subset of a normal exit, as explained in * 23.7 "VM-entry failures during or after loading guest state" (this also * lists the acceptable exit-reason and exit-qualification parameters). -- cgit v0.10.2 From 533558bcb69ef28aff81b6ae9acda8943575319f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:20 +0100 Subject: KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit Instead of fixing up the vmcs12 after the nested vmexit, pass key parameters already when calling nested_vmx_vmexit. This will help tracing those vmexits. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bff5555..e3578b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1058,7 +1058,9 @@ static inline bool is_exception(u32 intr_info) == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); } -static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); +static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, + u32 exit_intr_info, + unsigned long exit_qualification); static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 reason, unsigned long qualification); @@ -1967,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) if (!(vmcs12->exception_bitmap & (1u << nr))) return 0; - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); return 1; } @@ -4649,15 +4653,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending) return 0; if (nested_exit_on_nmi(vcpu)) { - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; - vmcs12->vm_exit_intr_info = NMI_VECTOR | - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + NMI_VECTOR | INTR_TYPE_NMI_INTR | + INTR_INFO_VALID_MASK, 0); /* * The NMI-triggered VM exit counts as injection: * clear this one and block further NMIs. @@ -4679,15 +4680,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending) return 0; if (nested_exit_on_intr(vcpu)) { - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = - EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + 0, 0); /* * fall through to normal code, but now in L1, not L2 */ @@ -6849,7 +6846,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) return handle_invalid_guest_state(vcpu); if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); return 1; } @@ -7590,15 +7589,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { - struct vmcs12 *vmcs12; - nested_vmx_vmexit(vcpu); - vmcs12 = get_vmcs12(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_reason; if (fault->error_code & PFERR_RSVD_MASK) - vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + exit_reason = EXIT_REASON_EPT_MISCONFIG; else - vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; - vmcs12->exit_qualification = vcpu->arch.exit_qualification; + exit_reason = EXIT_REASON_EPT_VIOLATION; + nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); vmcs12->guest_physical_address = fault->address; } @@ -7636,7 +7634,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); else kvm_inject_page_fault(vcpu, fault); } @@ -8191,7 +8191,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, * exit-information fields only. Other fields are modified by L1 with VMWRITE, * which already writes to vmcs12 directly. */ -static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + u32 exit_reason, u32 exit_intr_info, + unsigned long exit_qualification) { /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); @@ -8282,10 +8284,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* update exit information fields: */ - vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; - vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + vmcs12->vm_exit_reason = exit_reason; + vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + vmcs12->vm_exit_intr_info = exit_intr_info; if ((vmcs12->vm_exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) @@ -8452,7 +8454,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * and modify vmcs12 to make it see what it would expect to see there if * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) */ -static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) +static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, + u32 exit_intr_info, + unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; @@ -8462,7 +8466,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vmx->nested.nested_run_pending); leave_guest_mode(vcpu); - prepare_vmcs12(vcpu, vmcs12); + prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, + exit_qualification); cpu = get_cpu(); vmx->loaded_vmcs = &vmx->vmcs01; @@ -8513,7 +8518,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) static void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, -1, 0, 0); free_nested(to_vmx(vcpu)); } -- cgit v0.10.2 From 542060ea79c861e100411a5a44df747b56a693df Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:21 +0100 Subject: KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject Already used by nested SVM for tracing nested vmexit: kvm_nested_vmexit marks exits from L2 to L0 while kvm_nested_vmexit_inject marks vmexits that are reflected to L1. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e3578b3..e539c45 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6697,6 +6697,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); u32 exit_reason = vmx->exit_reason; + trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, + vmcs_readl(EXIT_QUALIFICATION), + vmx->idt_vectoring_info, + intr_info, + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + KVM_ISA_VMX); + if (vmx->nested.nested_run_pending) return 0; @@ -8469,6 +8476,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); + trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, + vmcs12->exit_qualification, + vmcs12->idt_vectoring_info_field, + vmcs12->vm_exit_intr_info, + vmcs12->vm_exit_intr_error_code, + KVM_ISA_VMX); + cpu = get_cpu(); vmx->loaded_vmcs = &vmx->vmcs01; vmx_vcpu_put(vcpu); -- cgit v0.10.2 From cae501397a25dc1e88375925c5e93a264d4a55ba Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:22 +0100 Subject: KVM: nVMX: Clean up handling of VMX-related MSRs This simplifies the code and also stops issuing warning about writing to unhandled MSRs when VMX is disabled or the Feature Control MSR is locked - we do handle them all according to the spec. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 37813b5..2e4a42d 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -527,6 +527,7 @@ #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e539c45..fc4a255 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2361,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high) return low | ((u64)high << 32); } -/* - * If we allow our guest to use VMX instructions (i.e., nested VMX), we should - * also let it use VMX-specific MSRs. - * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a - * VMX-specific MSR, or 0 when we haven't (and the caller should handle it - * like all other MSRs). - */ +/* Returns 0 on success, non-0 otherwise. */ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { - if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && - msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { - /* - * According to the spec, processors which do not support VMX - * should throw a #GP(0) when VMX capability MSRs are read. - */ - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - switch (msr_index) { - case MSR_IA32_FEATURE_CONTROL: - if (nested_vmx_allowed(vcpu)) { - *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; - break; - } - return 0; case MSR_IA32_VMX_BASIC: /* * This MSR reports some information about VMX support. We @@ -2453,38 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = nested_vmx_ept_caps; break; default: - return 0; - } - - return 1; -} - -static void vmx_leave_nested(struct kvm_vcpu *vcpu); - -static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - u32 msr_index = msr_info->index; - u64 data = msr_info->data; - bool host_initialized = msr_info->host_initiated; - - if (!nested_vmx_allowed(vcpu)) - return 0; - - if (msr_index == MSR_IA32_FEATURE_CONTROL) { - if (!host_initialized && - to_vmx(vcpu)->nested.msr_ia32_feature_control - & FEATURE_CONTROL_LOCKED) - return 0; - to_vmx(vcpu)->nested.msr_ia32_feature_control = data; - if (host_initialized && data == 0) - vmx_leave_nested(vcpu); return 1; } - /* - * No need to treat VMX capability MSRs specially: If we don't handle - * them, handle_wrmsr will #GP(0), which is correct (they are readonly) - */ return 0; } @@ -2530,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) case MSR_IA32_SYSENTER_ESP: data = vmcs_readl(GUEST_SYSENTER_ESP); break; + case MSR_IA32_FEATURE_CONTROL: + if (!nested_vmx_allowed(vcpu)) + return 1; + data = to_vmx(vcpu)->nested.msr_ia32_feature_control; + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested_vmx_allowed(vcpu)) + return 1; + return vmx_get_vmx_msr(vcpu, msr_index, pdata); case MSR_TSC_AUX: if (!to_vmx(vcpu)->rdtscp_enabled) return 1; /* Otherwise falls through */ default: - if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) - return 0; msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; @@ -2549,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 0; } +static void vmx_leave_nested(struct kvm_vcpu *vcpu); + /* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2603,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: ret = kvm_set_msr_common(vcpu, msr_info); break; + case MSR_IA32_FEATURE_CONTROL: + if (!nested_vmx_allowed(vcpu) || + (to_vmx(vcpu)->nested.msr_ia32_feature_control & + FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + return 1; + vmx->nested.msr_ia32_feature_control = data; + if (msr_info->host_initiated && data == 0) + vmx_leave_nested(vcpu); + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + return 1; /* they are read-only */ case MSR_TSC_AUX: if (!vmx->rdtscp_enabled) return 1; @@ -2611,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; /* Otherwise falls through */ default: - if (vmx_set_vmx_msr(vcpu, msr_info)) - break; msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; -- cgit v0.10.2 From 7af40ad37b3f097f367cbe9c0198caccce6fd83b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:23 +0100 Subject: KVM: nVMX: Fix nested_run_pending on activity state HLT When we suspend the guest in HLT state, the nested run is no longer pending - we emulated it completely. So only set nested_run_pending after checking the activity state. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fc4a255..f9a5433 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8046,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.nested_run_pending = 1; - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); cpu = get_cpu(); @@ -8066,6 +8064,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) return kvm_emulate_halt(vcpu); + vmx->nested.nested_run_pending = 1; + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet -- cgit v0.10.2 From 3edf1e698ff638c2ab095e8c60fd11c5d292fc5f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:24 +0100 Subject: KVM: nVMX: Update guest activity state field on L2 exits Set guest activity state in L1's VMCS according to the VCPUs mp_state. This ensures we report the correct state in case we L2 executed HLT or if we put L2 into HLT state and it was now woken up by an event. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f9a5433..407b05c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8219,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) + vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; + else + vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) -- cgit v0.10.2 From 699bde3b6c95319749a8e1b7aa2b3f6bee84bff8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 20 Jan 2014 12:34:13 +0100 Subject: KVM: s390: Fix memory access error detection Seems that commit 210b1607012cc9034841a393e0591b2c86d9e26c (KVM: s390: Removed SIE_INTERCEPT_UCONTROL) lost a hunk when we reworked our patch queue to rework the async_fp code. We now ignore faults on the sie instruction (guest accesses non-existing memory) instead of sending a fault into the guest. This leads to hang situations with the old virtio transport that checks for descriptor memory after guest memory. Instead of bailing out this code now goes wild... Lets re-add the check. Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1bb1dda..7635c00 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -738,6 +738,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; rc = -EREMOTE; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); -- cgit v0.10.2 From 94491620e1362f6065ab821c13eb54b716ada19f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jan 2014 18:02:38 -0800 Subject: kvm: make KVM_MMU_AUDIT help text more readable Make KVM_MMU_AUDIT kconfig help text readable and collapse two spaces between words down to one space. Signed-off-by: Randy Dunlap Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db..287e4c8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -80,7 +80,7 @@ config KVM_MMU_AUDIT depends on KVM && TRACEPOINTS ---help--- This option adds a R/W kVM module parameter 'mmu_audit', which allows - audit KVM MMU at runtime. + auditing of KVM MMU events at runtime. config KVM_DEVICE_ASSIGNMENT bool "KVM legacy PCI device assignment support" -- cgit v0.10.2