From a06cdb5676272a12056820aeb49a1416ad2d0c6f Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 18 May 2010 09:34:12 +0200 Subject: KVM: powerpc: fix init/exit annotation kvmppc_e500_exit() is a module_exit function, so it should be tagged with __exit, not __init. The incorrect annotation was added by commit 2986b8c72c272ea58edd37903b042c6da985627d. Signed-off-by: Jean Delvare Cc: stable@kernel.org Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index bc2b400..e8a00b0 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -164,7 +164,7 @@ static int __init kvmppc_e500_init(void) return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); } -static void __init kvmppc_e500_exit(void) +static void __exit kvmppc_e500_exit(void) { kvmppc_booke_exit(); } -- cgit v0.10.2 From fe5913e4e1700cbfc337f4b1da9ddb26f6a55586 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 May 2010 14:43:34 +0200 Subject: KVM: SVM: Handle MCEs early in the vmexit process This patch moves handling of the MC vmexits to an earlier point in the vmexit. The handle_exit function is too late because the vcpu might alreadry have changed its physical cpu. Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 96dc232..5e1ed03 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1410,7 +1410,7 @@ static int nm_interception(struct vcpu_svm *svm) return 1; } -static int mc_interception(struct vcpu_svm *svm) +static void svm_handle_mce(struct vcpu_svm *svm) { /* * On an #MC intercept the MCE handler is not called automatically in @@ -1420,6 +1420,11 @@ static int mc_interception(struct vcpu_svm *svm) "int $0x12\n"); /* not sure if we ever come back to this point */ + return; +} + +static int mc_interception(struct vcpu_svm *svm) +{ return 1; } @@ -3088,6 +3093,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); } + + /* + * We need to handle MC intercepts here before the vcpu has a chance to + * change the physical cpu + */ + if (unlikely(svm->vmcb->control.exit_code == + SVM_EXIT_EXCP_BASE + MC_VECTOR)) + svm_handle_mce(svm); } #undef R -- cgit v0.10.2 From 67ec66077799f2fef84b21a643912b179c422281 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 17 May 2010 14:43:35 +0200 Subject: KVM: SVM: Implement workaround for Erratum 383 This patch implements a workaround for AMD erratum 383 into KVM. Without this erratum fix it is possible for a guest to kill the host machine. This patch implements the suggested workaround for hypervisors which will be published by the next revision guide update. [jan: fix overflow warning on i386] [xiao: fix unused variable warning] Cc: stable@kernel.org Signed-off-by: Joerg Roedel Signed-off-by: Jan Kiszka Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index b49d8ca..8c7ae43 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -110,6 +110,7 @@ #define MSR_AMD64_PATCH_LOADER 0xc0010020 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 +#define MSR_AMD64_DC_CFG 0xc0011022 #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5e1ed03..ce438e0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -28,6 +28,7 @@ #include #include +#include #include #include @@ -56,6 +57,8 @@ MODULE_LICENSE("GPL"); #define DEBUGCTL_RESERVED_BITS (~(0x3fULL)) +static bool erratum_383_found __read_mostly; + static const u32 host_save_user_msrs[] = { #ifdef CONFIG_X86_64 MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE, @@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, svm->vmcb->control.event_inj_err = error_code; } +static void svm_init_erratum_383(void) +{ + u32 low, high; + int err; + u64 val; + + /* Only Fam10h is affected */ + if (boot_cpu_data.x86 != 0x10) + return; + + /* Use _safe variants to not break nested virtualization */ + val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err); + if (err) + return; + + val |= (1ULL << 47); + + low = lower_32_bits(val); + high = upper_32_bits(val); + + native_write_msr_safe(MSR_AMD64_DC_CFG, low, high); + + erratum_383_found = true; +} + static int has_svm(void) { const char *msg; @@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage) wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT); + svm_init_erratum_383(); + return 0; } @@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm) return 1; } +static bool is_erratum_383(void) +{ + int err, i; + u64 value; + + if (!erratum_383_found) + return false; + + value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err); + if (err) + return false; + + /* Bit 62 may or may not be set for this mce */ + value &= ~(1ULL << 62); + + if (value != 0xb600000000010015ULL) + return false; + + /* Clear MCi_STATUS registers */ + for (i = 0; i < 6; ++i) + native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0); + + value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err); + if (!err) { + u32 low, high; + + value &= ~(1ULL << 2); + low = lower_32_bits(value); + high = upper_32_bits(value); + + native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high); + } + + /* Flush tlb to evict multi-match entries */ + __flush_tlb_all(); + + return true; +} + static void svm_handle_mce(struct vcpu_svm *svm) { + if (is_erratum_383()) { + /* + * Erratum 383 triggered. Guest state is corrupt so kill the + * guest. + */ + pr_err("KVM: Guest triggered AMD Erratum 383\n"); + + set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests); + + return; + } + /* * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. -- cgit v0.10.2 From 3be2264be3c00865116f997dc53ebcc90fe7fc4b Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 28 May 2010 09:44:59 -0300 Subject: KVM: MMU: invalidate and flush on spte small->large page size change Always invalidate spte and flush TLBs when changing page size, to make sure different sized translations for the same address are never cached in a CPU's TLB. Currently the only case where this occurs is when a non-leaf spte pointer is overwritten by a leaf, large spte entry. This can happen after dirty logging is disabled on a memslot, for example. Noticed by Andrea. KVM-Stable-Tag Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 81563e7..6fbcb48 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1870,6 +1870,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, child = page_header(pte & PT64_BASE_ADDR_MASK); mmu_page_remove_parent_pte(child, sptep); + __set_spte(sptep, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(vcpu->kvm); } else if (pfn != spte_to_pfn(*sptep)) { pgprintk("hfn old %lx new %lx\n", spte_to_pfn(*sptep), pfn); -- cgit v0.10.2 From 69325a122580d3a7b26589e8efdd6663001c3297 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Thu, 27 May 2010 14:35:58 +0300 Subject: KVM: MMU: Remove user access when allowing kernel access to gpte.w=0 page If cr0.wp=0, we have to allow the guest kernel access to a page with pte.w=0. We do that by setting spte.w=1, since the host cr0.wp must remain set so the host can write protect pages. Once we allow write access, we must remove user access otherwise we mistakenly allow the user to write the page. Reviewed-by: Xiao Guangrong Signed-off-by: Avi Kivity diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6fbcb48..a6f695d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= PT_WRITABLE_MASK; + if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK)) + spte &= ~PT_USER_MASK; + /* * Optimization: for pte sync, if spte was writable the hash * lookup is unnecessary (and expensive). Write protection -- cgit v0.10.2 From 05b782ab951a896d7da41775999821f692dc9e01 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 26 May 2010 21:36:33 +0200 Subject: KVM: Fix order passed to iommu_unmap This is obviously a left-over from the the old interface taking the size. Apparently a mostly harmless issue with the current iommu_unmap implementation. Signed-off-by: Jan Kiszka Acked-by: Joerg Roedel Signed-off-by: Avi Kivity diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index d2f06be..96048ee 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -271,7 +271,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm, pfn = phys >> PAGE_SHIFT; /* Unmap address from IO address space */ - order = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); unmap_pages = 1ULL << order; /* Unpin all pages we just unmapped to not leak any memory */ -- cgit v0.10.2 From 3499f4d0d1159a21245d6071f8af6a71b86a78bc Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 26 May 2010 17:57:05 +0200 Subject: KVM: ia64: Add missing spin_unlock in kvm_arch_hardware_enable() Add a spin_unlock missing on the error path. The semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression E1; @@ * spin_lock(E1,...); <+... when != E1 if (...) { ... when != E1 * return ...; } ...+> * spin_unlock(E1,...); // Signed-off-by: Julia Lawall Signed-off-by: Avi Kivity diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d5f4e91..21b7013 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -144,6 +144,7 @@ int kvm_arch_hardware_enable(void *garbage) VP_INIT_ENV : VP_INIT_ENV_INITALIZE, __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); if (status != 0) { + spin_unlock(&vp_lock); printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); return -EINVAL; } -- cgit v0.10.2 From 07dc7263b99e4ddad2b4c69765a428ccb7d48938 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 2 Jun 2010 11:26:26 -0300 Subject: KVM: read apic->irr with ioapic lock held Read ioapic->irr inside ioapic->lock protected section. KVM-Stable-Tag Signed-off-by: Marcelo Tosatti diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 7c79c1d..3500dee 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -192,12 +192,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level) { - u32 old_irr = ioapic->irr; + u32 old_irr; u32 mask = 1 << irq; union kvm_ioapic_redirect_entry entry; int ret = 1; spin_lock(&ioapic->lock); + old_irr = ioapic->irr; if (irq >= 0 && irq < IOAPIC_NUM_PINS) { entry = ioapic->redirtbl[irq]; level ^= entry.fields.polarity; -- cgit v0.10.2