From abdb080f7ac8a85547f5e0246362790043bbd3f2 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Tue, 10 Nov 2015 15:36:31 +0300 Subject: kvm/irqchip: kvm_arch_irq_routing_update renaming split Actually kvm_arch_irq_routing_update() should be kvm_arch_post_irq_routing_update() as it's called at the end of irq routing update. This renaming frees kvm_arch_irq_routing_update function name. kvm_arch_irq_routing_update() weak function which will be used to update mappings for arch-specific irq routing entries (in particular, the upcoming Hyper-V synthetic interrupts). Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 84b96d3..e39768c 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -332,7 +332,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) return kvm_set_irq_routing(kvm, empty_routing, 0, 0); } -void kvm_arch_irq_routing_update(struct kvm *kvm) +void kvm_arch_post_irq_routing_update(struct kvm *kvm) { if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) return; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index c923350..23555c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -484,12 +484,12 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); -void kvm_arch_irq_routing_update(struct kvm *kvm); +void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { } -static inline void kvm_arch_irq_routing_update(struct kvm *kvm) +static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) { } #endif @@ -1091,6 +1091,7 @@ static inline void kvm_irq_routing_update(struct kvm *kvm) { } #endif +void kvm_arch_irq_routing_update(struct kvm *kvm); static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) { diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index f0b08a2..fe84e1a 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -166,6 +166,10 @@ out: return r; } +void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm) +{ +} + int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *ue, unsigned nr, @@ -219,9 +223,10 @@ int kvm_set_irq_routing(struct kvm *kvm, old = kvm->irq_routing; rcu_assign_pointer(kvm->irq_routing, new); kvm_irq_routing_update(kvm); + kvm_arch_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); - kvm_arch_irq_routing_update(kvm); + kvm_arch_post_irq_routing_update(kvm); synchronize_srcu_expedited(&kvm->irq_srcu); -- cgit v0.10.2 From 6308630bd3dbb6a8a883c4c571ce5e5a759a8a0e Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Tue, 10 Nov 2015 15:36:32 +0300 Subject: kvm/x86: split ioapic-handled and EOI exit bitmaps The function to determine if the vector is handled by ioapic used to rely on the fact that only ioapic-handled vectors were set up to cause vmexits when virtual apic was in use. We're going to break this assumption when introducing Hyper-V synthetic interrupts: they may need to cause vmexits too. To achieve that, introduce a new bitmap dedicated specifically for ioapic-handled vectors, and populate EOI exit bitmap from it for now. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 30cfd64..f6d8894 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -400,7 +400,7 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ - u64 eoi_exit_bitmap[4]; + DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; int mp_state; @@ -834,7 +834,7 @@ struct kvm_x86_ops { int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); - void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu); + void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 88d0a92..1facfd6 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr) } -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors) { struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic; union kvm_ioapic_redirect_entry *e; @@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) (e->fields.trig_mode == IOAPIC_EDGE_TRIG && kvm_apic_pending_eoi(vcpu, e->fields.vector))) __set_bit(e->fields.vector, - (unsigned long *)eoi_exit_bitmap); + ioapic_handled_vectors); } } spin_unlock(&ioapic->lock); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 084617d..2d16dc2 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src, struct kvm_lapic_irq *irq, unsigned long *dest_map); int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state); -void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); - +void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors); #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index e39768c..ece901c 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -339,7 +339,8 @@ void kvm_arch_post_irq_routing_update(struct kvm *kvm) kvm_make_scan_ioapic_request(kvm); } -void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) +void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, + ulong *ioapic_handled_vectors) { struct kvm *kvm = vcpu->kvm; struct kvm_kernel_irq_routing_entry *entry; @@ -369,7 +370,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) u32 vector = entry->msi.data & 0xff; __set_bit(vector, - (unsigned long *) eoi_exit_bitmap); + ioapic_handled_vectors); } } } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 4d30b86..9469d453 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -932,7 +932,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector) { - return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap); + return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors); } static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 83a1c64..ebb76e8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3564,7 +3564,7 @@ static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) return 0; } -static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { return; } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index af823a3..c8a87c9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8257,9 +8257,8 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu) +static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { - u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap; if (!vmx_cpu_uses_apicv(vcpu)) return; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eed3228..9c69337 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6301,15 +6301,16 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; - memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8); + bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256); if (irqchip_split(vcpu->kvm)) - kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap); + kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap); + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } - kvm_x86_ops->load_eoi_exitmap(vcpu); + kvm_x86_ops->load_eoi_exitmap(vcpu, + (u64 *)vcpu->arch.ioapic_handled_vectors); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -6417,7 +6418,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) { BUG_ON(vcpu->arch.pending_ioapic_eoi > 255); if (test_bit(vcpu->arch.pending_ioapic_eoi, - (void *) vcpu->arch.eoi_exit_bitmap)) { + vcpu->arch.ioapic_handled_vectors)) { vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI; vcpu->run->eoi.vector = vcpu->arch.pending_ioapic_eoi; -- cgit v0.10.2 From d62caabb41f33d96333f9ef15e09cd26e1c12760 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Tue, 10 Nov 2015 15:36:33 +0300 Subject: kvm/x86: per-vcpu apicv deactivation support The decision on whether to use hardware APIC virtualization used to be taken globally, based on the availability of the feature in the CPU and the value of a module parameter. However, under certain circumstances we want to control it on per-vcpu basis. In particular, when the userspace activates HyperV synthetic interrupt controller (SynIC), APICv has to be disabled as it's incompatible with SynIC auto-EOI behavior. To achieve that, introduce 'apicv_active' flag on struct kvm_vcpu_arch, and kvm_vcpu_deactivate_apicv() function to turn APICv off. The flag is initialized based on the module parameter and CPU capability, and consulted whenever an APICv-specific action is performed. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f6d8894..bac0d54 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -400,6 +400,7 @@ struct kvm_vcpu_arch { u64 efer; u64 apic_base; struct kvm_lapic *apic; /* kernel irqchip context */ + bool apicv_active; DECLARE_BITMAP(ioapic_handled_vectors, 256); unsigned long apic_attention; int32_t apic_arb_prio; @@ -831,7 +832,8 @@ struct kvm_x86_ops { void (*enable_nmi_window)(struct kvm_vcpu *vcpu); void (*enable_irq_window)(struct kvm_vcpu *vcpu); void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); - int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu); + bool (*get_enable_apicv)(void); + void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); void (*hwapic_isr_update)(struct kvm *kvm, int isr); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); @@ -1086,6 +1088,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu); + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code, diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index 097060e..3982b47 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) if (kvm_cpu_has_extint(v)) return 1; - if (kvm_vcpu_apic_vid_enabled(v)) + if (kvm_vcpu_apicv_active(v)) return 0; return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9469d453..618a20d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -379,7 +379,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + if (apic->vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -392,7 +393,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; - if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) { + if (unlikely(vcpu->arch.apicv_active)) { /* try to update RVI */ apic_clear_vector(vec, apic->regs + APIC_IRR); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -418,7 +419,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic) * because the processor can modify ISR under the hood. Instead * just set SVI. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec); else { ++apic->isr_count; @@ -466,7 +467,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic) * on the other hand isr_count and highest_isr_cache are unused * and must be left alone. */ - if (unlikely(kvm_x86_ops->hwapic_isr_update)) + if (unlikely(vcpu->arch.apicv_active)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); else { @@ -852,7 +853,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, apic_clear_vector(vector, apic->regs + APIC_TMR); } - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) kvm_x86_ops->deliver_posted_interrupt(vcpu, vector); else { apic_set_irr(vector, apic); @@ -1225,7 +1226,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu) int vec = reg & APIC_VECTOR_MASK; void *bitmap = apic->regs + APIC_ISR; - if (kvm_x86_ops->deliver_posted_interrupt) + if (vcpu->arch.apicv_active) bitmap = apic->regs + APIC_IRR; if (apic_test_vector(vec, bitmap)) @@ -1693,8 +1694,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) apic_set_reg(apic, APIC_ISR + 0x10 * i, 0); apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); } - apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu); - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0; + apic->irr_pending = vcpu->arch.apicv_active; + apic->isr_count = vcpu->arch.apicv_active ? 1 : 0; apic->highest_isr_cache = -1; update_divide_count(apic); atomic_set(&apic->lapic_timer.pending, 0); @@ -1906,15 +1907,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, update_divide_count(apic); start_apic_timer(apic); apic->irr_pending = true; - apic->isr_count = kvm_x86_ops->hwapic_isr_update ? + apic->isr_count = vcpu->arch.apicv_active ? 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) { kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); - if (unlikely(kvm_x86_ops->hwapic_isr_update)) kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic)); + } kvm_make_request(KVM_REQ_EVENT, vcpu); if (ioapic_in_kernel(vcpu->kvm)) kvm_rtc_eoi_tracking_restore_one(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index fde8e35d..5fc60e4 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic) return apic->vcpu->arch.apic_base & X2APIC_ENABLE; } -static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu) +static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu) { - return kvm_x86_ops->cpu_uses_apicv(vcpu); + return vcpu->arch.apic && vcpu->arch.apicv_active; } static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ebb76e8..2401fc8 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3559,9 +3559,13 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) return; } -static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool svm_get_enable_apicv(void) +{ + return false; +} + +static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) { - return 0; } static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) @@ -4328,7 +4332,8 @@ static struct kvm_x86_ops svm_x86_ops = { .enable_irq_window = enable_irq_window, .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode, - .cpu_uses_apicv = svm_cpu_uses_apicv, + .get_enable_apicv = svm_get_enable_apicv, + .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, .sync_pir_to_irr = svm_sync_pir_to_irr, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c8a87c9..1a8bfaa 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -19,6 +19,7 @@ #include "irq.h" #include "mmu.h" #include "cpuid.h" +#include "lapic.h" #include #include @@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr); static void kvm_cpu_vmxoff(void); static bool vmx_mpx_supported(void); static bool vmx_xsaves_supported(void); -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu); static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); static void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); @@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); static bool guest_state_valid(struct kvm_vcpu *vcpu); static u32 vmx_segment_access_rights(struct kvm_segment *var); -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); static int alloc_identity_pagetable(struct kvm *kvm); @@ -2498,7 +2497,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | PIN_BASED_VMX_PREEMPTION_TIMER; - if (vmx_cpu_uses_apicv(&vmx->vcpu)) + if (kvm_vcpu_apicv_active(&vmx->vcpu)) vmx->nested.nested_vmx_pinbased_ctls_high |= PIN_BASED_POSTED_INTR; @@ -4462,9 +4461,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr) msr, MSR_TYPE_W); } -static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu) +static bool vmx_get_enable_apicv(void) { - return enable_apicv && lapic_in_kernel(vcpu); + return enable_apicv; } static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) @@ -4586,11 +4585,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); } -static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu) -{ - return; -} - /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -4660,11 +4654,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) { u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR; return pin_based_exec_ctrl; } +static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx)); +} + static u32 vmx_exec_control(struct vcpu_vmx *vmx) { u32 exec_control = vmcs_config.cpu_based_exec_ctrl; @@ -4703,7 +4704,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx) exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST; if (!ple_gap) exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING; - if (!vmx_cpu_uses_apicv(&vmx->vcpu)) + if (!kvm_vcpu_apicv_active(&vmx->vcpu)) exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY); exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; @@ -4767,7 +4768,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write32(SECONDARY_VM_EXEC_CONTROL, vmx_secondary_exec_control(vmx)); - if (vmx_cpu_uses_apicv(&vmx->vcpu)) { + if (kvm_vcpu_apicv_active(&vmx->vcpu)) { vmcs_write64(EOI_EXIT_BITMAP0, 0); vmcs_write64(EOI_EXIT_BITMAP1, 0); vmcs_write64(EOI_EXIT_BITMAP2, 0); @@ -4919,7 +4920,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); - if (vmx_cpu_uses_apicv(vcpu)) + if (kvm_vcpu_apicv_active(vcpu)) memset(&vmx->pi_desc, 0, sizeof(struct pi_desc)); if (vmx->vpid != 0) @@ -6203,15 +6204,6 @@ static __init int hardware_setup(void) kvm_tsc_scaling_ratio_frac_bits = 48; } - if (enable_apicv) - kvm_x86_ops->update_cr8_intercept = NULL; - else { - kvm_x86_ops->hwapic_irr_update = NULL; - kvm_x86_ops->hwapic_isr_update = NULL; - kvm_x86_ops->deliver_posted_interrupt = NULL; - kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy; - } - vmx_disable_intercept_for_msr(MSR_FS_BASE, false); vmx_disable_intercept_for_msr(MSR_GS_BASE, false); vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); @@ -8152,7 +8144,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) * apicv */ if (!cpu_has_vmx_virtualize_x2apic_mode() || - !vmx_cpu_uses_apicv(vcpu)) + !kvm_vcpu_apicv_active(vcpu)) return; if (!cpu_need_tpr_shadow(vcpu)) @@ -8259,7 +8251,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { - if (!vmx_cpu_uses_apicv(vcpu)) + if (!kvm_vcpu_apicv_active(vcpu)) return; vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]); @@ -10803,7 +10795,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .update_cr8_intercept = update_cr8_intercept, .set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode, .set_apic_access_page_addr = vmx_set_apic_access_page_addr, - .cpu_uses_apicv = vmx_cpu_uses_apicv, + .get_enable_apicv = vmx_get_enable_apicv, + .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9c69337..f0250a0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2748,7 +2748,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - kvm_x86_ops->sync_pir_to_irr(vcpu); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s); return 0; @@ -5867,6 +5869,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid) kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL); } +void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) +{ + vcpu->arch.apicv_active = false; + kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu); +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -5960,6 +5968,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) if (!vcpu->arch.apic) return; + if (vcpu->arch.apicv_active) + return; + if (!vcpu->arch.apic->vapic_addr) max_irr = kvm_lapic_find_highest_irr(vcpu); else @@ -6306,7 +6317,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - kvm_x86_ops->sync_pir_to_irr(vcpu); + if (vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } kvm_x86_ops->load_eoi_exitmap(vcpu, @@ -6453,7 +6465,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (kvm_x86_ops->hwapic_irr_update) + if (vcpu->arch.apicv_active) kvm_x86_ops->hwapic_irr_update(vcpu, kvm_lapic_find_highest_irr(vcpu)); } @@ -7524,6 +7536,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) BUG_ON(vcpu->kvm == NULL); kvm = vcpu->kvm; + vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(); vcpu->arch.pv.pv_unhalted = false; vcpu->arch.emulate_ctxt.ops = &emulate_ops; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu)) -- cgit v0.10.2 From 5c919412fe61c35947816fdbd5f7bd09fe0dd073 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Tue, 10 Nov 2015 15:36:34 +0300 Subject: kvm/x86: Hyper-V synthetic interrupt controller SynIC (synthetic interrupt controller) is a lapic extension, which is controlled via MSRs and maintains for each vCPU - 16 synthetic interrupt "lines" (SINT's); each can be configured to trigger a specific interrupt vector optionally with auto-EOI semantics - a message page in the guest memory with 16 256-byte per-SINT message slots - an event flag page in the guest memory with 16 2048-bit per-SINT event flag areas The host triggers a SINT whenever it delivers a new message to the corresponding slot or flips an event flag bit in the corresponding area. The guest informs the host that it can try delivering a message by explicitly asserting EOI in lapic or writing to End-Of-Message (EOM) MSR. The userspace (qemu) triggers interrupts and receives EOM notifications via irqfd with resampler; for that, a GSI is allocated for each configured SINT, and irq_routing api is extended to support GSI-SINT mapping. Changes v4: * added activation of SynIC by vcpu KVM_ENABLE_CAP * added per SynIC active flag * added deactivation of APICv upon SynIC activation Changes v3: * added KVM_CAP_HYPERV_SYNIC and KVM_IRQ_ROUTING_HV_SINT notes into docs Changes v2: * do not use posted interrupts for Hyper-V SynIC AutoEOI vectors * add Hyper-V SynIC vectors into EOI exit bitmap * Hyper-V SyniIC SINT msr write logic simplified Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 092ee9f..88af846 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; __u32 pad[8]; } u; }; @@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 No flags are specified so far, the corresponding field must be set to zero. @@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter { __u32 adapter_id; }; +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated) @@ -3685,3 +3691,16 @@ available, means that that the kernel has an implementation of the H_RANDOM hypercall backed by a hardware random-number generator. If present, the kernel H_RANDOM handler can be enabled for guest use with the KVM_CAP_PPC_ENABLE_HCALL capability. + +8.2 KVM_CAP_HYPERV_SYNIC + +Architectures: x86 +This capability, if KVM_CHECK_EXTENSION indicates that it is +available, means that that the kernel has an implementation of the +Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is +used to support Windows Hyper-V based guest paravirt drivers(VMBus). + +In order to use SynIC, it has to be activated by setting this +capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this +will disable the use of APIC hardware virtualization even if supported +by the CPU, as it's incompatible with SynIC auto-EOI behavior. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bac0d54..bab47b6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -374,10 +375,24 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V synthetic interrupt controller (SynIC)*/ +struct kvm_vcpu_hv_synic { + u64 version; + u64 control; + u64 msg_page; + u64 evt_page; + atomic64_t sint[HV_SYNIC_SINT_COUNT]; + atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; + DECLARE_BITMAP(auto_eoi_bitmap, 256); + DECLARE_BITMAP(vec_bitmap, 256); + bool active; +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { u64 hv_vapic; s64 runtime_offset; + struct kvm_vcpu_hv_synic synic; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 62cf8c9..83a3c0c 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,13 +23,314 @@ #include "x86.h" #include "lapic.h" +#include "ioapic.h" #include "hyperv.h" #include +#include #include #include "trace.h" +static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint) +{ + return atomic64_read(&synic->sint[sint]); +} + +static inline int synic_get_sint_vector(u64 sint_value) +{ + if (sint_value & HV_SYNIC_SINT_MASKED) + return -1; + return sint_value & HV_SYNIC_SINT_VECTOR_MASK; +} + +static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + return true; + } + return false; +} + +static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, + int vector) +{ + int i; + u64 sint_value; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + sint_value = synic_read_sint(synic, i); + if (synic_get_sint_vector(sint_value) == vector && + sint_value & HV_SYNIC_SINT_AUTO_EOI) + return true; + } + return false; +} + +static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data) +{ + int vector; + + vector = data & HV_SYNIC_SINT_VECTOR_MASK; + if (vector < 16) + return 1; + /* + * Guest may configure multiple SINTs to use the same vector, so + * we maintain a bitmap of vectors handled by synic, and a + * bitmap of vectors with auto-eoi behavior. The bitmaps are + * updated here, and atomically queried on fast paths. + */ + + atomic64_set(&synic->sint[sint], data); + + if (synic_has_vector_connected(synic, vector)) + __set_bit(vector, synic->vec_bitmap); + else + __clear_bit(vector, synic->vec_bitmap); + + if (synic_has_vector_auto_eoi(synic, vector)) + __set_bit(vector, synic->auto_eoi_bitmap); + else + __clear_bit(vector, synic->auto_eoi_bitmap); + + /* Load SynIC vectors into EOI exit bitmap */ + kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic)); + return 0; +} + +static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_hv_synic *synic; + + if (vcpu_id >= atomic_read(&kvm->online_vcpus)) + return NULL; + vcpu = kvm_get_vcpu(kvm, vcpu_id); + if (!vcpu) + return NULL; + synic = vcpu_to_synic(vcpu); + return (synic->active) ? synic : NULL; +} + +static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) +{ + struct kvm *kvm = vcpu->kvm; + int gsi, idx; + + vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); + + idx = srcu_read_lock(&kvm->irq_srcu); + gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]); + if (gsi != -1) + kvm_notify_acked_gsi(kvm, gsi); + srcu_read_unlock(&kvm->irq_srcu, idx); +} + +static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, + u32 msr, u64 data, bool host) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + int ret; + + if (!synic->active) + return 1; + + vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n", + msr, data, host); + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + synic->control = data; + break; + case HV_X64_MSR_SVERSION: + if (!host) { + ret = 1; + break; + } + synic->version = data; + break; + case HV_X64_MSR_SIEFP: + if (data & HV_SYNIC_SIEFP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->evt_page = data; + break; + case HV_X64_MSR_SIMP: + if (data & HV_SYNIC_SIMP_ENABLE) + if (kvm_clear_guest(vcpu->kvm, + data & PAGE_MASK, PAGE_SIZE)) { + ret = 1; + break; + } + synic->msg_page = data; + break; + case HV_X64_MSR_EOM: { + int i; + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + kvm_hv_notify_acked_sint(vcpu, i); + break; + } + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data); + break; + default: + ret = 1; + break; + } + return ret; +} + +static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata) +{ + int ret; + + if (!synic->active) + return 1; + + ret = 0; + switch (msr) { + case HV_X64_MSR_SCONTROL: + *pdata = synic->control; + break; + case HV_X64_MSR_SVERSION: + *pdata = synic->version; + break; + case HV_X64_MSR_SIEFP: + *pdata = synic->evt_page; + break; + case HV_X64_MSR_SIMP: + *pdata = synic->msg_page; + break; + case HV_X64_MSR_EOM: + *pdata = 0; + break; + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]); + break; + default: + ret = 1; + break; + } + return ret; +} + +int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_lapic_irq irq; + int ret, vector; + + if (sint >= ARRAY_SIZE(synic->sint)) + return -EINVAL; + + vector = synic_get_sint_vector(synic_read_sint(synic, sint)); + if (vector < 0) + return -ENOENT; + + memset(&irq, 0, sizeof(irq)); + irq.dest_id = kvm_apic_id(vcpu->arch.apic); + irq.dest_mode = APIC_DEST_PHYSICAL; + irq.delivery_mode = APIC_DM_FIXED; + irq.vector = vector; + irq.level = 1; + + ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); + vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret); + return ret; +} + +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + return synic_set_irq(synic, sint); +} + +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) +{ + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); + int i; + + vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector); + + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) + if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) + kvm_hv_notify_acked_sint(vcpu, i); +} + +static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) +{ + struct kvm_vcpu_hv_synic *synic; + + synic = synic_get(kvm, vcpu_id); + if (!synic) + return -EINVAL; + + if (sint >= ARRAY_SIZE(synic->sint_to_gsi)) + return -EINVAL; + + atomic_set(&synic->sint_to_gsi[sint], gsi); + return 0; +} + +void kvm_hv_irq_routing_update(struct kvm *kvm) +{ + struct kvm_irq_routing_table *irq_rt; + struct kvm_kernel_irq_routing_entry *e; + u32 gsi; + + irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu, + lockdep_is_held(&kvm->irq_lock)); + + for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) { + hlist_for_each_entry(e, &irq_rt->map[gsi], link) { + if (e->type == KVM_IRQ_ROUTING_HV_SINT) + kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu, + e->hv_sint.sint, gsi); + } + } +} + +static void synic_init(struct kvm_vcpu_hv_synic *synic) +{ + int i; + + memset(synic, 0, sizeof(*synic)); + synic->version = HV_SYNIC_VERSION_1; + for (i = 0; i < ARRAY_SIZE(synic->sint); i++) { + atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED); + atomic_set(&synic->sint_to_gsi[i], -1); + } +} + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) +{ + synic_init(vcpu_to_synic(vcpu)); +} + +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) +{ + /* + * Hyper-V SynIC auto EOI SINT's are + * not compatible with APICV, so deactivate APICV + */ + kvm_vcpu_deactivate_apicv(vcpu); + vcpu_to_synic(vcpu)->active = true; + return 0; +} + static bool kvm_hv_msr_partition_wide(u32 msr) { bool r = false; @@ -226,6 +527,13 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 1; hv->runtime_offset = data - current_task_runtime_100ns(); break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -304,6 +612,13 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_VP_RUNTIME: data = current_task_runtime_100ns() + hv->runtime_offset; break; + case HV_X64_MSR_SCONTROL: + case HV_X64_MSR_SVERSION: + case HV_X64_MSR_SIEFP: + case HV_X64_MSR_SIMP: + case HV_X64_MSR_EOM: + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index c7bce55..315af4b 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -29,4 +29,27 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); bool kvm_hv_hypercall_enabled(struct kvm *kvm); int kvm_hv_hypercall(struct kvm_vcpu *vcpu); +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); + +static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv.synic; +} + +static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) +{ + struct kvm_vcpu_hv *hv; + struct kvm_vcpu_arch *arch; + + hv = container_of(synic, struct kvm_vcpu_hv, synic); + arch = container_of(hv, struct kvm_vcpu_arch, hyperv); + return container_of(arch, struct kvm_vcpu, arch); +} +void kvm_hv_irq_routing_update(struct kvm *kvm); + +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); + +int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index ece901c..8fc89ef 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -33,6 +33,8 @@ #include "lapic.h" +#include "hyperv.h" + static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) @@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, srcu_read_unlock(&kvm->irq_srcu, idx); } +static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (!level) + return -1; + + return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint); +} + int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) { @@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, e->msi.address_hi = ue->u.msi.address_hi; e->msi.data = ue->u.msi.data; break; + case KVM_IRQ_ROUTING_HV_SINT: + e->set = kvm_hv_set_sint; + e->hv_sint.vcpu = ue->u.hv_sint.vcpu; + e->hv_sint.sint = ue->u.hv_sint.sint; + break; default: goto out; } @@ -376,3 +393,20 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, } srcu_read_unlock(&kvm->irq_srcu, idx); } + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status) +{ + switch (irq->type) { + case KVM_IRQ_ROUTING_HV_SINT: + return kvm_hv_set_sint(irq, kvm, irq_source_id, level, + line_status); + default: + return -EWOULDBLOCK; + } +} + +void kvm_arch_irq_routing_update(struct kvm *kvm) +{ + kvm_hv_irq_routing_update(kvm); +} diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 618a20d..36591fa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -41,6 +41,7 @@ #include "trace.h" #include "x86.h" #include "cpuid.h" +#include "hyperv.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) -static inline int kvm_apic_id(struct kvm_lapic *apic) -{ - return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; -} - /* The logical map is definitely wrong if we have multiple * modes at the same time. (Physical map is always right.) */ @@ -975,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic) apic_clear_isr(vector, apic); apic_update_ppr(apic); + if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap)) + kvm_hv_synic_send_eoi(apic->vcpu, vector); + kvm_ioapic_send_eoi(apic, vector); kvm_make_request(KVM_REQ_EVENT, apic->vcpu); return vector; @@ -1884,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) apic_set_isr(vector, apic); apic_update_ppr(apic); apic_clear_irr(vector, apic); + + if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { + apic_clear_isr(vector, apic); + apic_update_ppr(apic); + } + return vector; } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 5fc60e4..41bdb35 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } +static inline int kvm_apic_id(struct kvm_lapic *apic) +{ + return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff; +} + bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f0250a0..eb64377ed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -966,6 +966,7 @@ static u32 emulated_msrs[] = { HV_X64_MSR_RESET, HV_X64_MSR_VP_INDEX, HV_X64_MSR_VP_RUNTIME, + HV_X64_MSR_SCONTROL, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -2541,6 +2542,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV: case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_SPIN: + case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_PCI_SEGMENT: case KVM_CAP_DEBUGREGS: case KVM_CAP_X86_ROBUST_SINGLESTEP: @@ -3193,6 +3195,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu) return 0; } +static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, + struct kvm_enable_cap *cap) +{ + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_HYPERV_SYNIC: + return kvm_hv_activate_synic(vcpu); + default: + return -EINVAL; + } +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -3457,6 +3473,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_set_guest_paused(vcpu); goto out; } + case KVM_ENABLE_CAP: { + struct kvm_enable_cap cap; + + r = -EFAULT; + if (copy_from_user(&cap, argp, sizeof(cap))) + goto out; + r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); + break; + } default: r = -EINVAL; } @@ -6309,6 +6334,8 @@ static void process_smi(struct kvm_vcpu *vcpu) static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { + u64 eoi_exit_bitmap[4]; + if (!kvm_apic_hw_enabled(vcpu->arch.apic)) return; @@ -6321,8 +6348,9 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } - kvm_x86_ops->load_eoi_exitmap(vcpu, - (u64 *)vcpu->arch.ioapic_handled_vectors); + bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors, + vcpu_to_synic(vcpu)->vec_bitmap, 256); + kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap); } static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu) @@ -7594,6 +7622,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu->arch.pending_external_vector = -1; + kvm_hv_vcpu_init(vcpu); + return 0; fail_free_mce_banks: diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 23555c0..ebaf2f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -318,6 +318,11 @@ struct kvm_s390_adapter_int { u32 adapter_id; }; +struct kvm_hv_sint { + u32 vcpu; + u32 sint; +}; + struct kvm_kernel_irq_routing_entry { u32 gsi; u32 type; @@ -331,6 +336,7 @@ struct kvm_kernel_irq_routing_entry { } irqchip; struct msi_msg msi; struct kvm_s390_adapter_int adapter; + struct kvm_hv_sint hv_sint; }; struct hlist_node link; }; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 03f3618..27ce460 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -831,6 +831,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #define KVM_CAP_SPLIT_IRQCHIP 121 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 +#define KVM_CAP_HYPERV_SYNIC 123 #ifdef KVM_CAP_IRQ_ROUTING @@ -854,10 +855,16 @@ struct kvm_irq_routing_s390_adapter { __u32 adapter_id; }; +struct kvm_irq_routing_hv_sint { + __u32 vcpu; + __u32 sint; +}; + /* gsi routing entry types */ #define KVM_IRQ_ROUTING_IRQCHIP 1 #define KVM_IRQ_ROUTING_MSI 2 #define KVM_IRQ_ROUTING_S390_ADAPTER 3 +#define KVM_IRQ_ROUTING_HV_SINT 4 struct kvm_irq_routing_entry { __u32 gsi; @@ -868,6 +875,7 @@ struct kvm_irq_routing_entry { struct kvm_irq_routing_irqchip irqchip; struct kvm_irq_routing_msi msi; struct kvm_irq_routing_s390_adapter adapter; + struct kvm_irq_routing_hv_sint hv_sint; __u32 pad[8]; } u; }; -- cgit v0.10.2 From db3975717ac5e2c2761bae7b90c4f2e0abb5ef22 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Tue, 10 Nov 2015 15:36:35 +0300 Subject: kvm/x86: Hyper-V kvm exit A new vcpu exit is introduced to notify the userspace of the changes in Hyper-V SynIC configuration triggered by guest writing to the corresponding MSRs. Changes v4: * exit into userspace only if guest writes into SynIC MSR's Changes v3: * added KVM_EXIT_HYPERV types and structs notes into docs Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 88af846..053f613 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3337,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if it is still asserted. Vector is the LAPIC interrupt vector for which the EOI was received. + struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + } u; + }; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; +Indicates that the VCPU exits into userspace to process some tasks +related to Hyper-V emulation. +Valid values for 'type' are: + KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about +Hyper-V SynIC state change. Notification is used to remap SynIC +event/message pages and to enable/disable SynIC messages/events processing +in userspace. + /* Fix the size of the union. */ char padding[256]; }; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index bab47b6..f608e17 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -393,6 +393,7 @@ struct kvm_vcpu_hv { u64 hv_vapic; s64 runtime_offset; struct kvm_vcpu_hv_synic synic; + struct kvm_hyperv_exit exit; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 83a3c0c..41869a9 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -130,6 +130,20 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) srcu_read_unlock(&kvm->irq_srcu, idx); } +static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; + + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC; + hv_vcpu->exit.u.synic.msr = msr; + hv_vcpu->exit.u.synic.control = synic->control; + hv_vcpu->exit.u.synic.evt_page = synic->evt_page; + hv_vcpu->exit.u.synic.msg_page = synic->msg_page; + + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); +} + static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 data, bool host) { @@ -145,6 +159,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, switch (msr) { case HV_X64_MSR_SCONTROL: synic->control = data; + if (!host) + synic_exit(synic, msr); break; case HV_X64_MSR_SVERSION: if (!host) { @@ -161,6 +177,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, break; } synic->evt_page = data; + if (!host) + synic_exit(synic, msr); break; case HV_X64_MSR_SIMP: if (data & HV_SYNIC_SIMP_ENABLE) @@ -170,6 +188,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, break; } synic->msg_page = data; + if (!host) + synic_exit(synic, msr); break; case HV_X64_MSR_EOM: { int i; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb64377ed..036e4bc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6482,6 +6482,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) { + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv = vcpu->arch.hyperv.exit; + r = 0; + goto out; + } } /* diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ebaf2f8..14f9596 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -143,6 +143,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_HV_CRASH 27 #define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_REQ_HV_RESET 29 +#define KVM_REQ_HV_EXIT 30 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 27ce460..6e32f75 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -154,6 +154,20 @@ struct kvm_s390_skeys { __u32 flags; __u32 reserved[9]; }; + +struct kvm_hyperv_exit { +#define KVM_EXIT_HYPERV_SYNIC 1 + __u32 type; + union { + struct { + __u32 msr; + __u64 control; + __u64 evt_page; + __u64 msg_page; + } synic; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -184,6 +198,7 @@ struct kvm_s390_skeys { #define KVM_EXIT_SYSTEM_EVENT 24 #define KVM_EXIT_S390_STSI 25 #define KVM_EXIT_IOAPIC_EOI 26 +#define KVM_EXIT_HYPERV 27 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -338,6 +353,8 @@ struct kvm_run { struct { __u8 vector; } eoi; + /* KVM_EXIT_HYPERV */ + struct kvm_hyperv_exit hyperv; /* Fix the size of the union. */ char padding[256]; }; -- cgit v0.10.2 From 9dbe6cf941a6fe82933aef565e4095fb10f65023 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Nov 2015 14:49:17 +0100 Subject: KVM: x86: expose MSR_TSC_AUX to userspace If we do not do this, it is not properly saved and restored across migration. Windows notices due to its self-protection mechanisms, and is very upset about it (blue screen of death). Cc: Radim Krcmar Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 036e4bc..f1d6501 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -951,7 +951,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -4028,16 +4028,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } -- cgit v0.10.2 From 46896c73c1a4dde527c3a3cc43379deeb41985a1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Nov 2015 14:49:16 +0100 Subject: KVM: svm: add support for RDTSCP RDTSCP was never supported for AMD CPUs, which nobody noticed because Linux does not use it. But exactly the fact that Linux does not use it makes the implementation very simple; we can freely trash MSR_TSC_AUX while running the guest. Cc: Joerg Roedel Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2401fc8..af34215 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = { MSR_FS_BASE, #endif MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_TSC_AUX, }; #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs) @@ -135,6 +136,7 @@ struct vcpu_svm { uint64_t asid_generation; uint64_t sysenter_esp; uint64_t sysenter_eip; + uint64_t tsc_aux; u64 next_rip; @@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio); } } + /* This assumes that the kernel never uses MSR_TSC_AUX */ + if (static_cpu_has(X86_FEATURE_RDTSCP)) + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) @@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_SYSENTER_ESP: msr_info->data = svm->sysenter_esp; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + msr_info->data = svm->tsc_aux; + break; /* * Nobody will change the following 5 values in the VMCB so we can * safely return them on rdmsr. They will always be 0 until LBRV is @@ -3145,6 +3155,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) svm->sysenter_esp = data; svm->vmcb->save.sysenter_esp = data; break; + case MSR_TSC_AUX: + if (!boot_cpu_has(X86_FEATURE_RDTSCP)) + return 1; + + /* + * This is rare, so we update the MSR here instead of using + * direct_access_msrs. Doing that would require a rdmsr in + * svm_vcpu_put. + */ + svm->tsc_aux = data; + wrmsrl(MSR_TSC_AUX, svm->tsc_aux); + break; case MSR_IA32_DEBUGCTLMSR: if (!boot_cpu_has(X86_FEATURE_LBRV)) { vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n", @@ -4041,7 +4063,7 @@ static int svm_get_lpage_level(void) static bool svm_rdtscp_supported(void) { - return false; + return boot_cpu_has(X86_FEATURE_RDTSCP); } static bool svm_invpcid_supported(void) -- cgit v0.10.2 From aba2f06c070f604e388cf77b1dcc7f4cf4577eb0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 12 Nov 2015 16:42:18 +0100 Subject: KVM: x86: correctly print #AC in traces Poor #AC was so unimportant until a few days ago that we were not even tracing its name correctly. But now it's all over the place. Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 1203025..ab9ae67 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: -- cgit v0.10.2 From 0e3d0648bd903ff6cda7499f9349a2ce612bccb4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 13 Nov 2015 11:52:45 +0100 Subject: KVM: x86: MMU: always set accessed bit in shadow PTEs Commit 7a1638ce4220 ("nEPT: Redefine EPT-specific link_shadow_page()", 2013-08-05) says: Since nEPT doesn't support A/D bit, we should not set those bit when building the shadow page table. but this is not necessary. Even though nEPT doesn't support A/D bits, and hence the vmcs12 EPT pointer will never enable them, we always use them for shadow page tables if available (see construct_eptp in vmx.c). So we can set the A/D bits freely in the shadow page table. This patch hence basically reverts commit 7a1638ce4220. Cc: Yang Zhang Cc: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e7c2c14..276d2f2 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2198,7 +2198,7 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) return __shadow_walk_next(iterator, *iterator->sptep); } -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) +static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) { u64 spte; @@ -2206,10 +2206,7 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed) VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK); spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | - shadow_user_mask | shadow_x_mask; - - if (accessed) - spte |= shadow_accessed_mask; + shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); } @@ -2740,7 +2737,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - link_shadow_page(iterator.sptep, sp, true); + link_shadow_page(iterator.sptep, sp); } } return emulate; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3058a22..d8fdc5c 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, goto out_gpte_changed; if (sp) - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + link_shadow_page(it.sptep, sp); } for (; @@ -618,7 +618,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, true, direct_access, it.sptep); - link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK); + link_shadow_page(it.sptep, sp); } clear_sp_write_flooding_count(it.sptep); -- cgit v0.10.2 From 4f52696a6c4d9b1449c462546f1318935c6973db Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 15 Nov 2015 10:40:36 +0100 Subject: KVM-async_pf: Delete an unnecessary check before the function call "kmem_cache_destroy" The kmem_cache_destroy() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Paolo Bonzini diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 77d42be..3531599 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -57,8 +57,7 @@ int kvm_async_pf_init(void) void kvm_async_pf_deinit(void) { - if (async_pf_cache) - kmem_cache_destroy(async_pf_cache); + kmem_cache_destroy(async_pf_cache); async_pf_cache = NULL; } -- cgit v0.10.2 From 33e941547923283f7f1022f3c35359ea9403d9a4 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Sat, 14 Nov 2015 11:21:06 +0800 Subject: KVM: kvm_is_visible_gfn can be boolean This patch makes kvm_is_visible_gfn return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 14f9596..2911919 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -641,7 +641,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); -int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); +bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn); unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn); void mark_page_dirty(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 484079e..73cbb41 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1164,15 +1164,15 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn); } -int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) +bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS || memslot->flags & KVM_MEMSLOT_INVALID) - return 0; + return false; - return 1; + return true; } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); -- cgit v0.10.2 From 08ff0d5e63b4f360091dd171b0ede1a3361227a1 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Sat, 14 Nov 2015 11:21:07 +0800 Subject: KVM: kvm_para_has_feature can be boolean This patch makes kvm_para_has_feature return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h index 00a97bb..35e568f 100644 --- a/include/linux/kvm_para.h +++ b/include/linux/kvm_para.h @@ -4,10 +4,8 @@ #include -static inline int kvm_para_has_feature(unsigned int feature) +static inline bool kvm_para_has_feature(unsigned int feature) { - if (kvm_arch_para_features() & (1UL << feature)) - return 1; - return 0; + return !!(kvm_arch_para_features() & (1UL << feature)); } #endif /* __LINUX_KVM_PARA_H */ -- cgit v0.10.2 From 378b417d652c4ff20be3144b7064e3a4ecd2571d Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 16 Nov 2015 11:10:24 +0800 Subject: KVM: powerpc: kvmppc_visible_gpa can be boolean In another patch kvm_is_visible_gfn is maken return bool due to this function only returns zero or one as its return value, let's also make kvmppc_visible_gpa return bool to keep consistent. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 64891b0..70fb08d 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte) put_page(hpage); } -static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) +static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) { ulong mp_pa = vcpu->arch.magic_page_pa; @@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa) gpa &= ~0xFFFULL; if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) { - return 1; + return true; } return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT); -- cgit v0.10.2 From 018aabb56d6109c8f12397c24e59f67c58870ac1 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:41:28 +0900 Subject: KVM: x86: MMU: Encapsulate the type of rmap-chain head in a new struct New struct kvm_rmap_head makes the code type-safe to some extent. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f608e17..8140077 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -214,6 +214,10 @@ union kvm_mmu_page_role { }; }; +struct kvm_rmap_head { + unsigned long val; +}; + struct kvm_mmu_page { struct list_head link; struct hlist_node hash_link; @@ -231,7 +235,7 @@ struct kvm_mmu_page { bool unsync; int root_count; /* Currently serving as active root */ unsigned int unsync_children; - unsigned long parent_ptes; /* Reverse mapping for parent_pte */ + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ /* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen. */ unsigned long mmu_valid_gen; @@ -606,7 +610,7 @@ struct kvm_lpage_info { }; struct kvm_arch_memory_slot { - unsigned long *rmap[KVM_NR_PAGE_SIZES]; + struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 276d2f2..d9a6801 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -909,36 +909,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn, } /* - * Pte mapping structures: + * About rmap_head encoding: * - * If pte_list bit zero is zero, then pte_list point to the spte. - * - * If pte_list bit zero is one, (then pte_list & ~1) points to a struct + * If the bit zero of rmap_head->val is clear, then it points to the only spte + * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct * pte_list_desc containing more mappings. - * - * Returns the number of pte entries before the spte was added or zero if - * the spte was not added. - * + */ + +/* + * Returns the number of pointers in the rmap chain, not counting the new one. */ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, - unsigned long *pte_list) + struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; int i, count = 0; - if (!*pte_list) { + if (!rmap_head->val) { rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte); - *pte_list = (unsigned long)spte; - } else if (!(*pte_list & 1)) { + rmap_head->val = (unsigned long)spte; + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte); desc = mmu_alloc_pte_list_desc(vcpu); - desc->sptes[0] = (u64 *)*pte_list; + desc->sptes[0] = (u64 *)rmap_head->val; desc->sptes[1] = spte; - *pte_list = (unsigned long)desc | 1; + rmap_head->val = (unsigned long)desc | 1; ++count; } else { rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); while (desc->sptes[PTE_LIST_EXT-1] && desc->more) { desc = desc->more; count += PTE_LIST_EXT; @@ -955,8 +954,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte, } static void -pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, - int i, struct pte_list_desc *prev_desc) +pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head, + struct pte_list_desc *desc, int i, + struct pte_list_desc *prev_desc) { int j; @@ -967,43 +967,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc, if (j != 0) return; if (!prev_desc && !desc->more) - *pte_list = (unsigned long)desc->sptes[0]; + rmap_head->val = (unsigned long)desc->sptes[0]; else if (prev_desc) prev_desc->more = desc->more; else - *pte_list = (unsigned long)desc->more | 1; + rmap_head->val = (unsigned long)desc->more | 1; mmu_free_pte_list_desc(desc); } -static void pte_list_remove(u64 *spte, unsigned long *pte_list) +static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) { struct pte_list_desc *desc; struct pte_list_desc *prev_desc; int i; - if (!*pte_list) { + if (!rmap_head->val) { printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte); BUG(); - } else if (!(*pte_list & 1)) { + } else if (!(rmap_head->val & 1)) { rmap_printk("pte_list_remove: %p 1->0\n", spte); - if ((u64 *)*pte_list != spte) { + if ((u64 *)rmap_head->val != spte) { printk(KERN_ERR "pte_list_remove: %p 1->BUG\n", spte); BUG(); } - *pte_list = 0; + rmap_head->val = 0; } else { rmap_printk("pte_list_remove: %p many->many\n", spte); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); prev_desc = NULL; while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) + for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) { if (desc->sptes[i] == spte) { - pte_list_desc_remove_entry(pte_list, - desc, i, - prev_desc); + pte_list_desc_remove_entry(rmap_head, + desc, i, prev_desc); return; } + } prev_desc = desc; desc = desc->more; } @@ -1013,18 +1013,18 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list) } typedef void (*pte_list_walk_fn) (u64 *spte); -static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) +static void pte_list_walk(struct kvm_rmap_head *rmap_head, pte_list_walk_fn fn) { struct pte_list_desc *desc; int i; - if (!*pte_list) + if (!rmap_head->val) return; - if (!(*pte_list & 1)) - return fn((u64 *)*pte_list); + if (!(rmap_head->val & 1)) + return fn((u64 *)rmap_head->val); - desc = (struct pte_list_desc *)(*pte_list & ~1ul); + desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); while (desc) { for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) fn(desc->sptes[i]); @@ -1032,8 +1032,8 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn) } } -static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, - struct kvm_memory_slot *slot) +static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, + struct kvm_memory_slot *slot) { unsigned long idx; @@ -1041,10 +1041,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level, return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; } -/* - * Take gfn and return the reverse mapping to it. - */ -static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp) +static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, + struct kvm_mmu_page *sp) { struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -1065,24 +1063,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu) static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { struct kvm_mmu_page *sp; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); - return pte_list_add(vcpu, spte, rmapp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); + return pte_list_add(vcpu, spte, rmap_head); } static void rmap_remove(struct kvm *kvm, u64 *spte) { struct kvm_mmu_page *sp; gfn_t gfn; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; sp = page_header(__pa(spte)); gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt); - rmapp = gfn_to_rmap(kvm, gfn, sp); - pte_list_remove(spte, rmapp); + rmap_head = gfn_to_rmap(kvm, gfn, sp); + pte_list_remove(spte, rmap_head); } /* @@ -1102,17 +1100,18 @@ struct rmap_iterator { * * Returns sptep if found, NULL otherwise. */ -static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter) +static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, + struct rmap_iterator *iter) { - if (!rmap) + if (!rmap_head->val) return NULL; - if (!(rmap & 1)) { + if (!(rmap_head->val & 1)) { iter->desc = NULL; - return (u64 *)rmap; + return (u64 *)rmap_head->val; } - iter->desc = (struct pte_list_desc *)(rmap & ~1ul); + iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); iter->pos = 0; return iter->desc->sptes[iter->pos]; } @@ -1146,10 +1145,10 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) return NULL; } -#define for_each_rmap_spte(_rmap_, _iter_, _spte_) \ - for (_spte_ = rmap_get_first(*_rmap_, _iter_); \ - _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ - _spte_ = rmap_get_next(_iter_)) +#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ + for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ + _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ + _spte_ = rmap_get_next(_iter_)) static void drop_spte(struct kvm *kvm, u64 *sptep) { @@ -1207,14 +1206,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect) return mmu_spte_update(sptep, spte); } -static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp, +static bool __rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, bool pt_protect) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_write_protect(kvm, sptep, pt_protect); return flush; @@ -1231,13 +1231,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_clear_dirty(kvm, sptep); return flush; @@ -1254,13 +1254,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep) return mmu_spte_update(sptep, spte); } -static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp) +static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) flush |= spte_set_dirty(kvm, sptep); return flush; @@ -1280,12 +1280,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_write_protect(kvm, rmapp, false); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ mask &= mask - 1; @@ -1305,12 +1305,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn_offset, unsigned long mask) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; while (mask) { - rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); - __rmap_clear_dirty(kvm, rmapp); + rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), + PT_PAGE_TABLE_LEVEL, slot); + __rmap_clear_dirty(kvm, rmap_head); /* clear the first set bit */ mask &= mask - 1; @@ -1342,27 +1342,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn) { struct kvm_memory_slot *slot; - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; int i; bool write_protected = false; slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { - rmapp = __gfn_to_rmap(gfn, i, slot); - write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true); + rmap_head = __gfn_to_rmap(gfn, i, slot); + write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true); } return write_protected; } -static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) +static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; bool flush = false; - while ((sptep = rmap_get_first(*rmapp, &iter))) { + while ((sptep = rmap_get_first(rmap_head, &iter))) { BUG_ON(!(*sptep & PT_PRESENT_MASK)); rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); @@ -1373,14 +1373,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp) return flush; } -static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { - return kvm_zap_rmapp(kvm, rmapp); + return kvm_zap_rmapp(kvm, rmap_head); } -static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1395,7 +1395,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp, new_pfn = pte_pfn(*ptep); restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", sptep, *sptep, gfn, level); @@ -1433,11 +1433,11 @@ struct slot_rmap_walk_iterator { /* output fields. */ gfn_t gfn; - unsigned long *rmap; + struct kvm_rmap_head *rmap; int level; /* private field. */ - unsigned long *end_rmap; + struct kvm_rmap_head *end_rmap; }; static void @@ -1496,7 +1496,7 @@ static int kvm_handle_hva_range(struct kvm *kvm, unsigned long end, unsigned long data, int (*handler)(struct kvm *kvm, - unsigned long *rmapp, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, @@ -1540,7 +1540,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, unsigned long data, - int (*handler)(struct kvm *kvm, unsigned long *rmapp, + int (*handler)(struct kvm *kvm, + struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data)) @@ -1563,7 +1564,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp); } -static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1573,18 +1574,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, BUG_ON(!shadow_accessed_mask); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; clear_bit((ffs(shadow_accessed_mask) - 1), (unsigned long *)sptep); } + } trace_kvm_age_page(gfn, level, slot, young); return young; } -static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, +static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct kvm_memory_slot *slot, gfn_t gfn, int level, unsigned long data) { @@ -1600,11 +1602,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, if (!shadow_accessed_mask) goto out; - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (*sptep & shadow_accessed_mask) { young = 1; break; } + } out: return young; } @@ -1613,14 +1616,14 @@ out: static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *sp; sp = page_header(__pa(spte)); - rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp); + rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp); - kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0); + kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0); kvm_flush_remote_tlbs(vcpu->kvm); } @@ -1737,7 +1740,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, * this feature. See the comments in kvm_zap_obsolete_pages(). */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - sp->parent_ptes = 0; + sp->parent_ptes.val = 0; mmu_page_add_parent_pte(vcpu, sp, parent_pte); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; @@ -2277,7 +2280,7 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) u64 *sptep; struct rmap_iterator iter; - while ((sptep = rmap_get_first(sp->parent_ptes, &iter))) + while ((sptep = rmap_get_first(&sp->parent_ptes, &iter))) drop_parent_pte(sp, sptep); } @@ -4492,7 +4495,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu) } /* The return value indicates if tlb flush on all vcpus is needed. */ -typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap); +typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head); /* The caller should hold mmu-lock before calling this function. */ static bool @@ -4586,9 +4589,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) spin_unlock(&kvm->mmu_lock); } -static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp) +static bool slot_rmap_write_protect(struct kvm *kvm, + struct kvm_rmap_head *rmap_head) { - return __rmap_write_protect(kvm, rmapp, false); + return __rmap_write_protect(kvm, rmap_head, false); } void kvm_mmu_slot_remove_write_access(struct kvm *kvm, @@ -4624,7 +4628,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, } static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, - unsigned long *rmapp) + struct kvm_rmap_head *rmap_head) { u64 *sptep; struct rmap_iterator iter; @@ -4633,7 +4637,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm, struct kvm_mmu_page *sp; restart: - for_each_rmap_spte(rmapp, &iter, sptep) { + for_each_rmap_spte(rmap_head, &iter, sptep) { sp = page_header(__pa(sptep)); pfn = spte_to_pfn(*sptep); diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index 03d518e..f7b0488 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) { static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10); - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; struct kvm_mmu_page *rev_sp; struct kvm_memslots *slots; struct kvm_memory_slot *slot; @@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep) return; } - rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot); - if (!*rmapp) { + rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot); + if (!rmap_head->val) { if (!__ratelimit(&ratelimit_state)) return; audit_printk(kvm, "no rmap for writable spte %llx\n", @@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) { - unsigned long *rmapp; + struct kvm_rmap_head *rmap_head; u64 *sptep; struct rmap_iterator iter; struct kvm_memslots *slots; @@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); + rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); - for_each_rmap_spte(rmapp, &iter, sptep) + for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) audit_printk(kvm, "shadow page has writable " "mappings: gfn %llx role %x\n", sp->gfn, sp->role.word); + } } static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp) -- cgit v0.10.2 From 7ee0e5b29d275ac299cdf8ef67e60bf1648c8c6a Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:42:23 +0900 Subject: KVM: x86: MMU: Remove unused parameter of __direct_map() Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d9a6801..8a1593f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2708,9 +2708,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) __direct_pte_prefetch(vcpu, sp, sptep); } -static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, - int map_writable, int level, gfn_t gfn, pfn_t pfn, - bool prefault) +static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, + int level, gfn_t gfn, pfn_t pfn, bool prefault) { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; @@ -3018,11 +3017,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn, - prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); - return r; out_unlock: @@ -3531,8 +3528,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code, make_mmu_pages_available(vcpu); if (likely(!force_pt_level)) transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); - r = __direct_map(vcpu, gpa, write, map_writable, - level, gfn, pfn, prefault); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); spin_unlock(&vcpu->kvm->mmu_lock); return r; -- cgit v0.10.2 From fd9514572f721acbabb0ff24f6b5294a2449d492 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:43:13 +0900 Subject: KVM: x86: MMU: Add helper function to clear a bit in unsync child bitmap Both __mmu_unsync_walk() and mmu_pages_clear_parents() have three line code which clears a bit in the unsync child bitmap; the former places it inside a loop block and uses a few goto statements to jump to it. A new helper function, clear_unsync_child_bit(), makes the code cleaner. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8a1593f..9832bc9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1809,6 +1809,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, return (pvec->nr == KVM_PAGE_ARRAY_NR); } +static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx) +{ + --sp->unsync_children; + WARN_ON((int)sp->unsync_children < 0); + __clear_bit(idx, sp->unsync_child_bitmap); +} + static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { @@ -1818,8 +1825,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_page *child; u64 ent = sp->spt[i]; - if (!is_shadow_present_pte(ent) || is_large_pte(ent)) - goto clear_child_bitmap; + if (!is_shadow_present_pte(ent) || is_large_pte(ent)) { + clear_unsync_child_bit(sp, i); + continue; + } child = page_header(ent & PT64_BASE_ADDR_MASK); @@ -1828,28 +1837,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, return -ENOSPC; ret = __mmu_unsync_walk(child, pvec); - if (!ret) - goto clear_child_bitmap; - else if (ret > 0) + if (!ret) { + clear_unsync_child_bit(sp, i); + continue; + } else if (ret > 0) { nr_unsync_leaf += ret; - else + } else return ret; } else if (child->unsync) { nr_unsync_leaf++; if (mmu_pages_add(pvec, child, i)) return -ENOSPC; } else - goto clear_child_bitmap; - - continue; - -clear_child_bitmap: - __clear_bit(i, sp->unsync_child_bitmap); - sp->unsync_children--; - WARN_ON((int)sp->unsync_children < 0); + clear_unsync_child_bit(sp, i); } - return nr_unsync_leaf; } @@ -2012,9 +2014,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) if (!sp) return; - --sp->unsync_children; - WARN_ON((int)sp->unsync_children < 0); - __clear_bit(idx, sp->unsync_child_bitmap); + clear_unsync_child_bit(sp, idx); level++; } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); } -- cgit v0.10.2 From 029499b477389f7d6486c8c759a8498bcfecf322 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:44:05 +0900 Subject: KVM: x86: MMU: Make mmu_set_spte() return emulate value mmu_set_spte()'s code is based on the assumption that the emulate parameter has a valid pointer value if set_spte() returns true and write_fault is not zero. In other cases, emulate may be NULL, so a NULL-check is needed. Stop passing emulate pointer and make mmu_set_spte() return the emulate value instead to clean up this complex interface. Prefetch functions can just throw away the return value. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9832bc9..74c120c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2564,13 +2564,13 @@ done: return ret; } -static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, - unsigned pte_access, int write_fault, int *emulate, - int level, gfn_t gfn, pfn_t pfn, bool speculative, - bool host_writable) +static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, + int write_fault, int level, gfn_t gfn, pfn_t pfn, + bool speculative, bool host_writable) { int was_rmapped = 0; int rmap_count; + bool emulate = false; pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, *sptep, write_fault, gfn); @@ -2600,12 +2600,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative, true, host_writable)) { if (write_fault) - *emulate = 1; + emulate = true; kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); } - if (unlikely(is_mmio_spte(*sptep) && emulate)) - *emulate = 1; + if (unlikely(is_mmio_spte(*sptep))) + emulate = true; pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", @@ -2624,6 +2624,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, } kvm_release_pfn_clean(pfn); + + return emulate; } static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, @@ -2658,9 +2660,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu, return -1; for (i = 0; i < ret; i++, gfn++, start++) - mmu_set_spte(vcpu, start, access, 0, NULL, - sp->role.level, gfn, page_to_pfn(pages[i]), - true, true); + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, + page_to_pfn(pages[i]), true, true); return 0; } @@ -2721,9 +2722,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { - mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, - write, &emulate, level, gfn, pfn, - prefault, map_writable); + emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, + write, level, gfn, pfn, prefault, + map_writable); direct_pte_prefetch(vcpu, iterator.sptep); ++vcpu->stat.pf_fixed; break; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d8fdc5c..11650ea 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL, - gfn, pfn, true, true); + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + true, true); return true; } @@ -556,7 +556,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, struct kvm_mmu_page *sp = NULL; struct kvm_shadow_walk_iterator it; unsigned direct_access, access = gw->pt_access; - int top_level, emulate = 0; + int top_level, emulate; direct_access = gw->pte_access; @@ -622,8 +622,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, } clear_sp_write_flooding_count(it.sptep); - mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate, - it.level, gw->gfn, pfn, prefault, map_writable); + emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, + it.level, gw->gfn, pfn, prefault, map_writable); FNAME(pte_prefetch)(vcpu, gw, it.sptep); return emulate; -- cgit v0.10.2 From afd28fe1c901429eba8957f54bdb4a13cc15ae44 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:44:55 +0900 Subject: KVM: x86: MMU: Remove is_rmap_spte() and use is_shadow_present_pte() is_rmap_spte(), originally named is_rmap_pte(), was introduced when the simple reverse mapping was implemented by commit cd4a4e5374110444 ("[PATCH] KVM: MMU: Implement simple reverse mapping"). At that point, its role was clear and only rmap_add() and rmap_remove() were using it to select sptes that need to be reverse-mapped. Independently of that, is_shadow_present_pte() was first introduced by commit c7addb902054195b ("KVM: Allow not-present guest page faults to bypass kvm") to do bypass_guest_pf optimization, which does not exist any more. These two seem to have changed their roles somewhat, and is_rmap_spte() just calls is_shadow_present_pte() now. Since using both of them without clear distinction just makes the code confusing, remove is_rmap_spte(). Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 74c120c..3104748 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -311,11 +311,6 @@ static int is_large_pte(u64 pte) return pte & PT_PAGE_SIZE_MASK; } -static int is_rmap_spte(u64 pte) -{ - return is_shadow_present_pte(pte); -} - static int is_last_spte(u64 pte, int level) { if (level == PT_PAGE_TABLE_LEVEL) @@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) u64 old_spte = *sptep; bool ret = false; - WARN_ON(!is_rmap_spte(new_spte)); + WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); @@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep) else old_spte = __update_clear_spte_slow(sptep, 0ull); - if (!is_rmap_spte(old_spte)) + if (!is_shadow_present_pte(old_spte)) return 0; pfn = spte_to_pfn(old_spte); @@ -2575,7 +2570,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__, *sptep, write_fault, gfn); - if (is_rmap_spte(*sptep)) { + if (is_shadow_present_pte(*sptep)) { /* * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. @@ -2919,7 +2914,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * If the mapping has been changed, let the vcpu fault on the * same address again. */ - if (!is_rmap_spte(spte)) { + if (!is_shadow_present_pte(spte)) { ret = true; goto exit; } diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index f7b0488..1cee3ec 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) return; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { - if (!is_rmap_spte(sp->spt[i])) + if (!is_shadow_present_pte(sp->spt[i])) continue; inspect_spte_has_rmap(kvm, sp->spt + i); -- cgit v0.10.2 From 77fbbbd2f09fae486190bb2bd7142647dc2a6e8b Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:45:44 +0900 Subject: KVM: x86: MMU: Consolidate BUG_ON checks for reverse-mapped sptes At some call sites of rmap_get_first() and rmap_get_next(), BUG_ON is placed right after the call to detect unrelated sptes which must not be found in the reverse-mapping list. Move this check in rmap_get_first/next() so that all call sites, not just the users of the for_each_rmap_spte() macro, will be checked the same way. One thing to keep in mind is that kvm_mmu_unlink_parents() also uses rmap_get_first() to handle parent sptes. The change will not break it because parent sptes are present, at least until drop_parent_pte() actually unlinks them, and not mmio-sptes. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index 3a4d681..daf9c0f 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -203,10 +203,10 @@ Shadow pages contain the following information: page cannot be destroyed. See role.invalid. parent_ptes: The reverse mapping for the pte/ptes pointing at this page's spt. If - parent_ptes bit 0 is zero, only one spte points at this pages and + parent_ptes bit 0 is zero, only one spte points at this page and parent_ptes points at this single spte, otherwise, there exists multiple sptes pointing at this page and (parent_ptes & ~0x1) points at a data - structure with a list of parent_ptes. + structure with a list of parent sptes. unsync: If true, then the translations in this page may not match the guest's translation. This is equivalent to the state of the tlb when a pte is diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3104748..5b249d4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1098,17 +1098,23 @@ struct rmap_iterator { static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, struct rmap_iterator *iter) { + u64 *sptep; + if (!rmap_head->val) return NULL; if (!(rmap_head->val & 1)) { iter->desc = NULL; - return (u64 *)rmap_head->val; + sptep = (u64 *)rmap_head->val; + goto out; } iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); iter->pos = 0; - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } /* @@ -1118,14 +1124,14 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head, */ static u64 *rmap_get_next(struct rmap_iterator *iter) { + u64 *sptep; + if (iter->desc) { if (iter->pos < PTE_LIST_EXT - 1) { - u64 *sptep; - ++iter->pos; sptep = iter->desc->sptes[iter->pos]; if (sptep) - return sptep; + goto out; } iter->desc = iter->desc->more; @@ -1133,17 +1139,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter) if (iter->desc) { iter->pos = 0; /* desc->sptes[0] cannot be NULL */ - return iter->desc->sptes[iter->pos]; + sptep = iter->desc->sptes[iter->pos]; + goto out; } } return NULL; +out: + BUG_ON(!is_shadow_present_pte(*sptep)); + return sptep; } #define for_each_rmap_spte(_rmap_head_, _iter_, _spte_) \ for (_spte_ = rmap_get_first(_rmap_head_, _iter_); \ - _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;}); \ - _spte_ = rmap_get_next(_iter_)) + _spte_; _spte_ = rmap_get_next(_iter_)) static void drop_spte(struct kvm *kvm, u64 *sptep) { @@ -1358,7 +1367,6 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head) bool flush = false; while ((sptep = rmap_get_first(rmap_head, &iter))) { - BUG_ON(!(*sptep & PT_PRESENT_MASK)); rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep); drop_spte(kvm, sptep); -- cgit v0.10.2 From 4700579241d2d587765a58dddd1b2a89902767c0 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 20 Nov 2015 17:46:29 +0900 Subject: KVM: x86: MMU: Move initialization of parent_ptes out from kvm_mmu_alloc_page() Make kvm_mmu_alloc_page() do just what its name tells to do, and remove the extra allocation error check and zero-initialization of parent_ptes: shadow page headers allocated by kmem_cache_zalloc() are always in the per-VCPU pools. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5b249d4..7f46e3e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1726,8 +1726,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp, mmu_spte_clear_no_track(parent_pte); } -static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, - u64 *parent_pte, int direct) +static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct) { struct kvm_mmu_page *sp; @@ -1743,8 +1742,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, * this feature. See the comments in kvm_zap_obsolete_pages(). */ list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); - sp->parent_ptes.val = 0; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); kvm_mod_used_mmu_pages(vcpu->kvm, +1); return sp; } @@ -2133,10 +2130,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, trace_kvm_mmu_get_page(sp, false); return sp; } + ++vcpu->kvm->stat.mmu_cache_miss; - sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct); - if (!sp) - return sp; + + sp = kvm_mmu_alloc_page(vcpu, direct); + + mmu_page_add_parent_pte(vcpu, sp, parent_pte); + sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, -- cgit v0.10.2 From 98bba238429e200521594ed30dd1edad7faa0081 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 26 Nov 2015 21:14:34 +0900 Subject: KVM: x86: MMU: Move parent_pte handling from kvm_mmu_get_page() to link_shadow_page() Every time kvm_mmu_get_page() is called with a non-NULL parent_pte argument, link_shadow_page() follows that to set the parent entry so that the new mapping will point to the returned page table. Moving parent_pte handling there allows to clean up the code because parent_pte is passed to kvm_mmu_get_page() just for mark_unsync() and mmu_page_add_parent_pte(). In addition, the patch avoids calling mark_unsync() for other parents in the sp->parent_ptes chain than the newly added parent_pte, because they have been there since before the current page fault handling started. Signed-off-by: Takuya Yoshikawa Cc: Xiao Guangrong Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7f46e3e..ec61b22 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2119,12 +2119,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, if (sp->unsync && kvm_sync_page_transient(vcpu, sp)) break; - mmu_page_add_parent_pte(vcpu, sp, parent_pte); - if (sp->unsync_children) { + if (sp->unsync_children) kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_mmu_mark_parents_unsync(sp); - } else if (sp->unsync) - kvm_mmu_mark_parents_unsync(sp); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); @@ -2135,8 +2131,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, sp = kvm_mmu_alloc_page(vcpu, direct); - mmu_page_add_parent_pte(vcpu, sp, parent_pte); - sp->gfn = gfn; sp->role = role; hlist_add_head(&sp->hash_link, @@ -2204,7 +2198,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator) return __shadow_walk_next(iterator, *iterator->sptep); } -static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) +static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep, + struct kvm_mmu_page *sp) { u64 spte; @@ -2215,6 +2210,11 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp) shadow_user_mask | shadow_x_mask | shadow_accessed_mask; mmu_spte_set(sptep, spte); + + mmu_page_add_parent_pte(vcpu, sp, sptep); + + if (sp->unsync_children || sp->unsync) + mark_unsync(sptep); } static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, @@ -2273,11 +2273,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm, mmu_page_zap_pte(kvm, sp, sp->spt + i); } -static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte) -{ - mmu_page_remove_parent_pte(sp, parent_pte); -} - static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) { u64 *sptep; @@ -2743,7 +2738,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, iterator.level - 1, 1, ACC_ALL, iterator.sptep); - link_shadow_page(iterator.sptep, sp); + link_shadow_page(vcpu, iterator.sptep, sp); } } return emulate; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 11650ea..0dcf9c8 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, goto out_gpte_changed; if (sp) - link_shadow_page(it.sptep, sp); + link_shadow_page(vcpu, it.sptep, sp); } for (; @@ -618,7 +618,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, true, direct_access, it.sptep); - link_shadow_page(it.sptep, sp); + link_shadow_page(vcpu, it.sptep, sp); } clear_sp_write_flooding_count(it.sptep); @@ -629,8 +629,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, return emulate; out_gpte_changed: - if (sp) - kvm_mmu_put_page(sp, it.sptep); kvm_release_pfn_clean(pfn); return 0; } -- cgit v0.10.2 From 74c4e63ab9b550b47ca2eb192e52d6c8971e2f31 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 26 Nov 2015 21:15:38 +0900 Subject: KVM: x86: MMU: Use for_each_rmap_spte macro instead of pte_list_walk() As kvm_mmu_get_page() was changed so that every parent pointer would not get into the sp->parent_ptes chain before the entry pointed to by it was set properly, we can use the for_each_rmap_spte macro instead of pte_list_walk(). Signed-off-by: Takuya Yoshikawa Cc: Xiao Guangrong Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ec61b22..204c7d4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head) } } -typedef void (*pte_list_walk_fn) (u64 *spte); -static void pte_list_walk(struct kvm_rmap_head *rmap_head, pte_list_walk_fn fn) -{ - struct pte_list_desc *desc; - int i; - - if (!rmap_head->val) - return; - - if (!(rmap_head->val & 1)) - return fn((u64 *)rmap_head->val); - - desc = (struct pte_list_desc *)(rmap_head->val & ~1ul); - while (desc) { - for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) - fn(desc->sptes[i]); - desc = desc->more; - } -} - static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, struct kvm_memory_slot *slot) { @@ -1749,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct static void mark_unsync(u64 *spte); static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp) { - pte_list_walk(&sp->parent_ptes, mark_unsync); + u64 *sptep; + struct rmap_iterator iter; + + for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) { + mark_unsync(sptep); + } } static void mark_unsync(u64 *spte) -- cgit v0.10.2 From bb11c6c96544737aede6a2eb92e5c6bc8b46534b Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Thu, 26 Nov 2015 21:16:35 +0900 Subject: KVM: x86: MMU: Remove unused parameter parent_pte from kvm_mmu_get_page() Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 204c7d4..a1a3d19 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2071,8 +2071,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, gva_t gaddr, unsigned level, int direct, - unsigned access, - u64 *parent_pte) + unsigned access) { union kvm_mmu_page_role role; unsigned quadrant; @@ -2720,8 +2719,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, base_addr &= PT64_LVL_ADDR_MASK(iterator.level); pseudo_gfn = base_addr >> PAGE_SHIFT; sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, - iterator.level - 1, - 1, ACC_ALL, iterator.sptep); + iterator.level - 1, 1, ACC_ALL); link_shadow_page(vcpu, iterator.sptep, sp); } @@ -3078,8 +3076,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, - 1, ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu.root_hpa = __pa(sp->spt); @@ -3091,9 +3088,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, - PT32_ROOT_LEVEL, 1, ACC_ALL, - NULL); + i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3130,7 +3125,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, - 0, ACC_ALL, NULL); + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); @@ -3163,9 +3158,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) } spin_lock(&vcpu->kvm->mmu_lock); make_mmu_pages_available(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, - PT32_ROOT_LEVEL, 0, - ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, + 0, ACC_ALL); root = __pa(sp->spt); ++sp->root_count; spin_unlock(&vcpu->kvm->mmu_lock); diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 0dcf9c8..91e939b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (!is_shadow_present_pte(*it.sptep)) { table_gfn = gw->table_gfn[it.level - 2]; sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1, - false, access, it.sptep); + false, access); } /* @@ -617,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, - true, direct_access, it.sptep); + true, direct_access); link_shadow_page(vcpu, it.sptep, sp); } -- cgit v0.10.2 From e09fefdeeb517ff653516dea8a882ce001e99237 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: KVM: Use common function for VCPU lookup by id Let's reuse the new common function for VPCU lookup by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split out the new function into a separate patch] diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 54b45b7..a29da44 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -308,16 +308,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu) static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id) { - int r; - struct kvm_vcpu *v, *ret = NULL; + struct kvm_vcpu *ret; mutex_lock(&kvm->lock); - kvm_for_each_vcpu(r, v, kvm) { - if (v->vcpu_id == id) { - ret = v; - break; - } - } + ret = kvm_get_vcpu_by_id(kvm, id); mutex_unlock(&kvm->lock); return ret; } diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5fbfb88..05f7de9 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu) static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) { - struct kvm *kvm = vcpu->kvm; struct kvm_vcpu *tcpu; int tid; - int i; tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4]; vcpu->stat.diagnose_9c++; @@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu) if (tid == vcpu->vcpu_id) return 0; - kvm_for_each_vcpu(i, tcpu, kvm) - if (tcpu->vcpu_id == tid) { - kvm_vcpu_yield_to(tcpu); - break; - } - + tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid); + if (tcpu) + kvm_vcpu_yield_to(tcpu); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 73cbb41..9649a42 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2257,7 +2257,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) { int r; - struct kvm_vcpu *vcpu, *v; + struct kvm_vcpu *vcpu; if (id >= KVM_MAX_VCPUS) return -EINVAL; @@ -2281,12 +2281,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) r = -EINVAL; goto unlock_vcpu_destroy; } - - kvm_for_each_vcpu(r, v, kvm) - if (v->vcpu_id == id) { - r = -EEXIST; - goto unlock_vcpu_destroy; - } + if (kvm_get_vcpu_by_id(kvm, id)) { + r = -EEXIST; + goto unlock_vcpu_destroy; + } BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]); -- cgit v0.10.2 From c896939f7cff767091b5d84587cd144e5d3613b7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:55:08 +0100 Subject: KVM: use heuristic for fast VCPU lookup by id Usually, VCPU ids match the array index. So let's try a fast lookup first before falling back to the slow iteration. Suggested-by: Christian Borntraeger Reviewed-by: Dominik Dingel Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2911919..a754fc0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -472,6 +472,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) struct kvm_vcpu *vcpu; int i; + if (id < 0 || id >= KVM_MAX_VCPUS) + return NULL; + vcpu = kvm_get_vcpu(kvm, id); + if (vcpu && vcpu->vcpu_id == id) + return vcpu; kvm_for_each_vcpu(i, vcpu, kvm) if (vcpu->vcpu_id == id) return vcpu; -- cgit v0.10.2 From 4bd33b568855f5483a6c6d7e4706ef507ab8586b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 14 Oct 2015 12:37:35 +0200 Subject: KVM: Remove unnecessary debugfs dentry references KVM creates debugfs files to export VM statistics to userland. To be able to remove them on kvm exit it tracks the files' dentries. Since their parent directory is also tracked and since each parent direntry knows its children we can easily remove them by using debugfs_remove_recursive(kvm_debugfs_dir). Therefore we don't need the extra tracking in the kvm_stats_debugfs_item anymore. Signed-off-by: Janosch Frank Reviewed-By: Sascha Silbe Acked-by: Christian Borntraeger Signed-off-by: Christian Borntraeger diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a754fc0..590c46e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1016,7 +1016,6 @@ struct kvm_stats_debugfs_item { const char *name; int offset; enum kvm_stat_kind kind; - struct dentry *dentry; }; extern struct kvm_stats_debugfs_item debugfs_entries[]; extern struct dentry *kvm_debugfs_dir; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9649a42..be3cef1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3447,10 +3447,9 @@ static int kvm_init_debug(void) goto out; for (p = debugfs_entries; p->name; ++p) { - p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, - (void *)(long)p->offset, - stat_fops[p->kind]); - if (p->dentry == NULL) + if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir, + (void *)(long)p->offset, + stat_fops[p->kind])) goto out_dir; } @@ -3462,15 +3461,6 @@ out: return r; } -static void kvm_exit_debug(void) -{ - struct kvm_stats_debugfs_item *p; - - for (p = debugfs_entries; p->name; ++p) - debugfs_remove(p->dentry); - debugfs_remove(kvm_debugfs_dir); -} - static int kvm_suspend(void) { if (kvm_usage_count) @@ -3628,7 +3618,7 @@ EXPORT_SYMBOL_GPL(kvm_init); void kvm_exit(void) { - kvm_exit_debug(); + debugfs_remove_recursive(kvm_debugfs_dir); misc_deregister(&kvm_dev); kmem_cache_destroy(kvm_vcpu_cache); kvm_async_pf_deinit(); -- cgit v0.10.2 From 71f116bfedfdd6763f2caf842bf40a6506759029 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 19 Oct 2015 16:24:28 +0200 Subject: KVM: s390: rewrite vcpu_post_run and drop out early Let's rewrite this function to better reflect how we actually handle exit_code. By dropping out early we can save a few cycles. This especially speeds up sie exits caused by host irqs. Also, let's move the special -EOPNOTSUPP for intercepts to the place where it belongs and convert it to -EREMOTE. Reviewed-by: Dominik Dingel Reviewed-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b4a5aa1..d53c107 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc) static int handle_noop(struct kvm_vcpu *vcpu) { switch (vcpu->arch.sie_block->icptcode) { - case 0x0: - vcpu->stat.exit_null++; - break; case 0x10: vcpu->stat.exit_external_request++; break; @@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) { + if (kvm_is_ucontrol(vcpu->kvm)) + return -EOPNOTSUPP; + switch (vcpu->arch.sie_block->icptcode) { - case 0x00: case 0x10: case 0x18: return handle_noop(vcpu); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8465892..5c36c8e 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2071,8 +2071,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) { - int rc = -1; - VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", vcpu->arch.sie_block->icptcode); trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); @@ -2080,40 +2078,35 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - if (exit_reason >= 0) { - rc = 0; + memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + + if (vcpu->arch.sie_block->icptcode > 0) { + int rc = kvm_handle_sie_intercept(vcpu); + + if (rc != -EOPNOTSUPP) + return rc; + vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC; + vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + return -EREMOTE; + } else if (exit_reason != -EFAULT) { + vcpu->stat.exit_null++; + return 0; } else if (kvm_is_ucontrol(vcpu->kvm)) { vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; vcpu->run->s390_ucontrol.trans_exc_code = current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; - rc = -EREMOTE; - + return -EREMOTE; } else if (current->thread.gmap_pfault) { trace_kvm_s390_major_guest_pfault(vcpu); current->thread.gmap_pfault = 0; - if (kvm_arch_setup_async_pf(vcpu)) { - rc = 0; - } else { - gpa_t gpa = current->thread.gmap_addr; - rc = kvm_arch_fault_in_page(vcpu, gpa, 1); - } - } - - if (rc == -1) - rc = vcpu_post_run_fault_in_sie(vcpu); - - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); - - if (rc == 0) { - if (kvm_is_ucontrol(vcpu->kvm)) - /* Don't exit for host interrupts. */ - rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; - else - rc = kvm_handle_sie_intercept(vcpu); + if (kvm_arch_setup_async_pf(vcpu)) + return 0; + return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1); } - - return rc; + return vcpu_post_run_fault_in_sie(vcpu); } static int __vcpu_run(struct kvm_vcpu *vcpu) @@ -2233,18 +2226,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = 0; } - if (rc == -EOPNOTSUPP) { - /* intercept cannot be handled in-kernel, prepare kvm-run */ - kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; - kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; - kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; - kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; - rc = 0; - } - if (rc == -EREMOTE) { - /* intercept was handled, but userspace support is needed - * kvm_run has been prepared by the handler */ + /* userspace support is needed, kvm_run has been prepared */ rc = 0; } -- cgit v0.10.2 From f7ba1d34263e333e82aa8879028ddd06d6d5f9ac Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Thu, 9 Oct 2014 16:04:48 +0200 Subject: s390/sclp: introduce checks for ESCA and HVS Introduce sclp.has_hvs and sclp.has_esca to provide a way for kvm to check whether the extended-SCA and the home-virtual-SCA facilities are available. Signed-off-by: Eugene (jno) Dvurechenski Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 821dde5..8324abb 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -53,6 +53,8 @@ struct sclp_info { unsigned char has_sigpif : 1; unsigned char has_core_type : 1; unsigned char has_sprp : 1; + unsigned char has_hvs : 1; + unsigned char has_esca : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index 7bc6df3..ff1e1bb 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -43,7 +43,10 @@ struct read_info_sccb { u8 _pad_92[100 - 92]; /* 92-99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ - u8 _pad_112[120 - 112]; /* 112-119 */ + u8 _pad_112[116 - 112]; /* 112-115 */ + u8 fac116; /* 116 */ + u8 _pad_117[119 - 117]; /* 117-118 */ + u8 fac119; /* 119 */ u16 hcpua; /* 120-121 */ u8 _pad_122[4096 - 122]; /* 122-4095 */ } __packed __aligned(PAGE_SIZE); @@ -108,6 +111,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.facilities = sccb->facilities; sclp.has_sprp = !!(sccb->fac84 & 0x02); sclp.has_core_type = !!(sccb->fac84 & 0x01); + sclp.has_esca = !!(sccb->fac116 & 0x08); + sclp.has_hvs = !!(sccb->fac119 & 0x80); if (sccb->fac85 & 0x02) S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP; sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2; -- cgit v0.10.2 From 605145103abb21c555d5982073bee29269aaad51 Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Tue, 21 Apr 2015 14:44:54 +0200 Subject: KVM: s390: Generalize access to IPTE controls This patch generalizes access to the IPTE controls, which is a part of SCA. This is to prepare for upcoming introduction of Extended SCA support. Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index a7559f7..06f7edb 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,7 +259,7 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control; + union ipte_control *ic = kvm_s390_get_ipte_control(vcpu->kvm); if (vcpu->arch.sie_block->eca & 1) return ic->kh != 0; @@ -274,7 +274,7 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); while (old.k) { @@ -296,7 +296,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; - ic = &vcpu->kvm->arch.sca->ipte_control; + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; @@ -311,7 +311,7 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); while (old.kg) { @@ -328,7 +328,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; - ic = &vcpu->kvm->arch.sca->ipte_control; + ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 1e70e00..844f711 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -340,4 +340,9 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +/* support for Basic/Extended SCA handling */ +static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) +{ + return &kvm->arch.sca->ipte_control; +} #endif -- cgit v0.10.2 From a5bd764734838da64b37d771e5b7814eb1f61ffd Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Tue, 21 Apr 2015 15:10:10 +0200 Subject: KVM: s390: Generalize access to SIGP controls This patch generalizes access to the SIGP controls, which is a part of SCA. This is to prepare for upcoming introduction of Extended SCA support. Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 6a75352..2a4718a 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -34,6 +34,45 @@ #define PFAULT_DONE 0x0680 #define VIRTIO_PARAM 0x0d00 +/* handle external calls via sigp interpretation facility */ +static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) +{ + struct sca_block *sca = vcpu->kvm->arch.sca; + uint8_t sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + if (src_id) + *src_id = sigp_ctrl & SIGP_CTRL_SCN_MASK; + + return sigp_ctrl & SIGP_CTRL_C && + atomic_read(&vcpu->arch.sie_block->cpuflags) & + CPUSTAT_ECALL_PEND; +} + +static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) +{ + struct sca_block *sca = vcpu->kvm->arch.sca; + uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + uint8_t new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); + uint8_t old_val = *sigp_ctrl & ~SIGP_CTRL_C; + + if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { + /* another external call is pending */ + return -EBUSY; + } + atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); + return 0; +} + +static void sca_clear_ext_call(struct kvm_vcpu *vcpu) +{ + struct sca_block *sca = vcpu->kvm->arch.sca; + struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; + uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + + atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + *sigp_ctrl = 0; +} + int psw_extint_disabled(struct kvm_vcpu *vcpu) { return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); @@ -792,13 +831,11 @@ static const deliver_irq_t deliver_irq_funcs[] = { int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (!sclp.has_sigpif) return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs); - return (sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND); + return sca_ext_call_pending(vcpu, NULL); } int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop) @@ -909,9 +946,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu) memset(&li->irq, 0, sizeof(li->irq)); spin_unlock(&li->lock); - /* clear pending external calls set by sigp interpretation facility */ - atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0; + sca_clear_ext_call(vcpu); } int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) @@ -1003,21 +1038,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return 0; } -static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id) -{ - unsigned char new_val, old_val; - uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; - - new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - old_val = *sigp_ctrl & ~SIGP_CTRL_C; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { - /* another external call is pending */ - return -EBUSY; - } - atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags); - return 0; -} - static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; @@ -1034,7 +1054,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) return -EINVAL; if (sclp.has_sigpif) - return __inject_extcall_sigpif(vcpu, src_id); + return sca_inject_ext_call(vcpu, src_id); if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs)) return -EBUSY; @@ -2203,7 +2223,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li, int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) { - uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int scn; unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)]; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; unsigned long pending_irqs; @@ -2243,14 +2263,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len) } } - if ((sigp_ctrl & SIGP_CTRL_C) && - (atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND)) { + if (sca_ext_call_pending(vcpu, &scn)) { if (n + sizeof(irq) > len) return -ENOBUFS; memset(&irq, 0, sizeof(irq)); irq.type = KVM_S390_INT_EXTERNAL_CALL; - irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK; + irq.u.extcall.code = scn; if (copy_to_user(&buf[n], &irq, sizeof(irq))) return -EFAULT; n += sizeof(irq); -- cgit v0.10.2 From a6e2f683e7691949d33ca9392e7807cfa9aca34e Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Tue, 21 Apr 2015 15:31:59 +0200 Subject: KVM: s390: Provide SCA-aware helpers for VCPU add/del This patch provides SCA-aware helpers to create/delete a VCPU. This is to prepare for upcoming introduction of Extended SCA support. Signed-off-by: Eugene (jno) Dvurechenski Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5c36c8e..8ddd488 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -283,6 +283,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, } /* Section: vm related */ +static void sca_del_vcpu(struct kvm_vcpu *vcpu); + /* * Get (and clear) the dirty memory log for a memory slot. */ @@ -1189,11 +1191,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_is_ucontrol(vcpu->kvm)) { - clear_bit(63 - vcpu->vcpu_id, - (unsigned long *) &vcpu->kvm->arch.sca->mcn); - if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == - (__u64) vcpu->arch.sie_block) - vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; + sca_del_vcpu(vcpu); } smp_mb(); @@ -1249,6 +1247,32 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) return 0; } +static void sca_del_vcpu(struct kvm_vcpu *vcpu) +{ + struct sca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) + sca->cpu[vcpu->vcpu_id].sda = 0; +} + +static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, + unsigned int id) +{ + struct sca_block *sca = kvm->arch.sca; + + if (!sca->cpu[id].sda) + sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(id, (unsigned long *) &sca->mcn); +} + +static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) +{ + return id < KVM_MAX_VCPUS; +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID; @@ -1465,7 +1489,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (id >= KVM_MAX_VCPUS) + if (!sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; @@ -1487,13 +1511,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, WARN_ON_ONCE(1); goto out_free_cpu; } - if (!kvm->arch.sca->cpu[id].sda) - kvm->arch.sca->cpu[id].sda = - (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = - (__u32)(((__u64)kvm->arch.sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; - set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); + sca_add_vcpu(vcpu, kvm, id); } spin_lock_init(&vcpu->arch.local_int.lock); -- cgit v0.10.2 From bc784ccee5eb9ae1e737927eb9d8a0fbf7601abc Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Thu, 23 Apr 2015 16:09:06 +0200 Subject: KVM: s390: Introduce new structures This patch adds new structures and updates some existing ones to provide the base for Extended SCA functionality. The old sca_* structures were renamed to bsca_* to keep things uniform. The access to fields of SIGP controls were turned into bitfields instead of hardcoded bitmasks. Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index efaac2c..923b13d 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -25,7 +25,9 @@ #include #include -#define KVM_MAX_VCPUS 64 +#define KVM_S390_BSCA_CPU_SLOTS 64 +#define KVM_S390_ESCA_CPU_SLOTS 248 +#define KVM_MAX_VCPUS KVM_S390_BSCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* @@ -40,9 +42,34 @@ #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f -struct sca_entry { +union bsca_sigp_ctrl { + __u8 value; + struct { + __u8 c : 1; + __u8 r : 1; + __u8 scn : 6; + }; +} __packed; + +union esca_sigp_ctrl { + __u16 value; + struct { + __u8 c : 1; + __u8 reserved: 7; + __u8 scn; + }; +} __packed; + +struct esca_entry { + union esca_sigp_ctrl sigp_ctrl; + __u16 reserved1[3]; + __u64 sda; + __u64 reserved2[6]; +} __packed; + +struct bsca_entry { __u8 reserved0; - __u8 sigp_ctrl; + union bsca_sigp_ctrl sigp_ctrl; __u16 reserved[3]; __u64 sda; __u64 reserved2[2]; @@ -57,14 +84,22 @@ union ipte_control { }; }; -struct sca_block { +struct bsca_block { union ipte_control ipte_control; __u64 reserved[5]; __u64 mcn; __u64 reserved2; - struct sca_entry cpu[64]; + struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS]; } __attribute__((packed)); +struct esca_block { + union ipte_control ipte_control; + __u64 reserved1[7]; + __u64 mcn[4]; + __u64 reserved2[20]; + struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS]; +} __packed; + #define CPUSTAT_STOPPED 0x80000000 #define CPUSTAT_WAIT 0x10000000 #define CPUSTAT_ECALL_PEND 0x08000000 @@ -585,7 +620,7 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct sca_block *sca; + struct bsca_block *sca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 2a4718a..aa221a4 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -37,25 +37,32 @@ /* handle external calls via sigp interpretation facility */ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { - struct sca_block *sca = vcpu->kvm->arch.sca; - uint8_t sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; if (src_id) - *src_id = sigp_ctrl & SIGP_CTRL_SCN_MASK; + *src_id = sigp_ctrl.scn; - return sigp_ctrl & SIGP_CTRL_C && + return sigp_ctrl.c && atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND; } static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) { - struct sca_block *sca = vcpu->kvm->arch.sca; - uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - uint8_t new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK); - uint8_t old_val = *sigp_ctrl & ~SIGP_CTRL_C; + int expect, rc; + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; - if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) { + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + + if (rc != expect) { /* another external call is pending */ return -EBUSY; } @@ -65,12 +72,12 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - struct sca_block *sca = vcpu->kvm->arch.sca; + struct bsca_block *sca = vcpu->kvm->arch.sca; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - *sigp_ctrl = 0; + sigp_ctrl->value = 0; } int psw_extint_disabled(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8ddd488..c268352 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1100,14 +1100,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; - kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); sca_offset += 16; - if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) sca_offset = 0; - kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); sprintf(debug_name, "kvm-%u", current->pid); @@ -1190,9 +1191,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); kvm_s390_clear_local_irqs(vcpu); kvm_clear_async_pf_completion_queue(vcpu); - if (!kvm_is_ucontrol(vcpu->kvm)) { + if (!kvm_is_ucontrol(vcpu->kvm)) sca_del_vcpu(vcpu); - } smp_mb(); if (kvm_is_ucontrol(vcpu->kvm)) @@ -1249,7 +1249,7 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) static void sca_del_vcpu(struct kvm_vcpu *vcpu) { - struct sca_block *sca = vcpu->kvm->arch.sca; + struct bsca_block *sca = vcpu->kvm->arch.sca; clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) @@ -1259,7 +1259,7 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned int id) { - struct sca_block *sca = kvm->arch.sca; + struct bsca_block *sca = kvm->arch.sca; if (!sca->cpu[id].sda) sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 844f711..df1abad 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -343,6 +343,8 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); /* support for Basic/Extended SCA handling */ static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) { - return &kvm->arch.sca->ipte_control; + struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */ + + return &sca->ipte_control; } #endif -- cgit v0.10.2 From 7d43bafcff17c7fb07270999d3cf002f1ed6bd3f Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Wed, 22 Apr 2015 17:09:44 +0200 Subject: KVM: s390: Make provisions for ESCA utilization This patch updates the routines (sca_*) to provide transparent access to and manipulation on the data for both Basic and Extended SCA in use. The kvm.arch.sca is generalized to (void *) to handle BSCA/ESCA cases. Also the kvm.arch.use_esca flag is provided. The actual functionality is kept the same. Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 923b13d..25fdbf8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -620,7 +620,8 @@ struct kvm_s390_crypto_cb { }; struct kvm_arch{ - struct bsca_block *sca; + void *sca; + int use_esca; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index aa221a4..60b36b0 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -37,30 +37,60 @@ /* handle external calls via sigp interpretation facility */ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { - struct bsca_block *sca = vcpu->kvm->arch.sca; - union bsca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl; + int c, scn; + + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl sigp_ctrl = + sca->cpu[vcpu->vcpu_id].sigp_ctrl; + + c = sigp_ctrl.c; + scn = sigp_ctrl.scn; + } if (src_id) - *src_id = sigp_ctrl.scn; + *src_id = scn; - return sigp_ctrl.c && - atomic_read(&vcpu->arch.sie_block->cpuflags) & + return c && atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND; } static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) { int expect, rc; - struct bsca_block *sca = vcpu->kvm->arch.sca; - union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); - union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; - new_val.scn = src_id; - new_val.c = 1; - old_val.c = 0; + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; - expect = old_val.value; - rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl; + + new_val.scn = src_id; + new_val.c = 1; + old_val.c = 0; + + expect = old_val.value; + rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); + } if (rc != expect) { /* another external call is pending */ @@ -72,12 +102,28 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) static void sca_clear_ext_call(struct kvm_vcpu *vcpu) { - struct bsca_block *sca = vcpu->kvm->arch.sca; struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + int rc, expect; atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); - sigp_ctrl->value = 0; + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + union esca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union esca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; + union bsca_sigp_ctrl *sigp_ctrl = + &(sca->cpu[vcpu->vcpu_id].sigp_ctrl); + union bsca_sigp_ctrl old = *sigp_ctrl; + + expect = old.value; + rc = cmpxchg(&sigp_ctrl->value, old.value, 0); + } + WARN_ON(rc != expect); /* cannot clear? */ } int psw_extint_disabled(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c268352..41b3fed 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1077,6 +1077,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm) return 0; } +static void sca_dispose(struct kvm *kvm) +{ + if (kvm->arch.use_esca) + BUG(); /* not implemented yet */ + else + free_page((unsigned long)(kvm->arch.sca)); + kvm->arch.sca = NULL; +} + int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int i, rc; @@ -1100,6 +1109,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; + kvm->arch.use_esca = 0; /* start with basic SCA */ kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; @@ -1180,7 +1190,7 @@ out_err: kfree(kvm->arch.crypto.crycb); free_page((unsigned long)kvm->arch.model.fac); debug_unregister(kvm->arch.dbf); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); KVM_EVENT(3, "creation of vm failed: %d", rc); return rc; } @@ -1226,7 +1236,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_free_vcpus(kvm); free_page((unsigned long)kvm->arch.model.fac); - free_page((unsigned long)(kvm->arch.sca)); + sca_dispose(kvm); debug_unregister(kvm->arch.dbf); kfree(kvm->arch.crypto.crycb); if (!kvm_is_ucontrol(kvm)) @@ -1249,23 +1259,41 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) static void sca_del_vcpu(struct kvm_vcpu *vcpu) { - struct bsca_block *sca = vcpu->kvm->arch.sca; + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; + + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); + if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) + sca->cpu[vcpu->vcpu_id].sda = 0; + } else { + struct bsca_block *sca = vcpu->kvm->arch.sca; - clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) - sca->cpu[vcpu->vcpu_id].sda = 0; + clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); + if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) + sca->cpu[vcpu->vcpu_id].sda = 0; + } } static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned int id) { - struct bsca_block *sca = kvm->arch.sca; + if (kvm->arch.use_esca) { + struct esca_block *sca = kvm->arch.sca; - if (!sca->cpu[id].sda) - sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; - set_bit_inv(id, (unsigned long *) &sca->mcn); + if (!sca->cpu[id].sda) + sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + set_bit_inv(id, (unsigned long *) sca->mcn); + } else { + struct bsca_block *sca = kvm->arch.sca; + + if (!sca->cpu[id].sda) + sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + set_bit_inv(id, (unsigned long *) &sca->mcn); + } } static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) @@ -1458,6 +1486,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb |= 0x10; vcpu->arch.sie_block->ecb2 = 8; + if (vcpu->kvm->arch.use_esca) + vcpu->arch.sie_block->ecb2 |= 4; vcpu->arch.sie_block->eca = 0xC1002000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; -- cgit v0.10.2 From 5e0443152367ab9fef597a41a4e09a32df2bf887 Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Wed, 22 Apr 2015 18:08:39 +0200 Subject: KVM: s390: Introduce switching code This patch adds code that performs transparent switch to Extended SCA on addition of 65th VCPU in a VM. Disposal of ESCA is added too. The entier ESCA functionality, however, is still not enabled. The enablement will be provided in a separate patch. This patch also uses read/write lock protection of SCA and its subfields for possible disposal at the BSCA-to-ESCA transition. While only Basic SCA needs such a protection (for the swap), any SCA access is now guarded. Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 25fdbf8..86c3386 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -622,6 +622,7 @@ struct kvm_s390_crypto_cb { struct kvm_arch{ void *sca; int use_esca; + rwlock_t sca_lock; debug_info_t *dbf; struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 06f7edb..d30db40 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -259,10 +259,14 @@ struct aste { int ipte_lock_held(struct kvm_vcpu *vcpu) { - union ipte_control *ic = kvm_s390_get_ipte_control(vcpu->kvm); + if (vcpu->arch.sie_block->eca & 1) { + int rc; - if (vcpu->arch.sie_block->eca & 1) - return ic->kh != 0; + read_lock(&vcpu->kvm->arch.sca_lock); + rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0; + read_unlock(&vcpu->kvm->arch.sca_lock); + return rc; + } return vcpu->kvm->arch.ipte_lock_count != 0; } @@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count++; if (vcpu->kvm->arch.ipte_lock_count > 1) goto out; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.k) { + if (old.k) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); } @@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu) vcpu->kvm->arch.ipte_lock_count--; if (vcpu->kvm->arch.ipte_lock_count) goto out; + read_lock(&vcpu->kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); new = old; new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); wake_up(&vcpu->kvm->arch.ipte_wq); out: mutex_unlock(&vcpu->kvm->arch.ipte_mutex); @@ -311,23 +321,28 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; +retry: + read_lock(&vcpu->kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); - while (old.kg) { + if (old.kg) { + read_unlock(&vcpu->kvm->arch.sca_lock); cond_resched(); - old = READ_ONCE(*ic); + goto retry; } new = old; new.k = 1; new.kh++; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); } static void ipte_unlock_siif(struct kvm_vcpu *vcpu) { union ipte_control old, new, *ic; + read_lock(&vcpu->kvm->arch.sca_lock); ic = kvm_s390_get_ipte_control(vcpu->kvm); do { old = READ_ONCE(*ic); @@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu) if (!new.kh) new.k = 0; } while (cmpxchg(&ic->val, old.val, new.val) != old.val); + read_unlock(&vcpu->kvm->arch.sca_lock); if (!new.kh) wake_up(&vcpu->kvm->arch.ipte_wq); } diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 60b36b0..831c9ac 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -39,6 +39,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { int c, scn; + read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl sigp_ctrl = @@ -54,6 +55,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) c = sigp_ctrl.c; scn = sigp_ctrl.scn; } + read_unlock(&vcpu->kvm->arch.sca_lock); if (src_id) *src_id = scn; @@ -66,6 +68,7 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) { int expect, rc; + read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = @@ -91,6 +94,7 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) expect = old_val.value; rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value); } + read_unlock(&vcpu->kvm->arch.sca_lock); if (rc != expect) { /* another external call is pending */ @@ -106,6 +110,7 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) int rc, expect; atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags); + read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; union esca_sigp_ctrl *sigp_ctrl = @@ -123,6 +128,7 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu) expect = old.value; rc = cmpxchg(&sigp_ctrl->value, old.value, 0); } + read_unlock(&vcpu->kvm->arch.sca_lock); WARN_ON(rc != expect); /* cannot clear? */ } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 41b3fed..5e884aa 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1080,7 +1080,7 @@ static int kvm_s390_crypto_init(struct kvm *kvm) static void sca_dispose(struct kvm *kvm) { if (kvm->arch.use_esca) - BUG(); /* not implemented yet */ + free_pages_exact(kvm->arch.sca, sizeof(struct esca_block)); else free_page((unsigned long)(kvm->arch.sca)); kvm->arch.sca = NULL; @@ -1110,6 +1110,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) rc = -ENOMEM; kvm->arch.use_esca = 0; /* start with basic SCA */ + rwlock_init(&kvm->arch.sca_lock); kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL); if (!kvm->arch.sca) goto out_err; @@ -1259,6 +1260,7 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu) static void sca_del_vcpu(struct kvm_vcpu *vcpu) { + read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; @@ -1272,11 +1274,13 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) sca->cpu[vcpu->vcpu_id].sda = 0; } + read_unlock(&vcpu->kvm->arch.sca_lock); } static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned int id) { + read_lock(&kvm->arch.sca_lock); if (kvm->arch.use_esca) { struct esca_block *sca = kvm->arch.sca; @@ -1294,11 +1298,78 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; set_bit_inv(id, (unsigned long *) &sca->mcn); } + read_unlock(&kvm->arch.sca_lock); +} + +/* Basic SCA to Extended SCA data copy routines */ +static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s) +{ + d->sda = s->sda; + d->sigp_ctrl.c = s->sigp_ctrl.c; + d->sigp_ctrl.scn = s->sigp_ctrl.scn; +} + +static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s) +{ + int i; + + d->ipte_control = s->ipte_control; + d->mcn[0] = s->mcn; + for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++) + sca_copy_entry(&d->cpu[i], &s->cpu[i]); +} + +static int sca_switch_to_extended(struct kvm *kvm) +{ + struct bsca_block *old_sca = kvm->arch.sca; + struct esca_block *new_sca; + struct kvm_vcpu *vcpu; + unsigned int vcpu_idx; + u32 scaol, scaoh; + + new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO); + if (!new_sca) + return -ENOMEM; + + scaoh = (u32)((u64)(new_sca) >> 32); + scaol = (u32)(u64)(new_sca) & ~0x3fU; + + kvm_s390_vcpu_block_all(kvm); + write_lock(&kvm->arch.sca_lock); + + sca_copy_b_to_e(new_sca, old_sca); + + kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) { + vcpu->arch.sie_block->scaoh = scaoh; + vcpu->arch.sie_block->scaol = scaol; + vcpu->arch.sie_block->ecb2 |= 0x04U; + } + kvm->arch.sca = new_sca; + kvm->arch.use_esca = 1; + + write_unlock(&kvm->arch.sca_lock); + kvm_s390_vcpu_unblock_all(kvm); + + free_page((unsigned long)old_sca); + + VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca); + return 0; } static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id) { - return id < KVM_MAX_VCPUS; + int rc; + + if (id < KVM_S390_BSCA_CPU_SLOTS) + return true; + if (!sclp.has_esca) + return false; + + mutex_lock(&kvm->lock); + rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm); + mutex_unlock(&kvm->lock); + + return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS; } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From fe0edcb7311e80a349ad5f2b15f2d91a5606ed7f Mon Sep 17 00:00:00 2001 From: "Eugene (jno) Dvurechenski" Date: Wed, 22 Apr 2015 18:37:40 +0200 Subject: KVM: s390: Enable up to 248 VCPUs per VM This patch allows s390 to have more than 64 VCPUs for a guest (up to 248 for memory usage considerations), if supported by the underlaying hardware (sclp.has_esca). Signed-off-by: Eugene (jno) Dvurechenski Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 86c3386..12e9291 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -27,7 +27,7 @@ #define KVM_S390_BSCA_CPU_SLOTS 64 #define KVM_S390_ESCA_CPU_SLOTS 248 -#define KVM_MAX_VCPUS KVM_S390_BSCA_CPU_SLOTS +#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS #define KVM_USER_MEM_SLOTS 32 /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5e884aa..16c19fb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: - r = KVM_MAX_VCPUS; + r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS + : KVM_S390_BSCA_CPU_SLOTS; break; case KVM_CAP_NR_MEMSLOTS: r = KVM_USER_MEM_SLOTS; -- cgit v0.10.2 From 2c1bb2be986c56848ce92ba41ba32fc62ab3165f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 23 Sep 2015 09:45:50 +0200 Subject: KVM: s390: fast path for sca_ext_call_pending If CPUSTAT_ECALL_PEND isn't set, we can't have an external call pending, so we can directly avoid taking the lock. Acked-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 831c9ac..62ec925 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -39,6 +39,9 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) { int c, scn; + if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND)) + return 0; + read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; @@ -60,8 +63,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id) if (src_id) *src_id = scn; - return c && atomic_read(&vcpu->arch.sie_block->cpuflags) & - CPUSTAT_ECALL_PEND; + return c; } static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id) -- cgit v0.10.2 From 5f3fe620a56f2f5c79e89522107f2476a45ed6ce Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Oct 2015 12:34:19 +0200 Subject: KVM: s390: we always have a SCA Having no sca can never happen, even when something goes wrong when switching to ESCA. Otherwise we would have a serious bug. Let's remove this superfluous check. Acked-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 16c19fb..5c58127 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1608,13 +1608,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) { - if (!kvm->arch.sca) { - WARN_ON_ONCE(1); - goto out_free_cpu; - } + if (!kvm_is_ucontrol(kvm)) sca_add_vcpu(vcpu, kvm, id); - } spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; -- cgit v0.10.2 From 2550882449299fd55c8214529cc0777b789db0f7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Oct 2015 16:27:23 +0200 Subject: KVM: s390: fix SCA related races and double use If something goes wrong in kvm_arch_vcpu_create, the VCPU has already been added to the sca but will never be removed. Trying to create VCPUs with duplicate ids (e.g. after a failed attempt) is problematic. Also, when creating multiple VCPUs in parallel, we could theoretically forget to set the correct SCA when the switch to ESCA happens just before the VCPU is registered. Let's add the VCPU to the SCA in kvm_arch_vcpu_postcreate, where we can be sure that no duplicate VCPU with the same id is around and the VCPU has already been registered at the VM. We also have to make sure to update ECB at that point. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5c58127..2ba5978 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1289,6 +1289,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + vcpu->arch.sie_block->ecb2 |= 0x04U; set_bit_inv(id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = kvm->arch.sca; @@ -1493,8 +1494,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch; preempt_enable(); mutex_unlock(&vcpu->kvm->lock); - if (!kvm_is_ucontrol(vcpu->kvm)) + if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; + sca_add_vcpu(vcpu, vcpu->kvm, vcpu->vcpu_id); + } + } static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) @@ -1558,8 +1562,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ecb |= 0x10; vcpu->arch.sie_block->ecb2 = 8; - if (vcpu->kvm->arch.use_esca) - vcpu->arch.sie_block->ecb2 |= 4; vcpu->arch.sie_block->eca = 0xC1002000U; if (sclp.has_siif) vcpu->arch.sie_block->eca |= 1; @@ -1608,9 +1610,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; vcpu->arch.sie_block->icpua = id; - if (!kvm_is_ucontrol(kvm)) - sca_add_vcpu(vcpu, kvm, id); - spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; vcpu->arch.local_int.wq = &vcpu->wq; -- cgit v0.10.2 From 10ce32d5b07470c5eb0fa821000c789073128b3f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Oct 2015 12:41:41 +0200 Subject: KVM: s390: always set/clear the SCA sda field Let's always set and clear the sda when enabling/disabling a VCPU. Dealing with sda being set to something else makes no sense anymore as we enable a VCPU in the SCA now after it has been registered at the VM. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2ba5978..7e0092b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1266,14 +1266,12 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) struct esca_block *sca = vcpu->kvm->arch.sca; clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); - if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) - sca->cpu[vcpu->vcpu_id].sda = 0; + sca->cpu[vcpu->vcpu_id].sda = 0; } else { struct bsca_block *sca = vcpu->kvm->arch.sca; clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); - if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block) - sca->cpu[vcpu->vcpu_id].sda = 0; + sca->cpu[vcpu->vcpu_id].sda = 0; } read_unlock(&vcpu->kvm->arch.sca_lock); } @@ -1285,8 +1283,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, if (kvm->arch.use_esca) { struct esca_block *sca = kvm->arch.sca; - if (!sca->cpu[id].sda) - sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; vcpu->arch.sie_block->ecb2 |= 0x04U; @@ -1294,8 +1291,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, } else { struct bsca_block *sca = kvm->arch.sca; - if (!sca->cpu[id].sda) - sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; set_bit_inv(id, (unsigned long *) &sca->mcn); -- cgit v0.10.2 From eaa78f343255dabac963ab8c7644367844d18e00 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Oct 2015 16:29:01 +0200 Subject: KVM: s390: cleanup sca_add_vcpu Now that we already have kvm and the VCPU id set for the VCPU, we can convert sda_add_vcpu to look much more like sda_del_vcpu. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7e0092b..d9d71bb 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1276,27 +1276,26 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) read_unlock(&vcpu->kvm->arch.sca_lock); } -static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm, - unsigned int id) +static void sca_add_vcpu(struct kvm_vcpu *vcpu) { - read_lock(&kvm->arch.sca_lock); - if (kvm->arch.use_esca) { - struct esca_block *sca = kvm->arch.sca; + read_lock(&vcpu->kvm->arch.sca_lock); + if (vcpu->kvm->arch.use_esca) { + struct esca_block *sca = vcpu->kvm->arch.sca; - sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; vcpu->arch.sie_block->ecb2 |= 0x04U; - set_bit_inv(id, (unsigned long *) sca->mcn); + set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { - struct bsca_block *sca = kvm->arch.sca; + struct bsca_block *sca = vcpu->kvm->arch.sca; - sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; - set_bit_inv(id, (unsigned long *) &sca->mcn); + set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); } - read_unlock(&kvm->arch.sca_lock); + read_unlock(&vcpu->kvm->arch.sca_lock); } /* Basic SCA to Extended SCA data copy routines */ @@ -1492,7 +1491,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) mutex_unlock(&vcpu->kvm->lock); if (!kvm_is_ucontrol(vcpu->kvm)) { vcpu->arch.gmap = vcpu->kvm->arch.gmap; - sca_add_vcpu(vcpu, vcpu->kvm, vcpu->vcpu_id); + sca_add_vcpu(vcpu); } } -- cgit v0.10.2 From 4215825eeb1f704d1bd327ca01fb753b53ea34d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 12 Oct 2015 12:57:22 +0200 Subject: KVM: s390: don't switch to ESCA for ucontrol sca_add_vpcu is not called for ucontrol guests. We must also not apply the sca checking for sca_can_add_vcpu as ucontrol guests do not have to follow the sca limits. As common code already checks that id < KVM_MAX_VCPUS all other data structures are safe as well. Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d9d71bb..539d385 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1588,7 +1588,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, struct sie_page *sie_page; int rc = -EINVAL; - if (!sca_can_add_vcpu(kvm, id)) + if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id)) goto out; rc = -ENOMEM; -- cgit v0.10.2 From 8dfd523f8523779210038264259546299a8398e9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Feb 2015 15:51:55 +0100 Subject: s390/sclp: introduce check for SIE This patch adds a way to check if the SIE with zArchitecture support is available. Acked-by: Martin Schwidefsky Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index 8324abb..dea883f 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -29,7 +29,10 @@ struct sclp_ipl_info { struct sclp_core_entry { u8 core_id; - u8 reserved0[2]; + u8 reserved0; + u8 : 4; + u8 sief2 : 1; + u8 : 3; u8 : 3; u8 siif : 1; u8 sigpif : 1; @@ -55,6 +58,7 @@ struct sclp_info { unsigned char has_sprp : 1; unsigned char has_hvs : 1; unsigned char has_esca : 1; + unsigned char has_sief2 : 1; unsigned int ibc; unsigned int mtid; unsigned int mtid_cp; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index ff1e1bb..e0a1f4e 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -136,6 +136,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) continue; sclp.has_siif = cpue->siif; sclp.has_sigpif = cpue->sigpif; + sclp.has_sief2 = cpue->sief2; break; } -- cgit v0.10.2 From 7f16d7e787b731d9db273b822b4b8069102e57a6 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Feb 2015 15:54:47 +0100 Subject: s390: show virtualization support in /proc/cpuinfo This patch exposes the SIE capability (aka virtualization support) via /proc/cpuinfo -> "features" as "sie". As we don't want to expose this hwcap via elf, let's add a second, "internal"/non-elf capability list. The content is simply concatenated to the existing features when printing /proc/cpuinfo. We also add the defines to elf.h to keep the hwcap stuff at a common place. Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index bab6739..08e34a5 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -104,6 +104,9 @@ #define HWCAP_S390_TE 1024 #define HWCAP_S390_VXRS 2048 +/* Internal bits, not exposed via elf */ +#define HWCAP_INT_SIE 1UL + /* * These are used to set parameters in the core dumps. */ @@ -169,6 +172,10 @@ extern unsigned int vdso_enabled; extern unsigned long elf_hwcap; #define ELF_HWCAP (elf_hwcap) +/* Internal hardware capabilities, not exposed via elf */ + +extern unsigned long int_hwcap; + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 7ce00e7..647128d 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) "esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp", "edat", "etf3eh", "highgprs", "te", "vx" }; + static const char * const int_hwcap_str[] = { + "sie" + }; unsigned long n = (unsigned long) v - 1; int i; @@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) for (i = 0; i < ARRAY_SIZE(hwcap_str); i++) if (hwcap_str[i] && (elf_hwcap & (1UL << i))) seq_printf(m, "%s ", hwcap_str[i]); + for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++) + if (int_hwcap_str[i] && (int_hwcap & (1UL << i))) + seq_printf(m, "%s ", int_hwcap_str[i]); seq_puts(m, "\n"); show_cacheinfo(m); } diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c837bca..dc83ae6 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq); unsigned long elf_hwcap __read_mostly = 0; char elf_platform[ELF_PLATFORM_SIZE]; +unsigned long int_hwcap = 0; + int __initdata memory_end_set; unsigned long __initdata memory_end; unsigned long __initdata max_physmem_end; @@ -793,6 +795,13 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "z13"); break; } + + /* + * Virtualization support HWCAP_INT_SIE is bit 0. + */ + if (sclp.has_sief2) + int_hwcap |= HWCAP_INT_SIE; + return 0; } arch_initcall(setup_hwcaps); -- cgit v0.10.2 From 07197fd05fa3eb2e8a5aae92938ca5d07bcac9fc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 30 Jan 2015 16:01:38 +0100 Subject: KVM: s390: don't load kvm without virtualization support If we don't have support for virtualization (SIE), e.g. when running under a hypervisor not supporting execution of the SIE instruction, we should immediately abort loading the kvm module, as the SIE instruction cannot be enabled dynamically. Currently, the SIE instructions fails with an exception on a non-SIE host, resulting in the guest making no progress, instead of failing hard. Reviewed-by: Cornelia Huck Acked-by: Martin Schwidefsky Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 539d385..49d3319 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2859,6 +2859,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, static int __init kvm_s390_init(void) { + if (!sclp.has_sief2) { + pr_info("SIE not available\n"); + return -ENODEV; + } + return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); } -- cgit v0.10.2 From a6aacc3f87dfd44425fc17ea9875a5c2ad917227 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 24 Nov 2015 14:28:12 +0100 Subject: KVM: s390: remove pointless test_facility(2) check This evaluates always to 'true'. Signed-off-by: Heiko Carstens Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 49d3319..77724ce 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1027,7 +1027,7 @@ static int kvm_s390_apxa_installed(void) u8 config[128]; int cc; - if (test_facility(2) && test_facility(12)) { + if (test_facility(12)) { cc = kvm_s390_query_ap_config(config); if (cc) -- cgit v0.10.2 From 2f8a43d45d14ad62b105ed99151b453c12df7149 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 27 Nov 2015 11:00:54 +0100 Subject: KVM: s390: remove redudant assigment of error code rc already contains -ENOMEM, no need to assign it twice. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck Reviewed-by: David Hildenbrand diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 77724ce..6857262 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1618,10 +1618,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, */ vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS, GFP_KERNEL); - if (!vcpu->arch.guest_fpregs.fprs) { - rc = -ENOMEM; + if (!vcpu->arch.guest_fpregs.fprs) goto out_free_sie_block; - } rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) -- cgit v0.10.2 From 8cdb654abe5730654d0385382c4e877a011bb8c8 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Thu, 12 Nov 2015 19:59:14 +0800 Subject: KVM: arm/arm64: vgic: make vgic_io_ops static vgic_io_ops is only referenced within vgic.c, so it can be declared static. Signed-off-by: Jisheng Zhang Signed-off-by: Christoffer Dall diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 65461f8..0c739a7 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -878,7 +878,7 @@ static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, true); } -struct kvm_io_device_ops vgic_io_ops = { +static struct kvm_io_device_ops vgic_io_ops = { .read = vgic_handle_mmio_read, .write = vgic_handle_mmio_write, }; -- cgit v0.10.2 From b19e6892a90e7c9d15fde0a08516ec891a4e7d54 Mon Sep 17 00:00:00 2001 From: Amit Tomar Date: Thu, 26 Nov 2015 10:09:43 +0000 Subject: KVM: arm/arm64: Count guest exit due to various reasons It would add guest exit statistics to debugfs, this can be helpful while measuring KVM performance. [ Renamed some of the field names - Christoffer ] Signed-off-by: Amit Singh Tomar Signed-off-by: Christoffer Dall diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6692982..f9f2779 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -150,6 +150,12 @@ struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_wakeup; + u32 hvc_exit_stat; + u64 wfe_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; }; int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index e06fd29..8a79a57 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -603,6 +603,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; /* * Back from guest *************************************************************/ diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 96e935b..5fa69d7 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -33,6 +33,12 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(hvc_exit_stat), + VCPU_STAT(wfe_exit_stat), + VCPU_STAT(wfi_exit_stat), + VCPU_STAT(mmio_exit_user), + VCPU_STAT(mmio_exit_kernel), + VCPU_STAT(exits), { NULL } }; diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 95f12b2..3ede90d 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -42,6 +42,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; ret = kvm_psci_call(vcpu); if (ret < 0) { @@ -89,9 +90,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) { trace_kvm_wfx(*vcpu_pc(vcpu), true); + vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu); } else { trace_kvm_wfx(*vcpu_pc(vcpu), false); + vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); } diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 3a10c9f..7f33b20 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -210,8 +210,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, if (!ret) { /* We handled the access successfully in the kernel. */ + vcpu->stat.mmio_exit_kernel++; kvm_handle_mmio_return(vcpu, run); return 1; + } else { + vcpu->stat.mmio_exit_user++; } run->exit_reason = KVM_EXIT_MMIO; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index a35ce72..19504aa 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -197,6 +197,12 @@ struct kvm_vcpu_stat { u32 halt_successful_poll; u32 halt_attempted_poll; u32 halt_wakeup; + u32 hvc_exit_stat; + u64 wfe_exit_stat; + u64 wfi_exit_stat; + u64 mmio_exit_user; + u64 mmio_exit_kernel; + u64 exits; }; int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index d250160..115522b 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -34,7 +34,16 @@ #include "trace.h" +#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } +#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } + struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(hvc_exit_stat), + VCPU_STAT(wfe_exit_stat), + VCPU_STAT(wfi_exit_stat), + VCPU_STAT(mmio_exit_user), + VCPU_STAT(mmio_exit_kernel), + VCPU_STAT(exits), { NULL } }; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 15f0477..8bddae1 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -39,6 +39,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0), kvm_vcpu_hvc_get_imm(vcpu)); + vcpu->stat.hvc_exit_stat++; ret = kvm_psci_call(vcpu); if (ret < 0) { @@ -71,9 +72,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) { if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); + vcpu->stat.wfe_exit_stat++; kvm_vcpu_on_spin(vcpu); } else { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); + vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); } -- cgit v0.10.2 From 3600c2fdc09a43a30909743569e35a29121602ed Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Nov 2015 15:09:17 +0000 Subject: arm64: Add macros to read/write system registers Rather than crafting custom macros for reading/writing each system register provide generics accessors, read_sysreg and write_sysreg, for this purpose. Signed-off-by: Mark Rutland Acked-by: Catalin Marinas Cc: Suzuki Poulose Cc: Will Deacon Signed-off-by: Marc Zyngier diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index d48ab5b..4aeebec 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -20,6 +20,8 @@ #ifndef __ASM_SYSREG_H #define __ASM_SYSREG_H +#include + #include /* @@ -208,6 +210,8 @@ #else +#include + asm( " .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" " .equ __reg_num_x\\num, \\num\n" @@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set) val |= set; asm volatile("msr sctlr_el1, %0" : : "r" (val)); } + +/* + * Unlike read_cpuid, calls to read_sysreg are never expected to be + * optimized away or replaced with synthetic values. + */ +#define read_sysreg(r) ({ \ + u64 __val; \ + asm volatile("mrs %0, " __stringify(r) : "=r" (__val)); \ + __val; \ +}) + +#define write_sysreg(v, r) do { \ + u64 __val = (u64)v; \ + asm volatile("msr " __stringify(r) ", %0" \ + : : "r" (__val)); \ +} while (0) + #endif #endif /* __ASM_SYSREG_H */ -- cgit v0.10.2 From c76a0a6695c61088c8d2e731e25305502666bf7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2015 10:09:49 +0100 Subject: arm64: KVM: Add a HYP-specific header file In order to expose the various EL2 services that are private to the hypervisor, add a new hyp.h file. So far, it only contains mundane things such as section annotation and VA manipulation. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h new file mode 100644 index 0000000..057f483 --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp.h @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ARM64_KVM_HYP_H__ +#define __ARM64_KVM_HYP_H__ + +#include +#include +#include +#include + +#define __hyp_text __section(.hyp.text) notrace + +#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK) +#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + + PAGE_OFFSET) + +#endif /* __ARM64_KVM_HYP_H__ */ + -- cgit v0.10.2 From 06282fd2c2bf61619649a2b13e4a08556598a64c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 15:50:37 +0100 Subject: arm64: KVM: Implement vgic-v2 save/restore Implement the vgic-v2 save restore (mostly) as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 1949fe5..d31e4e5 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,7 @@ KVM=../../../virt/kvm ARM=../../../arch/arm/kvm obj-$(CONFIG_KVM_ARM_HOST) += kvm.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp/ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile new file mode 100644 index 0000000..d8d5968 --- /dev/null +++ b/arch/arm64/kvm/hyp/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for Kernel-based Virtual Machine module, HYP part +# + +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 057f483..ac63553 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -29,5 +29,8 @@ #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + PAGE_OFFSET) +void __vgic_v2_save_state(struct kvm_vcpu *vcpu); +void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c new file mode 100644 index 0000000..e717612 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + u32 eisr0, eisr1, elrsr0, elrsr1; + int i, nr_lr; + + if (!base) + return; + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR); + cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR); + eisr0 = readl_relaxed(base + GICH_EISR0); + elrsr0 = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(nr_lr > 32)) { + eisr1 = readl_relaxed(base + GICH_EISR1); + elrsr1 = readl_relaxed(base + GICH_ELRSR1); + } else { + eisr1 = elrsr1 = 0; + } +#ifdef CONFIG_CPU_BIG_ENDIAN + cpu_if->vgic_eisr = ((u64)eisr0 << 32) | eisr1; + cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1; +#else + cpu_if->vgic_eisr = ((u64)eisr1 << 32) | eisr0; + cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0; +#endif + cpu_if->vgic_apr = readl_relaxed(base + GICH_APR); + + writel_relaxed(0, base + GICH_HCR); + + for (i = 0; i < nr_lr; i++) + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); +} + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &kvm->arch.vgic; + void __iomem *base = kern_hyp_va(vgic->vctrl_base); + int i, nr_lr; + + if (!base) + return; + + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, base + GICH_APR); + + nr_lr = vcpu->arch.vgic_cpu.nr_lr; + for (i = 0; i < nr_lr; i++) + writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4)); +} -- cgit v0.10.2 From 3c13b8f435acb452eac62d966148a8b6fa92151f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Dec 2015 13:48:56 +0000 Subject: KVM: arm/arm64: vgic-v3: Make the LR indexing macro public We store GICv3 LRs in reverse order so that the CPU can save/restore them in rever order as well (don't ask why, the design is crazy), and yet generate memory traffic that doesn't completely suck. We need this macro to be available to the C version of save/restore. Signed-off-by: Marc Zyngier diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d2f4147..13a3d53 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -279,6 +279,12 @@ struct vgic_v2_cpu_if { u32 vgic_lr[VGIC_V2_MAX_LRS]; }; +/* + * LRs are stored in reverse order in memory. make sure we index them + * correctly. + */ +#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) + struct vgic_v3_cpu_if { #ifdef CONFIG_KVM_ARM_VGIC_V3 u32 vgic_hcr; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 487d635..3813d23 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -36,18 +36,12 @@ #define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) #define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1) -/* - * LRs are stored in reverse order in memory. make sure we index them - * correctly. - */ -#define LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) - static u32 ich_vtr_el2; static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) { struct vgic_lr lr_desc; - u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)]; + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)]; if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; @@ -111,7 +105,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; } - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val; if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); -- cgit v0.10.2 From f68d2b1b73cc3d8f6eb189c11ce79a472ed27c42 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 15:50:58 +0100 Subject: arm64: KVM: Implement vgic-v3 save/restore Implement the vgic-v3 save restore as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index d8d5968..d1e38ce 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -3,3 +3,4 @@ # obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index ac63553..5759f9f 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -32,5 +32,8 @@ void __vgic_v2_save_state(struct kvm_vcpu *vcpu); void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 0000000..78d05f3 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "hyp.h" + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) + +#define read_gicreg(r) \ + ({ \ + u64 reg; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ + reg; \ + }) + +#define write_gicreg(v,r) \ + do { \ + u64 __val = (v); \ + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ + } while (0) + +/* vcpu is already in the HYP VA space */ +void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface. + */ + dsb(st); + + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); + + write_gicreg(0, ICH_HCR_EL2); + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (max_lr_idx) { + case 15: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); + case 14: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); + case 13: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); + case 12: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); + case 11: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); + case 10: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); + case 9: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); + case 8: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); + case 7: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); + case 6: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); + case 5: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); + case 4: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); + case 3: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); + case 2: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); + case 1: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); + case 0: + cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + case 6: + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + default: + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + case 6: + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + default: + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + write_gicreg(1, ICC_SRE_EL1); +} + +void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val; + u32 max_lr_idx, nr_pri_bits; + + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC. + */ + write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); + isb(); + + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + val = read_gicreg(ICH_VTR_EL2); + max_lr_idx = vtr_to_max_lr_idx(val); + nr_pri_bits = vtr_to_nr_pri_bits(val); + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + } + + switch (nr_pri_bits) { + case 7: + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + case 6: + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + default: + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + } + + switch (max_lr_idx) { + case 15: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); + case 14: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); + case 13: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); + case 12: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); + case 11: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); + case 10: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); + case 9: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); + case 8: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); + case 7: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); + case 6: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); + case 5: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); + case 4: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); + case 3: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); + case 2: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); + case 1: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); + case 0: + write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); + } + + /* + * Ensures that the above will have reached the + * (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + isb(); + dsb(sy); + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + if (!cpu_if->vgic_sre) { + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + } +} + +u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} -- cgit v0.10.2 From 1431af367e52b08038e78d346822966d968f1694 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 16:32:20 +0100 Subject: arm64: KVM: Implement timer save/restore Implement the timer save restore as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index d1e38ce..455dc0a 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 5759f9f..f213e46 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -35,5 +35,8 @@ void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +void __timer_save_state(struct kvm_vcpu *vcpu); +void __timer_restore_state(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 0000000..1051e5d --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +#include + +#include "hyp.h" + +/* vcpu is already in the HYP VA space */ +void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + if (kvm->arch.timer.enabled) { + timer->cntv_ctl = read_sysreg(cntv_ctl_el0); + timer->cntv_cval = read_sysreg(cntv_cval_el0); + } + + /* Disable the virtual timer */ + write_sysreg(0, cntv_ctl_el0); + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); + + /* Clear cntvoff for the host */ + write_sysreg(0, cntvoff_el2); +} + +void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); + + if (kvm->arch.timer.enabled) { + write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2); + write_sysreg(timer->cntv_cval, cntv_cval_el0); + isb(); + write_sysreg(timer->cntv_ctl, cntv_ctl_el0); + } +} diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h index 9916d0e..25d0914 100644 --- a/include/clocksource/arm_arch_timer.h +++ b/include/clocksource/arm_arch_timer.h @@ -23,6 +23,12 @@ #define ARCH_TIMER_CTRL_IT_MASK (1 << 1) #define ARCH_TIMER_CTRL_IT_STAT (1 << 2) +#define CNTHCTL_EL1PCTEN (1 << 0) +#define CNTHCTL_EL1PCEN (1 << 1) +#define CNTHCTL_EVNTEN (1 << 2) +#define CNTHCTL_EVNTDIR (1 << 3) +#define CNTHCTL_EVNTI (0xF << 4) + enum arch_timer_reg { ARCH_TIMER_REG_CTRL, ARCH_TIMER_REG_TVAL, -- cgit v0.10.2 From 6d6ec20fcf2830ca10c1b7c8efd7e2592c40e3d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 18:02:48 +0100 Subject: arm64: KVM: Implement system register save/restore Implement the system register save/restore as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 455dc0a..ec94200 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index f213e46..778d56d 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -38,5 +38,8 @@ void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); void __timer_save_state(struct kvm_vcpu *vcpu); void __timer_restore_state(struct kvm_vcpu *vcpu); +void __sysreg_save_state(struct kvm_cpu_context *ctxt); +void __sysreg_restore_state(struct kvm_cpu_context *ctxt); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c new file mode 100644 index 0000000..add8fcb --- /dev/null +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +#include "hyp.h" + +/* ctxt is already in the HYP VA space */ +void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt) +{ + ctxt->sys_regs[MPIDR_EL1] = read_sysreg(vmpidr_el2); + ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1); + ctxt->sys_regs[SCTLR_EL1] = read_sysreg(sctlr_el1); + ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1); + ctxt->sys_regs[CPACR_EL1] = read_sysreg(cpacr_el1); + ctxt->sys_regs[TTBR0_EL1] = read_sysreg(ttbr0_el1); + ctxt->sys_regs[TTBR1_EL1] = read_sysreg(ttbr1_el1); + ctxt->sys_regs[TCR_EL1] = read_sysreg(tcr_el1); + ctxt->sys_regs[ESR_EL1] = read_sysreg(esr_el1); + ctxt->sys_regs[AFSR0_EL1] = read_sysreg(afsr0_el1); + ctxt->sys_regs[AFSR1_EL1] = read_sysreg(afsr1_el1); + ctxt->sys_regs[FAR_EL1] = read_sysreg(far_el1); + ctxt->sys_regs[MAIR_EL1] = read_sysreg(mair_el1); + ctxt->sys_regs[VBAR_EL1] = read_sysreg(vbar_el1); + ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg(contextidr_el1); + ctxt->sys_regs[TPIDR_EL0] = read_sysreg(tpidr_el0); + ctxt->sys_regs[TPIDRRO_EL0] = read_sysreg(tpidrro_el0); + ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1); + ctxt->sys_regs[AMAIR_EL1] = read_sysreg(amair_el1); + ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg(cntkctl_el1); + ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1); + ctxt->sys_regs[MDSCR_EL1] = read_sysreg(mdscr_el1); + + ctxt->gp_regs.regs.sp = read_sysreg(sp_el0); + ctxt->gp_regs.regs.pc = read_sysreg(elr_el2); + ctxt->gp_regs.regs.pstate = read_sysreg(spsr_el2); + ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1); + ctxt->gp_regs.elr_el1 = read_sysreg(elr_el1); + ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1); +} + +void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) +{ + write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); + write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); + write_sysreg(ctxt->sys_regs[SCTLR_EL1], sctlr_el1); + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); + write_sysreg(ctxt->sys_regs[CPACR_EL1], cpacr_el1); + write_sysreg(ctxt->sys_regs[TTBR0_EL1], ttbr0_el1); + write_sysreg(ctxt->sys_regs[TTBR1_EL1], ttbr1_el1); + write_sysreg(ctxt->sys_regs[TCR_EL1], tcr_el1); + write_sysreg(ctxt->sys_regs[ESR_EL1], esr_el1); + write_sysreg(ctxt->sys_regs[AFSR0_EL1], afsr0_el1); + write_sysreg(ctxt->sys_regs[AFSR1_EL1], afsr1_el1); + write_sysreg(ctxt->sys_regs[FAR_EL1], far_el1); + write_sysreg(ctxt->sys_regs[MAIR_EL1], mair_el1); + write_sysreg(ctxt->sys_regs[VBAR_EL1], vbar_el1); + write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1); + write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0); + write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0); + write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + write_sysreg(ctxt->sys_regs[AMAIR_EL1], amair_el1); + write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1); + write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); + write_sysreg(ctxt->sys_regs[MDSCR_EL1], mdscr_el1); + + write_sysreg(ctxt->gp_regs.regs.sp, sp_el0); + write_sysreg(ctxt->gp_regs.regs.pc, elr_el2); + write_sysreg(ctxt->gp_regs.regs.pstate, spsr_el2); + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); + write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); + write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); +} -- cgit v0.10.2 From c209ec85a2a7d2fd38bca0a44b7e70abd079c178 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 19:28:29 +0100 Subject: arm64: KVM: Implement 32bit system register save/restore Implement the 32bit system register save/restore as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 778d56d..bffd308 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -40,6 +40,8 @@ void __timer_restore_state(struct kvm_vcpu *vcpu); void __sysreg_save_state(struct kvm_cpu_context *ctxt); void __sysreg_restore_state(struct kvm_cpu_context *ctxt); +void __sysreg32_save_state(struct kvm_vcpu *vcpu); +void __sysreg32_restore_state(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index add8fcb..eb05afb 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -88,3 +88,50 @@ void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->gp_regs.elr_el1, elr_el1); write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1); } + +void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt); + spsr[KVM_SPSR_UND] = read_sysreg(spsr_und); + spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq); + spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq); + + sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); + sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); + + if (!(read_sysreg(cptr_el2) & CPTR_EL2_TFP)) + sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2); +} + +void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu) +{ + u64 *spsr, *sysreg; + + if (read_sysreg(hcr_el2) & HCR_RW) + return; + + spsr = vcpu->arch.ctxt.gp_regs.spsr; + sysreg = vcpu->arch.ctxt.sys_regs; + + write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt); + write_sysreg(spsr[KVM_SPSR_UND], spsr_und); + write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq); + write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq); + + write_sysreg(sysreg[DACR32_EL2], dacr32_el2); + write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2); +} -- cgit v0.10.2 From 8eb992674c9e69d57af199f36b6455dbc00ac9f9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 19 Oct 2015 21:02:46 +0100 Subject: arm64: KVM: Implement debug save/restore MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the debug save restore as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier Tested-by: Alex Bennée Reviewed-by: Alex Bennée Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ec94200..ec14cac 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,3 +6,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c new file mode 100644 index 0000000..7848322 --- /dev/null +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -0,0 +1,137 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include + +#include "hyp.h" + +#define read_debug(r,n) read_sysreg(r##n##_el1) +#define write_debug(v,r,n) write_sysreg(v, r##n##_el1) + +#define save_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: ptr[15] = read_debug(reg, 15); \ + case 14: ptr[14] = read_debug(reg, 14); \ + case 13: ptr[13] = read_debug(reg, 13); \ + case 12: ptr[12] = read_debug(reg, 12); \ + case 11: ptr[11] = read_debug(reg, 11); \ + case 10: ptr[10] = read_debug(reg, 10); \ + case 9: ptr[9] = read_debug(reg, 9); \ + case 8: ptr[8] = read_debug(reg, 8); \ + case 7: ptr[7] = read_debug(reg, 7); \ + case 6: ptr[6] = read_debug(reg, 6); \ + case 5: ptr[5] = read_debug(reg, 5); \ + case 4: ptr[4] = read_debug(reg, 4); \ + case 3: ptr[3] = read_debug(reg, 3); \ + case 2: ptr[2] = read_debug(reg, 2); \ + case 1: ptr[1] = read_debug(reg, 1); \ + default: ptr[0] = read_debug(reg, 0); \ + } + +#define restore_debug(ptr,reg,nr) \ + switch (nr) { \ + case 15: write_debug(ptr[15], reg, 15); \ + case 14: write_debug(ptr[14], reg, 14); \ + case 13: write_debug(ptr[13], reg, 13); \ + case 12: write_debug(ptr[12], reg, 12); \ + case 11: write_debug(ptr[11], reg, 11); \ + case 10: write_debug(ptr[10], reg, 10); \ + case 9: write_debug(ptr[9], reg, 9); \ + case 8: write_debug(ptr[8], reg, 8); \ + case 7: write_debug(ptr[7], reg, 7); \ + case 6: write_debug(ptr[6], reg, 6); \ + case 5: write_debug(ptr[5], reg, 5); \ + case 4: write_debug(ptr[4], reg, 4); \ + case 3: write_debug(ptr[3], reg, 3); \ + case 2: write_debug(ptr[2], reg, 2); \ + case 1: write_debug(ptr[1], reg, 1); \ + default: write_debug(ptr[0], reg, 0); \ + } + +void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + save_debug(dbg->dbg_bcr, dbgbcr, brps); + save_debug(dbg->dbg_bvr, dbgbvr, brps); + save_debug(dbg->dbg_wcr, dbgwcr, wrps); + save_debug(dbg->dbg_wvr, dbgwvr, wrps); + + ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1); +} + +void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt) +{ + u64 aa64dfr0; + int brps, wrps; + + if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)) + return; + + aa64dfr0 = read_sysreg(id_aa64dfr0_el1); + + brps = (aa64dfr0 >> 12) & 0xf; + wrps = (aa64dfr0 >> 20) & 0xf; + + restore_debug(dbg->dbg_bcr, dbgbcr, brps); + restore_debug(dbg->dbg_bvr, dbgbvr, brps); + restore_debug(dbg->dbg_wcr, dbgwcr, wrps); + restore_debug(dbg->dbg_wvr, dbgwvr, wrps); + + write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1); +} + +void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu) +{ + /* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform + * a full save/restore cycle. */ + if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) || + (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE)) + vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; + + __debug_save_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); +} + +void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) +{ + __debug_restore_state(vcpu, &vcpu->arch.host_debug_state, + kern_hyp_va(vcpu->arch.host_cpu_context)); + + if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) + vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; +} + +u32 __hyp_text __debug_read_mdcr_el2(void) +{ + return read_sysreg(mdcr_el2); +} diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index bffd308..454e46f 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -43,5 +43,14 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt); void __sysreg32_save_state(struct kvm_vcpu *vcpu); void __sysreg32_restore_state(struct kvm_vcpu *vcpu); +void __debug_save_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_restore_state(struct kvm_vcpu *vcpu, + struct kvm_guest_debug_arch *dbg, + struct kvm_cpu_context *ctxt); +void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); +void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_HYP_H__ */ -- cgit v0.10.2 From b97b66c14b96ab562e4fd516d804c5cd05c0529e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 22 Oct 2015 08:32:18 +0100 Subject: arm64: KVM: Implement guest entry Contrary to the previous patch, the guest entry is fairly different from its assembly counterpart, mostly because it is only concerned with saving/restoring the GP registers, and nothing else. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ec14cac..1e1ff06 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -7,3 +7,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += entry.o diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S new file mode 100644 index 0000000..ff19695 --- /dev/null +++ b/arch/arm64/kvm/hyp/entry.S @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) +#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) + + .text + .pushsection .hyp.text, "ax" + +.macro save_callee_saved_regs ctxt + stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + stp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + stp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + stp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +.macro restore_callee_saved_regs ctxt + ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)] + ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)] + ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)] + ldp x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)] + ldp x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)] + ldp x29, lr, [\ctxt, #CPU_XREG_OFFSET(29)] +.endm + +/* + * u64 __guest_enter(struct kvm_vcpu *vcpu, + * struct kvm_cpu_context *host_ctxt); + */ +ENTRY(__guest_enter) + // x0: vcpu + // x1: host/guest context + // x2-x18: clobbered by macros + + // Store the host regs + save_callee_saved_regs x1 + + // Preserve vcpu & host_ctxt for use at exit time + stp x0, x1, [sp, #-16]! + + add x1, x0, #VCPU_CONTEXT + + // Prepare x0-x1 for later restore by pushing them onto the stack + ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] + stp x2, x3, [sp, #-16]! + + // x2-x18 + ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] + ldr x18, [x1, #CPU_XREG_OFFSET(18)] + + // x19-x29, lr + restore_callee_saved_regs x1 + + // Last bits of the 64bit state + ldp x0, x1, [sp], #16 + + // Do not touch any register after this! + eret +ENDPROC(__guest_enter) + +ENTRY(__guest_exit) + // x0: vcpu + // x1: return code + // x2-x3: free + // x4-x29,lr: vcpu regs + // vcpu x0-x3 on the stack + + add x2, x0, #VCPU_CONTEXT + + stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] + str x18, [x2, #CPU_XREG_OFFSET(18)] + + ldp x6, x7, [sp], #16 // x2, x3 + ldp x4, x5, [sp], #16 // x0, x1 + + stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] + stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] + + save_callee_saved_regs x2 + + // Restore vcpu & host_ctxt from the stack + // (preserving return code in x1) + ldp x0, x2, [sp], #16 + // Now restore the host regs + restore_callee_saved_regs x2 + + mov x0, x1 + ret +ENDPROC(__guest_exit) + + /* Insert fault handling here */ diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 454e46f..0809653 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -52,5 +52,7 @@ void __debug_restore_state(struct kvm_vcpu *vcpu, void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); +u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); + #endif /* __ARM64_KVM_HYP_H__ */ -- cgit v0.10.2 From c1bf6e18e97e7ead77371d4251f8ef1567455584 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 28 Oct 2015 08:45:37 +0000 Subject: arm64: KVM: Add patchable function selector KVM so far relies on code patching, and is likely to use it more in the future. The main issue is that our alternative system works at the instruction level, while we'd like to have alternatives at the function level. In order to cope with this, add the "hyp_alternate_select" macro that outputs a brief sequence of code that in turn can be patched, allowing an alternative function to be selected. Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 0809653..73419a7 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -29,6 +29,30 @@ #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \ + PAGE_OFFSET) +/** + * hyp_alternate_select - Generates patchable code sequences that are + * used to switch between two implementations of a function, depending + * on the availability of a feature. + * + * @fname: a symbol name that will be defined as a function returning a + * function pointer whose type will match @orig and @alt + * @orig: A pointer to the default function, as returned by @fname when + * @cond doesn't hold + * @alt: A pointer to the alternate function, as returned by @fname + * when @cond holds + * @cond: a CPU feature (as described in asm/cpufeature.h) + */ +#define hyp_alternate_select(fname, orig, alt, cond) \ +typeof(orig) * __hyp_text fname(void) \ +{ \ + typeof(alt) *val = orig; \ + asm volatile(ALTERNATIVE("nop \n", \ + "mov %0, %1 \n", \ + cond) \ + : "+r" (val) : "r" (alt)); \ + return val; \ +} + void __vgic_v2_save_state(struct kvm_vcpu *vcpu); void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); -- cgit v0.10.2 From be901e9b15cd2c8e48dc089b4655ea4a076e66fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 21 Oct 2015 09:57:10 +0100 Subject: arm64: KVM: Implement the core world switch Implement the core of the world switch in C. Not everything is there yet, and there is nothing to re-enter the world switch either. But this already outlines the code structure well enough. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 1e1ff06..9c11b0f 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -8,3 +8,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o +obj-$(CONFIG_KVM_ARM_HOST) += switch.o diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c new file mode 100644 index 0000000..79f59c9 --- /dev/null +++ b/arch/arm64/kvm/hyp/switch.c @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "hyp.h" + +static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * We are about to set CPTR_EL2.TFP to trap all floating point + * register accesses to EL2, however, the ARM ARM clearly states that + * traps are only taken to EL2 if the operation would not otherwise + * trap to EL1. Therefore, always make sure that for 32-bit guests, + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. + */ + val = vcpu->arch.hcr_el2; + if (!(val & HCR_RW)) { + write_sysreg(1 << 30, fpexc32_el2); + isb(); + } + write_sysreg(val, hcr_el2); + /* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */ + write_sysreg(1 << 15, hstr_el2); + write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2); + write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); +} + +static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) +{ + write_sysreg(HCR_RW, hcr_el2); + write_sysreg(0, hstr_el2); + write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); + write_sysreg(0, cptr_el2); +} + +static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); +} + +static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) +{ + write_sysreg(0, vttbr_el2); +} + +static hyp_alternate_select(__vgic_call_save_state, + __vgic_v2_save_state, __vgic_v3_save_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static hyp_alternate_select(__vgic_call_restore_state, + __vgic_v2_restore_state, __vgic_v3_restore_state, + ARM64_HAS_SYSREG_GIC_CPUIF); + +static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) +{ + __vgic_call_save_state()(vcpu); + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); +} + +static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) +{ + u64 val; + + val = read_sysreg(hcr_el2); + val |= HCR_INT_OVERRIDE; + val |= vcpu->arch.irq_lines; + write_sysreg(val, hcr_el2); + + __vgic_call_restore_state()(vcpu); +} + +int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +{ + struct kvm_cpu_context *host_ctxt; + struct kvm_cpu_context *guest_ctxt; + u64 exit_code; + + vcpu = kern_hyp_va(vcpu); + write_sysreg(vcpu, tpidr_el2); + + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + guest_ctxt = &vcpu->arch.ctxt; + + __sysreg_save_state(host_ctxt); + __debug_cond_save_host_state(vcpu); + + __activate_traps(vcpu); + __activate_vm(vcpu); + + __vgic_restore_state(vcpu); + __timer_restore_state(vcpu); + + /* + * We must restore the 32-bit state before the sysregs, thanks + * to Cortex-A57 erratum #852523. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state(guest_ctxt); + __debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + + /* Jump in the fire! */ + exit_code = __guest_enter(vcpu, host_ctxt); + /* And we're baaack! */ + + __sysreg_save_state(guest_ctxt); + __sysreg32_save_state(vcpu); + __timer_save_state(vcpu); + __vgic_save_state(vcpu); + + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + + __sysreg_restore_state(host_ctxt); + + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); + __debug_cond_restore_host_state(vcpu); + + return exit_code; +} -- cgit v0.10.2 From c13d1683df16db16c91372177ca10c31677b5ed5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Oct 2015 08:34:09 +0000 Subject: arm64: KVM: Implement fpsimd save/restore Implement the fpsimd save restore, keeping the lazy part in assembler (as returning to C would be overkill). Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 9c11b0f..56238d0 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o +obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index ff19695..90cbf0f 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -27,6 +27,7 @@ #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) +#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) .text .pushsection .hyp.text, "ax" @@ -127,4 +128,33 @@ ENTRY(__guest_exit) ret ENDPROC(__guest_exit) - /* Insert fault handling here */ +ENTRY(__fpsimd_guest_restore) + stp x4, lr, [sp, #-16]! + + mrs x2, cptr_el2 + bic x2, x2, #CPTR_EL2_TFP + msr cptr_el2, x2 + isb + + mrs x3, tpidr_el2 + + ldr x0, [x3, #VCPU_HOST_CONTEXT] + kern_hyp_va x0 + add x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_save_state + + add x2, x3, #VCPU_CONTEXT + add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) + bl __fpsimd_restore_state + + mrs x1, hcr_el2 + tbnz x1, #HCR_RW_SHIFT, 1f + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] + msr fpexc32_el2, x4 +1: + ldp x4, lr, [sp], #16 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 + + eret +ENDPROC(__fpsimd_guest_restore) diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S new file mode 100644 index 0000000..da3f22c --- /dev/null +++ b/arch/arm64/kvm/hyp/fpsimd.S @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include + + .text + .pushsection .hyp.text, "ax" + +ENTRY(__fpsimd_save_state) + fpsimd_save x0, 1 + ret +ENDPROC(__fpsimd_save_state) + +ENTRY(__fpsimd_restore_state) + fpsimd_restore x0, 1 + ret +ENDPROC(__fpsimd_restore_state) diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 73419a7..70d4f69 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -76,6 +76,13 @@ void __debug_restore_state(struct kvm_vcpu *vcpu, void __debug_cond_save_host_state(struct kvm_vcpu *vcpu); void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu); +void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); +void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); +static inline bool __fpsimd_enabled(void) +{ + return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP); +} + u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 79f59c9..608155f 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -89,6 +89,7 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; + bool fp_enabled; u64 exit_code; vcpu = kern_hyp_va(vcpu); @@ -118,6 +119,8 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) exit_code = __guest_enter(vcpu, host_ctxt); /* And we're baaack! */ + fp_enabled = __fpsimd_enabled(); + __sysreg_save_state(guest_ctxt); __sysreg32_save_state(vcpu); __timer_save_state(vcpu); @@ -128,6 +131,11 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) __sysreg_restore_state(host_ctxt); + if (fp_enabled) { + __fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs); + __fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs); + } + __debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt); __debug_cond_restore_host_state(vcpu); diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index eb05afb..3603541 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -107,7 +107,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu) sysreg[DACR32_EL2] = read_sysreg(dacr32_el2); sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2); - if (!(read_sysreg(cptr_el2) & CPTR_EL2_TFP)) + if (__fpsimd_enabled()) sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2); if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY) -- cgit v0.10.2 From 5eec0a91e32a2862e86265532ae773820e0afd77 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Oct 2015 08:26:37 +0100 Subject: arm64: KVM: Implement TLB handling Implement the TLB handling as a direct translation of the assembly code version. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 56238d0..1a529f5 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o +obj-$(CONFIG_KVM_ARM_HOST) += tlb.o diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 90cbf0f..1050b2b 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -147,6 +147,7 @@ ENTRY(__fpsimd_guest_restore) add x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) bl __fpsimd_restore_state + // Skip restoring fpexc32 for AArch64 guests mrs x1, hcr_el2 tbnz x1, #HCR_RW_SHIFT, 1f ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c new file mode 100644 index 0000000..6fcb93a --- /dev/null +++ b/arch/arm64/kvm/hyp/tlb.c @@ -0,0 +1,73 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include "hyp.h" + +void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + /* + * We could do so much better if we had the VA as well. + * Instead, we invalidate Stage-2 for this IPA, and the + * whole of Stage-1. Weep... + */ + ipa >>= 12; + asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa)); + + /* + * We have to ensure completion of the invalidation at Stage-2, + * since a table walk on another CPU could refill a TLB with a + * complete (S1 + S2) walk based on the old Stage-2 mapping if + * the Stage-1 invalidation happened first. + */ + dsb(ish); + asm volatile("tlbi vmalle1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +{ + dsb(ishst); + + /* Switch to requested VMID */ + kvm = kern_hyp_va(kvm); + write_sysreg(kvm->arch.vttbr, vttbr_el2); + isb(); + + asm volatile("tlbi vmalls12e1is" : : ); + dsb(ish); + isb(); + + write_sysreg(0, vttbr_el2); +} + +void __hyp_text __tlb_flush_vm_context(void) +{ + dsb(ishst); + asm volatile("tlbi alle1is \n" + "ic ialluis ": : ); + dsb(ish); +} -- cgit v0.10.2 From 2b28162cf65a6fe1c93d172675e4f2792792f17e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 08:01:56 +0000 Subject: arm64: KVM: HYP mode entry points Add the entry points for HYP mode (both for hypercalls and exception handling). Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 1a529f5..826032b 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -11,3 +11,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o obj-$(CONFIG_KVM_ARM_HOST) += switch.o obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o +obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S new file mode 100644 index 0000000..818731a --- /dev/null +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -0,0 +1,203 @@ +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + + .text + .pushsection .hyp.text, "ax" + +.macro save_x0_to_x3 + stp x0, x1, [sp, #-16]! + stp x2, x3, [sp, #-16]! +.endm + +.macro restore_x0_to_x3 + ldp x2, x3, [sp], #16 + ldp x0, x1, [sp], #16 +.endm + +el1_sync: // Guest trapped into EL2 + save_x0_to_x3 + + mrs x1, esr_el2 + lsr x2, x1, #ESR_ELx_EC_SHIFT + + cmp x2, #ESR_ELx_EC_HVC64 + b.ne el1_trap + + mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x3, el1_trap // called HVC + + /* Here, we're pretty sure the host called HVC. */ + restore_x0_to_x3 + + /* Check for __hyp_get_vectors */ + cbnz x0, 1f + mrs x0, vbar_el2 + b 2f + +1: stp lr, xzr, [sp, #-16]! + + /* + * Compute the function address in EL2, and shuffle the parameters. + */ + kern_hyp_va x0 + mov lr, x0 + mov x0, x1 + mov x1, x2 + mov x2, x3 + blr lr + + ldp lr, xzr, [sp], #16 +2: eret + +el1_trap: + /* + * x1: ESR + * x2: ESR_EC + */ + + /* Guest accessed VFP/SIMD registers, save host, restore Guest */ + cmp x2, #ESR_ELx_EC_FP_ASIMD + b.eq __fpsimd_guest_restore + + cmp x2, #ESR_ELx_EC_DABT_LOW + mov x0, #ESR_ELx_EC_IABT_LOW + ccmp x2, x0, #4, ne + b.ne 1f // Not an abort we care about + + /* This is an abort. Check for permission fault */ +alternative_if_not ARM64_WORKAROUND_834220 + and x2, x1, #ESR_ELx_FSC_TYPE + cmp x2, #FSC_PERM + b.ne 1f // Not a permission fault +alternative_else + nop // Use the permission fault path to + nop // check for a valid S1 translation, + nop // regardless of the ESR value. +alternative_endif + + /* + * Check for Stage-1 page table walk, which is guaranteed + * to give a valid HPFAR_EL2. + */ + tbnz x1, #7, 1f // S1PTW is set + + /* Preserve PAR_EL1 */ + mrs x3, par_el1 + stp x3, xzr, [sp, #-16]! + + /* + * Permission fault, HPFAR_EL2 is invalid. + * Resolve the IPA the hard way using the guest VA. + * Stage-1 translation already validated the memory access rights. + * As such, we can use the EL1 translation regime, and don't have + * to distinguish between EL0 and EL1 access. + */ + mrs x2, far_el2 + at s1e1r, x2 + isb + + /* Read result */ + mrs x3, par_el1 + ldp x0, xzr, [sp], #16 // Restore PAR_EL1 from the stack + msr par_el1, x0 + tbnz x3, #0, 3f // Bail out if we failed the translation + ubfx x3, x3, #12, #36 // Extract IPA + lsl x3, x3, #4 // and present it like HPFAR + b 2f + +1: mrs x3, hpfar_el2 + mrs x2, far_el2 + +2: mrs x0, tpidr_el2 + str w1, [x0, #VCPU_ESR_EL2] + str x2, [x0, #VCPU_FAR_EL2] + str x3, [x0, #VCPU_HPFAR_EL2] + + mov x1, #ARM_EXCEPTION_TRAP + b __guest_exit + + /* + * Translation failed. Just return to the guest and + * let it fault again. Another CPU is probably playing + * behind our back. + */ +3: restore_x0_to_x3 + + eret + +el1_irq: + save_x0_to_x3 + mrs x0, tpidr_el2 + mov x1, #ARM_EXCEPTION_IRQ + b __guest_exit + +.macro invalid_vector label, target = __kvm_hyp_panic + .align 2 +\label: + b \target +ENDPROC(\label) +.endm + + /* None of these should ever happen */ + invalid_vector el2t_sync_invalid + invalid_vector el2t_irq_invalid + invalid_vector el2t_fiq_invalid + invalid_vector el2t_error_invalid + invalid_vector el2h_sync_invalid + invalid_vector el2h_irq_invalid + invalid_vector el2h_fiq_invalid + invalid_vector el2h_error_invalid + invalid_vector el1_sync_invalid + invalid_vector el1_irq_invalid + invalid_vector el1_fiq_invalid + invalid_vector el1_error_invalid + + .ltorg + + .align 11 + +ENTRY(__hyp_vector) + ventry el2t_sync_invalid // Synchronous EL2t + ventry el2t_irq_invalid // IRQ EL2t + ventry el2t_fiq_invalid // FIQ EL2t + ventry el2t_error_invalid // Error EL2t + + ventry el2h_sync_invalid // Synchronous EL2h + ventry el2h_irq_invalid // IRQ EL2h + ventry el2h_fiq_invalid // FIQ EL2h + ventry el2h_error_invalid // Error EL2h + + ventry el1_sync // Synchronous 64-bit EL1 + ventry el1_irq // IRQ 64-bit EL1 + ventry el1_fiq_invalid // FIQ 64-bit EL1 + ventry el1_error_invalid // Error 64-bit EL1 + + ventry el1_sync // Synchronous 32-bit EL1 + ventry el1_irq // IRQ 32-bit EL1 + ventry el1_fiq_invalid // FIQ 32-bit EL1 + ventry el1_error_invalid // Error 32-bit EL1 +ENDPROC(__hyp_vector) -- cgit v0.10.2 From 53fd5b6487e4438049a5da5e36dfb8edcf1fd789 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 15:21:52 +0000 Subject: arm64: KVM: Add panic handling Add the panic handler, together with the small bits of assembly code to call the kernel's panic implementation. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 818731a..8e58a3b 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -155,7 +155,16 @@ el1_irq: mov x1, #ARM_EXCEPTION_IRQ b __guest_exit -.macro invalid_vector label, target = __kvm_hyp_panic +ENTRY(__hyp_do_panic) + mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ + PSR_MODE_EL1h) + msr spsr_el2, lr + ldr lr, =panic + msr elr_el2, lr + eret +ENDPROC(__hyp_do_panic) + +.macro invalid_vector label, target = __hyp_panic .align 2 \label: b \target diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h index 70d4f69..fb27517 100644 --- a/arch/arm64/kvm/hyp/hyp.h +++ b/arch/arm64/kvm/hyp/hyp.h @@ -84,6 +84,7 @@ static inline bool __fpsimd_enabled(void) } u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); +void __noreturn __hyp_do_panic(unsigned long, ...); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 608155f..b012870 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -141,3 +141,33 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) return exit_code; } + +static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; + +void __hyp_text __noreturn __hyp_panic(void) +{ + unsigned long str_va = (unsigned long)__hyp_panic_string; + u64 spsr = read_sysreg(spsr_el2); + u64 elr = read_sysreg(elr_el2); + u64 par = read_sysreg(par_el1); + + if (read_sysreg(vttbr_el2)) { + struct kvm_vcpu *vcpu; + struct kvm_cpu_context *host_ctxt; + + vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); + host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __deactivate_traps(vcpu); + __deactivate_vm(vcpu); + __sysreg_restore_state(host_ctxt); + } + + /* Call panic for real */ + __hyp_do_panic(hyp_kern_va(str_va), + spsr, elr, + read_sysreg(esr_el2), read_sysreg(far_el2), + read_sysreg(hpfar_el2), par, + (void *)read_sysreg(tpidr_el2)); + + unreachable(); +} -- cgit v0.10.2 From 044ac37d1281fc7b59d5dce4fe979a99369e95f2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 13:58:00 +0000 Subject: arm64: KVM: Add compatibility aliases So far, we've implemented the new world switch with a completely different namespace, so that we could have both implementation compiled in. Let's take things one step further by adding weak aliases that have the same names as the original implementation. The weak attributes allows the new implementation to be overriden by the old one, and everything still work. At a later point, we'll be able to simply drop the old code, and everything will hopefully keep working, thanks to the aliases we have just added. This also saves us repainting all the callers. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 7848322..d071f45 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -135,3 +135,6 @@ u32 __hyp_text __debug_read_mdcr_el2(void) { return read_sysreg(mdcr_el2); } + +__alias(__debug_read_mdcr_el2) +u32 __weak __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 8e58a3b..10d6d2a 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -189,6 +189,8 @@ ENDPROC(\label) .align 11 + .weak __kvm_hyp_vector +ENTRY(__kvm_hyp_vector) ENTRY(__hyp_vector) ventry el2t_sync_invalid // Synchronous EL2t ventry el2t_irq_invalid // IRQ EL2t @@ -210,3 +212,4 @@ ENTRY(__hyp_vector) ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 ENDPROC(__hyp_vector) +ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index b012870..7457ae4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -142,6 +142,9 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) return exit_code; } +__alias(__guest_run) +int __weak __kvm_vcpu_run(struct kvm_vcpu *vcpu); + static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; void __hyp_text __noreturn __hyp_panic(void) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 6fcb93a..5f815cf 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -48,6 +48,9 @@ void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) write_sysreg(0, vttbr_el2); } +__alias(__tlb_flush_vmid_ipa) +void __weak __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); + void __hyp_text __tlb_flush_vmid(struct kvm *kvm) { dsb(ishst); @@ -64,6 +67,9 @@ void __hyp_text __tlb_flush_vmid(struct kvm *kvm) write_sysreg(0, vttbr_el2); } +__alias(__tlb_flush_vmid) +void __weak __kvm_tlb_flush_vmid(struct kvm *kvm); + void __hyp_text __tlb_flush_vm_context(void) { dsb(ishst); @@ -71,3 +77,6 @@ void __hyp_text __tlb_flush_vm_context(void) "ic ialluis ": : ); dsb(ish); } + +__alias(__tlb_flush_vm_context) +void __weak __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 78d05f3..a769458 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -224,3 +224,6 @@ u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } + +__alias(__vgic_v3_read_ich_vtr_el2) +u64 __weak __vgic_v3_get_ich_vtr_el2(void); -- cgit v0.10.2 From 910917bb7db070cc67557a6b3c8fcceaa5c398a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 27 Oct 2015 12:18:48 +0000 Subject: arm64: KVM: Map the kernel RO section into HYP In order to run C code in HYP, we must make sure that the kernel's RO section is mapped into HYP (otherwise things break badly). Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 8a79a57..6e35d1d 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef REQUIRES_VIRT __asm__(".arch_extension virt"); @@ -1068,6 +1069,12 @@ static int init_hyp_mode(void) goto out_free_mappings; } + err = create_hyp_mappings(__start_rodata, __end_rodata); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_free_mappings; + } + /* * Map the Hyp stack pages */ -- cgit v0.10.2 From 1ea66d27e7b01086669ff2abdc3ac89dc90eae51 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 15:51:41 +0000 Subject: arm64: KVM: Move away from the assembly version of the world switch This is it. We remove all of the code that has now been rewritten. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index d31e4e5..caee9ee 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index 86c2898..0ccdcbb 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -17,910 +17,7 @@ #include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) -#define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) -#define CPU_SPSR_OFFSET(x) CPU_GP_REG_OFFSET(CPU_SPSR + 8*x) -#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) - - .text - .pushsection .hyp.text, "ax" - .align PAGE_SHIFT - -.macro save_common_regs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_XREG_OFFSET(19) - stp x19, x20, [x3] - stp x21, x22, [x3, #16] - stp x23, x24, [x3, #32] - stp x25, x26, [x3, #48] - stp x27, x28, [x3, #64] - stp x29, lr, [x3, #80] - - mrs x19, sp_el0 - mrs x20, elr_el2 // pc before entering el2 - mrs x21, spsr_el2 // pstate before entering el2 - - stp x19, x20, [x3, #96] - str x21, [x3, #112] - - mrs x22, sp_el1 - mrs x23, elr_el1 - mrs x24, spsr_el1 - - str x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - str x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - str x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] -.endm - -.macro restore_common_regs - // x2: base address for cpu context - // x3: tmp register - - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - ldr x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)] - ldr x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)] - - msr sp_el1, x22 - msr elr_el1, x23 - msr spsr_el1, x24 - - add x3, x2, #CPU_XREG_OFFSET(31) // SP_EL0 - ldp x19, x20, [x3] - ldr x21, [x3, #16] - - msr sp_el0, x19 - msr elr_el2, x20 // pc on return from el2 - msr spsr_el2, x21 // pstate on return from el2 - - add x3, x2, #CPU_XREG_OFFSET(19) - ldp x19, x20, [x3] - ldp x21, x22, [x3, #16] - ldp x23, x24, [x3, #32] - ldp x25, x26, [x3, #48] - ldp x27, x28, [x3, #64] - ldp x29, lr, [x3, #80] -.endm - -.macro save_host_regs - save_common_regs -.endm - -.macro restore_host_regs - restore_common_regs -.endm - -.macro save_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_save x3, 4 -.endm - -.macro restore_fpsimd - // x2: cpu context address - // x3, x4: tmp regs - add x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS) - fpsimd_restore x3, 4 -.endm - -.macro save_guest_regs - // x0 is the vcpu address - // x1 is the return code, do not corrupt! - // x2 is the cpu context - // x3 is a tmp register - // Guest's x0-x3 are on the stack - - // Compute base to save registers - add x3, x2, #CPU_XREG_OFFSET(4) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - str x18, [x3, #112] - - pop x6, x7 // x2, x3 - pop x4, x5 // x0, x1 - - add x3, x2, #CPU_XREG_OFFSET(0) - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - save_common_regs -.endm - -.macro restore_guest_regs - // x0 is the vcpu address. - // x2 is the cpu context - // x3 is a tmp register - - // Prepare x0-x3 for later restore - add x3, x2, #CPU_XREG_OFFSET(0) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - push x4, x5 // Push x0-x3 on the stack - push x6, x7 - - // x4-x18 - ldp x4, x5, [x3, #32] - ldp x6, x7, [x3, #48] - ldp x8, x9, [x3, #64] - ldp x10, x11, [x3, #80] - ldp x12, x13, [x3, #96] - ldp x14, x15, [x3, #112] - ldp x16, x17, [x3, #128] - ldr x18, [x3, #144] - - // x19-x29, lr, sp*, elr*, spsr* - restore_common_regs - - // Last bits of the 64bit state - pop x2, x3 - pop x0, x1 - - // Do not touch any register after this! -.endm - -/* - * Macros to perform system register save/restore. - * - * Ordering here is absolutely critical, and must be kept consistent - * in {save,restore}_sysregs, {save,restore}_guest_32bit_state, - * and in kvm_asm.h. - * - * In other words, don't touch any of these unless you know what - * you are doing. - */ -.macro save_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - mrs x4, vmpidr_el2 - mrs x5, csselr_el1 - mrs x6, sctlr_el1 - mrs x7, actlr_el1 - mrs x8, cpacr_el1 - mrs x9, ttbr0_el1 - mrs x10, ttbr1_el1 - mrs x11, tcr_el1 - mrs x12, esr_el1 - mrs x13, afsr0_el1 - mrs x14, afsr1_el1 - mrs x15, far_el1 - mrs x16, mair_el1 - mrs x17, vbar_el1 - mrs x18, contextidr_el1 - mrs x19, tpidr_el0 - mrs x20, tpidrro_el0 - mrs x21, tpidr_el1 - mrs x22, amair_el1 - mrs x23, cntkctl_el1 - mrs x24, par_el1 - mrs x25, mdscr_el1 - - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - stp x8, x9, [x3, #32] - stp x10, x11, [x3, #48] - stp x12, x13, [x3, #64] - stp x14, x15, [x3, #80] - stp x16, x17, [x3, #96] - stp x18, x19, [x3, #112] - stp x20, x21, [x3, #128] - stp x22, x23, [x3, #144] - stp x24, x25, [x3, #160] -.endm - -.macro save_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - mrs x21, \type\()15_el1 - mrs x20, \type\()14_el1 - mrs x19, \type\()13_el1 - mrs x18, \type\()12_el1 - mrs x17, \type\()11_el1 - mrs x16, \type\()10_el1 - mrs x15, \type\()9_el1 - mrs x14, \type\()8_el1 - mrs x13, \type\()7_el1 - mrs x12, \type\()6_el1 - mrs x11, \type\()5_el1 - mrs x10, \type\()4_el1 - mrs x9, \type\()3_el1 - mrs x8, \type\()2_el1 - mrs x7, \type\()1_el1 - mrs x6, \type\()0_el1 - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - str x21, [x4, #(15 * 8)] - str x20, [x4, #(14 * 8)] - str x19, [x4, #(13 * 8)] - str x18, [x4, #(12 * 8)] - str x17, [x4, #(11 * 8)] - str x16, [x4, #(10 * 8)] - str x15, [x4, #(9 * 8)] - str x14, [x4, #(8 * 8)] - str x13, [x4, #(7 * 8)] - str x12, [x4, #(6 * 8)] - str x11, [x4, #(5 * 8)] - str x10, [x4, #(4 * 8)] - str x9, [x4, #(3 * 8)] - str x8, [x4, #(2 * 8)] - str x7, [x4, #(1 * 8)] - str x6, [x4, #(0 * 8)] -.endm - -.macro restore_sysregs - // x2: base address for cpu context - // x3: tmp register - - add x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1) - - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - ldp x8, x9, [x3, #32] - ldp x10, x11, [x3, #48] - ldp x12, x13, [x3, #64] - ldp x14, x15, [x3, #80] - ldp x16, x17, [x3, #96] - ldp x18, x19, [x3, #112] - ldp x20, x21, [x3, #128] - ldp x22, x23, [x3, #144] - ldp x24, x25, [x3, #160] - - msr vmpidr_el2, x4 - msr csselr_el1, x5 - msr sctlr_el1, x6 - msr actlr_el1, x7 - msr cpacr_el1, x8 - msr ttbr0_el1, x9 - msr ttbr1_el1, x10 - msr tcr_el1, x11 - msr esr_el1, x12 - msr afsr0_el1, x13 - msr afsr1_el1, x14 - msr far_el1, x15 - msr mair_el1, x16 - msr vbar_el1, x17 - msr contextidr_el1, x18 - msr tpidr_el0, x19 - msr tpidrro_el0, x20 - msr tpidr_el1, x21 - msr amair_el1, x22 - msr cntkctl_el1, x23 - msr par_el1, x24 - msr mdscr_el1, x25 -.endm - -.macro restore_debug type - // x4: pointer to register set - // x5: number of registers to skip - // x6..x22 trashed - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - ldr x21, [x4, #(15 * 8)] - ldr x20, [x4, #(14 * 8)] - ldr x19, [x4, #(13 * 8)] - ldr x18, [x4, #(12 * 8)] - ldr x17, [x4, #(11 * 8)] - ldr x16, [x4, #(10 * 8)] - ldr x15, [x4, #(9 * 8)] - ldr x14, [x4, #(8 * 8)] - ldr x13, [x4, #(7 * 8)] - ldr x12, [x4, #(6 * 8)] - ldr x11, [x4, #(5 * 8)] - ldr x10, [x4, #(4 * 8)] - ldr x9, [x4, #(3 * 8)] - ldr x8, [x4, #(2 * 8)] - ldr x7, [x4, #(1 * 8)] - ldr x6, [x4, #(0 * 8)] - - adr x22, 1f - add x22, x22, x5, lsl #2 - br x22 -1: - msr \type\()15_el1, x21 - msr \type\()14_el1, x20 - msr \type\()13_el1, x19 - msr \type\()12_el1, x18 - msr \type\()11_el1, x17 - msr \type\()10_el1, x16 - msr \type\()9_el1, x15 - msr \type\()8_el1, x14 - msr \type\()7_el1, x13 - msr \type\()6_el1, x12 - msr \type\()5_el1, x11 - msr \type\()4_el1, x10 - msr \type\()3_el1, x9 - msr \type\()2_el1, x8 - msr \type\()1_el1, x7 - msr \type\()0_el1, x6 -.endm - -.macro skip_32bit_state tmp, target - // Skip 32bit state if not needed - mrs \tmp, hcr_el2 - tbnz \tmp, #HCR_RW_SHIFT, \target -.endm - -.macro skip_tee_state tmp, target - // Skip ThumbEE state if not needed - mrs \tmp, id_pfr0_el1 - tbz \tmp, #12, \target -.endm - -.macro skip_debug_state tmp, target - ldr \tmp, [x0, #VCPU_DEBUG_FLAGS] - tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target -.endm - -/* - * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled) - */ -.macro skip_fpsimd_state tmp, target - mrs \tmp, cptr_el2 - tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target -.endm - -.macro compute_debug_state target - // Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY - // is set, we do a full save/restore cycle and disable trapping. - add x25, x0, #VCPU_CONTEXT - - // Check the state of MDSCR_EL1 - ldr x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)] - and x26, x25, #DBG_MDSCR_KDE - and x25, x25, #DBG_MDSCR_MDE - adds xzr, x25, x26 - b.eq 9998f // Nothing to see there - - // If any interesting bits was set, we must set the flag - mov x26, #KVM_ARM64_DEBUG_DIRTY - str x26, [x0, #VCPU_DEBUG_FLAGS] - b 9999f // Don't skip restore - -9998: - // Otherwise load the flags from memory in case we recently - // trapped - skip_debug_state x25, \target -9999: -.endm - -.macro save_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - mrs x4, spsr_abt - mrs x5, spsr_und - mrs x6, spsr_irq - mrs x7, spsr_fiq - stp x4, x5, [x3] - stp x6, x7, [x3, #16] - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - mrs x4, dacr32_el2 - mrs x5, ifsr32_el2 - stp x4, x5, [x3] - - skip_fpsimd_state x8, 2f - mrs x6, fpexc32_el2 - str x6, [x3, #16] -2: - skip_debug_state x8, 1f - mrs x7, dbgvcr32_el2 - str x7, [x3, #24] -1: -.endm - -.macro restore_guest_32bit_state - skip_32bit_state x3, 1f - - add x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT) - ldp x4, x5, [x3] - ldp x6, x7, [x3, #16] - msr spsr_abt, x4 - msr spsr_und, x5 - msr spsr_irq, x6 - msr spsr_fiq, x7 - - add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2) - ldp x4, x5, [x3] - msr dacr32_el2, x4 - msr ifsr32_el2, x5 - - skip_debug_state x8, 1f - ldr x7, [x3, #24] - msr dbgvcr32_el2, x7 -1: -.endm - -.macro activate_traps - ldr x2, [x0, #VCPU_HCR_EL2] - - /* - * We are about to set CPTR_EL2.TFP to trap all floating point - * register accesses to EL2, however, the ARM ARM clearly states that - * traps are only taken to EL2 if the operation would not otherwise - * trap to EL1. Therefore, always make sure that for 32-bit guests, - * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit. - */ - tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state - mov x3, #(1 << 30) - msr fpexc32_el2, x3 - isb -99: - msr hcr_el2, x2 - mov x2, #CPTR_EL2_TTA - orr x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - - mov x2, #(1 << 15) // Trap CP15 Cr=15 - msr hstr_el2, x2 - - // Monitor Debug Config - see kvm_arm_setup_debug() - ldr x2, [x0, #VCPU_MDCR_EL2] - msr mdcr_el2, x2 -.endm - -.macro deactivate_traps - mov x2, #HCR_RW - msr hcr_el2, x2 - msr hstr_el2, xzr - - mrs x2, mdcr_el2 - and x2, x2, #MDCR_EL2_HPMN_MASK - msr mdcr_el2, x2 -.endm - -.macro activate_vm - ldr x1, [x0, #VCPU_KVM] - kern_hyp_va x1 - ldr x2, [x1, #KVM_VTTBR] - msr vttbr_el2, x2 -.endm - -.macro deactivate_vm - msr vttbr_el2, xzr -.endm - -/* - * Call into the vgic backend for state saving - */ -.macro save_vgic_state -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __save_vgic_v2_state -alternative_else - bl __save_vgic_v3_state -alternative_endif - mrs x24, hcr_el2 - mov x25, #HCR_INT_OVERRIDE - neg x25, x25 - and x24, x24, x25 - msr hcr_el2, x24 -.endm - -/* - * Call into the vgic backend for state restoring - */ -.macro restore_vgic_state - mrs x24, hcr_el2 - ldr x25, [x0, #VCPU_IRQ_LINES] - orr x24, x24, #HCR_INT_OVERRIDE - orr x24, x24, x25 - msr hcr_el2, x24 -alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF - bl __restore_vgic_v2_state -alternative_else - bl __restore_vgic_v3_state -alternative_endif -.endm - -.macro save_timer_state - // x0: vcpu pointer - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - mrs x3, cntv_ctl_el0 - and x3, x3, #3 - str w3, [x0, #VCPU_TIMER_CNTV_CTL] - - isb - - mrs x3, cntv_cval_el0 - str x3, [x0, #VCPU_TIMER_CNTV_CVAL] - -1: - // Disable the virtual timer - msr cntv_ctl_el0, xzr - - // Allow physical timer/counter access for the host - mrs x2, cnthctl_el2 - orr x2, x2, #3 - msr cnthctl_el2, x2 - - // Clear cntvoff for the host - msr cntvoff_el2, xzr -.endm - -.macro restore_timer_state - // x0: vcpu pointer - // Disallow physical timer access for the guest - // Physical counter access is allowed - mrs x2, cnthctl_el2 - orr x2, x2, #1 - bic x2, x2, #2 - msr cnthctl_el2, x2 - - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr w3, [x2, #KVM_TIMER_ENABLED] - cbz w3, 1f - - ldr x3, [x2, #KVM_TIMER_CNTVOFF] - msr cntvoff_el2, x3 - ldr x2, [x0, #VCPU_TIMER_CNTV_CVAL] - msr cntv_cval_el0, x2 - isb - - ldr w2, [x0, #VCPU_TIMER_CNTV_CTL] - and x2, x2, #3 - msr cntv_ctl_el0, x2 -1: -.endm - -__save_sysregs: - save_sysregs - ret - -__restore_sysregs: - restore_sysregs - ret - -/* Save debug state */ -__save_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - save_debug dbgbcr - add x4, x3, #DEBUG_BVR - save_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - save_debug dbgwcr - add x4, x3, #DEBUG_WVR - save_debug dbgwvr - - mrs x21, mdccint_el1 - str x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - ret - -/* Restore debug state */ -__restore_debug: - // x2: ptr to CPU context - // x3: ptr to debug reg struct - // x4/x5/x6-22/x24-26: trashed - - mrs x26, id_aa64dfr0_el1 - ubfx x24, x26, #12, #4 // Extract BRPs - ubfx x25, x26, #20, #4 // Extract WRPs - mov w26, #15 - sub w24, w26, w24 // How many BPs to skip - sub w25, w26, w25 // How many WPs to skip - - mov x5, x24 - add x4, x3, #DEBUG_BCR - restore_debug dbgbcr - add x4, x3, #DEBUG_BVR - restore_debug dbgbvr - - mov x5, x25 - add x4, x3, #DEBUG_WCR - restore_debug dbgwcr - add x4, x3, #DEBUG_WVR - restore_debug dbgwvr - - ldr x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)] - msr mdccint_el1, x21 - - ret - -__save_fpsimd: - skip_fpsimd_state x3, 1f - save_fpsimd -1: ret - -__restore_fpsimd: - skip_fpsimd_state x3, 1f - restore_fpsimd -1: ret - -switch_to_guest_fpsimd: - push x4, lr - - mrs x2, cptr_el2 - bic x2, x2, #CPTR_EL2_TFP - msr cptr_el2, x2 - isb - - mrs x0, tpidr_el2 - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - bl __save_fpsimd - - add x2, x0, #VCPU_CONTEXT - bl __restore_fpsimd - - skip_32bit_state x3, 1f - ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] - msr fpexc32_el2, x4 -1: - pop x4, lr - pop x2, x3 - pop x0, x1 - - eret - -/* - * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu); - * - * This is the world switch. The first half of the function - * deals with entering the guest, and anything from __kvm_vcpu_return - * to the end of the function deals with reentering the host. - * On the enter path, only x0 (vcpu pointer) must be preserved until - * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception - * code) must both be preserved until the epilogue. - * In both cases, x2 points to the CPU context we're saving/restoring from/to. - */ -ENTRY(__kvm_vcpu_run) - kern_hyp_va x0 - msr tpidr_el2, x0 // Save the vcpu register - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - save_host_regs - bl __save_sysregs - - compute_debug_state 1f - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __save_debug -1: - activate_traps - activate_vm - - restore_vgic_state - restore_timer_state - - // Guest context - add x2, x0, #VCPU_CONTEXT - - // We must restore the 32-bit state before the sysregs, thanks - // to Cortex-A57 erratum #852523. - restore_guest_32bit_state - bl __restore_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __restore_debug -1: - restore_guest_regs - - // That's it, no more messing around. - eret - -__kvm_vcpu_return: - // Assume x0 is the vcpu pointer, x1 the return code - // Guest's x0-x3 are on the stack - - // Guest context - add x2, x0, #VCPU_CONTEXT - - save_guest_regs - bl __save_fpsimd - bl __save_sysregs - - skip_debug_state x3, 1f - ldr x3, [x0, #VCPU_DEBUG_PTR] - kern_hyp_va x3 - bl __save_debug -1: - save_guest_32bit_state - - save_timer_state - save_vgic_state - - deactivate_traps - deactivate_vm - - // Host context - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - bl __restore_fpsimd - /* Clear FPSIMD and Trace trapping */ - msr cptr_el2, xzr - - skip_debug_state x3, 1f - // Clear the dirty flag for the next run, as all the state has - // already been saved. Note that we nuke the whole 64bit word. - // If we ever add more flags, we'll have to be more careful... - str xzr, [x0, #VCPU_DEBUG_FLAGS] - add x3, x0, #VCPU_HOST_DEBUG_STATE - bl __restore_debug -1: - restore_host_regs - - mov x0, x1 - ret -END(__kvm_vcpu_run) - -// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); -ENTRY(__kvm_tlb_flush_vmid_ipa) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - /* - * We could do so much better if we had the VA as well. - * Instead, we invalidate Stage-2 for this IPA, and the - * whole of Stage-1. Weep... - */ - lsr x1, x1, #12 - tlbi ipas2e1is, x1 - /* - * We have to ensure completion of the invalidation at Stage-2, - * since a table walk on another CPU could refill a TLB with a - * complete (S1 + S2) walk based on the old Stage-2 mapping if - * the Stage-1 invalidation happened first. - */ - dsb ish - tlbi vmalle1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid_ipa) - -/** - * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs - * @struct kvm *kvm - pointer to kvm structure - * - * Invalidates all Stage 1 and 2 TLB entries for current VMID. - */ -ENTRY(__kvm_tlb_flush_vmid) - dsb ishst - - kern_hyp_va x0 - ldr x2, [x0, #KVM_VTTBR] - msr vttbr_el2, x2 - isb - - tlbi vmalls12e1is - dsb ish - isb - - msr vttbr_el2, xzr - ret -ENDPROC(__kvm_tlb_flush_vmid) - -ENTRY(__kvm_flush_vm_context) - dsb ishst - tlbi alle1is - ic ialluis - dsb ish - ret -ENDPROC(__kvm_flush_vm_context) - -__kvm_hyp_panic: - // Stash PAR_EL1 before corrupting it in __restore_sysregs - mrs x0, par_el1 - push x0, xzr - - // Guess the context by looking at VTTBR: - // If zero, then we're already a host. - // Otherwise restore a minimal host context before panicing. - mrs x0, vttbr_el2 - cbz x0, 1f - - mrs x0, tpidr_el2 - - deactivate_traps - deactivate_vm - - ldr x2, [x0, #VCPU_HOST_CONTEXT] - kern_hyp_va x2 - - bl __restore_sysregs - - /* - * Make sure we have a valid host stack, and don't leave junk in the - * frame pointer that will give us a misleading host stack unwinding. - */ - ldr x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)] - msr sp_el1, x22 - mov x29, xzr - -1: adr x0, __hyp_panic_str - adr x1, 2f - ldp x2, x3, [x1] - sub x0, x0, x2 - add x0, x0, x3 - mrs x1, spsr_el2 - mrs x2, elr_el2 - mrs x3, esr_el2 - mrs x4, far_el2 - mrs x5, hpfar_el2 - pop x6, xzr // active context PAR_EL1 - mrs x7, tpidr_el2 - - mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ - PSR_MODE_EL1h) - msr spsr_el2, lr - ldr lr, =panic - msr elr_el2, lr - eret - - .align 3 -2: .quad HYP_PAGE_OFFSET - .quad PAGE_OFFSET -ENDPROC(__kvm_hyp_panic) - -__hyp_panic_str: - .ascii "HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0" - - .align 2 /* * u64 kvm_call_hyp(void *hypfn, ...); @@ -934,7 +31,7 @@ __hyp_panic_str: * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the * function pointer can be passed). The function being called must be mapped * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c). Return values are - * passed in r0 and r1. + * passed in x0. * * A function pointer with a value of 0 has a special meaning, and is * used to implement __hyp_get_vectors in the same way as in @@ -944,179 +41,3 @@ ENTRY(kvm_call_hyp) hvc #0 ret ENDPROC(kvm_call_hyp) - -.macro invalid_vector label, target - .align 2 -\label: - b \target -ENDPROC(\label) -.endm - - /* None of these should ever happen */ - invalid_vector el2t_sync_invalid, __kvm_hyp_panic - invalid_vector el2t_irq_invalid, __kvm_hyp_panic - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic - invalid_vector el2t_error_invalid, __kvm_hyp_panic - invalid_vector el2h_sync_invalid, __kvm_hyp_panic - invalid_vector el2h_irq_invalid, __kvm_hyp_panic - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic - invalid_vector el2h_error_invalid, __kvm_hyp_panic - invalid_vector el1_sync_invalid, __kvm_hyp_panic - invalid_vector el1_irq_invalid, __kvm_hyp_panic - invalid_vector el1_fiq_invalid, __kvm_hyp_panic - invalid_vector el1_error_invalid, __kvm_hyp_panic - -el1_sync: // Guest trapped into EL2 - push x0, x1 - push x2, x3 - - mrs x1, esr_el2 - lsr x2, x1, #ESR_ELx_EC_SHIFT - - cmp x2, #ESR_ELx_EC_HVC64 - b.ne el1_trap - - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x3, el1_trap // called HVC - - /* Here, we're pretty sure the host called HVC. */ - pop x2, x3 - pop x0, x1 - - /* Check for __hyp_get_vectors */ - cbnz x0, 1f - mrs x0, vbar_el2 - b 2f - -1: push lr, xzr - - /* - * Compute the function address in EL2, and shuffle the parameters. - */ - kern_hyp_va x0 - mov lr, x0 - mov x0, x1 - mov x1, x2 - mov x2, x3 - blr lr - - pop lr, xzr -2: eret - -el1_trap: - /* - * x1: ESR - * x2: ESR_EC - */ - - /* Guest accessed VFP/SIMD registers, save host, restore Guest */ - cmp x2, #ESR_ELx_EC_FP_ASIMD - b.eq switch_to_guest_fpsimd - - cmp x2, #ESR_ELx_EC_DABT_LOW - mov x0, #ESR_ELx_EC_IABT_LOW - ccmp x2, x0, #4, ne - b.ne 1f // Not an abort we care about - - /* This is an abort. Check for permission fault */ -alternative_if_not ARM64_WORKAROUND_834220 - and x2, x1, #ESR_ELx_FSC_TYPE - cmp x2, #FSC_PERM - b.ne 1f // Not a permission fault -alternative_else - nop // Use the permission fault path to - nop // check for a valid S1 translation, - nop // regardless of the ESR value. -alternative_endif - - /* - * Check for Stage-1 page table walk, which is guaranteed - * to give a valid HPFAR_EL2. - */ - tbnz x1, #7, 1f // S1PTW is set - - /* Preserve PAR_EL1 */ - mrs x3, par_el1 - push x3, xzr - - /* - * Permission fault, HPFAR_EL2 is invalid. - * Resolve the IPA the hard way using the guest VA. - * Stage-1 translation already validated the memory access rights. - * As such, we can use the EL1 translation regime, and don't have - * to distinguish between EL0 and EL1 access. - */ - mrs x2, far_el2 - at s1e1r, x2 - isb - - /* Read result */ - mrs x3, par_el1 - pop x0, xzr // Restore PAR_EL1 from the stack - msr par_el1, x0 - tbnz x3, #0, 3f // Bail out if we failed the translation - ubfx x3, x3, #12, #36 // Extract IPA - lsl x3, x3, #4 // and present it like HPFAR - b 2f - -1: mrs x3, hpfar_el2 - mrs x2, far_el2 - -2: mrs x0, tpidr_el2 - str w1, [x0, #VCPU_ESR_EL2] - str x2, [x0, #VCPU_FAR_EL2] - str x3, [x0, #VCPU_HPFAR_EL2] - - mov x1, #ARM_EXCEPTION_TRAP - b __kvm_vcpu_return - - /* - * Translation failed. Just return to the guest and - * let it fault again. Another CPU is probably playing - * behind our back. - */ -3: pop x2, x3 - pop x0, x1 - - eret - -el1_irq: - push x0, x1 - push x2, x3 - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_IRQ - b __kvm_vcpu_return - - .ltorg - - .align 11 - -ENTRY(__kvm_hyp_vector) - ventry el2t_sync_invalid // Synchronous EL2t - ventry el2t_irq_invalid // IRQ EL2t - ventry el2t_fiq_invalid // FIQ EL2t - ventry el2t_error_invalid // Error EL2t - - ventry el2h_sync_invalid // Synchronous EL2h - ventry el2h_irq_invalid // IRQ EL2h - ventry el2h_fiq_invalid // FIQ EL2h - ventry el2h_error_invalid // Error EL2h - - ventry el1_sync // Synchronous 64-bit EL1 - ventry el1_irq // IRQ 64-bit EL1 - ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 - - ventry el1_sync // Synchronous 32-bit EL1 - ventry el1_irq // IRQ 32-bit EL1 - ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__kvm_hyp_vector) - - -ENTRY(__kvm_get_mdcr_el2) - mrs x0, mdcr_el2 - ret -ENDPROC(__kvm_get_mdcr_el2) - - .popsection diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S deleted file mode 100644 index 3f00071..0000000 --- a/arch/arm64/kvm/vgic-v2-switch.S +++ /dev/null @@ -1,134 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include - - .text - .pushsection .hyp.text, "ax" - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -ENTRY(__save_vgic_v2_state) -__save_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* Save all interesting registers */ - ldr w5, [x2, #GICH_VMCR] - ldr w6, [x2, #GICH_MISR] - ldr w7, [x2, #GICH_EISR0] - ldr w8, [x2, #GICH_EISR1] - ldr w9, [x2, #GICH_ELRSR0] - ldr w10, [x2, #GICH_ELRSR1] - ldr w11, [x2, #GICH_APR] -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) -CPU_BE( rev w7, w7 ) -CPU_BE( rev w8, w8 ) -CPU_BE( rev w9, w9 ) -CPU_BE( rev w10, w10 ) -CPU_BE( rev w11, w11 ) - - str w5, [x3, #VGIC_V2_CPU_VMCR] - str w6, [x3, #VGIC_V2_CPU_MISR] -CPU_LE( str w7, [x3, #VGIC_V2_CPU_EISR] ) -CPU_LE( str w8, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_LE( str w9, [x3, #VGIC_V2_CPU_ELRSR] ) -CPU_LE( str w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w7, [x3, #(VGIC_V2_CPU_EISR + 4)] ) -CPU_BE( str w8, [x3, #VGIC_V2_CPU_EISR] ) -CPU_BE( str w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] ) -CPU_BE( str w10, [x3, #VGIC_V2_CPU_ELRSR] ) - str w11, [x3, #VGIC_V2_CPU_APR] - - /* Clear GICH_HCR */ - str wzr, [x2, #GICH_HCR] - - /* Save list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x2], #4 -CPU_BE( rev w5, w5 ) - str w5, [x3], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__save_vgic_v2_state) - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -ENTRY(__restore_vgic_v2_state) -__restore_vgic_v2_state: - /* Get VGIC VCTRL base into x2 */ - ldr x2, [x0, #VCPU_KVM] - kern_hyp_va x2 - ldr x2, [x2, #KVM_VGIC_VCTRL] - kern_hyp_va x2 - cbz x2, 2f // disabled - - /* Compute the address of struct vgic_cpu */ - add x3, x0, #VCPU_VGIC_CPU - - /* We only restore a minimal set of registers */ - ldr w4, [x3, #VGIC_V2_CPU_HCR] - ldr w5, [x3, #VGIC_V2_CPU_VMCR] - ldr w6, [x3, #VGIC_V2_CPU_APR] -CPU_BE( rev w4, w4 ) -CPU_BE( rev w5, w5 ) -CPU_BE( rev w6, w6 ) - - str w4, [x2, #GICH_HCR] - str w5, [x2, #GICH_VMCR] - str w6, [x2, #GICH_APR] - - /* Restore list registers */ - add x2, x2, #GICH_LR0 - ldr w4, [x3, #VGIC_CPU_NR_LR] - add x3, x3, #VGIC_V2_CPU_LR -1: ldr w5, [x3], #4 -CPU_BE( rev w5, w5 ) - str w5, [x2], #4 - sub w4, w4, #1 - cbnz w4, 1b -2: - ret -ENDPROC(__restore_vgic_v2_state) - - .popsection diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S deleted file mode 100644 index 3c20730..0000000 --- a/arch/arm64/kvm/vgic-v3-switch.S +++ /dev/null @@ -1,269 +0,0 @@ -/* - * Copyright (C) 2012,2013 - ARM Ltd - * Author: Marc Zyngier - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include -#include -#include -#include -#include - - .text - .pushsection .hyp.text, "ax" - -/* - * We store LRs in reverse order to let the CPU deal with streaming - * access. Use this macro to make it look saner... - */ -#define LR_OFFSET(n) (VGIC_V3_CPU_LR + (15 - n) * 8) - -/* - * Save the VGIC CPU state into memory - * x0: Register pointing to VCPU struct - * Do not corrupt x1!!! - */ -.macro save_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Make sure stores to the GIC via the memory mapped interface - // are now visible to the system register interface - dsb st - - // Save all interesting registers - mrs_s x5, ICH_VMCR_EL2 - mrs_s x6, ICH_MISR_EL2 - mrs_s x7, ICH_EISR_EL2 - mrs_s x8, ICH_ELSR_EL2 - - str w5, [x3, #VGIC_V3_CPU_VMCR] - str w6, [x3, #VGIC_V3_CPU_MISR] - str w7, [x3, #VGIC_V3_CPU_EISR] - str w8, [x3, #VGIC_V3_CPU_ELRSR] - - msr_s ICH_HCR_EL2, xzr - - mrs_s x21, ICH_VTR_EL2 - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - mrs_s x20, ICH_LR15_EL2 - mrs_s x19, ICH_LR14_EL2 - mrs_s x18, ICH_LR13_EL2 - mrs_s x17, ICH_LR12_EL2 - mrs_s x16, ICH_LR11_EL2 - mrs_s x15, ICH_LR10_EL2 - mrs_s x14, ICH_LR9_EL2 - mrs_s x13, ICH_LR8_EL2 - mrs_s x12, ICH_LR7_EL2 - mrs_s x11, ICH_LR6_EL2 - mrs_s x10, ICH_LR5_EL2 - mrs_s x9, ICH_LR4_EL2 - mrs_s x8, ICH_LR3_EL2 - mrs_s x7, ICH_LR2_EL2 - mrs_s x6, ICH_LR1_EL2 - mrs_s x5, ICH_LR0_EL2 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - str x20, [x3, #LR_OFFSET(15)] - str x19, [x3, #LR_OFFSET(14)] - str x18, [x3, #LR_OFFSET(13)] - str x17, [x3, #LR_OFFSET(12)] - str x16, [x3, #LR_OFFSET(11)] - str x15, [x3, #LR_OFFSET(10)] - str x14, [x3, #LR_OFFSET(9)] - str x13, [x3, #LR_OFFSET(8)] - str x12, [x3, #LR_OFFSET(7)] - str x11, [x3, #LR_OFFSET(6)] - str x10, [x3, #LR_OFFSET(5)] - str x9, [x3, #LR_OFFSET(4)] - str x8, [x3, #LR_OFFSET(3)] - str x7, [x3, #LR_OFFSET(2)] - str x6, [x3, #LR_OFFSET(1)] - str x5, [x3, #LR_OFFSET(0)] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP0R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - mrs_s x19, ICH_AP0R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] -6: mrs_s x18, ICH_AP0R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] -5: mrs_s x17, ICH_AP0R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP0R] - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - mrs_s x20, ICH_AP1R3_EL2 - str w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - mrs_s x19, ICH_AP1R2_EL2 - str w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] -6: mrs_s x18, ICH_AP1R1_EL2 - str w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] -5: mrs_s x17, ICH_AP1R0_EL2 - str w17, [x3, #VGIC_V3_CPU_AP1R] - - // Restore SRE_EL1 access and re-enable SRE at EL1. - mrs_s x5, ICC_SRE_EL2 - orr x5, x5, #ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 - isb - mov x5, #1 - msr_s ICC_SRE_EL1, x5 -.endm - -/* - * Restore the VGIC CPU state from memory - * x0: Register pointing to VCPU struct - */ -.macro restore_vgic_v3_state - // Compute the address of struct vgic_cpu - add x3, x0, #VCPU_VGIC_CPU - - // Restore all interesting registers - ldr w4, [x3, #VGIC_V3_CPU_HCR] - ldr w5, [x3, #VGIC_V3_CPU_VMCR] - ldr w25, [x3, #VGIC_V3_CPU_SRE] - - msr_s ICC_SRE_EL1, x25 - - // make sure SRE is valid before writing the other registers - isb - - msr_s ICH_HCR_EL2, x4 - msr_s ICH_VMCR_EL2, x5 - - mrs_s x21, ICH_VTR_EL2 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)] - msr_s ICH_AP1R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)] - msr_s ICH_AP1R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)] - msr_s ICH_AP1R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP1R] - msr_s ICH_AP1R0_EL2, x17 - - tbnz w21, #29, 6f // 6 bits - tbz w21, #30, 5f // 5 bits - // 7 bits - ldr w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)] - msr_s ICH_AP0R3_EL2, x20 - ldr w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)] - msr_s ICH_AP0R2_EL2, x19 -6: ldr w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)] - msr_s ICH_AP0R1_EL2, x18 -5: ldr w17, [x3, #VGIC_V3_CPU_AP0R] - msr_s ICH_AP0R0_EL2, x17 - - and w22, w21, #0xf - mvn w22, w21 - ubfiz w23, w22, 2, 4 // w23 = (15 - ListRegs) * 4 - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - ldr x20, [x3, #LR_OFFSET(15)] - ldr x19, [x3, #LR_OFFSET(14)] - ldr x18, [x3, #LR_OFFSET(13)] - ldr x17, [x3, #LR_OFFSET(12)] - ldr x16, [x3, #LR_OFFSET(11)] - ldr x15, [x3, #LR_OFFSET(10)] - ldr x14, [x3, #LR_OFFSET(9)] - ldr x13, [x3, #LR_OFFSET(8)] - ldr x12, [x3, #LR_OFFSET(7)] - ldr x11, [x3, #LR_OFFSET(6)] - ldr x10, [x3, #LR_OFFSET(5)] - ldr x9, [x3, #LR_OFFSET(4)] - ldr x8, [x3, #LR_OFFSET(3)] - ldr x7, [x3, #LR_OFFSET(2)] - ldr x6, [x3, #LR_OFFSET(1)] - ldr x5, [x3, #LR_OFFSET(0)] - - adr x24, 1f - add x24, x24, x23 - br x24 - -1: - msr_s ICH_LR15_EL2, x20 - msr_s ICH_LR14_EL2, x19 - msr_s ICH_LR13_EL2, x18 - msr_s ICH_LR12_EL2, x17 - msr_s ICH_LR11_EL2, x16 - msr_s ICH_LR10_EL2, x15 - msr_s ICH_LR9_EL2, x14 - msr_s ICH_LR8_EL2, x13 - msr_s ICH_LR7_EL2, x12 - msr_s ICH_LR6_EL2, x11 - msr_s ICH_LR5_EL2, x10 - msr_s ICH_LR4_EL2, x9 - msr_s ICH_LR3_EL2, x8 - msr_s ICH_LR2_EL2, x7 - msr_s ICH_LR1_EL2, x6 - msr_s ICH_LR0_EL2, x5 - - // Ensure that the above will have reached the - // (re)distributors. This ensure the guest will read - // the correct values from the memory-mapped interface. - isb - dsb sy - - // Prevent the guest from touching the GIC system registers - // if SRE isn't enabled for GICv3 emulation - cbnz x25, 1f - mrs_s x5, ICC_SRE_EL2 - and x5, x5, #~ICC_SRE_EL2_ENABLE - msr_s ICC_SRE_EL2, x5 -1: -.endm - -ENTRY(__save_vgic_v3_state) - save_vgic_v3_state - ret -ENDPROC(__save_vgic_v3_state) - -ENTRY(__restore_vgic_v3_state) - restore_vgic_v3_state - ret -ENDPROC(__restore_vgic_v3_state) - -ENTRY(__vgic_v3_get_ich_vtr_el2) - mrs_s x0, ICH_VTR_EL2 - ret -ENDPROC(__vgic_v3_get_ich_vtr_el2) - - .popsection -- cgit v0.10.2 From 9d8415d6c148a16b6d906a96f0596851d7e4d607 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 19:57:11 +0000 Subject: arm64: KVM: Turn system register numbers to an enum Having the system register numbers as #defines has been a pain since day one, as the ordering is pretty fragile, and moving things around leads to renumbering and epic conflict resolutions. Now that we're mostly acessing the sysreg file in C, an enum is a much better type to use, and we can clean things up a bit. Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 5e37710..52b777b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,82 +20,6 @@ #include -/* - * 0 is reserved as an invalid value. - * Order *must* be kept in sync with the hyp switch code. - */ -#define MPIDR_EL1 1 /* MultiProcessor Affinity Register */ -#define CSSELR_EL1 2 /* Cache Size Selection Register */ -#define SCTLR_EL1 3 /* System Control Register */ -#define ACTLR_EL1 4 /* Auxiliary Control Register */ -#define CPACR_EL1 5 /* Coprocessor Access Control */ -#define TTBR0_EL1 6 /* Translation Table Base Register 0 */ -#define TTBR1_EL1 7 /* Translation Table Base Register 1 */ -#define TCR_EL1 8 /* Translation Control Register */ -#define ESR_EL1 9 /* Exception Syndrome Register */ -#define AFSR0_EL1 10 /* Auxilary Fault Status Register 0 */ -#define AFSR1_EL1 11 /* Auxilary Fault Status Register 1 */ -#define FAR_EL1 12 /* Fault Address Register */ -#define MAIR_EL1 13 /* Memory Attribute Indirection Register */ -#define VBAR_EL1 14 /* Vector Base Address Register */ -#define CONTEXTIDR_EL1 15 /* Context ID Register */ -#define TPIDR_EL0 16 /* Thread ID, User R/W */ -#define TPIDRRO_EL0 17 /* Thread ID, User R/O */ -#define TPIDR_EL1 18 /* Thread ID, Privileged */ -#define AMAIR_EL1 19 /* Aux Memory Attribute Indirection Register */ -#define CNTKCTL_EL1 20 /* Timer Control Register (EL1) */ -#define PAR_EL1 21 /* Physical Address Register */ -#define MDSCR_EL1 22 /* Monitor Debug System Control Register */ -#define MDCCINT_EL1 23 /* Monitor Debug Comms Channel Interrupt Enable Reg */ - -/* 32bit specific registers. Keep them at the end of the range */ -#define DACR32_EL2 24 /* Domain Access Control Register */ -#define IFSR32_EL2 25 /* Instruction Fault Status Register */ -#define FPEXC32_EL2 26 /* Floating-Point Exception Control Register */ -#define DBGVCR32_EL2 27 /* Debug Vector Catch Register */ -#define NR_SYS_REGS 28 - -/* 32bit mapping */ -#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ -#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ -#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ -#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ -#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ -#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ -#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ -#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ -#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ -#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ -#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ -#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ -#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ -#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ -#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ -#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ -#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ -#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ -#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ -#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ -#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ -#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ -#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ -#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ -#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ -#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ -#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ -#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ -#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ - -#define cp14_DBGDSCRext (MDSCR_EL1 * 2) -#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) -#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) -#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) -#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) -#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) -#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) - -#define NR_COPRO_REGS (NR_SYS_REGS * 2) - #define ARM_EXCEPTION_IRQ 0 #define ARM_EXCEPTION_TRAP 1 diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 25a4021..3066328 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -26,7 +26,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 19504aa..689d4c9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #define __KVM_HAVE_ARCH_INTC_INITIALIZED @@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info { u64 hpfar_el2; /* Hyp IPA Fault Address Register */ }; +/* + * 0 is reserved as an invalid value. + * Order should be kept in sync with the save/restore code. + */ +enum vcpu_sysreg { + __INVALID_SYSREG__, + MPIDR_EL1, /* MultiProcessor Affinity Register */ + CSSELR_EL1, /* Cache Size Selection Register */ + SCTLR_EL1, /* System Control Register */ + ACTLR_EL1, /* Auxiliary Control Register */ + CPACR_EL1, /* Coprocessor Access Control */ + TTBR0_EL1, /* Translation Table Base Register 0 */ + TTBR1_EL1, /* Translation Table Base Register 1 */ + TCR_EL1, /* Translation Control Register */ + ESR_EL1, /* Exception Syndrome Register */ + AFSR0_EL1, /* Auxilary Fault Status Register 0 */ + AFSR1_EL1, /* Auxilary Fault Status Register 1 */ + FAR_EL1, /* Fault Address Register */ + MAIR_EL1, /* Memory Attribute Indirection Register */ + VBAR_EL1, /* Vector Base Address Register */ + CONTEXTIDR_EL1, /* Context ID Register */ + TPIDR_EL0, /* Thread ID, User R/W */ + TPIDRRO_EL0, /* Thread ID, User R/O */ + TPIDR_EL1, /* Thread ID, Privileged */ + AMAIR_EL1, /* Aux Memory Attribute Indirection Register */ + CNTKCTL_EL1, /* Timer Control Register (EL1) */ + PAR_EL1, /* Physical Address Register */ + MDSCR_EL1, /* Monitor Debug System Control Register */ + MDCCINT_EL1, /* Monitor Debug Comms Channel Interrupt Enable Reg */ + + /* 32bit specific registers. Keep them at the end of the range */ + DACR32_EL2, /* Domain Access Control Register */ + IFSR32_EL2, /* Instruction Fault Status Register */ + FPEXC32_EL2, /* Floating-Point Exception Control Register */ + DBGVCR32_EL2, /* Debug Vector Catch Register */ + + NR_SYS_REGS /* Nothing after this line! */ +}; + +/* 32bit mapping */ +#define c0_MPIDR (MPIDR_EL1 * 2) /* MultiProcessor ID Register */ +#define c0_CSSELR (CSSELR_EL1 * 2)/* Cache Size Selection Register */ +#define c1_SCTLR (SCTLR_EL1 * 2) /* System Control Register */ +#define c1_ACTLR (ACTLR_EL1 * 2) /* Auxiliary Control Register */ +#define c1_CPACR (CPACR_EL1 * 2) /* Coprocessor Access Control */ +#define c2_TTBR0 (TTBR0_EL1 * 2) /* Translation Table Base Register 0 */ +#define c2_TTBR0_high (c2_TTBR0 + 1) /* TTBR0 top 32 bits */ +#define c2_TTBR1 (TTBR1_EL1 * 2) /* Translation Table Base Register 1 */ +#define c2_TTBR1_high (c2_TTBR1 + 1) /* TTBR1 top 32 bits */ +#define c2_TTBCR (TCR_EL1 * 2) /* Translation Table Base Control R. */ +#define c3_DACR (DACR32_EL2 * 2)/* Domain Access Control Register */ +#define c5_DFSR (ESR_EL1 * 2) /* Data Fault Status Register */ +#define c5_IFSR (IFSR32_EL2 * 2)/* Instruction Fault Status Register */ +#define c5_ADFSR (AFSR0_EL1 * 2) /* Auxiliary Data Fault Status R */ +#define c5_AIFSR (AFSR1_EL1 * 2) /* Auxiliary Instr Fault Status R */ +#define c6_DFAR (FAR_EL1 * 2) /* Data Fault Address Register */ +#define c6_IFAR (c6_DFAR + 1) /* Instruction Fault Address Register */ +#define c7_PAR (PAR_EL1 * 2) /* Physical Address Register */ +#define c7_PAR_high (c7_PAR + 1) /* PAR top 32 bits */ +#define c10_PRRR (MAIR_EL1 * 2) /* Primary Region Remap Register */ +#define c10_NMRR (c10_PRRR + 1) /* Normal Memory Remap Register */ +#define c12_VBAR (VBAR_EL1 * 2) /* Vector Base Address Register */ +#define c13_CID (CONTEXTIDR_EL1 * 2) /* Context ID Register */ +#define c13_TID_URW (TPIDR_EL0 * 2) /* Thread ID, User R/W */ +#define c13_TID_URO (TPIDRRO_EL0 * 2)/* Thread ID, User R/O */ +#define c13_TID_PRIV (TPIDR_EL1 * 2) /* Thread ID, Privileged */ +#define c10_AMAIR0 (AMAIR_EL1 * 2) /* Aux Memory Attr Indirection Reg */ +#define c10_AMAIR1 (c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */ +#define c14_CNTKCTL (CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */ + +#define cp14_DBGDSCRext (MDSCR_EL1 * 2) +#define cp14_DBGBCR0 (DBGBCR0_EL1 * 2) +#define cp14_DBGBVR0 (DBGBVR0_EL1 * 2) +#define cp14_DBGBXVR0 (cp14_DBGBVR0 + 1) +#define cp14_DBGWCR0 (DBGWCR0_EL1 * 2) +#define cp14_DBGWVR0 (DBGWVR0_EL1 * 2) +#define cp14_DBGDCCINT (MDCCINT_EL1 * 2) + +#define NR_COPRO_REGS (NR_SYS_REGS * 2) + struct kvm_cpu_context { struct kvm_regs gp_regs; union { diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h index 889c908..fe612a9 100644 --- a/arch/arm64/include/asm/kvm_mmio.h +++ b/arch/arm64/include/asm/kvm_mmio.h @@ -19,7 +19,6 @@ #define __ARM64_KVM_MMIO_H__ #include -#include #include /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 25de8b2..4b72231 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -112,6 +112,7 @@ int main(void) DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 115522b..fcb7788 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8bddae1..eba89e4 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index d071f45..567a0d6 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "hyp.h" diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 1050b2b..fd0fbe9 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -27,7 +27,6 @@ #define CPU_GP_REG_OFFSET(x) (CPU_GP_REGS + x) #define CPU_XREG_OFFSET(x) CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x) -#define CPU_SYSREG_OFFSET(x) (CPU_SYSREGS + 8*x) .text .pushsection .hyp.text, "ax" @@ -150,7 +149,7 @@ ENTRY(__fpsimd_guest_restore) // Skip restoring fpexc32 for AArch64 guests mrs x1, hcr_el2 tbnz x1, #HCR_RW_SHIFT, 1f - ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)] + ldr x4, [x3, #VCPU_FPEXC32_EL2] msr fpexc32_el2, x4 1: ldp x4, lr, [sp], #16 diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 3603541..42563098 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -18,6 +18,7 @@ #include #include +#include #include #include "hyp.h" diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index d2650e8..88adebf 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 3813d23..453eafd 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -28,6 +28,7 @@ #include #include +#include #include /* These are for GICv2 emulation only */ -- cgit v0.10.2 From 23a13465c84c51ec4330863b59e9d50ee671f8b4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 25 Oct 2015 20:03:08 +0000 Subject: arm64: KVM: Cleanup asm-offset.c As we've now rewritten most of our code-base in C, most of the KVM-specific code in asm-offset.c is useless. Delete-time again! Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 4b72231..94090a6 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -108,50 +108,11 @@ int main(void) DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); - DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); - DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); - DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); - DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); DEFINE(VCPU_FPEXC32_EL2, offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2])); DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); - DEFINE(VCPU_DEBUG_FLAGS, offsetof(struct kvm_vcpu, arch.debug_flags)); - DEFINE(VCPU_DEBUG_PTR, offsetof(struct kvm_vcpu, arch.debug_ptr)); - DEFINE(DEBUG_BCR, offsetof(struct kvm_guest_debug_arch, dbg_bcr)); - DEFINE(DEBUG_BVR, offsetof(struct kvm_guest_debug_arch, dbg_bvr)); - DEFINE(DEBUG_WCR, offsetof(struct kvm_guest_debug_arch, dbg_wcr)); - DEFINE(DEBUG_WVR, offsetof(struct kvm_guest_debug_arch, dbg_wvr)); - DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); - DEFINE(VCPU_MDCR_EL2, offsetof(struct kvm_vcpu, arch.mdcr_el2)); - DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); - DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state)); - DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); - DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); - DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); - DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); - DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); - DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); - DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr)); - DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr)); - DEFINE(VGIC_V2_CPU_MISR, offsetof(struct vgic_cpu, vgic_v2.vgic_misr)); - DEFINE(VGIC_V2_CPU_EISR, offsetof(struct vgic_cpu, vgic_v2.vgic_eisr)); - DEFINE(VGIC_V2_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr)); - DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr)); - DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr)); - DEFINE(VGIC_V3_CPU_SRE, offsetof(struct vgic_cpu, vgic_v3.vgic_sre)); - DEFINE(VGIC_V3_CPU_HCR, offsetof(struct vgic_cpu, vgic_v3.vgic_hcr)); - DEFINE(VGIC_V3_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr)); - DEFINE(VGIC_V3_CPU_MISR, offsetof(struct vgic_cpu, vgic_v3.vgic_misr)); - DEFINE(VGIC_V3_CPU_EISR, offsetof(struct vgic_cpu, vgic_v3.vgic_eisr)); - DEFINE(VGIC_V3_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr)); - DEFINE(VGIC_V3_CPU_AP0R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r)); - DEFINE(VGIC_V3_CPU_AP1R, offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r)); - DEFINE(VGIC_V3_CPU_LR, offsetof(struct vgic_cpu, vgic_v3.vgic_lr)); - DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); - DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); - DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); #endif #ifdef CONFIG_CPU_PM DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); -- cgit v0.10.2 From 3ffa75cd18134a03f86f9d9b8b6e9128e0eda254 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 26 Oct 2015 09:10:07 +0000 Subject: arm64: KVM: Remove weak attributes As we've now switched to the new world switch implementation, remove the weak attributes, as nobody is supposed to override it anymore. Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 567a0d6..c9c1e97 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -132,10 +132,9 @@ void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; } -u32 __hyp_text __debug_read_mdcr_el2(void) +static u32 __hyp_text __debug_read_mdcr_el2(void) { return read_sysreg(mdcr_el2); } -__alias(__debug_read_mdcr_el2) -u32 __weak __kvm_get_mdcr_el2(void); +__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 10d6d2a..93e8d983 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -189,9 +189,7 @@ ENDPROC(\label) .align 11 - .weak __kvm_hyp_vector ENTRY(__kvm_hyp_vector) -ENTRY(__hyp_vector) ventry el2t_sync_invalid // Synchronous EL2t ventry el2t_irq_invalid // IRQ EL2t ventry el2t_fiq_invalid // FIQ EL2t @@ -211,5 +209,4 @@ ENTRY(__hyp_vector) ventry el1_irq // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__hyp_vector) ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 7457ae4..ca8f5a5 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -85,7 +85,7 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) __vgic_call_restore_state()(vcpu); } -int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -142,8 +142,7 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu) return exit_code; } -__alias(__guest_run) -int __weak __kvm_vcpu_run(struct kvm_vcpu *vcpu); +__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index 5f815cf..2a7e0d8 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -17,7 +17,7 @@ #include "hyp.h" -void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); @@ -48,10 +48,10 @@ void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) write_sysreg(0, vttbr_el2); } -__alias(__tlb_flush_vmid_ipa) -void __weak __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, + phys_addr_t ipa); -void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) { dsb(ishst); @@ -67,10 +67,9 @@ void __hyp_text __tlb_flush_vmid(struct kvm *kvm) write_sysreg(0, vttbr_el2); } -__alias(__tlb_flush_vmid) -void __weak __kvm_tlb_flush_vmid(struct kvm *kvm); +__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); -void __hyp_text __tlb_flush_vm_context(void) +static void __hyp_text __tlb_flush_vm_context(void) { dsb(ishst); asm volatile("tlbi alle1is \n" @@ -78,5 +77,4 @@ void __hyp_text __tlb_flush_vm_context(void) dsb(ish); } -__alias(__tlb_flush_vm_context) -void __weak __kvm_flush_vm_context(void); +__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index a769458..9142e08 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -220,10 +220,9 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) } } -u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } -__alias(__vgic_v3_read_ich_vtr_el2) -u64 __weak __vgic_v3_get_ich_vtr_el2(void); +__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void); -- cgit v0.10.2 From 7ec7c8c70b26de90f61be7523a6ad14df911219f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 2 Dec 2015 14:27:03 +0100 Subject: KVM: s390: use assignment instead of memcpy Replace two memcpy with proper assignment. Suggested-by: Paolo Bonzini Reviewed-by: David Hildenbrand Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6857262..6dec01d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2120,7 +2120,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu) */ kvm_check_async_pf_completion(vcpu); - memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); + vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14]; + vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15]; if (need_resched()) schedule(); @@ -2185,7 +2186,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (guestdbg_enabled(vcpu)) kvm_s390_restore_guest_per_regs(vcpu); - memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); + vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14; + vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15; if (vcpu->arch.sie_block->icptcode > 0) { int rc = kvm_handle_sie_intercept(vcpu); -- cgit v0.10.2 From 8335713ad08caf2c3dfcb5bc2c93d7e0276142d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Tue, 8 Dec 2015 16:55:27 +0100 Subject: KVM: s390: obey kptr_restrict in traces The s390dbf and trace events provide a debugfs interface. If kptr_restrict is active, we should not expose kernel pointers. We can fence the debugfs output by using %pK instead of %p. Cc: Kees Cook Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 6dec01d..c14845c 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1185,7 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.epoch = 0; spin_lock_init(&kvm->arch.start_stop_lock); - KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid); + KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; out_err: @@ -1245,7 +1245,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) gmap_free(kvm->arch.gmap); kvm_s390_destroy_adapters(kvm); kvm_s390_clear_float_irqs(kvm); - KVM_EVENT(3, "vm 0x%p destroyed", kvm); + KVM_EVENT(3, "vm 0x%pK destroyed", kvm); } /* Section: vcpu related */ @@ -1349,7 +1349,8 @@ static int sca_switch_to_extended(struct kvm *kvm) free_page((unsigned long)old_sca); - VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca); + VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)", + old_sca, kvm->arch.sca); return 0; } @@ -1624,7 +1625,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, rc = kvm_vcpu_init(vcpu, kvm, id); if (rc) goto out_free_sie_block; - VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu, vcpu->arch.sie_block); trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h index cc1d6c6..396485b 100644 --- a/arch/s390/kvm/trace-s390.h +++ b/arch/s390/kvm/trace-s390.h @@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu, __entry->sie_block = sie_block; ), - TP_printk("create cpu %d at %p, sie block at %p", __entry->id, - __entry->vcpu, __entry->sie_block) + TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK", + __entry->id, __entry->vcpu, __entry->sie_block) ); TRACE_EVENT(kvm_s390_destroy_vcpu, @@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css, __entry->kvm = kvm; ), - TP_printk("enabling channel I/O support (kvm @ %p)\n", + TP_printk("enabling channel I/O support (kvm @ %pK)\n", __entry->kvm) ); -- cgit v0.10.2 From a3a92c31bf0b57ad0ca7f092a6f3a57168ba9ae2 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 1 Dec 2014 17:24:42 +0100 Subject: KVM: s390: fix mismatch between user and in-kernel guest limit While the userspace interface requests the maximum size the gmap code expects to get a maximum address. This error resulted in bigger page tables than necessary for some guest sizes, e.g. a 2GB guest used 3 levels instead of 2. At the same time we introduce KVM_S390_NO_MEM_LIMIT, which allows in a bright future that a guest spans the complete 64 bit address space. We also switch to TASK_MAX_SIZE for the initial memory size, this is a cosmetic change as the previous size also resulted in a 4 level pagetable creation. Reported-by: David Hildenbrand Reviewed-by: Cornelia Huck Signed-off-by: Dominik Dingel Signed-off-by: Christian Borntraeger diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt index 2d09d1e..f083a16 100644 --- a/Documentation/virtual/kvm/devices/vm.txt +++ b/Documentation/virtual/kvm/devices/vm.txt @@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible Allows userspace to query the actual limit and set a new limit for the maximum guest memory size. The limit will be rounded up to 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by -the number of page table levels. +the number of page table levels. In the case that there is no limit we will set +the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX). 2. GROUP: KVM_S390_VM_CPU_MODEL Architectures: s390 diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 12e9291..c831441 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -627,6 +627,7 @@ struct kvm_arch{ struct kvm_s390_float_interrupt float_int; struct kvm_device *flic; struct gmap *gmap; + unsigned long mem_limit; int css_support; int use_irqchip; int use_cmma; diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index ef1a5fc..d2aea31 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req { #define KVM_S390_VM_MEM_CLR_CMMA 1 #define KVM_S390_VM_MEM_LIMIT_SIZE 2 +#define KVM_S390_NO_MEM_LIMIT U64_MAX + /* kvm attributes for KVM_S390_VM_TOD */ #define KVM_S390_VM_TOD_LOW 0 #define KVM_S390_VM_TOD_HIGH 1 diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index c14845c..8aa5e55 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -378,8 +378,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att case KVM_S390_VM_MEM_LIMIT_SIZE: ret = 0; VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes", - kvm->arch.gmap->asce_end); - if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr)) + kvm->arch.mem_limit); + if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr)) ret = -EFAULT; break; default: @@ -431,9 +431,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT; - if (new_limit > kvm->arch.gmap->asce_end) + if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT && + new_limit > kvm->arch.mem_limit) return -E2BIG; + if (!new_limit) + return -EINVAL; + + /* gmap_alloc takes last usable address */ + if (new_limit != KVM_S390_NO_MEM_LIMIT) + new_limit -= 1; + ret = -EBUSY; mutex_lock(&kvm->lock); if (atomic_read(&kvm->online_vcpus) == 0) { @@ -450,7 +458,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att } } mutex_unlock(&kvm->lock); - VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit); + VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit); + VM_EVENT(kvm, 3, "New guest asce: 0x%pK", + (void *) kvm->arch.gmap->asce); break; } default: @@ -1172,8 +1182,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (type & KVM_VM_S390_UCONTROL) { kvm->arch.gmap = NULL; + kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1); + kvm->arch.mem_limit = TASK_MAX_SIZE; + kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; kvm->arch.gmap->private = kvm; @@ -2829,6 +2841,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (mem->memory_size & 0xffffful) return -EINVAL; + if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit) + return -EINVAL; + return 0; } diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 54ef3bc..63b0398 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) /** * gmap_alloc - allocate a guest address space * @mm: pointer to the parent mm_struct - * @limit: maximum size of the gmap address space + * @limit: maximum address of the gmap address space * * Returns a guest address space structure. */ @@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from, if ((from | to | len) & (PMD_SIZE - 1)) return -EINVAL; if (len == 0 || from + len < from || to + len < to || - from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) return -EINVAL; flush = 0; -- cgit v0.10.2 From 32e6b236d26946eb076d1450bfb8f9978f15d6b9 Mon Sep 17 00:00:00 2001 From: Guenther Hutzl Date: Mon, 1 Dec 2014 17:24:42 +0100 Subject: KVM: s390: consider system MHA for guest storage Verify that the guest maximum storage address is below the MHA (maximum host address) value allowed on the host. Acked-by: Michael Holzheu Reviewed-by: Cornelia Huck Reviewed-by: David Hildenbrand Signed-off-by: Guenther Hutzl Signed-off-by: Dominik Dingel [adopt to match recent limit,size changes] Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 8aa5e55..940e9ff 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1184,7 +1184,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.gmap = NULL; kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT; } else { - kvm->arch.mem_limit = TASK_MAX_SIZE; + if (sclp.hamax == U64_MAX) + kvm->arch.mem_limit = TASK_MAX_SIZE; + else + kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE, + sclp.hamax + 1); kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1); if (!kvm->arch.gmap) goto out_err; diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c index e0a1f4e..6804354 100644 --- a/drivers/s390/char/sclp_early.c +++ b/drivers/s390/char/sclp_early.c @@ -40,7 +40,8 @@ struct read_info_sccb { u8 fac85; /* 85 */ u8 _pad_86[91 - 86]; /* 86-90 */ u8 flags; /* 91 */ - u8 _pad_92[100 - 92]; /* 92-99 */ + u8 _pad_92[99 - 92]; /* 92-98 */ + u8 hamaxpow; /* 99 */ u32 rnsize2; /* 100-103 */ u64 rnmax2; /* 104-111 */ u8 _pad_112[116 - 112]; /* 112-115 */ @@ -120,6 +121,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb) sclp.rzm <<= 20; sclp.ibc = sccb->ibc; + if (sccb->hamaxpow && sccb->hamaxpow < 64) + sclp.hamax = (1UL << sccb->hamaxpow) - 1; + else + sclp.hamax = U64_MAX; + if (!sccb->hcpua) { if (MACHINE_IS_VM) sclp.max_cores = 64; -- cgit v0.10.2 From 7797dcf63f11b6e1d34822daf2317223d0f4ad46 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:13 +0300 Subject: drivers/hv: replace enum hv_message_type by u32 enum hv_message_type inside struct hv_message, hv_post_message is not size portable. Replace enum by u32. Signed-off-by: Andrey Smetanin CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 3782636..ab3be44 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -174,7 +174,7 @@ union hv_message_flags { /* Define synthetic interrupt controller message header. */ struct hv_message_header { - enum hv_message_type message_type; + u32 message_type; u8 payload_size; union hv_message_flags message_flags; u8 reserved[2]; @@ -347,7 +347,7 @@ enum hv_call_code { struct hv_input_post_message { union hv_connection_id connectionid; u32 reserved; - enum hv_message_type message_type; + u32 message_type; u32 payload_size; u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; }; -- cgit v0.10.2 From 4f39bcfd1c132522380138a323f9af7902766301 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:14 +0300 Subject: drivers/hv: Move HV_SYNIC_STIMER_COUNT into Hyper-V UAPI x86 header This constant is required for Hyper-V SynIC timers MSR's support by userspace(QEMU). Signed-off-by: Andrey Smetanin Acked-by: K. Y. Srinivasan Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 040d408..07981f0 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -269,4 +269,6 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) #define HV_SYNIC_SINT_VECTOR_MASK (0xFF) +#define HV_SYNIC_STIMER_COUNT (4) + #endif diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index ab3be44..bf01b11 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -102,8 +102,6 @@ enum hv_message_type { HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 }; -#define HV_SYNIC_STIMER_COUNT (4) - /* Define invalid partition identifier. */ #define HV_PARTITION_ID_INVALID ((u64)0x0) -- cgit v0.10.2 From 5b423efe11e822e092e8c911a6bad17eadf718eb Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:15 +0300 Subject: drivers/hv: Move struct hv_message into UAPI Hyper-V x86 header This struct is required for Hyper-V SynIC timers implementation inside KVM and for upcoming Hyper-V VMBus support by userspace(QEMU). So place it into Hyper-V UAPI header. Signed-off-by: Andrey Smetanin Acked-by: K. Y. Srinivasan Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 07981f0..a41cdee 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -271,4 +271,80 @@ typedef struct _HV_REFERENCE_TSC_PAGE { #define HV_SYNIC_STIMER_COUNT (4) +/* Define synthetic interrupt controller message constants. */ +#define HV_MESSAGE_SIZE (256) +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) + +/* Define hypervisor message types. */ +enum hv_message_type { + HVMSG_NONE = 0x00000000, + + /* Memory access messages. */ + HVMSG_UNMAPPED_GPA = 0x80000000, + HVMSG_GPA_INTERCEPT = 0x80000001, + + /* Timer notification messages. */ + HVMSG_TIMER_EXPIRED = 0x80000010, + + /* Error messages. */ + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + + /* Trace buffer complete messages. */ + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, + + /* Platform-specific processor intercept messages. */ + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_MSR_INTERCEPT = 0x80010001, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 +}; + +/* Define synthetic interrupt controller message flags. */ +union hv_message_flags { + __u8 asu8; + struct { + __u8 msg_pending:1; + __u8 reserved:7; + }; +}; + +/* Define port identifier type. */ +union hv_port_id { + __u32 asu32; + struct { + __u32 id:24; + __u32 reserved:8; + } u; +}; + +/* Define synthetic interrupt controller message header. */ +struct hv_message_header { + __u32 message_type; + __u8 payload_size; + union hv_message_flags message_flags; + __u8 reserved[2]; + union { + __u64 sender; + union hv_port_id port; + }; +}; + +/* Define synthetic interrupt controller message format. */ +struct hv_message { + struct hv_message_header header; + union { + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; + } u; +}; + +/* Define the synthetic interrupt message page layout. */ +struct hv_message_page { + struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; +}; + #endif diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index bf01b11..d9d5063 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -63,10 +63,6 @@ enum hv_cpuid_function { /* Define version of the synthetic interrupt controller. */ #define HV_SYNIC_VERSION (1) -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) #define HV_ANY_VP (0xFFFFFFFF) /* Define synthetic interrupt controller flag constants. */ @@ -74,46 +70,9 @@ enum hv_cpuid_function { #define HV_EVENT_FLAGS_BYTE_COUNT (256) #define HV_EVENT_FLAGS_DWORD_COUNT (256 / sizeof(u32)) -/* Define hypervisor message types. */ -enum hv_message_type { - HVMSG_NONE = 0x00000000, - - /* Memory access messages. */ - HVMSG_UNMAPPED_GPA = 0x80000000, - HVMSG_GPA_INTERCEPT = 0x80000001, - - /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, - - /* Error messages. */ - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, - - /* Trace buffer complete messages. */ - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, - - /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, - HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - /* Define invalid partition identifier. */ #define HV_PARTITION_ID_INVALID ((u64)0x0) -/* Define port identifier type. */ -union hv_port_id { - u32 asu32; - struct { - u32 id:24; - u32 reserved:8; - } u ; -}; - /* Define port type. */ enum hv_port_type { HVPORT_MSG = 1, @@ -161,27 +120,6 @@ struct hv_connection_info { }; }; -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - u8 asu8; - struct { - u8 msg_pending:1; - u8 reserved:7; - }; -}; - -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - u32 message_type; - u8 payload_size; - union hv_message_flags message_flags; - u8 reserved[2]; - union { - u64 sender; - union hv_port_id port; - }; -}; - /* * Timer configuration register. */ @@ -207,22 +145,9 @@ struct hv_timer_message_payload { u64 delivery_time; /* When the message was delivered */ }; -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u ; -}; - /* Define the number of message buffers associated with each port. */ #define HV_PORT_MESSAGE_BUFFER_COUNT (16) -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -}; - /* Define the synthetic interrupt controller event flags format. */ union hv_synic_event_flags { u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT]; -- cgit v0.10.2 From c71acc4c74dddebbbbeede69fdd4f0b1a124f9df Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:16 +0300 Subject: drivers/hv: Move struct hv_timer_message_payload into UAPI Hyper-V x86 header This struct is required for Hyper-V SynIC timers implementation inside KVM and for upcoming Hyper-V VMBus support by userspace(QEMU). So place it into Hyper-V UAPI header. Signed-off-by: Andrey Smetanin CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index a41cdee..2a5629e 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -347,4 +347,12 @@ struct hv_message_page { struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; }; +/* Define timer message payload structure. */ +struct hv_timer_message_payload { + __u32 timer_index; + __u32 reserved; + __u64 expiration_time; /* When the timer expired */ + __u64 delivery_time; /* When the message was delivered */ +}; + #endif diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index d9d5063..678663e 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -136,15 +136,6 @@ union hv_timer_config { }; }; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - u32 timer_index; - u32 reserved; - u64 expiration_time; /* When the timer expired */ - u64 delivery_time; /* When the message was delivered */ -}; - /* Define the number of message buffers associated with each port. */ #define HV_PORT_MESSAGE_BUFFER_COUNT (16) -- cgit v0.10.2 From e18eaeed2b056094a5626288d47ceefc740c90e5 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:17 +0300 Subject: kvm/x86: Rearrange func's declarations inside Hyper-V header This rearrangement places functions declarations together according to their functionality, so future additions will be simplier. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 315af4b..9483d49 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -24,14 +24,6 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ -int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); -int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); -bool kvm_hv_hypercall_enabled(struct kvm *kvm); -int kvm_hv_hypercall(struct kvm_vcpu *vcpu); - -int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); -void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); - static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) { return &vcpu->arch.hyperv.synic; @@ -46,10 +38,18 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) arch = container_of(hv, struct kvm_vcpu_arch, hyperv); return container_of(arch, struct kvm_vcpu, arch); } -void kvm_hv_irq_routing_update(struct kvm *kvm); -void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); +int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); + +bool kvm_hv_hypercall_enabled(struct kvm *kvm); +int kvm_hv_hypercall(struct kvm_vcpu *vcpu); +void kvm_hv_irq_routing_update(struct kvm *kvm); +int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); +void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); +void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); + #endif -- cgit v0.10.2 From 0ae80384b257b0a1ffa4e5d9eab2eb559bb063c5 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:18 +0300 Subject: kvm/x86: Added Hyper-V vcpu_to_hv_vcpu()/hv_vcpu_to_vcpu() helpers Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 9483d49..d5d8217 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -24,21 +24,29 @@ #ifndef __ARCH_X86_KVM_HYPERV_H__ #define __ARCH_X86_KVM_HYPERV_H__ -static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) +static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) { - return &vcpu->arch.hyperv.synic; + return &vcpu->arch.hyperv; } -static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) +static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu) { - struct kvm_vcpu_hv *hv; struct kvm_vcpu_arch *arch; - hv = container_of(synic, struct kvm_vcpu_hv, synic); - arch = container_of(hv, struct kvm_vcpu_arch, hyperv); + arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv); return container_of(arch, struct kvm_vcpu, arch); } +static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.hyperv.synic; +} + +static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) +{ + return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); +} + int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata); -- cgit v0.10.2 From 93bf4172481c4b2a8544c83a687946252563edd0 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:19 +0300 Subject: kvm/x86: Hyper-V internal helper to read MSR HV_X64_MSR_TIME_REF_COUNT This helper will be used also in Hyper-V SynIC timers implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 41869a9..9958926 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -335,6 +335,11 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic) } } +static u64 get_time_ref_counter(struct kvm *kvm) +{ + return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); +} + void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) { synic_init(vcpu_to_synic(vcpu)); @@ -576,11 +581,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = hv->hv_hypercall; break; - case HV_X64_MSR_TIME_REF_COUNT: { - data = - div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + case HV_X64_MSR_TIME_REF_COUNT: + data = get_time_ref_counter(kvm); break; - } case HV_X64_MSR_REFERENCE_TSC: data = hv->hv_tsc_page; break; -- cgit v0.10.2 From 765eaa0f70eaa274ec8b815d8c210c20cf7b6dbc Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:20 +0300 Subject: kvm/x86: Hyper-V SynIC message slot pending clearing at SINT ack The SynIC message protocol mandates that the message slot is claimed by atomically setting message type to something other than HVMSG_NONE. If another message is to be delivered while the slot is still busy, message pending flag is asserted to indicate to the guest that the hypervisor wants to be notified when the slot is released. To make sure the protocol works regardless of where the message sources are (kernel or userspace), clear the pending flag on SINT ACK notification, and let the message sources compete for the slot again. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 9958926..6412b6b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -27,6 +27,7 @@ #include "hyperv.h" #include +#include #include #include @@ -116,13 +117,43 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) return (synic->active) ? synic : NULL; } +static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic, + u32 sint) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *msg; + struct hv_message_page *msg_page; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) { + vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n", + gpa); + return; + } + msg_page = kmap_atomic(page); + + msg = &msg_page->sint_message[sint]; + msg->header.message_flags.msg_pending = 0; + + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); +} + static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) { struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); int gsi, idx; vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); + if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) + synic_clear_sint_msg_pending(synic, sint); + idx = srcu_read_lock(&kvm->irq_srcu); gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]); if (gsi != -1) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 590c46e..f44c24b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -450,6 +450,8 @@ struct kvm { #define vcpu_debug(vcpu, fmt, ...) \ kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +#define vcpu_err(vcpu, fmt, ...) \ + kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { -- cgit v0.10.2 From 1f4b34f825e8cef6f493d06b46605384785b3d16 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 30 Nov 2015 19:22:21 +0300 Subject: kvm/x86: Hyper-V SynIC timers Per Hyper-V specification (and as required by Hyper-V-aware guests), SynIC provides 4 per-vCPU timers. Each timer is programmed via a pair of MSRs, and signals expiration by delivering a special format message to the configured SynIC message slot and triggering the corresponding synthetic interrupt. Note: as implemented by this patch, all periodic timers are "lazy" (i.e. if the vCPU wasn't scheduled for more than the timer period the timer events are lost), regardless of the corresponding configuration MSR. If deemed necessary, the "catch up" mode (the timer period is shortened until the timer catches up) will be implemented later. Changes v2: * Use remainder to calculate periodic timer expiration time Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: "K. Y. Srinivasan" CC: Haiyang Zhang CC: Vitaly Kuznetsov CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8140077..a7c8987 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -379,6 +379,17 @@ struct kvm_mtrr { struct list_head head; }; +/* Hyper-V SynIC timer */ +struct kvm_vcpu_hv_stimer { + struct hrtimer timer; + int index; + u64 config; + u64 count; + u64 exp_time; + struct hv_message msg; + bool msg_pending; +}; + /* Hyper-V synthetic interrupt controller (SynIC)*/ struct kvm_vcpu_hv_synic { u64 version; @@ -398,6 +409,8 @@ struct kvm_vcpu_hv { s64 runtime_offset; struct kvm_vcpu_hv_synic synic; struct kvm_hyperv_exit exit; + struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; + DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); }; struct kvm_vcpu_arch { diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 2a5629e..7956412 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -355,4 +355,10 @@ struct hv_timer_message_payload { __u64 delivery_time; /* When the message was delivered */ }; +#define HV_STIMER_ENABLE (1ULL << 0) +#define HV_STIMER_PERIODIC (1ULL << 1) +#define HV_STIMER_LAZY (1ULL << 2) +#define HV_STIMER_AUTOENABLE (1ULL << 3) +#define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) + #endif diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6412b6b..8ff8829 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -147,15 +147,32 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) { struct kvm *kvm = vcpu->kvm; struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); - int gsi, idx; + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + int gsi, idx, stimers_pending; vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) synic_clear_sint_msg_pending(synic, sint); + /* Try to deliver pending Hyper-V SynIC timers messages */ + stimers_pending = 0; + for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) { + stimer = &hv_vcpu->stimer[idx]; + if (stimer->msg_pending && + (stimer->config & HV_STIMER_ENABLE) && + HV_STIMER_SINT(stimer->config) == sint) { + set_bit(stimer->index, + hv_vcpu->stimer_pending_bitmap); + stimers_pending++; + } + } + if (stimers_pending) + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + idx = srcu_read_lock(&kvm->irq_srcu); - gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]); + gsi = atomic_read(&synic->sint_to_gsi[sint]); if (gsi != -1) kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); @@ -371,9 +388,268 @@ static u64 get_time_ref_counter(struct kvm *kvm) return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); } +static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer, + bool vcpu_kick) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + set_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + kvm_make_request(KVM_REQ_HV_STIMER, vcpu); + if (vcpu_kick) + kvm_vcpu_kick(vcpu); +} + +static void stimer_stop(struct kvm_vcpu_hv_stimer *stimer) +{ + hrtimer_cancel(&stimer->timer); +} + +static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + + stimer_stop(stimer); + clear_bit(stimer->index, + vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); + stimer->msg_pending = false; +} + +static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) +{ + struct kvm_vcpu_hv_stimer *stimer; + + stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); + stimer_mark_expired(stimer, true); + + return HRTIMER_NORESTART; +} + +static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer) +{ + u64 time_now; + ktime_t ktime_now; + u64 remainder; + + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); + ktime_now = ktime_get(); + + div64_u64_rem(time_now - stimer->exp_time, stimer->count, &remainder); + stimer->exp_time = time_now + (stimer->count - remainder); + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, + 100 * (stimer->exp_time - time_now)), + HRTIMER_MODE_ABS); +} + +static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) +{ + u64 time_now; + ktime_t ktime_now; + + time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); + ktime_now = ktime_get(); + + if (stimer->config & HV_STIMER_PERIODIC) { + if (stimer->count == 0) + return -EINVAL; + + stimer->exp_time = time_now + stimer->count; + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, 100 * stimer->count), + HRTIMER_MODE_ABS); + return 0; + } + stimer->exp_time = stimer->count; + if (time_now >= stimer->count) { + /* + * Expire timer according to Hypervisor Top-Level Functional + * specification v4(15.3.1): + * "If a one shot is enabled and the specified count is in + * the past, it will expire immediately." + */ + stimer_mark_expired(stimer, false); + return 0; + } + + hrtimer_start(&stimer->timer, + ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), + HRTIMER_MODE_ABS); + return 0; +} + +static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, + bool host) +{ + if (stimer->count == 0 || HV_STIMER_SINT(config) == 0) + config &= ~HV_STIMER_ENABLE; + stimer->config = config; + stimer_cleanup(stimer); + if (stimer->config & HV_STIMER_ENABLE) + if (stimer_start(stimer)) + return 1; + return 0; +} + +static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, + bool host) +{ + stimer->count = count; + + stimer_cleanup(stimer); + if (stimer->count == 0) + stimer->config &= ~HV_STIMER_ENABLE; + else if (stimer->config & HV_STIMER_AUTOENABLE) { + stimer->config |= HV_STIMER_ENABLE; + if (stimer_start(stimer)) + return 1; + } + + return 0; +} + +static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig) +{ + *pconfig = stimer->config; + return 0; +} + +static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount) +{ + *pcount = stimer->count; + return 0; +} + +static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, + struct hv_message *src_msg) +{ + struct kvm_vcpu *vcpu = synic_to_vcpu(synic); + struct page *page; + gpa_t gpa; + struct hv_message *dst_msg; + int r; + struct hv_message_page *msg_page; + + if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE)) + return -ENOENT; + + gpa = synic->msg_page & PAGE_MASK; + page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT); + if (is_error_page(page)) + return -EFAULT; + + msg_page = kmap_atomic(page); + dst_msg = &msg_page->sint_message[sint]; + if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE, + src_msg->header.message_type) != HVMSG_NONE) { + dst_msg->header.message_flags.msg_pending = 1; + r = -EAGAIN; + } else { + memcpy(&dst_msg->u.payload, &src_msg->u.payload, + src_msg->header.payload_size); + dst_msg->header.message_type = src_msg->header.message_type; + dst_msg->header.payload_size = src_msg->header.payload_size; + r = synic_set_irq(synic, sint); + if (r >= 1) + r = 0; + else if (r == 0) + r = -EFAULT; + } + kunmap_atomic(msg_page); + kvm_release_page_dirty(page); + kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT); + return r; +} + +static void stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + int r; + + stimer->msg_pending = true; + payload->expiration_time = stimer->exp_time; + payload->delivery_time = get_time_ref_counter(vcpu->kvm); + r = synic_deliver_msg(vcpu_to_synic(vcpu), + HV_STIMER_SINT(stimer->config), msg); + if (!r) + stimer->msg_pending = false; +} + +static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) +{ + stimer_send_msg(stimer); + if (!(stimer->config & HV_STIMER_PERIODIC)) + stimer->config |= ~HV_STIMER_ENABLE; + else + stimer_restart(stimer); +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + struct kvm_vcpu_hv_stimer *stimer; + u64 time_now; + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { + stimer = &hv_vcpu->stimer[i]; + stimer_stop(stimer); + if (stimer->config & HV_STIMER_ENABLE) { + time_now = get_time_ref_counter(vcpu->kvm); + if (time_now >= stimer->exp_time) + stimer_expiration(stimer); + } + } +} + +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_cleanup(&hv_vcpu->stimer[i]); +} + +static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer) +{ + struct hv_message *msg = &stimer->msg; + struct hv_timer_message_payload *payload = + (struct hv_timer_message_payload *)&msg->u.payload; + + memset(&msg->header, 0, sizeof(msg->header)); + msg->header.message_type = HVMSG_TIMER_EXPIRED; + msg->header.payload_size = sizeof(*payload); + + payload->timer_index = stimer->index; + payload->expiration_time = 0; + payload->delivery_time = 0; +} + +static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index) +{ + memset(stimer, 0, sizeof(*stimer)); + stimer->index = timer_index; + hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + stimer->timer.function = stimer_timer_callback; + stimer_prepare_msg(stimer); +} + void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) { - synic_init(vcpu_to_synic(vcpu)); + struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); + int i; + + synic_init(&hv_vcpu->synic); + + bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); + for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) + stimer_init(&hv_vcpu->stimer[i], i); } int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) @@ -590,6 +866,24 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_set_config(vcpu_to_stimer(vcpu, timer_index), + data, host); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_set_count(vcpu_to_stimer(vcpu, timer_index), + data, host); + } default: vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -673,6 +967,24 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_EOM: case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata); + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: { + int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2; + + return stimer_get_config(vcpu_to_stimer(vcpu, timer_index), + pdata); + } + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: { + int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2; + + return stimer_get_count(vcpu_to_stimer(vcpu, timer_index), + pdata); + } default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index d5d8217..60eccd4 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -59,5 +59,29 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); +void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); + +static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, + int timer_index) +{ + return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index]; +} + +static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer) +{ + struct kvm_vcpu_hv *hv_vcpu; + + hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv, + stimer[0]); + return hv_vcpu_to_vcpu(hv_vcpu); +} + +static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu) +{ + return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap, + HV_SYNIC_STIMER_COUNT); +} + +void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1d6501..b6102c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -967,6 +967,7 @@ static u32 emulated_msrs[] = { HV_X64_MSR_VP_INDEX, HV_X64_MSR_VP_RUNTIME, HV_X64_MSR_SCONTROL, + HV_X64_MSR_STIMER0_CONFIG, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, @@ -2199,6 +2200,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_set_msr_common(vcpu, msr, data, msr_info->host_initiated); case MSR_IA32_BBL_CR_CTL3: @@ -2403,6 +2405,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: + case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: return kvm_hv_get_msr_common(vcpu, msr_info->index, &msr_info->data); break; @@ -6489,6 +6492,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) + kvm_hv_process_stimers(vcpu); } /* @@ -7649,6 +7654,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) { int idx; + kvm_hv_vcpu_uninit(vcpu); kvm_pmu_destroy(vcpu); kfree(vcpu->arch.mce_banks); kvm_free_lapic(vcpu); @@ -8043,6 +8049,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) kvm_cpu_has_interrupt(vcpu)) return true; + if (kvm_hv_has_stimer_pending(vcpu)) + return true; + return false; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f44c24b..2969c47 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -144,6 +144,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_REQ_HV_RESET 29 #define KVM_REQ_HV_EXIT 30 +#define KVM_REQ_HV_STIMER 31 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v0.10.2 From 0bcf261cc86d082923082f79febe2d13c055f217 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 3 Dec 2015 13:29:34 +0800 Subject: KVM: VMX: fix the writing POSTED_INTR_NV POSTED_INTR_NV is 16bit, should not use 64bit write function [ 5311.676074] vmwrite error: reg 3 value 0 (err 12) [ 5311.680001] CPU: 49 PID: 4240 Comm: qemu-system-i38 Tainted: G I 4.1.13-WR8.0.0.0_standard #1 [ 5311.689343] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015 [ 5311.699550] 00000000 00000000 e69a7e1c c1950de1 00000000 e69a7e38 fafcff45 fafebd24 [ 5311.706924] 00000003 00000000 0000000c b6a06dfa e69a7e40 fafcff79 e69a7eb0 fafd5f57 [ 5311.714296] e69a7ec0 c1080600 00000000 00000001 c0e18018 000001be 00000000 00000b43 [ 5311.721651] Call Trace: [ 5311.722942] [] dump_stack+0x4b/0x75 [ 5311.726467] [] vmwrite_error+0x35/0x40 [kvm_intel] [ 5311.731444] [] vmcs_writel+0x29/0x30 [kvm_intel] [ 5311.736228] [] vmx_create_vcpu+0x337/0xb90 [kvm_intel] [ 5311.741600] [] ? dequeue_task_fair+0x2e0/0xf60 [ 5311.746197] [] kvm_arch_vcpu_create+0x3a/0x70 [kvm] [ 5311.751278] [] kvm_vm_ioctl+0x14d/0x640 [kvm] [ 5311.755771] [] ? free_pages_prepare+0x1a4/0x2d0 [ 5311.760455] [] ? debug_smp_processor_id+0x12/0x20 [ 5311.765333] [] ? sched_move_task+0xbe/0x170 [ 5311.769621] [] ? kmem_cache_free+0x213/0x230 [ 5311.774016] [] ? kvm_set_memory_region+0x60/0x60 [kvm] [ 5311.779379] [] do_vfs_ioctl+0x2e2/0x500 [ 5311.783285] [] ? kmem_cache_free+0x213/0x230 [ 5311.787677] [] ? __mmdrop+0x63/0xd0 [ 5311.791196] [] ? __mmdrop+0x63/0xd0 [ 5311.794712] [] ? __mmdrop+0x63/0xd0 [ 5311.798234] [] ? __fget+0x57/0x90 [ 5311.801559] [] ? __fget_light+0x22/0x50 [ 5311.805464] [] SyS_ioctl+0x80/0x90 [ 5311.808885] [] sysenter_do_call+0x12/0x12 [ 5312.059280] kvm: zapping shadow pages for mmio generation wraparound [ 5313.678415] kvm [4231]: vcpu0 disabled perfctr wrmsr: 0xc2 data 0xffff [ 5313.726518] kvm [4231]: vcpu0 unhandled rdmsr: 0x570 Signed-off-by: Li RongQing Cc: Yang Zhang Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1a8bfaa..c39737f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4776,7 +4776,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) vmcs_write16(GUEST_INTR_STATUS, 0); - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc))); } @@ -9498,7 +9498,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) */ vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; - vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR); + vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); vmcs_write64(POSTED_INTR_DESC_ADDR, page_to_phys(vmx->nested.pi_desc_page) + (unsigned long)(vmcs12->posted_intr_desc_addr & -- cgit v0.10.2 From f35310546399eb77f03d37e760320e021f9a8568 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Dec 2015 15:49:56 +0100 Subject: KVM: VMX: fix read/write sizes of VMCS fields In theory this should have broken EPT on 32-bit kernels (due to reading the high part of natural-width field GUEST_CR3). Not sure if no one noticed or the processor behaves differently from the documentation. Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c39737f..b1af1e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4868,7 +4868,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) seg_setup(VCPU_SREG_CS); vmcs_write16(GUEST_CS_SELECTOR, 0xf000); - vmcs_write32(GUEST_CS_BASE, 0xffff0000); + vmcs_writel(GUEST_CS_BASE, 0xffff0000ul); seg_setup(VCPU_SREG_DS); seg_setup(VCPU_SREG_ES); @@ -4904,7 +4904,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE); vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0); - vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0); + vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0); setup_msrs(vmx); @@ -7893,7 +7893,7 @@ static void dump_vmcs(void) u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL); u32 secondary_exec_control = 0; unsigned long cr4 = vmcs_readl(GUEST_CR4); - u64 efer = vmcs_readl(GUEST_IA32_EFER); + u64 efer = vmcs_read64(GUEST_IA32_EFER); int i, n; if (cpu_has_secondary_exec_ctrls()) @@ -10159,7 +10159,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, * Additionally, restore L2's PDPTR to vmcs12. */ if (enable_ept) { - vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3); + vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3); vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0); vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1); vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2); -- cgit v0.10.2 From 845c5b4054635f98eb6f1c783c0cc14b28772cb0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Dec 2015 15:51:00 +0100 Subject: KVM: VMX: fix read/write sizes of VMCS fields in dump_vmcs This was not printing the high parts of several 64-bit fields on 32-bit kernels. Separate from the previous one to make the patches easier to review. Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1af1e4..b1a453d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7909,10 +7909,10 @@ static void dump_vmcs(void) if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) && (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA)) { - pr_err("PDPTR0 = 0x%016lx PDPTR1 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1)); - pr_err("PDPTR2 = 0x%016lx PDPTR3 = 0x%016lx\n", - vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3)); + pr_err("PDPTR0 = 0x%016llx PDPTR1 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1)); + pr_err("PDPTR2 = 0x%016llx PDPTR3 = 0x%016llx\n", + vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3)); } pr_err("RSP = 0x%016lx RIP = 0x%016lx\n", vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP)); @@ -7933,16 +7933,16 @@ static void dump_vmcs(void) vmx_dump_sel("TR: ", GUEST_TR_SELECTOR); if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) || (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER))) - pr_err("EFER = 0x%016llx PAT = 0x%016lx\n", - efer, vmcs_readl(GUEST_IA32_PAT)); - pr_err("DebugCtl = 0x%016lx DebugExceptions = 0x%016lx\n", - vmcs_readl(GUEST_IA32_DEBUGCTL), + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + efer, vmcs_read64(GUEST_IA32_PAT)); + pr_err("DebugCtl = 0x%016llx DebugExceptions = 0x%016lx\n", + vmcs_read64(GUEST_IA32_DEBUGCTL), vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS)); if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL)); if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS) - pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS)); + pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS)); pr_err("Interruptibility = %08x ActivityState = %08x\n", vmcs_read32(GUEST_INTERRUPTIBILITY_INFO), vmcs_read32(GUEST_ACTIVITY_STATE)); @@ -7971,11 +7971,12 @@ static void dump_vmcs(void) vmcs_read32(HOST_IA32_SYSENTER_CS), vmcs_readl(HOST_IA32_SYSENTER_EIP)); if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER)) - pr_err("EFER = 0x%016lx PAT = 0x%016lx\n", - vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT)); + pr_err("EFER = 0x%016llx PAT = 0x%016llx\n", + vmcs_read64(HOST_IA32_EFER), + vmcs_read64(HOST_IA32_PAT)); if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) - pr_err("PerfGlobCtl = 0x%016lx\n", - vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL)); + pr_err("PerfGlobCtl = 0x%016llx\n", + vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL)); pr_err("*** Control State ***\n"); pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n", @@ -7998,16 +7999,16 @@ static void dump_vmcs(void) pr_err("IDTVectoring: info=%08x errcode=%08x\n", vmcs_read32(IDT_VECTORING_INFO_FIELD), vmcs_read32(IDT_VECTORING_ERROR_CODE)); - pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET)); + pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET)); if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING) - pr_err("TSC Multiplier = 0x%016lx\n", - vmcs_readl(TSC_MULTIPLIER)); + pr_err("TSC Multiplier = 0x%016llx\n", + vmcs_read64(TSC_MULTIPLIER)); if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW) pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD)); if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) - pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER)); + pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); n = vmcs_read32(CR3_TARGET_COUNT); for (i = 0; i + 1 < n; i += 4) pr_err("CR3 target%u=%016lx target%u=%016lx\n", -- cgit v0.10.2 From 8a86aea920f1bb1cf7050e48112227a1c42cafe5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 3 Dec 2015 15:56:55 +0100 Subject: KVM: vmx: detect mismatched size in VMCS read/write Signed-off-by: Paolo Bonzini --- I am sending this as RFC because the error messages it produces are very ugly. Because of inlining, the original line is lost. The alternative is to change vmcs_read/write/checkXX into macros, but then you need to have a single huge BUILD_BUG_ON or BUILD_BUG_ON_MSG because multiple BUILD_BUG_ON* with the same __LINE__ are not supported well. diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b1a453d..62d958a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1447,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp) } } -static __always_inline unsigned long vmcs_readl(unsigned long field) +static __always_inline void vmcs_check16(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "16-bit accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "16-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "16-bit accessor invalid for 32-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "16-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check32(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "32-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "32-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_check64(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "64-bit accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "64-bit accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "64-bit accessor invalid for 32-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000, + "64-bit accessor invalid for natural width field"); +} + +static __always_inline void vmcs_checkl(unsigned long field) +{ + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0, + "Natural width accessor invalid for 16-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, + "Natural width accessor invalid for 64-bit field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001, + "Natural width accessor invalid for 64-bit high field"); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000, + "Natural width accessor invalid for 32-bit field"); +} + +static __always_inline unsigned long __vmcs_readl(unsigned long field) { unsigned long value; @@ -1458,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field) static __always_inline u16 vmcs_read16(unsigned long field) { - return vmcs_readl(field); + vmcs_check16(field); + return __vmcs_readl(field); } static __always_inline u32 vmcs_read32(unsigned long field) { - return vmcs_readl(field); + vmcs_check32(field); + return __vmcs_readl(field); } static __always_inline u64 vmcs_read64(unsigned long field) { + vmcs_check64(field); #ifdef CONFIG_X86_64 - return vmcs_readl(field); + return __vmcs_readl(field); #else - return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); + return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32); #endif } +static __always_inline unsigned long vmcs_readl(unsigned long field) +{ + vmcs_checkl(field); + return __vmcs_readl(field); +} + static noinline void vmwrite_error(unsigned long field, unsigned long value) { printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n", @@ -1482,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value) dump_stack(); } -static void vmcs_writel(unsigned long field, unsigned long value) +static __always_inline void __vmcs_writel(unsigned long field, unsigned long value) { u8 error; @@ -1492,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value) vmwrite_error(field, value); } -static void vmcs_write16(unsigned long field, u16 value) +static __always_inline void vmcs_write16(unsigned long field, u16 value) { - vmcs_writel(field, value); + vmcs_check16(field); + __vmcs_writel(field, value); } -static void vmcs_write32(unsigned long field, u32 value) +static __always_inline void vmcs_write32(unsigned long field, u32 value) { - vmcs_writel(field, value); + vmcs_check32(field); + __vmcs_writel(field, value); } -static void vmcs_write64(unsigned long field, u64 value) +static __always_inline void vmcs_write64(unsigned long field, u64 value) { - vmcs_writel(field, value); + vmcs_check64(field); + __vmcs_writel(field, value); #ifndef CONFIG_X86_64 asm volatile (""); - vmcs_writel(field+1, value >> 32); + __vmcs_writel(field+1, value >> 32); #endif } -static void vmcs_clear_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_writel(unsigned long field, unsigned long value) +{ + vmcs_checkl(field); + __vmcs_writel(field, value); +} + +static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask) { - vmcs_writel(field, vmcs_readl(field) & ~mask); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_clear_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) & ~mask); } -static void vmcs_set_bits(unsigned long field, u32 mask) +static __always_inline void vmcs_set_bits(unsigned long field, u32 mask) { - vmcs_writel(field, vmcs_readl(field) | mask); + BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000, + "vmcs_set_bits does not support 64-bit fields"); + __vmcs_writel(field, __vmcs_readl(field) | mask); } static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) -- cgit v0.10.2 From 671d9ab38097fae45ff4f24562789b98b51d37ec Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Nov 2015 19:52:12 +0100 Subject: kvm: Dump guest rIP when the guest tried something unsupported It looks like this in action: kvm [5197]: vcpu0, guest rIP: 0xffffffff810187ba unhandled rdmsr: 0xc001102 and helps to pinpoint quickly where in the guest we did the unsupported thing. Signed-off-by: Borislav Petkov Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2969c47..61c3e6c6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -447,7 +447,8 @@ struct kvm { /* The guest did something we don't support. */ #define vcpu_unimpl(vcpu, fmt, ...) \ - kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) + kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt, \ + (vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__) #define vcpu_debug(vcpu, fmt, ...) \ kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) -- cgit v0.10.2 From 481d2bcc8454a44811db2bb68ac216fc6c5a23db Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 14 Dec 2015 18:33:05 +0300 Subject: kvm/x86: Remove Hyper-V SynIC timer stopping It's possible that guest send us Hyper-V EOM at the middle of Hyper-V SynIC timer running, so we start processing of Hyper-V SynIC timers in vcpu context and stop the Hyper-V SynIC timer unconditionally: host guest ------------------------------------------------------------------------------ start periodic stimer start periodic timer timer expires after 15ms send expiration message into guest restart periodic timer timer expires again after 15 ms msg slot is still not cleared so setup ->msg_pending (1) restart periodic timer process timer msg and clear slot ->msg_pending was set: send EOM into host received EOM kvm_make_request(KVM_REQ_HV_STIMER) kvm_hv_process_stimers(): ... stimer_stop() if (time_now >= stimer->exp_time) stimer_expiration(stimer); Because the timer was rearmed at (1), time_now < stimer->exp_time and stimer_expiration is not called. The timer then never fires. The patch fixes such situation by not stopping Hyper-V SynIC timer at all, because it's safe to restart it without stop in vcpu context and timer callback always returns HRTIMER_NORESTART. Signed-off-by: Andrey Smetanin CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 8ff8829..f34f666 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -598,7 +598,6 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { stimer = &hv_vcpu->stimer[i]; - stimer_stop(stimer); if (stimer->config & HV_STIMER_ENABLE) { time_now = get_time_ref_counter(vcpu->kvm); if (time_now >= stimer->exp_time) -- cgit v0.10.2 From e078ef81514222ffc10bf1767c15df16ca0b84db Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 14 Dec 2015 17:58:33 +0000 Subject: ARM: KVM: Cleanup exception injection David Binderman reported that the exception injection code had a couple of unused variables lingering around. Upon examination, it looked like this code could do with an anticipated spring cleaning, which amounts to deduplicating the CPSR/SPSR update, and making it look a bit more like the architecture spec. The spurious variables are removed in the process. Reported-by: David Binderman Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index d6c0052..dc99159 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) return vbar; } +/* + * Switch to an exception mode, updating both CPSR and SPSR. Follow + * the logic described in AArch32.EnterMode() from the ARMv8 ARM. + */ +static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode) +{ + unsigned long cpsr = *vcpu_cpsr(vcpu); + u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; + + *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode; + + switch (mode) { + case FIQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_F_BIT; + /* Fall through */ + case ABT_MODE: + case IRQ_MODE: + *vcpu_cpsr(vcpu) |= PSR_A_BIT; + /* Fall through */ + default: + *vcpu_cpsr(vcpu) |= PSR_I_BIT; + } + + *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); + + if (sctlr & SCTLR_TE) + *vcpu_cpsr(vcpu) |= PSR_T_BIT; + if (sctlr & SCTLR_EE) + *vcpu_cpsr(vcpu) |= PSR_E_BIT; + + /* Note: These now point to the mode banked copies */ + *vcpu_spsr(vcpu) = cpsr; +} + /** * kvm_inject_undefined - inject an undefined exception into the guest * @vcpu: The VCPU to receive the undefined exception @@ -286,29 +320,13 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu) */ void kvm_inject_undefined(struct kvm_vcpu *vcpu) { - unsigned long new_lr_value; - unsigned long new_spsr_value; unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset = 4; u32 return_offset = (is_thumb) ? 2 : 4; - new_spsr_value = cpsr; - new_lr_value = *vcpu_pc(vcpu) - return_offset; - - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE; - *vcpu_cpsr(vcpu) |= PSR_I_BIT; - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); - - if (sctlr & SCTLR_TE) - *vcpu_cpsr(vcpu) |= PSR_T_BIT; - if (sctlr & SCTLR_EE) - *vcpu_cpsr(vcpu) |= PSR_E_BIT; - - /* Note: These now point to UND banked copies */ - *vcpu_spsr(vcpu) = cpsr; - *vcpu_reg(vcpu, 14) = new_lr_value; + kvm_update_psr(vcpu, UND_MODE); + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset; /* Branch to exception vector */ *vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset; @@ -320,30 +338,14 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) */ static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr) { - unsigned long new_lr_value; - unsigned long new_spsr_value; unsigned long cpsr = *vcpu_cpsr(vcpu); - u32 sctlr = vcpu->arch.cp15[c1_SCTLR]; bool is_thumb = (cpsr & PSR_T_BIT); u32 vect_offset; u32 return_offset = (is_thumb) ? 4 : 0; bool is_lpae; - new_spsr_value = cpsr; - new_lr_value = *vcpu_pc(vcpu) + return_offset; - - *vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE; - *vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT; - *vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT); - - if (sctlr & SCTLR_TE) - *vcpu_cpsr(vcpu) |= PSR_T_BIT; - if (sctlr & SCTLR_EE) - *vcpu_cpsr(vcpu) |= PSR_E_BIT; - - /* Note: These now point to ABT banked copies */ - *vcpu_spsr(vcpu) = cpsr; - *vcpu_reg(vcpu, 14) = new_lr_value; + kvm_update_psr(vcpu, ABT_MODE); + *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; if (is_pabt) vect_offset = 12; -- cgit v0.10.2 From 281243cbe075d27ab884858d6e0b15d8ed61bc25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 16 Dec 2015 15:41:12 +0000 Subject: arm64: KVM: debug: Remove spurious inline attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The debug trapping code is pretty heavy on the "inline" attribute, but most functions are actually referenced in the sysreg tables, making the inlining imposible. Removing the useless inline qualifier seems the right thing to do, having verified that the output code is similar. Cc: Alex Bennée Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 88adebf..eec3598 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -220,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu, * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the * hyp.S code switches between host and guest values in future. */ -static inline void reg_to_dbg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void reg_to_dbg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { u64 val = p->regval; @@ -235,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu, vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY; } -static inline void dbg_to_reg(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - u64 *dbg_reg) +static void dbg_to_reg(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + u64 *dbg_reg) { p->regval = *dbg_reg; if (p->is_32bit) p->regval &= 0xffffffffUL; } -static inline bool trap_bvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; @@ -280,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val; } -static inline bool trap_bcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_bcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg]; @@ -323,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_bcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_bcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val; } -static inline bool trap_wvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg]; @@ -366,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wvr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wvr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val; } -static inline bool trap_wcr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_wcr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg]; @@ -408,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return 0; } -static inline void reset_wcr(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *rd) +static void reset_wcr(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd) { vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val; } @@ -723,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu, * system is in. */ -static inline bool trap_xvr(struct kvm_vcpu *vcpu, - struct sys_reg_params *p, - const struct sys_reg_desc *rd) +static bool trap_xvr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *rd) { u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg]; -- cgit v0.10.2 From 9d4dc688342a3cbda43a1789cd2c6c888658c60d Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Nov 2015 11:28:16 +0000 Subject: arm/arm64: KVM: Remove unreferenced S2_PGD_ORDER Since commit a987370 ("arm64: KVM: Fix stage-2 PGD allocation to have per-page refcounting") there is no reference to S2_PGD_ORDER, so kill it for the good. Acked-by: Christoffer Dall Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index dc641dd..b05bb5a 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -135,7 +135,6 @@ #define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) #define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) #define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 61d96a6..22f7fa0 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -656,9 +656,9 @@ static void *kvm_alloc_hwpgd(void) * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * - * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can - * support either full 40-bit input addresses or limited to 32-bit input - * addresses). Clears the allocated pages. + * Allocates only the stage-2 HW PGD level table(s) (can support either full + * 40-bit input addresses or limited to 32-bit input addresses). Clears the + * allocated pages. * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 6150567..54cba80 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -158,7 +158,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd) #define PTRS_PER_S2_PGD_SHIFT (KVM_PHYS_SHIFT - PGDIR_SHIFT) #endif #define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT) -#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t)) #define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1)) -- cgit v0.10.2 From 8420dcd37ef34040c8fc5a27bf66887b3b2faf80 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Nov 2015 11:28:17 +0000 Subject: arm: KVM: Make kvm_arm.h friendly to assembly code kvm_arm.h is included from both C code and assembly code; however some definitions in this header supplied with U/UL/ULL suffixes which might confuse assembly once they got evaluated. We have _AC macro for such cases, so just wrap problem places with it. Acked-by: Christoffer Dall Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index b05bb5a..01d4d7a 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -19,6 +19,7 @@ #ifndef __ARM_KVM_ARM_H__ #define __ARM_KVM_ARM_H__ +#include #include /* Hyp Configuration Register (HCR) bits */ @@ -132,9 +133,9 @@ * space. */ #define KVM_PHYS_SHIFT (40) -#define KVM_PHYS_SIZE (1ULL << KVM_PHYS_SHIFT) -#define KVM_PHYS_MASK (KVM_PHYS_SIZE - 1ULL) -#define PTRS_PER_S2_PGD (1ULL << (KVM_PHYS_SHIFT - 30)) +#define KVM_PHYS_SIZE (_AC(1, ULL) << KVM_PHYS_SHIFT) +#define KVM_PHYS_MASK (KVM_PHYS_SIZE - _AC(1, ULL)) +#define PTRS_PER_S2_PGD (_AC(1, ULL) << (KVM_PHYS_SHIFT - 30)) /* Virtualization Translation Control Register (VTCR) bits */ #define VTCR_SH0 (3 << 12) @@ -161,17 +162,17 @@ #define VTTBR_X (5 - KVM_T0SZ) #endif #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) -#define VTTBR_BADDR_MASK (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) -#define VTTBR_VMID_SHIFT (48LLU) -#define VTTBR_VMID_MASK (0xffLLU << VTTBR_VMID_SHIFT) +#define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) +#define VTTBR_VMID_SHIFT _AC(48, ULL) +#define VTTBR_VMID_MASK (_AC(0xff, ULL) << VTTBR_VMID_SHIFT) /* Hyp Syndrome Register (HSR) bits */ #define HSR_EC_SHIFT (26) -#define HSR_EC (0x3fU << HSR_EC_SHIFT) -#define HSR_IL (1U << 25) +#define HSR_EC (_AC(0x3f, UL) << HSR_EC_SHIFT) +#define HSR_IL (_AC(1, UL) << 25) #define HSR_ISS (HSR_IL - 1) #define HSR_ISV_SHIFT (24) -#define HSR_ISV (1U << HSR_ISV_SHIFT) +#define HSR_ISV (_AC(1, UL) << HSR_ISV_SHIFT) #define HSR_SRT_SHIFT (16) #define HSR_SRT_MASK (0xf << HSR_SRT_SHIFT) #define HSR_FSC (0x3f) @@ -179,9 +180,9 @@ #define HSR_SSE (1 << 21) #define HSR_WNR (1 << 6) #define HSR_CV_SHIFT (24) -#define HSR_CV (1U << HSR_CV_SHIFT) +#define HSR_CV (_AC(1, UL) << HSR_CV_SHIFT) #define HSR_COND_SHIFT (20) -#define HSR_COND (0xfU << HSR_COND_SHIFT) +#define HSR_COND (_AC(0xf, UL) << HSR_COND_SHIFT) #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) @@ -209,13 +210,13 @@ #define HSR_EC_DABT (0x24) #define HSR_EC_DABT_HYP (0x25) -#define HSR_WFI_IS_WFE (1U << 0) +#define HSR_WFI_IS_WFE (_AC(1, UL) << 0) -#define HSR_HVC_IMM_MASK ((1UL << 16) - 1) +#define HSR_HVC_IMM_MASK ((_AC(1, UL) << 16) - 1) -#define HSR_DABT_S1PTW (1U << 7) -#define HSR_DABT_CM (1U << 8) -#define HSR_DABT_EA (1U << 9) +#define HSR_DABT_S1PTW (_AC(1, UL) << 7) +#define HSR_DABT_CM (_AC(1, UL) << 8) +#define HSR_DABT_EA (_AC(1, UL) << 9) #define kvm_arm_exception_type \ {0, "RESET" }, \ -- cgit v0.10.2 From 20475f784d29991b3b843c80c38a36f2ebb35ac4 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin Date: Mon, 16 Nov 2015 11:28:18 +0000 Subject: arm64: KVM: Add support for 16-bit VMID The ARMv8.1 architecture extension allows to choose between 8-bit and 16-bit of VMID, so use this capability for KVM. Reviewed-by: Christoffer Dall Signed-off-by: Vladimir Murzin Signed-off-by: Marc Zyngier diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index 01d4d7a..e22089f 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -164,7 +164,7 @@ #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) #define VTTBR_BADDR_MASK (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT _AC(48, ULL) -#define VTTBR_VMID_MASK (_AC(0xff, ULL) << VTTBR_VMID_SHIFT) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp Syndrome Register (HSR) bits */ #define HSR_EC_SHIFT (26) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 405aa18..9203c21 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -279,6 +279,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, pgd_t *merged_hyp_pgd, unsigned long hyp_idmap_start) { } +static inline unsigned int kvm_get_vmid_bits(void) +{ + return 8; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ARM_KVM_MMU_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 6e35d1d..f6bcc2e 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -59,7 +59,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu); /* The VMID used in the VTTBR */ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); -static u8 kvm_next_vmid; +static u32 kvm_next_vmid; +static unsigned int kvm_vmid_bits __read_mostly; static DEFINE_SPINLOCK(kvm_vmid_lock); static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) @@ -434,11 +435,12 @@ static void update_vttbr(struct kvm *kvm) kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen); kvm->arch.vmid = kvm_next_vmid; kvm_next_vmid++; + kvm_next_vmid &= (1 << kvm_vmid_bits) - 1; /* update vttbr to be used with the new vmid */ pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm)); BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK); - vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK; + vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits); kvm->arch.vttbr = pgd_phys | vmid; spin_unlock(&kvm_vmid_lock); @@ -1135,6 +1137,10 @@ static int init_hyp_mode(void) kvm_perf_init(); + /* set size of VMID supported by CPU */ + kvm_vmid_bits = kvm_get_vmid_bits(); + kvm_info("%d-bit VMID\n", kvm_vmid_bits); + kvm_info("Hyp mode initialized successfully\n"); return 0; diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 5e6857b..738a95f 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -125,6 +125,7 @@ #define VTCR_EL2_SL0_LVL1 (1 << 6) #define VTCR_EL2_T0SZ_MASK 0x3f #define VTCR_EL2_T0SZ_40B 24 +#define VTCR_EL2_VS 19 /* * We configure the Stage-2 page tables to always restrict the IPA space to be @@ -169,7 +170,7 @@ #define VTTBR_BADDR_SHIFT (VTTBR_X - 1) #define VTTBR_BADDR_MASK (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT) #define VTTBR_VMID_SHIFT (UL(48)) -#define VTTBR_VMID_MASK (UL(0xFF) << VTTBR_VMID_SHIFT) +#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT) /* Hyp System Trap Register */ #define HSTR_EL2_T(x) (1 << x) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 54cba80..0bf8b43 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -20,6 +20,7 @@ #include #include +#include /* * As we only have the TTBR0_EL2 register, we cannot express @@ -301,5 +302,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd, merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE); } +static inline unsigned int kvm_get_vmid_bits(void) +{ + int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1); + + return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index 178ba22..3e568dc 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -94,6 +94,15 @@ __do_hyp_init: */ mrs x5, ID_AA64MMFR0_EL1 bfi x4, x5, #16, #3 + /* + * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in + * VTCR_EL2. + */ + mrs x5, ID_AA64MMFR1_EL1 + ubfx x5, x5, #5, #1 + lsl x5, x5, #VTCR_EL2_VS + orr x4, x4, x5 + msr vtcr_el2, x4 mrs x4, mair_el1 -- cgit v0.10.2 From 1b1ebe820fcb446146dfb2d04a1f0b7905645f75 Mon Sep 17 00:00:00 2001 From: Fengguang Wu Date: Fri, 18 Dec 2015 15:51:44 +0800 Subject: MAINTAINERS: add git URL for KVM/ARM Acked-by: Christoffer Dall Signed-off-by: Fengguang Wu Signed-off-by: Marc Zyngier diff --git a/MAINTAINERS b/MAINTAINERS index 9bff63c..8e92b45 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6102,6 +6102,7 @@ M: Marc Zyngier L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) L: kvmarm@lists.cs.columbia.edu W: http://systems.cs.columbia.edu/projects/kvm-arm +T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git S: Supported F: arch/arm/include/uapi/asm/kvm* F: arch/arm/include/asm/kvm* -- cgit v0.10.2 From c7da6fa43cb1c5e649da0f478a491feb9208cae7 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Fri, 18 Dec 2015 14:38:43 +0300 Subject: arm/arm64: KVM: Detect vGIC presence at runtime Before commit 662d9715840aef44dcb573b0f9fab9e8319c868a ("arm/arm64: KVM: Kill CONFIG_KVM_ARM_{VGIC,TIMER}") is was possible to compile the kernel without vGIC and vTimer support. Commit message says about possibility to detect vGIC support in runtime, but this has never been implemented. This patch introduces runtime check, restoring the lost functionality. It again allows to use KVM on hardware without vGIC. Interrupt controller has to be emulated in userspace in this case. -ENODEV return code from probe function means there's no GIC at all. -ENXIO happens when, for example, there is GIC node in the device tree, but it does not specify vGIC resources. Any other error code is still treated as full stop because it might mean some really serious problems. Signed-off-by: Pavel Fedin Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f6bcc2e..dda1959 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -63,6 +63,8 @@ static u32 kvm_next_vmid; static unsigned int kvm_vmid_bits __read_mostly; static DEFINE_SPINLOCK(kvm_vmid_lock); +static bool vgic_present; + static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu) { BUG_ON(preemptible()); @@ -134,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vmid_gen = 0; /* The maximum number of VCPUs is limited by the host's GIC model */ - kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus(); + kvm->arch.max_vcpus = vgic_present ? + kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; return ret; out_free_stage2_pgd: @@ -174,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) int r; switch (ext) { case KVM_CAP_IRQCHIP: + r = vgic_present; + break; case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: @@ -917,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, switch (dev_id) { case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; @@ -931,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp, switch (ioctl) { case KVM_CREATE_IRQCHIP: { + if (!vgic_present) + return -ENXIO; return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); } case KVM_ARM_SET_DEVICE_ADDR: { @@ -1121,8 +1130,17 @@ static int init_hyp_mode(void) * Init HYP view of VGIC */ err = kvm_vgic_hyp_init(); - if (err) + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + break; + default: goto out_free_context; + } /* * Init HYP architected timer support -- cgit v0.10.2 From 774926641d1968a4839da3a6ac79d914742aac2f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 18 Dec 2015 18:54:49 +0900 Subject: KVM: x86: MMU: Use clear_page() instead of init_shadow_page_table() Not just in order to clean up the code, but to make it faster by using enhanced instructions: the initialization became 20-30% faster on our testing machine. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a1a3d19..7f5a82b 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2041,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, } } -static void init_shadow_page_table(struct kvm_mmu_page *sp) -{ - int i; - - for (i = 0; i < PT64_ENT_PER_PAGE; ++i) - sp->spt[i] = 0ull; -} - static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp) { sp->write_flooding_count = 0; @@ -2128,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, account_shadowed(vcpu->kvm, sp); } sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen; - init_shadow_page_table(sp); + clear_page(sp->spt); trace_kvm_mmu_get_page(sp, true); return sp; } -- cgit v0.10.2 From 0af2593b2ad125880a78d1bba966e450cc2330df Mon Sep 17 00:00:00 2001 From: David Matlack Date: Wed, 30 Dec 2015 08:26:17 -0800 Subject: kvm: x86: fix comment about {mmu,nested_mmu}.gva_to_gpa The comment had the meaning of mmu.gva_to_gpa and nested_mmu.gva_to_gpa swapped. Fix that, and also add some details describing how each translation works. Signed-off-by: David Matlack Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7f5a82b..420a5ca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4024,10 +4024,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->inject_page_fault = kvm_inject_page_fault; /* - * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The - * translation of l2_gpa to l1_gpa addresses is done using the - * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa - * functions between mmu and nested_mmu are swapped. + * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using + * L1's nested page tables (e.g. EPT12). The nested translation + * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using + * L2's page tables as the first level of translation and L1's + * nested page tables as the second level of translation. Basically + * the gva_to_gpa functions between mmu and nested_mmu are swapped. */ if (!is_paging(vcpu)) { g_context->nx = false; -- cgit v0.10.2 From c57ee5faf4503b0cd586c3af663262a3d5599fc1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 5 Jan 2016 18:20:24 +0200 Subject: kvm/s390: drop unpaired smp_mb smp_mb on vcpu destroy isn't paired with anything, violating pairing rules, and seems to be useless. Drop it. Signed-off-by: Michael S. Tsirkin Message-Id: <1452010811-25486-1-git-send-email-mst@redhat.com> Signed-off-by: Christian Borntraeger diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 940e9ff..9f8eea3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1221,7 +1221,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_is_ucontrol(vcpu->kvm)) sca_del_vcpu(vcpu); - smp_mb(); if (kvm_is_ucontrol(vcpu->kvm)) gmap_free(vcpu->arch.gmap); -- cgit v0.10.2 From c6e5f166373a20a433c38a34bd16f2b62bd0864c Mon Sep 17 00:00:00 2001 From: Fan Zhang Date: Thu, 7 Jan 2016 18:24:29 +0800 Subject: KVM: s390: implement the RI support of guest This patch adds runtime instrumentation support for KVM guest. We need to setup a save area for the runtime instrumentation-controls control block(RICCB) and implement the necessary interfaces to live migrate the guest settings. We setup the sie control block in a way, that the runtime instrumentation instructions of a guest are handled by hardware. We also add a capability KVM_CAP_S390_RI to make this feature opt-in as it needs migration support. Signed-off-by: Fan Zhang Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index c831441..df0acff 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -217,7 +217,8 @@ struct kvm_s390_sie_block { __u64 pp; /* 0x01de */ __u8 reserved1e6[2]; /* 0x01e6 */ __u64 itdba; /* 0x01e8 */ - __u8 reserved1f0[16]; /* 0x01f0 */ + __u64 riccbd; /* 0x01f0 */ + __u8 reserved1f8[8]; /* 0x01f8 */ } __attribute__((packed)); struct kvm_s390_itdb { diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h index d2aea31..fe84bd5 100644 --- a/arch/s390/include/uapi/asm/kvm.h +++ b/arch/s390/include/uapi/asm/kvm.h @@ -153,6 +153,7 @@ struct kvm_guest_debug_arch { #define KVM_SYNC_ARCH0 (1UL << 4) #define KVM_SYNC_PFAULT (1UL << 5) #define KVM_SYNC_VRS (1UL << 6) +#define KVM_SYNC_RICCB (1UL << 7) /* definition of registers in kvm_run */ struct kvm_sync_regs { __u64 prefix; /* prefix register */ @@ -170,6 +171,8 @@ struct kvm_sync_regs { __u64 vrs[32][2]; /* vector registers */ __u8 reserved[512]; /* for future vector expansion */ __u32 fpc; /* only valid with vector registers */ + __u8 padding[52]; /* riccb needs to be 64byte aligned */ + __u8 riccb[64]; /* runtime instrumentation controls block */ }; #define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9f8eea3..5927c61 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -258,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VECTOR_REGISTERS: r = MACHINE_HAS_VX; break; + case KVM_CAP_S390_RI: + r = test_facility(64); + break; default: r = 0; } @@ -358,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; + case KVM_CAP_S390_RI: + r = -EINVAL; + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (test_facility(64)) { + set_kvm_facility(kvm->arch.model.fac->mask, 64); + set_kvm_facility(kvm->arch.model.fac->list, 64); + r = 0; + } + mutex_unlock(&kvm->lock); + VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s", + r ? "(not available)" : "(success)"); + break; case KVM_CAP_S390_USER_STSI: VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI"); kvm->arch.user_stsi = 1; @@ -1395,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) KVM_SYNC_CRS | KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; if (test_kvm_facility(vcpu->kvm, 129)) vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; @@ -1578,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->eca |= 1; if (sclp.has_sigpif) vcpu->arch.sie_block->eca |= 0x10000000U; + if (test_kvm_facility(vcpu->kvm, 64)) + vcpu->arch.sie_block->ecb3 |= 0x01; if (test_kvm_facility(vcpu->kvm, 129)) { vcpu->arch.sie_block->eca |= 0x00020000; vcpu->arch.sie_block->ecd |= 0x20000000; } + vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE; if (vcpu->kvm->arch.use_cmma) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 6e32f75..9da9051 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -849,6 +849,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_SPLIT_IRQCHIP 121 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 #define KVM_CAP_HYPERV_SYNIC 123 +#define KVM_CAP_S390_RI 124 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v0.10.2 From 1dab1345d885819a3a0d5d08ce0b5c2e12c65343 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Wed, 30 Dec 2015 13:08:46 -0500 Subject: kvm: x86: Check kvm_write_guest return value in kvm_write_wall_clock This makes sure the wall clock is updated only after an odd version value is successfully written to guest memory. Signed-off-by: Nicholas Krause Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b6102c1..102c302 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1169,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) ++version; - kvm_write_guest(kvm, wall_clock, &version, sizeof(version)); + if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version))) + return; /* * The guest calculates current wall clock time by adding -- cgit v0.10.2 From 6c71f8ae155422a030b4c382cb985dde006ccc3f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 14:53:46 +0100 Subject: KVM: Remove unused KVM_REQ_KICK to save a bit in vcpu->requests Suggested-by: Takuya Yoshikawa [Takuya moved all subsequent constants to fill the void, but that is useless in view of the following patches. So this change looks nothing like the original. - Paolo] Reviewed-by: Christian Borntraeger Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 61c3e6c6..5ac775b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -122,7 +122,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_UNHALT 6 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_CLOCK_UPDATE 8 -#define KVM_REQ_KICK 9 +/* 9 is unused */ #define KVM_REQ_DEACTIVATE_FPU 10 #define KVM_REQ_EVENT 11 #define KVM_REQ_APF_HALT 12 -- cgit v0.10.2 From 0cd310437255be81cd2413407c1d61eb70286fe2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 15:00:53 +0100 Subject: KVM: document which architecture uses each request bit Reviewed-by: Christian Borntraeger Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5ac775b..48abf67 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -114,12 +114,14 @@ static inline bool is_error_page(struct page *page) * vcpu->requests bit members */ #define KVM_REQ_TLB_FLUSH 0 -#define KVM_REQ_MIGRATE_TIMER 1 -#define KVM_REQ_REPORT_TPR_ACCESS 2 #define KVM_REQ_MMU_RELOAD 3 -#define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_PENDING_TIMER 5 #define KVM_REQ_UNHALT 6 + +/* x86-specific requests */ +#define KVM_REQ_MIGRATE_TIMER 1 +#define KVM_REQ_REPORT_TPR_ACCESS 2 +#define KVM_REQ_TRIPLE_FAULT 4 #define KVM_REQ_MMU_SYNC 7 #define KVM_REQ_CLOCK_UPDATE 8 /* 9 is unused */ @@ -130,14 +132,10 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_NMI 14 #define KVM_REQ_PMU 15 #define KVM_REQ_PMI 16 -#define KVM_REQ_WATCHDOG 17 #define KVM_REQ_MASTERCLOCK_UPDATE 18 #define KVM_REQ_MCLOCK_INPROGRESS 19 -#define KVM_REQ_EPR_EXIT 20 #define KVM_REQ_SCAN_IOAPIC 21 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 -#define KVM_REQ_ENABLE_IBS 23 -#define KVM_REQ_DISABLE_IBS 24 #define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 @@ -146,6 +144,14 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_HV_EXIT 30 #define KVM_REQ_HV_STIMER 31 +/* PPC-specific requests */ +#define KVM_REQ_WATCHDOG 17 +#define KVM_REQ_EPR_EXIT 20 + +/* s390-specific requests */ +#define KVM_REQ_ENABLE_IBS 23 +#define KVM_REQ_DISABLE_IBS 24 + #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v0.10.2 From 6662ba347b29b6df0756ffedb167fa4d89bab06f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 15:02:44 +0100 Subject: KVM: renumber vcpu->request bits Leave room for 4 more arch-independent requests. Reviewed-by: Christian Borntraeger Signed-off-by: Paolo Bonzini diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 48abf67..b0ec0f7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -114,43 +114,42 @@ static inline bool is_error_page(struct page *page) * vcpu->requests bit members */ #define KVM_REQ_TLB_FLUSH 0 -#define KVM_REQ_MMU_RELOAD 3 -#define KVM_REQ_PENDING_TIMER 5 -#define KVM_REQ_UNHALT 6 +#define KVM_REQ_MMU_RELOAD 1 +#define KVM_REQ_PENDING_TIMER 2 +#define KVM_REQ_UNHALT 3 /* x86-specific requests */ -#define KVM_REQ_MIGRATE_TIMER 1 -#define KVM_REQ_REPORT_TPR_ACCESS 2 -#define KVM_REQ_TRIPLE_FAULT 4 -#define KVM_REQ_MMU_SYNC 7 -#define KVM_REQ_CLOCK_UPDATE 8 -/* 9 is unused */ -#define KVM_REQ_DEACTIVATE_FPU 10 -#define KVM_REQ_EVENT 11 -#define KVM_REQ_APF_HALT 12 -#define KVM_REQ_STEAL_UPDATE 13 -#define KVM_REQ_NMI 14 -#define KVM_REQ_PMU 15 -#define KVM_REQ_PMI 16 -#define KVM_REQ_MASTERCLOCK_UPDATE 18 -#define KVM_REQ_MCLOCK_INPROGRESS 19 -#define KVM_REQ_SCAN_IOAPIC 21 -#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22 +#define KVM_REQ_MIGRATE_TIMER 8 +#define KVM_REQ_REPORT_TPR_ACCESS 9 +#define KVM_REQ_TRIPLE_FAULT 10 +#define KVM_REQ_MMU_SYNC 11 +#define KVM_REQ_CLOCK_UPDATE 12 +#define KVM_REQ_DEACTIVATE_FPU 13 +#define KVM_REQ_EVENT 14 +#define KVM_REQ_APF_HALT 15 +#define KVM_REQ_STEAL_UPDATE 16 +#define KVM_REQ_NMI 17 +#define KVM_REQ_PMU 18 +#define KVM_REQ_PMI 19 +#define KVM_REQ_SMI 20 +#define KVM_REQ_MASTERCLOCK_UPDATE 21 +#define KVM_REQ_MCLOCK_INPROGRESS 22 +#define KVM_REQ_SCAN_IOAPIC 23 +#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 #define KVM_REQ_APIC_PAGE_RELOAD 25 -#define KVM_REQ_SMI 26 -#define KVM_REQ_HV_CRASH 27 -#define KVM_REQ_IOAPIC_EOI_EXIT 28 -#define KVM_REQ_HV_RESET 29 -#define KVM_REQ_HV_EXIT 30 -#define KVM_REQ_HV_STIMER 31 +#define KVM_REQ_HV_CRASH 26 +#define KVM_REQ_IOAPIC_EOI_EXIT 27 +#define KVM_REQ_HV_RESET 28 +#define KVM_REQ_HV_EXIT 29 +#define KVM_REQ_HV_STIMER 30 /* PPC-specific requests */ -#define KVM_REQ_WATCHDOG 17 -#define KVM_REQ_EPR_EXIT 20 +#define KVM_REQ_WATCHDOG 8 +#define KVM_REQ_EPR_EXIT 9 /* s390-specific requests */ -#define KVM_REQ_ENABLE_IBS 23 -#define KVM_REQ_DISABLE_IBS 24 +#define KVM_REQ_ENABLE_IBS 8 +#define KVM_REQ_DISABLE_IBS 9 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v0.10.2 From 2860c4b1678646c99f5f1d77d026cd12ffd8a3a9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 Jan 2016 15:05:10 +0100 Subject: KVM: move architecture-dependent requests to arch/ Since the numbers now overlap, it makes sense to enumerate them in asm/kvm_host.h rather than linux/kvm_host.h. Functions that refer to architecture-specific requests are also moved to arch/. Signed-off-by: Paolo Bonzini diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cfa758c..271fefb 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -50,6 +50,10 @@ #define KVM_NR_IRQCHIPS 1 #define KVM_IRQCHIP_NUM_PINS 256 +/* PPC-specific vcpu->requests bit members */ +#define KVM_REQ_WATCHDOG 8 +#define KVM_REQ_EPR_EXIT 9 + #include #define KVM_ARCH_WANT_MMU_NOTIFIER diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index df0acff..6742414 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -39,6 +39,10 @@ #define KVM_IRQCHIP_NUM_PINS 4096 #define KVM_HALT_POLL_NS_DEFAULT 0 +/* s390-specific vcpu->requests bit members */ +#define KVM_REQ_ENABLE_IBS 8 +#define KVM_REQ_DISABLE_IBS 9 + #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a7c8987..44adbb8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -46,6 +46,31 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS +/* x86-specific vcpu->requests bit members */ +#define KVM_REQ_MIGRATE_TIMER 8 +#define KVM_REQ_REPORT_TPR_ACCESS 9 +#define KVM_REQ_TRIPLE_FAULT 10 +#define KVM_REQ_MMU_SYNC 11 +#define KVM_REQ_CLOCK_UPDATE 12 +#define KVM_REQ_DEACTIVATE_FPU 13 +#define KVM_REQ_EVENT 14 +#define KVM_REQ_APF_HALT 15 +#define KVM_REQ_STEAL_UPDATE 16 +#define KVM_REQ_NMI 17 +#define KVM_REQ_PMU 18 +#define KVM_REQ_PMI 19 +#define KVM_REQ_SMI 20 +#define KVM_REQ_MASTERCLOCK_UPDATE 21 +#define KVM_REQ_MCLOCK_INPROGRESS 22 +#define KVM_REQ_SCAN_IOAPIC 23 +#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 +#define KVM_REQ_APIC_PAGE_RELOAD 25 +#define KVM_REQ_HV_CRASH 26 +#define KVM_REQ_IOAPIC_EOI_EXIT 27 +#define KVM_REQ_HV_RESET 28 +#define KVM_REQ_HV_EXIT 29 +#define KVM_REQ_HV_STIMER 30 + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ @@ -1268,6 +1293,9 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); +void kvm_make_mclock_inprogress_request(struct kvm *kvm); +void kvm_make_scan_ioapic_request(struct kvm *kvm); + void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 102c302..107ceaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1686,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm) #endif } +void kvm_make_mclock_inprogress_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); +} + static void kvm_gen_update_masterclock(struct kvm *kvm) { #ifdef CONFIG_X86_64 @@ -2699,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu) return kvm_arch_has_noncoherent_dma(vcpu->kvm); } +static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) +{ + set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { /* Address WBINVD may be executed by guest */ @@ -6337,6 +6347,11 @@ static void process_smi(struct kvm_vcpu *vcpu) kvm_mmu_reset_context(vcpu); } +void kvm_make_scan_ioapic_request(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); +} + static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) { u64 eoi_exit_bitmap[4]; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b0ec0f7..f707f74 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -111,46 +111,14 @@ static inline bool is_error_page(struct page *page) } /* - * vcpu->requests bit members + * Architecture-independent vcpu->requests bit members + * Bits 4-7 are reserved for more arch-independent bits. */ #define KVM_REQ_TLB_FLUSH 0 #define KVM_REQ_MMU_RELOAD 1 #define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNHALT 3 -/* x86-specific requests */ -#define KVM_REQ_MIGRATE_TIMER 8 -#define KVM_REQ_REPORT_TPR_ACCESS 9 -#define KVM_REQ_TRIPLE_FAULT 10 -#define KVM_REQ_MMU_SYNC 11 -#define KVM_REQ_CLOCK_UPDATE 12 -#define KVM_REQ_DEACTIVATE_FPU 13 -#define KVM_REQ_EVENT 14 -#define KVM_REQ_APF_HALT 15 -#define KVM_REQ_STEAL_UPDATE 16 -#define KVM_REQ_NMI 17 -#define KVM_REQ_PMU 18 -#define KVM_REQ_PMI 19 -#define KVM_REQ_SMI 20 -#define KVM_REQ_MASTERCLOCK_UPDATE 21 -#define KVM_REQ_MCLOCK_INPROGRESS 22 -#define KVM_REQ_SCAN_IOAPIC 23 -#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 -#define KVM_REQ_APIC_PAGE_RELOAD 25 -#define KVM_REQ_HV_CRASH 26 -#define KVM_REQ_IOAPIC_EOI_EXIT 27 -#define KVM_REQ_HV_RESET 28 -#define KVM_REQ_HV_EXIT 29 -#define KVM_REQ_HV_STIMER 30 - -/* PPC-specific requests */ -#define KVM_REQ_WATCHDOG 8 -#define KVM_REQ_EPR_EXIT 9 - -/* s390-specific requests */ -#define KVM_REQ_ENABLE_IBS 8 -#define KVM_REQ_DISABLE_IBS 9 - #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -689,8 +657,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu); void kvm_flush_remote_tlbs(struct kvm *kvm); void kvm_reload_remote_mmus(struct kvm *kvm); -void kvm_make_mclock_inprogress_request(struct kvm *kvm); -void kvm_make_scan_ioapic_request(struct kvm *kvm); bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); long kvm_arch_dev_ioctl(struct file *filp, @@ -1011,11 +977,6 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa) return kvm_is_error_hva(hva); } -static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu) -{ - set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests); -} - enum kvm_stat_kind { KVM_STAT_VM, KVM_STAT_VCPU, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index be3cef1..314c777 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -206,16 +206,6 @@ void kvm_reload_remote_mmus(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD); } -void kvm_make_mclock_inprogress_request(struct kvm *kvm) -{ - kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS); -} - -void kvm_make_scan_ioapic_request(struct kvm *kvm) -{ - kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC); -} - int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) { struct page *page; -- cgit v0.10.2 From 1ac1b65ac199205724a8077d37ba7e64a1b7e77c Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:18 +0300 Subject: kvm/x86: Hyper-V timers fix incorrect logical operation Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f34f666..e4ef13a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -583,7 +583,7 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) { stimer_send_msg(stimer); if (!(stimer->config & HV_STIMER_PERIODIC)) - stimer->config |= ~HV_STIMER_ENABLE; + stimer->config &= ~HV_STIMER_ENABLE; else stimer_restart(stimer); } -- cgit v0.10.2 From 019b9781ccd667d4160f3636c8735e3baa085555 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:19 +0300 Subject: kvm/x86: Drop stimer_stop() function The function stimer_stop() is called in one place so remove the function and replace it's call by function content. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index e4ef13a..6b2ed93 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -400,16 +400,11 @@ static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer, kvm_vcpu_kick(vcpu); } -static void stimer_stop(struct kvm_vcpu_hv_stimer *stimer) -{ - hrtimer_cancel(&stimer->timer); -} - static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) { struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); - stimer_stop(stimer); + hrtimer_cancel(&stimer->timer); clear_bit(stimer->index, vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); stimer->msg_pending = false; -- cgit v0.10.2 From f808495da56f28e94c6448125158f1175009fcfc Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:20 +0300 Subject: kvm/x86: Hyper-V unify stimer_start() and stimer_restart() This will be used in future to start Hyper-V SynIC timer in several places by one logic in one function. Changes v2: * drop stimer->count == 0 check inside stimer_start() * comment stimer_start() assumptions Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 6b2ed93..0dd7d17 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -408,6 +408,7 @@ static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) clear_bit(stimer->index, vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); stimer->msg_pending = false; + stimer->exp_time = 0; } static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) @@ -420,24 +421,11 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer) -{ - u64 time_now; - ktime_t ktime_now; - u64 remainder; - - time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm); - ktime_now = ktime_get(); - - div64_u64_rem(time_now - stimer->exp_time, stimer->count, &remainder); - stimer->exp_time = time_now + (stimer->count - remainder); - - hrtimer_start(&stimer->timer, - ktime_add_ns(ktime_now, - 100 * (stimer->exp_time - time_now)), - HRTIMER_MODE_ABS); -} - +/* + * stimer_start() assumptions: + * a) stimer->count is not equal to 0 + * b) stimer->config has HV_STIMER_ENABLE flag + */ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) { u64 time_now; @@ -447,12 +435,21 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) ktime_now = ktime_get(); if (stimer->config & HV_STIMER_PERIODIC) { - if (stimer->count == 0) - return -EINVAL; + if (stimer->exp_time) { + if (time_now >= stimer->exp_time) { + u64 remainder; + + div64_u64_rem(time_now - stimer->exp_time, + stimer->count, &remainder); + stimer->exp_time = + time_now + (stimer->count - remainder); + } + } else + stimer->exp_time = time_now + stimer->count; - stimer->exp_time = time_now + stimer->count; hrtimer_start(&stimer->timer, - ktime_add_ns(ktime_now, 100 * stimer->count), + ktime_add_ns(ktime_now, + 100 * (stimer->exp_time - time_now)), HRTIMER_MODE_ABS); return 0; } @@ -580,7 +577,7 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) if (!(stimer->config & HV_STIMER_PERIODIC)) stimer->config &= ~HV_STIMER_ENABLE; else - stimer_restart(stimer); + stimer_start(stimer); } void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) -- cgit v0.10.2 From 0cdeabb1186fc3a6c7854f05cec7c99e32935ebc Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:21 +0300 Subject: kvm/x86: Reorg stimer_expiration() to better control timer restart Split stimer_expiration() into two parts - timer expiration message sending and timer restart/cleanup based on timer state(config). This also fixes a bug where a one-shot timer message whose delivery failed once would get lost for good. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0dd7d17..5f85c12 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -554,30 +554,27 @@ static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint, return r; } -static void stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) +static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) { struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); struct hv_message *msg = &stimer->msg; struct hv_timer_message_payload *payload = (struct hv_timer_message_payload *)&msg->u.payload; - int r; - stimer->msg_pending = true; payload->expiration_time = stimer->exp_time; payload->delivery_time = get_time_ref_counter(vcpu->kvm); - r = synic_deliver_msg(vcpu_to_synic(vcpu), - HV_STIMER_SINT(stimer->config), msg); - if (!r) - stimer->msg_pending = false; + return synic_deliver_msg(vcpu_to_synic(vcpu), + HV_STIMER_SINT(stimer->config), msg); } static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) { - stimer_send_msg(stimer); - if (!(stimer->config & HV_STIMER_PERIODIC)) - stimer->config &= ~HV_STIMER_ENABLE; - else - stimer_start(stimer); + stimer->msg_pending = true; + if (!stimer_send_msg(stimer)) { + stimer->msg_pending = false; + if (!(stimer->config & HV_STIMER_PERIODIC)) + stimer->config &= ~HV_STIMER_ENABLE; + } } void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) @@ -594,6 +591,11 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) time_now = get_time_ref_counter(vcpu->kvm); if (time_now >= stimer->exp_time) stimer_expiration(stimer); + + if (stimer->config & HV_STIMER_ENABLE) + stimer_start(stimer); + else + stimer_cleanup(stimer); } } } -- cgit v0.10.2 From 23a3b201fd187f1e7af573b3794c3c5ebf7d2c06 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:22 +0300 Subject: kvm/x86: Hyper-V fix SynIC timer disabling condition Hypervisor Function Specification(HFS) doesn't require to disable SynIC timer at timer config write if timer->count = 0. So drop this check, this allow to load timers MSR's during migration restore, because config are set before count in QEMU side. Also fix condition according to HFS doc(15.3.1): "It is not permitted to set the SINTx field to zero for an enabled timer. If attempted, the timer will be marked disabled (that is, bit 0 cleared) immediately." Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 5f85c12..abfb920 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -474,7 +474,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, bool host) { - if (stimer->count == 0 || HV_STIMER_SINT(config) == 0) + if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) config &= ~HV_STIMER_ENABLE; stimer->config = config; stimer_cleanup(stimer); -- cgit v0.10.2 From 7be58a6488a9d36886d9423a1ed54fe104c7b182 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:23 +0300 Subject: kvm/x86: Skip SynIC vector check for QEMU side QEMU zero-inits Hyper-V SynIC vectors. We should allow that, and don't reject zero values if set by the host. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index abfb920..ddae13e 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -72,12 +72,13 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic, return false; } -static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data) +static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, + u64 data, bool host) { int vector; vector = data & HV_SYNIC_SINT_VECTOR_MASK; - if (vector < 16) + if (vector < 16 && !host) return 1; /* * Guest may configure multiple SINTs to use the same vector, so @@ -247,7 +248,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, break; } case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: - ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data); + ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host); break; default: ret = 1; -- cgit v0.10.2 From f3b138c5d89a1f74a2b46adaa1067aea9a7e3cbb Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Mon, 28 Dec 2015 18:27:24 +0300 Subject: kvm/x86: Update SynIC timers on guest entry only Consolidate updating the Hyper-V SynIC timers in a single place: on guest entry in processing KVM_REQ_HV_STIMER request. This simplifies the overall logic, and makes sure the most current state of msrs and guest clock is used for arming the timers (to achieve that, KVM_REQ_HV_STIMER has to be processed after KVM_REQ_CLOCK_UPDATE). Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index ddae13e..101c2e4 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -389,7 +389,7 @@ static u64 get_time_ref_counter(struct kvm *kvm) return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); } -static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer, +static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, bool vcpu_kick) { struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); @@ -417,7 +417,7 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) struct kvm_vcpu_hv_stimer *stimer; stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); - stimer_mark_expired(stimer, true); + stimer_mark_pending(stimer, true); return HRTIMER_NORESTART; } @@ -462,7 +462,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) * "If a one shot is enabled and the specified count is in * the past, it will expire immediately." */ - stimer_mark_expired(stimer, false); + stimer_mark_pending(stimer, false); return 0; } @@ -475,30 +475,24 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, bool host) { + stimer_cleanup(stimer); if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) config &= ~HV_STIMER_ENABLE; stimer->config = config; - stimer_cleanup(stimer); - if (stimer->config & HV_STIMER_ENABLE) - if (stimer_start(stimer)) - return 1; + stimer_mark_pending(stimer, false); return 0; } static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, bool host) { - stimer->count = count; - stimer_cleanup(stimer); + stimer->count = count; if (stimer->count == 0) stimer->config &= ~HV_STIMER_ENABLE; - else if (stimer->config & HV_STIMER_AUTOENABLE) { + else if (stimer->config & HV_STIMER_AUTOENABLE) stimer->config |= HV_STIMER_ENABLE; - if (stimer_start(stimer)) - return 1; - } - + stimer_mark_pending(stimer, false); return 0; } @@ -582,18 +576,24 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) { struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu); struct kvm_vcpu_hv_stimer *stimer; - u64 time_now; + u64 time_now, exp_time; int i; for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++) if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) { stimer = &hv_vcpu->stimer[i]; if (stimer->config & HV_STIMER_ENABLE) { - time_now = get_time_ref_counter(vcpu->kvm); - if (time_now >= stimer->exp_time) - stimer_expiration(stimer); + exp_time = stimer->exp_time; + + if (exp_time) { + time_now = + get_time_ref_counter(vcpu->kvm); + if (time_now >= exp_time) + stimer_expiration(stimer); + } - if (stimer->config & HV_STIMER_ENABLE) + if ((stimer->config & HV_STIMER_ENABLE) && + stimer->count) stimer_start(stimer); else stimer_cleanup(stimer); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 107ceaf..fad1d096 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6508,6 +6508,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } + + /* + * KVM_REQ_HV_STIMER has to be processed after + * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers + * depend on the guest clock being up-to-date + */ if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu)) kvm_hv_process_stimers(vcpu); } -- cgit v0.10.2 From 18659a9cb1885d00dd428f8857f7f628e54a45ee Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Wed, 23 Dec 2015 16:53:59 +0300 Subject: kvm/x86: Hyper-V SynIC tracepoints Trace the following Hyper SynIC events: * set msr * set sint irq * ack sint * sint irq eoi Signed-off-by: Andrey Smetanin CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 101c2e4..2d83d459 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -152,7 +152,7 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint) struct kvm_vcpu_hv_stimer *stimer; int gsi, idx, stimers_pending; - vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint); + trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint); if (synic->msg_page & HV_SYNIC_SIMP_ENABLE) synic_clear_sint_msg_pending(synic, sint); @@ -202,8 +202,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, if (!synic->active) return 1; - vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n", - msr, data, host); + trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host); + ret = 0; switch (msr) { case HV_X64_MSR_SCONTROL: @@ -312,7 +312,7 @@ int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) irq.level = 1; ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); - vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret); + trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); return ret; } @@ -332,7 +332,7 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector) struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu); int i; - vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector); + trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector); for (i = 0; i < ARRAY_SIZE(synic->sint); i++) if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector) diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index ab9ae67..4be3500 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1025,6 +1025,99 @@ TRACE_EVENT(kvm_pi_irte_update, __entry->pi_desc_addr) ); +/* + * Tracepoint for kvm_hv_notify_acked_sint. + */ +TRACE_EVENT(kvm_hv_notify_acked_sint, + TP_PROTO(int vcpu_id, u32 sint), + TP_ARGS(vcpu_id, sint), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->sint = sint; + ), + + TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) +); + +/* + * Tracepoint for synic_set_irq. + */ +TRACE_EVENT(kvm_hv_synic_set_irq, + TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), + TP_ARGS(vcpu_id, sint, vector, ret), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + __field(int, vector) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->sint = sint; + __entry->vector = vector; + __entry->ret = ret; + ), + + TP_printk("vcpu_id %d sint %u vector %d ret %d", + __entry->vcpu_id, __entry->sint, __entry->vector, + __entry->ret) +); + +/* + * Tracepoint for kvm_hv_synic_send_eoi. + */ +TRACE_EVENT(kvm_hv_synic_send_eoi, + TP_PROTO(int vcpu_id, int vector), + TP_ARGS(vcpu_id, vector), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, sint) + __field(int, vector) + __field(int, ret) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vector = vector; + ), + + TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) +); + +/* + * Tracepoint for synic_set_msr. + */ +TRACE_EVENT(kvm_hv_synic_set_msr, + TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), + TP_ARGS(vcpu_id, msr, data, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, msr) + __field(u64, data) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->msr = msr; + __entry->data = data; + __entry->host = host + ), + + TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", + __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From ac3e5fcae8ca658e7dcc3fdcd50af7e4779f58c1 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Wed, 23 Dec 2015 16:54:00 +0300 Subject: kvm/x86: Hyper-V SynIC timers tracepoints Trace the following Hyper SynIC timers events: * periodic timer start * one-shot timer start * timer callback * timer expiration and message delivery result * timer config setup * timer count setup * timer cleanup Signed-off-by: Andrey Smetanin CC: Gleb Natapov CC: Paolo Bonzini CC: Roman Kagan CC: Denis V. Lunev CC: qemu-devel@nongnu.org Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2d83d459..c58ba67 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -405,6 +405,9 @@ static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer) { struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer); + trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index); + hrtimer_cancel(&stimer->timer); clear_bit(stimer->index, vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap); @@ -417,6 +420,8 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer) struct kvm_vcpu_hv_stimer *stimer; stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer); + trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index); stimer_mark_pending(stimer, true); return HRTIMER_NORESTART; @@ -448,6 +453,11 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) } else stimer->exp_time = time_now + stimer->count; + trace_kvm_hv_stimer_start_periodic( + stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, + time_now, stimer->exp_time); + hrtimer_start(&stimer->timer, ktime_add_ns(ktime_now, 100 * (stimer->exp_time - time_now)), @@ -466,6 +476,10 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) return 0; } + trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, + time_now, stimer->count); + hrtimer_start(&stimer->timer, ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)), HRTIMER_MODE_ABS); @@ -475,6 +489,9 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer) static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, bool host) { + trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, config, host); + stimer_cleanup(stimer); if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0) config &= ~HV_STIMER_ENABLE; @@ -486,6 +503,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, bool host) { + trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, count, host); + stimer_cleanup(stimer); stimer->count = count; if (stimer->count == 0) @@ -564,8 +584,13 @@ static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer) static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer) { + int r; + stimer->msg_pending = true; - if (!stimer_send_msg(stimer)) { + r = stimer_send_msg(stimer); + trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id, + stimer->index, r); + if (!r) { stimer->msg_pending = false; if (!(stimer->config & HV_STIMER_PERIODIC)) stimer->config &= ~HV_STIMER_ENABLE; diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 4be3500..ad9f6a2 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -1118,6 +1118,176 @@ TRACE_EVENT(kvm_hv_synic_set_msr, __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) ); +/* + * Tracepoint for stimer_set_config. + */ +TRACE_EVENT(kvm_hv_stimer_set_config, + TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), + TP_ARGS(vcpu_id, timer_index, config, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, config) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->config = config; + __entry->host = host; + ), + + TP_printk("vcpu_id %d timer %d config 0x%llx host %d", + __entry->vcpu_id, __entry->timer_index, __entry->config, + __entry->host) +); + +/* + * Tracepoint for stimer_set_count. + */ +TRACE_EVENT(kvm_hv_stimer_set_count, + TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), + TP_ARGS(vcpu_id, timer_index, count, host), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, count) + __field(bool, host) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->count = count; + __entry->host = host; + ), + + TP_printk("vcpu_id %d timer %d count %llu host %d", + __entry->vcpu_id, __entry->timer_index, __entry->count, + __entry->host) +); + +/* + * Tracepoint for stimer_start(periodic timer case). + */ +TRACE_EVENT(kvm_hv_stimer_start_periodic, + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), + TP_ARGS(vcpu_id, timer_index, time_now, exp_time), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, time_now) + __field(u64, exp_time) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->time_now = time_now; + __entry->exp_time = exp_time; + ), + + TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", + __entry->vcpu_id, __entry->timer_index, __entry->time_now, + __entry->exp_time) +); + +/* + * Tracepoint for stimer_start(one-shot timer case). + */ +TRACE_EVENT(kvm_hv_stimer_start_one_shot, + TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), + TP_ARGS(vcpu_id, timer_index, time_now, count), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(u64, time_now) + __field(u64, count) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->time_now = time_now; + __entry->count = count; + ), + + TP_printk("vcpu_id %d timer %d time_now %llu count %llu", + __entry->vcpu_id, __entry->timer_index, __entry->time_now, + __entry->count) +); + +/* + * Tracepoint for stimer_timer_callback. + */ +TRACE_EVENT(kvm_hv_stimer_callback, + TP_PROTO(int vcpu_id, int timer_index), + TP_ARGS(vcpu_id, timer_index), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + ), + + TP_printk("vcpu_id %d timer %d", + __entry->vcpu_id, __entry->timer_index) +); + +/* + * Tracepoint for stimer_expiration. + */ +TRACE_EVENT(kvm_hv_stimer_expiration, + TP_PROTO(int vcpu_id, int timer_index, int msg_send_result), + TP_ARGS(vcpu_id, timer_index, msg_send_result), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + __field(int, msg_send_result) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + __entry->msg_send_result = msg_send_result; + ), + + TP_printk("vcpu_id %d timer %d msg send result %d", + __entry->vcpu_id, __entry->timer_index, + __entry->msg_send_result) +); + +/* + * Tracepoint for stimer_cleanup. + */ +TRACE_EVENT(kvm_hv_stimer_cleanup, + TP_PROTO(int vcpu_id, int timer_index), + TP_ARGS(vcpu_id, timer_index), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(int, timer_index) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->timer_index = timer_index; + ), + + TP_printk("vcpu_id %d timer %d", + __entry->vcpu_id, __entry->timer_index) +); + #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH -- cgit v0.10.2 From 45bdbcfdf241149642fb6c25ab0c209d59c371b7 Mon Sep 17 00:00:00 2001 From: Huaitong Han Date: Tue, 12 Jan 2016 16:04:20 +0800 Subject: kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL vmx_cpuid_tries to update SECONDARY_VM_EXEC_CONTROL in the VMCS, but it will cause a vmwrite error on older CPUs because the code does not check for the presence of CPU_BASED_ACTIVATE_SECONDARY_CONTROLS. This will get rid of the following trace on e.g. Core2 6600: vmwrite error: reg 401e value 10 (err 12) Call Trace: [] dump_stack+0x40/0x57 [] vmx_cpuid_update+0x5d/0x150 [kvm_intel] [] kvm_vcpu_ioctl_set_cpuid2+0x4c/0x70 [kvm] [] kvm_arch_vcpu_ioctl+0x903/0xfa0 [kvm] Fixes: feda805fe7c4ed9cf78158e73b1218752e3b4314 Cc: stable@vger.kernel.org Reported-by: Zdenek Kaspar Signed-off-by: Huaitong Han Signed-off-by: Paolo Bonzini diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 62d958a..be3f1735 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8989,7 +8989,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) best->ebx &= ~bit(X86_FEATURE_INVPCID); } - vmcs_set_secondary_exec_control(secondary_exec_ctl); + if (cpu_has_secondary_exec_ctrls()) + vmcs_set_secondary_exec_control(secondary_exec_ctl); if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { if (guest_cpuid_has_pcommit(vcpu)) -- cgit v0.10.2