From abdb080f7ac8a85547f5e0246362790043bbd3f2 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Tue, 10 Nov 2015 15:36:31 +0300
Subject: kvm/irqchip: kvm_arch_irq_routing_update renaming split

Actually kvm_arch_irq_routing_update() should be
kvm_arch_post_irq_routing_update() as it's called at the end
of irq routing update.

This renaming frees kvm_arch_irq_routing_update function name.
kvm_arch_irq_routing_update() weak function which will be used
to update mappings for arch-specific irq routing entries
(in particular, the upcoming Hyper-V synthetic interrupts).

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 84b96d3..e39768c 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -332,7 +332,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm)
 	return kvm_set_irq_routing(kvm, empty_routing, 0, 0);
 }
 
-void kvm_arch_irq_routing_update(struct kvm *kvm)
+void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 {
 	if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm))
 		return;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index c923350..23555c0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -484,12 +484,12 @@ void vcpu_put(struct kvm_vcpu *vcpu);
 
 #ifdef __KVM_HAVE_IOAPIC
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
-void kvm_arch_irq_routing_update(struct kvm *kvm);
+void kvm_arch_post_irq_routing_update(struct kvm *kvm);
 #else
 static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 }
-static inline void kvm_arch_irq_routing_update(struct kvm *kvm)
+static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
@@ -1091,6 +1091,7 @@ static inline void kvm_irq_routing_update(struct kvm *kvm)
 {
 }
 #endif
+void kvm_arch_irq_routing_update(struct kvm *kvm);
 
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index f0b08a2..fe84e1a 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -166,6 +166,10 @@ out:
 	return r;
 }
 
+void __attribute__((weak)) kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+}
+
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *ue,
 			unsigned nr,
@@ -219,9 +223,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 	old = kvm->irq_routing;
 	rcu_assign_pointer(kvm->irq_routing, new);
 	kvm_irq_routing_update(kvm);
+	kvm_arch_irq_routing_update(kvm);
 	mutex_unlock(&kvm->irq_lock);
 
-	kvm_arch_irq_routing_update(kvm);
+	kvm_arch_post_irq_routing_update(kvm);
 
 	synchronize_srcu_expedited(&kvm->irq_srcu);
 
-- 
cgit v0.10.2


From 6308630bd3dbb6a8a883c4c571ce5e5a759a8a0e Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Tue, 10 Nov 2015 15:36:32 +0300
Subject: kvm/x86: split ioapic-handled and EOI exit bitmaps

The function to determine if the vector is handled by ioapic used to
rely on the fact that only ioapic-handled vectors were set up to
cause vmexits when virtual apic was in use.

We're going to break this assumption when introducing Hyper-V
synthetic interrupts: they may need to cause vmexits too.

To achieve that, introduce a new bitmap dedicated specifically for
ioapic-handled vectors, and populate EOI exit bitmap from it for now.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 30cfd64..f6d8894 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -400,7 +400,7 @@ struct kvm_vcpu_arch {
 	u64 efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
-	u64 eoi_exit_bitmap[4];
+	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
 	int mp_state;
@@ -834,7 +834,7 @@ struct kvm_x86_ops {
 	int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
-	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu);
+	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
 	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 88d0a92..1facfd6 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -233,7 +233,7 @@ static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
 }
 
 
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
 {
 	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
 	union kvm_ioapic_redirect_entry *e;
@@ -250,7 +250,7 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 			    (e->fields.trig_mode == IOAPIC_EDGE_TRIG &&
 			     kvm_apic_pending_eoi(vcpu, e->fields.vector)))
 				__set_bit(e->fields.vector,
-					(unsigned long *)eoi_exit_bitmap);
+					  ioapic_handled_vectors);
 		}
 	}
 	spin_unlock(&ioapic->lock);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 084617d..2d16dc2 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -121,7 +121,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
-
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
+			   ulong *ioapic_handled_vectors);
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
+			    ulong *ioapic_handled_vectors);
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index e39768c..ece901c 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -339,7 +339,8 @@ void kvm_arch_post_irq_routing_update(struct kvm *kvm)
 	kvm_make_scan_ioapic_request(kvm);
 }
 
-void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
+void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
+			    ulong *ioapic_handled_vectors)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_kernel_irq_routing_entry *entry;
@@ -369,7 +370,7 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 				u32 vector = entry->msi.data & 0xff;
 
 				__set_bit(vector,
-					  (unsigned long *) eoi_exit_bitmap);
+					  ioapic_handled_vectors);
 			}
 		}
 	}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 4d30b86..9469d453 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -932,7 +932,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
 
 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
 {
-	return test_bit(vector, (ulong *)apic->vcpu->arch.eoi_exit_bitmap);
+	return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
 }
 
 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 83a1c64..ebb76e8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3564,7 +3564,7 @@ static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
 	return;
 }
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index af823a3..c8a87c9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8257,9 +8257,8 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 	}
 }
 
-static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu)
+static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
-	u64 *eoi_exit_bitmap = vcpu->arch.eoi_exit_bitmap;
 	if (!vmx_cpu_uses_apicv(vcpu))
 		return;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eed3228..9c69337 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6301,15 +6301,16 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
-	memset(vcpu->arch.eoi_exit_bitmap, 0, 256 / 8);
+	bitmap_zero(vcpu->arch.ioapic_handled_vectors, 256);
 
 	if (irqchip_split(vcpu->kvm))
-		kvm_scan_ioapic_routes(vcpu, vcpu->arch.eoi_exit_bitmap);
+		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
 	else {
 		kvm_x86_ops->sync_pir_to_irr(vcpu);
-		kvm_ioapic_scan_entry(vcpu, vcpu->arch.eoi_exit_bitmap);
+		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
-	kvm_x86_ops->load_eoi_exitmap(vcpu);
+	kvm_x86_ops->load_eoi_exitmap(vcpu,
+				      (u64 *)vcpu->arch.ioapic_handled_vectors);
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -6417,7 +6418,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		if (kvm_check_request(KVM_REQ_IOAPIC_EOI_EXIT, vcpu)) {
 			BUG_ON(vcpu->arch.pending_ioapic_eoi > 255);
 			if (test_bit(vcpu->arch.pending_ioapic_eoi,
-				     (void *) vcpu->arch.eoi_exit_bitmap)) {
+				     vcpu->arch.ioapic_handled_vectors)) {
 				vcpu->run->exit_reason = KVM_EXIT_IOAPIC_EOI;
 				vcpu->run->eoi.vector =
 						vcpu->arch.pending_ioapic_eoi;
-- 
cgit v0.10.2


From d62caabb41f33d96333f9ef15e09cd26e1c12760 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Tue, 10 Nov 2015 15:36:33 +0300
Subject: kvm/x86: per-vcpu apicv deactivation support

The decision on whether to use hardware APIC virtualization used to be
taken globally, based on the availability of the feature in the CPU
and the value of a module parameter.

However, under certain circumstances we want to control it on per-vcpu
basis.  In particular, when the userspace activates HyperV synthetic
interrupt controller (SynIC), APICv has to be disabled as it's
incompatible with SynIC auto-EOI behavior.

To achieve that, introduce 'apicv_active' flag on struct
kvm_vcpu_arch, and kvm_vcpu_deactivate_apicv() function to turn APICv
off.  The flag is initialized based on the module parameter and CPU
capability, and consulted whenever an APICv-specific action is
performed.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f6d8894..bac0d54 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -400,6 +400,7 @@ struct kvm_vcpu_arch {
 	u64 efer;
 	u64 apic_base;
 	struct kvm_lapic *apic;    /* kernel irqchip context */
+	bool apicv_active;
 	DECLARE_BITMAP(ioapic_handled_vectors, 256);
 	unsigned long apic_attention;
 	int32_t apic_arb_prio;
@@ -831,7 +832,8 @@ struct kvm_x86_ops {
 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
-	int (*cpu_uses_apicv)(struct kvm_vcpu *vcpu);
+	bool (*get_enable_apicv)(void);
+	void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
 	void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
@@ -1086,6 +1088,8 @@ gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
 				struct x86_exception *exception);
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu);
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 097060e..3982b47 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -76,7 +76,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v)
 	if (kvm_cpu_has_extint(v))
 		return 1;
 
-	if (kvm_vcpu_apic_vid_enabled(v))
+	if (kvm_vcpu_apicv_active(v))
 		return 0;
 
 	return kvm_apic_has_interrupt(v) != -1; /* LAPIC */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9469d453..618a20d 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -379,7 +379,8 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic)
 	if (!apic->irr_pending)
 		return -1;
 
-	kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
+	if (apic->vcpu->arch.apicv_active)
+		kvm_x86_ops->sync_pir_to_irr(apic->vcpu);
 	result = apic_search_irr(apic);
 	ASSERT(result == -1 || result >= 16);
 
@@ -392,7 +393,7 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
 
 	vcpu = apic->vcpu;
 
-	if (unlikely(kvm_vcpu_apic_vid_enabled(vcpu))) {
+	if (unlikely(vcpu->arch.apicv_active)) {
 		/* try to update RVI */
 		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
@@ -418,7 +419,7 @@ static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
 	 * because the processor can modify ISR under the hood.  Instead
 	 * just set SVI.
 	 */
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
+	if (unlikely(vcpu->arch.apicv_active))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm, vec);
 	else {
 		++apic->isr_count;
@@ -466,7 +467,7 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 	 * on the other hand isr_count and highest_isr_cache are unused
 	 * and must be left alone.
 	 */
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
+	if (unlikely(vcpu->arch.apicv_active))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 					       apic_find_highest_isr(apic));
 	else {
@@ -852,7 +853,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 				apic_clear_vector(vector, apic->regs + APIC_TMR);
 		}
 
-		if (kvm_x86_ops->deliver_posted_interrupt)
+		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->deliver_posted_interrupt(vcpu, vector);
 		else {
 			apic_set_irr(vector, apic);
@@ -1225,7 +1226,7 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
 		int vec = reg & APIC_VECTOR_MASK;
 		void *bitmap = apic->regs + APIC_ISR;
 
-		if (kvm_x86_ops->deliver_posted_interrupt)
+		if (vcpu->arch.apicv_active)
 			bitmap = apic->regs + APIC_IRR;
 
 		if (apic_test_vector(vec, bitmap))
@@ -1693,8 +1694,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 		apic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
 		apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
 	}
-	apic->irr_pending = kvm_vcpu_apic_vid_enabled(vcpu);
-	apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
+	apic->irr_pending = vcpu->arch.apicv_active;
+	apic->isr_count = vcpu->arch.apicv_active ? 1 : 0;
 	apic->highest_isr_cache = -1;
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
@@ -1906,15 +1907,15 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	update_divide_count(apic);
 	start_apic_timer(apic);
 	apic->irr_pending = true;
-	apic->isr_count = kvm_x86_ops->hwapic_isr_update ?
+	apic->isr_count = vcpu->arch.apicv_active ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
-	if (kvm_x86_ops->hwapic_irr_update)
+	if (vcpu->arch.apicv_active) {
 		kvm_x86_ops->hwapic_irr_update(vcpu,
 				apic_find_highest_irr(apic));
-	if (unlikely(kvm_x86_ops->hwapic_isr_update))
 		kvm_x86_ops->hwapic_isr_update(vcpu->kvm,
 				apic_find_highest_isr(apic));
+	}
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	if (ioapic_in_kernel(vcpu->kvm))
 		kvm_rtc_eoi_tracking_restore_one(vcpu);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index fde8e35d..5fc60e4 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -143,9 +143,9 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static inline bool kvm_vcpu_apic_vid_enabled(struct kvm_vcpu *vcpu)
+static inline bool kvm_vcpu_apicv_active(struct kvm_vcpu *vcpu)
 {
-	return kvm_x86_ops->cpu_uses_apicv(vcpu);
+	return vcpu->arch.apic && vcpu->arch.apicv_active;
 }
 
 static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ebb76e8..2401fc8 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3559,9 +3559,13 @@ static void svm_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	return;
 }
 
-static int svm_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool svm_get_enable_apicv(void)
+{
+	return false;
+}
+
+static void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
 {
-	return 0;
 }
 
 static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
@@ -4328,7 +4332,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.enable_irq_window = enable_irq_window,
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
-	.cpu_uses_apicv = svm_cpu_uses_apicv,
+	.get_enable_apicv = svm_get_enable_apicv,
+	.refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = svm_load_eoi_exitmap,
 	.sync_pir_to_irr = svm_sync_pir_to_irr,
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c8a87c9..1a8bfaa 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -19,6 +19,7 @@
 #include "irq.h"
 #include "mmu.h"
 #include "cpuid.h"
+#include "lapic.h"
 
 #include <linux/kvm_host.h>
 #include <linux/module.h>
@@ -862,7 +863,6 @@ static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
 static bool vmx_xsaves_supported(void);
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -870,7 +870,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
 static bool guest_state_valid(struct kvm_vcpu *vcpu);
 static u32 vmx_segment_access_rights(struct kvm_segment *var);
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu);
 static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx);
 static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx);
 static int alloc_identity_pagetable(struct kvm *kvm);
@@ -2498,7 +2497,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 	vmx->nested.nested_vmx_pinbased_ctls_high |=
 		PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
 		PIN_BASED_VMX_PREEMPTION_TIMER;
-	if (vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (kvm_vcpu_apicv_active(&vmx->vcpu))
 		vmx->nested.nested_vmx_pinbased_ctls_high |=
 			PIN_BASED_POSTED_INTR;
 
@@ -4462,9 +4461,9 @@ static void vmx_disable_intercept_msr_write_x2apic(u32 msr)
 			msr, MSR_TYPE_W);
 }
 
-static int vmx_cpu_uses_apicv(struct kvm_vcpu *vcpu)
+static bool vmx_get_enable_apicv(void)
 {
-	return enable_apicv && lapic_in_kernel(vcpu);
+	return enable_apicv;
 }
 
 static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
@@ -4586,11 +4585,6 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
 	kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
 }
 
-static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu)
-{
-	return;
-}
-
 /*
  * Set up the vmcs's constant host-state fields, i.e., host-state fields that
  * will not change in the lifetime of the guest.
@@ -4660,11 +4654,18 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 {
 	u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
 
-	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
 	return pin_based_exec_ctrl;
 }
 
+static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+	vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
+}
+
 static u32 vmx_exec_control(struct vcpu_vmx *vmx)
 {
 	u32 exec_control = vmcs_config.cpu_based_exec_ctrl;
@@ -4703,7 +4704,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
 		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
 	if (!ple_gap)
 		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
-	if (!vmx_cpu_uses_apicv(&vmx->vcpu))
+	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
 				  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
 	exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
@@ -4767,7 +4768,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 				vmx_secondary_exec_control(vmx));
 
-	if (vmx_cpu_uses_apicv(&vmx->vcpu)) {
+	if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
 		vmcs_write64(EOI_EXIT_BITMAP0, 0);
 		vmcs_write64(EOI_EXIT_BITMAP1, 0);
 		vmcs_write64(EOI_EXIT_BITMAP2, 0);
@@ -4919,7 +4920,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
 
-	if (vmx_cpu_uses_apicv(vcpu))
+	if (kvm_vcpu_apicv_active(vcpu))
 		memset(&vmx->pi_desc, 0, sizeof(struct pi_desc));
 
 	if (vmx->vpid != 0)
@@ -6203,15 +6204,6 @@ static __init int hardware_setup(void)
 		kvm_tsc_scaling_ratio_frac_bits = 48;
 	}
 
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->hwapic_isr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
 	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
 	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
@@ -8152,7 +8144,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
 	 * apicv
 	 */
 	if (!cpu_has_vmx_virtualize_x2apic_mode() ||
-				!vmx_cpu_uses_apicv(vcpu))
+				!kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	if (!cpu_need_tpr_shadow(vcpu))
@@ -8259,7 +8251,7 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 
 static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
 {
-	if (!vmx_cpu_uses_apicv(vcpu))
+	if (!kvm_vcpu_apicv_active(vcpu))
 		return;
 
 	vmcs_write64(EOI_EXIT_BITMAP0, eoi_exit_bitmap[0]);
@@ -10803,7 +10795,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.update_cr8_intercept = update_cr8_intercept,
 	.set_virtual_x2apic_mode = vmx_set_virtual_x2apic_mode,
 	.set_apic_access_page_addr = vmx_set_apic_access_page_addr,
-	.cpu_uses_apicv = vmx_cpu_uses_apicv,
+	.get_enable_apicv = vmx_get_enable_apicv,
+	.refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl,
 	.load_eoi_exitmap = vmx_load_eoi_exitmap,
 	.hwapic_irr_update = vmx_hwapic_irr_update,
 	.hwapic_isr_update = vmx_hwapic_isr_update,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9c69337..f0250a0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2748,7 +2748,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	kvm_x86_ops->sync_pir_to_irr(vcpu);
+	if (vcpu->arch.apicv_active)
+		kvm_x86_ops->sync_pir_to_irr(vcpu);
+
 	memcpy(s->regs, vcpu->arch.apic->regs, sizeof *s);
 
 	return 0;
@@ -5867,6 +5869,12 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
 	kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
 }
 
+void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.apicv_active = false;
+	kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
 	unsigned long nr, a0, a1, a2, a3, ret;
@@ -5960,6 +5968,9 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.apic)
 		return;
 
+	if (vcpu->arch.apicv_active)
+		return;
+
 	if (!vcpu->arch.apic->vapic_addr)
 		max_irr = kvm_lapic_find_highest_irr(vcpu);
 	else
@@ -6306,7 +6317,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	if (irqchip_split(vcpu->kvm))
 		kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors);
 	else {
-		kvm_x86_ops->sync_pir_to_irr(vcpu);
+		if (vcpu->arch.apicv_active)
+			kvm_x86_ops->sync_pir_to_irr(vcpu);
 		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
 	kvm_x86_ops->load_eoi_exitmap(vcpu,
@@ -6453,7 +6465,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		 * Update architecture specific hints for APIC
 		 * virtual interrupt delivery.
 		 */
-		if (kvm_x86_ops->hwapic_irr_update)
+		if (vcpu->arch.apicv_active)
 			kvm_x86_ops->hwapic_irr_update(vcpu,
 				kvm_lapic_find_highest_irr(vcpu));
 	}
@@ -7524,6 +7536,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 	BUG_ON(vcpu->kvm == NULL);
 	kvm = vcpu->kvm;
 
+	vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv();
 	vcpu->arch.pv.pv_unhalted = false;
 	vcpu->arch.emulate_ctxt.ops = &emulate_ops;
 	if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
-- 
cgit v0.10.2


From 5c919412fe61c35947816fdbd5f7bd09fe0dd073 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Tue, 10 Nov 2015 15:36:34 +0300
Subject: kvm/x86: Hyper-V synthetic interrupt controller

SynIC (synthetic interrupt controller) is a lapic extension,
which is controlled via MSRs and maintains for each vCPU
 - 16 synthetic interrupt "lines" (SINT's); each can be configured to
   trigger a specific interrupt vector optionally with auto-EOI
   semantics
 - a message page in the guest memory with 16 256-byte per-SINT message
   slots
 - an event flag page in the guest memory with 16 2048-bit per-SINT
   event flag areas

The host triggers a SINT whenever it delivers a new message to the
corresponding slot or flips an event flag bit in the corresponding area.
The guest informs the host that it can try delivering a message by
explicitly asserting EOI in lapic or writing to End-Of-Message (EOM)
MSR.

The userspace (qemu) triggers interrupts and receives EOM notifications
via irqfd with resampler; for that, a GSI is allocated for each
configured SINT, and irq_routing api is extended to support GSI-SINT
mapping.

Changes v4:
* added activation of SynIC by vcpu KVM_ENABLE_CAP
* added per SynIC active flag
* added deactivation of APICv upon SynIC activation

Changes v3:
* added KVM_CAP_HYPERV_SYNIC and KVM_IRQ_ROUTING_HV_SINT notes into
docs

Changes v2:
* do not use posted interrupts for Hyper-V SynIC AutoEOI vectors
* add Hyper-V SynIC vectors into EOI exit bitmap
* Hyper-V SyniIC SINT msr write logic simplified

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 092ee9f..88af846 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1451,6 +1451,7 @@ struct kvm_irq_routing_entry {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_s390_adapter adapter;
+		struct kvm_irq_routing_hv_sint hv_sint;
 		__u32 pad[8];
 	} u;
 };
@@ -1459,6 +1460,7 @@ struct kvm_irq_routing_entry {
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
 
 No flags are specified so far, the corresponding field must be set to zero.
 
@@ -1482,6 +1484,10 @@ struct kvm_irq_routing_s390_adapter {
 	__u32 adapter_id;
 };
 
+struct kvm_irq_routing_hv_sint {
+	__u32 vcpu;
+	__u32 sint;
+};
 
 4.53 KVM_ASSIGN_SET_MSIX_NR (deprecated)
 
@@ -3685,3 +3691,16 @@ available, means that that the kernel has an implementation of the
 H_RANDOM hypercall backed by a hardware random-number generator.
 If present, the kernel H_RANDOM handler can be enabled for guest use
 with the KVM_CAP_PPC_ENABLE_HCALL capability.
+
+8.2 KVM_CAP_HYPERV_SYNIC
+
+Architectures: x86
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+Hyper-V Synthetic interrupt controller(SynIC). Hyper-V SynIC is
+used to support Windows Hyper-V based guest paravirt drivers(VMBus).
+
+In order to use SynIC, it has to be activated by setting this
+capability via KVM_ENABLE_CAP ioctl on the vcpu fd. Note that this
+will disable the use of APIC hardware virtualization even if supported
+by the CPU, as it's incompatible with SynIC auto-EOI behavior.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bac0d54..bab47b6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/clocksource.h>
 #include <linux/irqbypass.h>
+#include <linux/hyperv.h>
 
 #include <asm/pvclock-abi.h>
 #include <asm/desc.h>
@@ -374,10 +375,24 @@ struct kvm_mtrr {
 	struct list_head head;
 };
 
+/* Hyper-V synthetic interrupt controller (SynIC)*/
+struct kvm_vcpu_hv_synic {
+	u64 version;
+	u64 control;
+	u64 msg_page;
+	u64 evt_page;
+	atomic64_t sint[HV_SYNIC_SINT_COUNT];
+	atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
+	DECLARE_BITMAP(auto_eoi_bitmap, 256);
+	DECLARE_BITMAP(vec_bitmap, 256);
+	bool active;
+};
+
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
 	u64 hv_vapic;
 	s64 runtime_offset;
+	struct kvm_vcpu_hv_synic synic;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 62cf8c9..83a3c0c 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -23,13 +23,314 @@
 
 #include "x86.h"
 #include "lapic.h"
+#include "ioapic.h"
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <asm/apicdef.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
 
+static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
+{
+	return atomic64_read(&synic->sint[sint]);
+}
+
+static inline int synic_get_sint_vector(u64 sint_value)
+{
+	if (sint_value & HV_SYNIC_SINT_MASKED)
+		return -1;
+	return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
+}
+
+static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
+				      int vector)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
+			return true;
+	}
+	return false;
+}
+
+static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
+				     int vector)
+{
+	int i;
+	u64 sint_value;
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		sint_value = synic_read_sint(synic, i);
+		if (synic_get_sint_vector(sint_value) == vector &&
+		    sint_value & HV_SYNIC_SINT_AUTO_EOI)
+			return true;
+	}
+	return false;
+}
+
+static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data)
+{
+	int vector;
+
+	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
+	if (vector < 16)
+		return 1;
+	/*
+	 * Guest may configure multiple SINTs to use the same vector, so
+	 * we maintain a bitmap of vectors handled by synic, and a
+	 * bitmap of vectors with auto-eoi behavior.  The bitmaps are
+	 * updated here, and atomically queried on fast paths.
+	 */
+
+	atomic64_set(&synic->sint[sint], data);
+
+	if (synic_has_vector_connected(synic, vector))
+		__set_bit(vector, synic->vec_bitmap);
+	else
+		__clear_bit(vector, synic->vec_bitmap);
+
+	if (synic_has_vector_auto_eoi(synic, vector))
+		__set_bit(vector, synic->auto_eoi_bitmap);
+	else
+		__clear_bit(vector, synic->auto_eoi_bitmap);
+
+	/* Load SynIC vectors into EOI exit bitmap */
+	kvm_make_request(KVM_REQ_SCAN_IOAPIC, synic_to_vcpu(synic));
+	return 0;
+}
+
+static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vcpu_hv_synic *synic;
+
+	if (vcpu_id >= atomic_read(&kvm->online_vcpus))
+		return NULL;
+	vcpu = kvm_get_vcpu(kvm, vcpu_id);
+	if (!vcpu)
+		return NULL;
+	synic = vcpu_to_synic(vcpu);
+	return (synic->active) ? synic : NULL;
+}
+
+static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
+{
+	struct kvm *kvm = vcpu->kvm;
+	int gsi, idx;
+
+	vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint);
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]);
+	if (gsi != -1)
+		kvm_notify_acked_gsi(kvm, gsi);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
+			 u32 msr, u64 data, bool host)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	int ret;
+
+	if (!synic->active)
+		return 1;
+
+	vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n",
+		   msr, data, host);
+	ret = 0;
+	switch (msr) {
+	case HV_X64_MSR_SCONTROL:
+		synic->control = data;
+		break;
+	case HV_X64_MSR_SVERSION:
+		if (!host) {
+			ret = 1;
+			break;
+		}
+		synic->version = data;
+		break;
+	case HV_X64_MSR_SIEFP:
+		if (data & HV_SYNIC_SIEFP_ENABLE)
+			if (kvm_clear_guest(vcpu->kvm,
+					    data & PAGE_MASK, PAGE_SIZE)) {
+				ret = 1;
+				break;
+			}
+		synic->evt_page = data;
+		break;
+	case HV_X64_MSR_SIMP:
+		if (data & HV_SYNIC_SIMP_ENABLE)
+			if (kvm_clear_guest(vcpu->kvm,
+					    data & PAGE_MASK, PAGE_SIZE)) {
+				ret = 1;
+				break;
+			}
+		synic->msg_page = data;
+		break;
+	case HV_X64_MSR_EOM: {
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
+			kvm_hv_notify_acked_sint(vcpu, i);
+		break;
+	}
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data);
+		break;
+	default:
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata)
+{
+	int ret;
+
+	if (!synic->active)
+		return 1;
+
+	ret = 0;
+	switch (msr) {
+	case HV_X64_MSR_SCONTROL:
+		*pdata = synic->control;
+		break;
+	case HV_X64_MSR_SVERSION:
+		*pdata = synic->version;
+		break;
+	case HV_X64_MSR_SIEFP:
+		*pdata = synic->evt_page;
+		break;
+	case HV_X64_MSR_SIMP:
+		*pdata = synic->msg_page;
+		break;
+	case HV_X64_MSR_EOM:
+		*pdata = 0;
+		break;
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		*pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
+		break;
+	default:
+		ret = 1;
+		break;
+	}
+	return ret;
+}
+
+int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct kvm_lapic_irq irq;
+	int ret, vector;
+
+	if (sint >= ARRAY_SIZE(synic->sint))
+		return -EINVAL;
+
+	vector = synic_get_sint_vector(synic_read_sint(synic, sint));
+	if (vector < 0)
+		return -ENOENT;
+
+	memset(&irq, 0, sizeof(irq));
+	irq.dest_id = kvm_apic_id(vcpu->arch.apic);
+	irq.dest_mode = APIC_DEST_PHYSICAL;
+	irq.delivery_mode = APIC_DM_FIXED;
+	irq.vector = vector;
+	irq.level = 1;
+
+	ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
+	vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret);
+	return ret;
+}
+
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint)
+{
+	struct kvm_vcpu_hv_synic *synic;
+
+	synic = synic_get(kvm, vcpu_id);
+	if (!synic)
+		return -EINVAL;
+
+	return synic_set_irq(synic, sint);
+}
+
+void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
+{
+	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
+	int i;
+
+	vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector);
+
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
+		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
+			kvm_hv_notify_acked_sint(vcpu, i);
+}
+
+static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi)
+{
+	struct kvm_vcpu_hv_synic *synic;
+
+	synic = synic_get(kvm, vcpu_id);
+	if (!synic)
+		return -EINVAL;
+
+	if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
+		return -EINVAL;
+
+	atomic_set(&synic->sint_to_gsi[sint], gsi);
+	return 0;
+}
+
+void kvm_hv_irq_routing_update(struct kvm *kvm)
+{
+	struct kvm_irq_routing_table *irq_rt;
+	struct kvm_kernel_irq_routing_entry *e;
+	u32 gsi;
+
+	irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
+					lockdep_is_held(&kvm->irq_lock));
+
+	for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
+		hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
+			if (e->type == KVM_IRQ_ROUTING_HV_SINT)
+				kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
+						    e->hv_sint.sint, gsi);
+		}
+	}
+}
+
+static void synic_init(struct kvm_vcpu_hv_synic *synic)
+{
+	int i;
+
+	memset(synic, 0, sizeof(*synic));
+	synic->version = HV_SYNIC_VERSION_1;
+	for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
+		atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
+		atomic_set(&synic->sint_to_gsi[i], -1);
+	}
+}
+
+void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	synic_init(vcpu_to_synic(vcpu));
+}
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * Hyper-V SynIC auto EOI SINT's are
+	 * not compatible with APICV, so deactivate APICV
+	 */
+	kvm_vcpu_deactivate_apicv(vcpu);
+	vcpu_to_synic(vcpu)->active = true;
+	return 0;
+}
+
 static bool kvm_hv_msr_partition_wide(u32 msr)
 {
 	bool r = false;
@@ -226,6 +527,13 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 			return 1;
 		hv->runtime_offset = data - current_task_runtime_100ns();
 		break;
+	case HV_X64_MSR_SCONTROL:
+	case HV_X64_MSR_SVERSION:
+	case HV_X64_MSR_SIEFP:
+	case HV_X64_MSR_SIMP:
+	case HV_X64_MSR_EOM:
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -304,6 +612,13 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_VP_RUNTIME:
 		data = current_task_runtime_100ns() + hv->runtime_offset;
 		break;
+	case HV_X64_MSR_SCONTROL:
+	case HV_X64_MSR_SVERSION:
+	case HV_X64_MSR_SIEFP:
+	case HV_X64_MSR_SIMP:
+	case HV_X64_MSR_EOM:
+	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
+		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index c7bce55..315af4b 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -29,4 +29,27 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 bool kvm_hv_hypercall_enabled(struct kvm *kvm);
 int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
+void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
+
+static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.hyperv.synic;
+}
+
+static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
+{
+	struct kvm_vcpu_hv *hv;
+	struct kvm_vcpu_arch *arch;
+
+	hv = container_of(synic, struct kvm_vcpu_hv, synic);
+	arch = container_of(hv, struct kvm_vcpu_arch, hyperv);
+	return container_of(arch, struct kvm_vcpu, arch);
+}
+void kvm_hv_irq_routing_update(struct kvm *kvm);
+
+void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+
+int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index ece901c..8fc89ef 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -33,6 +33,8 @@
 
 #include "lapic.h"
 
+#include "hyperv.h"
+
 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
 			   struct kvm *kvm, int irq_source_id, int level,
 			   bool line_status)
@@ -219,6 +221,16 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
+static int kvm_hv_set_sint(struct kvm_kernel_irq_routing_entry *e,
+		    struct kvm *kvm, int irq_source_id, int level,
+		    bool line_status)
+{
+	if (!level)
+		return -1;
+
+	return kvm_hv_synic_set_irq(kvm, e->hv_sint.vcpu, e->hv_sint.sint);
+}
+
 int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 			  const struct kvm_irq_routing_entry *ue)
 {
@@ -257,6 +269,11 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 		e->msi.address_hi = ue->u.msi.address_hi;
 		e->msi.data = ue->u.msi.data;
 		break;
+	case KVM_IRQ_ROUTING_HV_SINT:
+		e->set = kvm_hv_set_sint;
+		e->hv_sint.vcpu = ue->u.hv_sint.vcpu;
+		e->hv_sint.sint = ue->u.hv_sint.sint;
+		break;
 	default:
 		goto out;
 	}
@@ -376,3 +393,20 @@ void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
 	}
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
+
+int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm,
+		     int irq_source_id, int level, bool line_status)
+{
+	switch (irq->type) {
+	case KVM_IRQ_ROUTING_HV_SINT:
+		return kvm_hv_set_sint(irq, kvm, irq_source_id, level,
+				       line_status);
+	default:
+		return -EWOULDBLOCK;
+	}
+}
+
+void kvm_arch_irq_routing_update(struct kvm *kvm)
+{
+	kvm_hv_irq_routing_update(kvm);
+}
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 618a20d..36591fa 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -41,6 +41,7 @@
 #include "trace.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "hyperv.h"
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -128,11 +129,6 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
-static inline int kvm_apic_id(struct kvm_lapic *apic)
-{
-	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
-}
-
 /* The logical map is definitely wrong if we have multiple
  * modes at the same time.  (Physical map is always right.)
  */
@@ -975,6 +971,9 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 
+	if (test_bit(vector, vcpu_to_synic(apic->vcpu)->vec_bitmap))
+		kvm_hv_synic_send_eoi(apic->vcpu, vector);
+
 	kvm_ioapic_send_eoi(apic, vector);
 	kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
 	return vector;
@@ -1884,6 +1883,12 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	apic_set_isr(vector, apic);
 	apic_update_ppr(apic);
 	apic_clear_irr(vector, apic);
+
+	if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) {
+		apic_clear_isr(vector, apic);
+		apic_update_ppr(apic);
+	}
+
 	return vector;
 }
 
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 5fc60e4..41bdb35 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -164,6 +164,11 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
 }
 
+static inline int kvm_apic_id(struct kvm_lapic *apic)
+{
+	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+}
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f0250a0..eb64377ed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -966,6 +966,7 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_RESET,
 	HV_X64_MSR_VP_INDEX,
 	HV_X64_MSR_VP_RUNTIME,
+	HV_X64_MSR_SCONTROL,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -2541,6 +2542,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV:
 	case KVM_CAP_HYPERV_VAPIC:
 	case KVM_CAP_HYPERV_SPIN:
+	case KVM_CAP_HYPERV_SYNIC:
 	case KVM_CAP_PCI_SEGMENT:
 	case KVM_CAP_DEBUGREGS:
 	case KVM_CAP_X86_ROBUST_SINGLESTEP:
@@ -3193,6 +3195,20 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_HYPERV_SYNIC:
+		return kvm_hv_activate_synic(vcpu);
+	default:
+		return -EINVAL;
+	}
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -3457,6 +3473,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_set_guest_paused(vcpu);
 		goto out;
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
 	default:
 		r = -EINVAL;
 	}
@@ -6309,6 +6334,8 @@ static void process_smi(struct kvm_vcpu *vcpu)
 
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
+	u64 eoi_exit_bitmap[4];
+
 	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		return;
 
@@ -6321,8 +6348,9 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 			kvm_x86_ops->sync_pir_to_irr(vcpu);
 		kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors);
 	}
-	kvm_x86_ops->load_eoi_exitmap(vcpu,
-				      (u64 *)vcpu->arch.ioapic_handled_vectors);
+	bitmap_or((ulong *)eoi_exit_bitmap, vcpu->arch.ioapic_handled_vectors,
+		  vcpu_to_synic(vcpu)->vec_bitmap, 256);
+	kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
 }
 
 static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
@@ -7594,6 +7622,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.pending_external_vector = -1;
 
+	kvm_hv_vcpu_init(vcpu);
+
 	return 0;
 
 fail_free_mce_banks:
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 23555c0..ebaf2f8 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,11 @@ struct kvm_s390_adapter_int {
 	u32 adapter_id;
 };
 
+struct kvm_hv_sint {
+	u32 vcpu;
+	u32 sint;
+};
+
 struct kvm_kernel_irq_routing_entry {
 	u32 gsi;
 	u32 type;
@@ -331,6 +336,7 @@ struct kvm_kernel_irq_routing_entry {
 		} irqchip;
 		struct msi_msg msi;
 		struct kvm_s390_adapter_int adapter;
+		struct kvm_hv_sint hv_sint;
 	};
 	struct hlist_node link;
 };
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 03f3618..27ce460 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -831,6 +831,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120
 #define KVM_CAP_SPLIT_IRQCHIP 121
 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
+#define KVM_CAP_HYPERV_SYNIC 123
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -854,10 +855,16 @@ struct kvm_irq_routing_s390_adapter {
 	__u32 adapter_id;
 };
 
+struct kvm_irq_routing_hv_sint {
+	__u32 vcpu;
+	__u32 sint;
+};
+
 /* gsi routing entry types */
 #define KVM_IRQ_ROUTING_IRQCHIP 1
 #define KVM_IRQ_ROUTING_MSI 2
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
 
 struct kvm_irq_routing_entry {
 	__u32 gsi;
@@ -868,6 +875,7 @@ struct kvm_irq_routing_entry {
 		struct kvm_irq_routing_irqchip irqchip;
 		struct kvm_irq_routing_msi msi;
 		struct kvm_irq_routing_s390_adapter adapter;
+		struct kvm_irq_routing_hv_sint hv_sint;
 		__u32 pad[8];
 	} u;
 };
-- 
cgit v0.10.2


From db3975717ac5e2c2761bae7b90c4f2e0abb5ef22 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Tue, 10 Nov 2015 15:36:35 +0300
Subject: kvm/x86: Hyper-V kvm exit

A new vcpu exit is introduced to notify the userspace of the
changes in Hyper-V SynIC configuration triggered by guest writing to the
corresponding MSRs.

Changes v4:
* exit into userspace only if guest writes into SynIC MSR's

Changes v3:
* added KVM_EXIT_HYPERV types and structs notes into docs

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
Signed-off-by: Denis V. Lunev <den@openvz.org>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 88af846..053f613 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3337,6 +3337,28 @@ the userspace IOAPIC should process the EOI and retrigger the interrupt if
 it is still asserted.  Vector is the LAPIC interrupt vector for which the
 EOI was received.
 
+		struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC          1
+			__u32 type;
+			union {
+				struct {
+					__u32 msr;
+					__u64 control;
+					__u64 evt_page;
+					__u64 msg_page;
+				} synic;
+			} u;
+		};
+		/* KVM_EXIT_HYPERV */
+                struct kvm_hyperv_exit hyperv;
+Indicates that the VCPU exits into userspace to process some tasks
+related to Hyper-V emulation.
+Valid values for 'type' are:
+	KVM_EXIT_HYPERV_SYNIC -- synchronously notify user-space about
+Hyper-V SynIC state change. Notification is used to remap SynIC
+event/message pages and to enable/disable SynIC messages/events processing
+in userspace.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index bab47b6..f608e17 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -393,6 +393,7 @@ struct kvm_vcpu_hv {
 	u64 hv_vapic;
 	s64 runtime_offset;
 	struct kvm_vcpu_hv_synic synic;
+	struct kvm_hyperv_exit exit;
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 83a3c0c..41869a9 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -130,6 +130,20 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 	srcu_read_unlock(&kvm->irq_srcu, idx);
 }
 
+static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv;
+
+	hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
+	hv_vcpu->exit.u.synic.msr = msr;
+	hv_vcpu->exit.u.synic.control = synic->control;
+	hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
+	hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
+
+	kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
+}
+
 static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 			 u32 msr, u64 data, bool host)
 {
@@ -145,6 +159,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 	switch (msr) {
 	case HV_X64_MSR_SCONTROL:
 		synic->control = data;
+		if (!host)
+			synic_exit(synic, msr);
 		break;
 	case HV_X64_MSR_SVERSION:
 		if (!host) {
@@ -161,6 +177,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 				break;
 			}
 		synic->evt_page = data;
+		if (!host)
+			synic_exit(synic, msr);
 		break;
 	case HV_X64_MSR_SIMP:
 		if (data & HV_SYNIC_SIMP_ENABLE)
@@ -170,6 +188,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 				break;
 			}
 		synic->msg_page = data;
+		if (!host)
+			synic_exit(synic, msr);
 		break;
 	case HV_X64_MSR_EOM: {
 		int i;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index eb64377ed..036e4bc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6482,6 +6482,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_HV_EXIT, vcpu)) {
+			vcpu->run->exit_reason = KVM_EXIT_HYPERV;
+			vcpu->run->hyperv = vcpu->arch.hyperv.exit;
+			r = 0;
+			goto out;
+		}
 	}
 
 	/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebaf2f8..14f9596 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -143,6 +143,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_HV_CRASH          27
 #define KVM_REQ_IOAPIC_EOI_EXIT   28
 #define KVM_REQ_HV_RESET          29
+#define KVM_REQ_HV_EXIT           30
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 27ce460..6e32f75 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -154,6 +154,20 @@ struct kvm_s390_skeys {
 	__u32 flags;
 	__u32 reserved[9];
 };
+
+struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC          1
+	__u32 type;
+	union {
+		struct {
+			__u32 msr;
+			__u64 control;
+			__u64 evt_page;
+			__u64 msg_page;
+		} synic;
+	} u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -184,6 +198,7 @@ struct kvm_s390_skeys {
 #define KVM_EXIT_SYSTEM_EVENT     24
 #define KVM_EXIT_S390_STSI        25
 #define KVM_EXIT_IOAPIC_EOI       26
+#define KVM_EXIT_HYPERV           27
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -338,6 +353,8 @@ struct kvm_run {
 		struct {
 			__u8 vector;
 		} eoi;
+		/* KVM_EXIT_HYPERV */
+		struct kvm_hyperv_exit hyperv;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
-- 
cgit v0.10.2


From 9dbe6cf941a6fe82933aef565e4095fb10f65023 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Nov 2015 14:49:17 +0100
Subject: KVM: x86: expose MSR_TSC_AUX to userspace

If we do not do this, it is not properly saved and restored across
migration.  Windows notices due to its self-protection mechanisms,
and is very upset about it (blue screen of death).

Cc: Radim Krcmar <rkrcmar@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 036e4bc..f1d6501 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -951,7 +951,7 @@ static u32 msrs_to_save[] = {
 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
 #endif
 	MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
-	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
+	MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
 };
 
 static unsigned num_msrs_to_save;
@@ -4028,16 +4028,17 @@ static void kvm_init_msr_list(void)
 
 		/*
 		 * Even MSRs that are valid in the host may not be exposed
-		 * to the guests in some cases.  We could work around this
-		 * in VMX with the generic MSR save/load machinery, but it
-		 * is not really worthwhile since it will really only
-		 * happen with nested virtualization.
+		 * to the guests in some cases.
 		 */
 		switch (msrs_to_save[i]) {
 		case MSR_IA32_BNDCFGS:
 			if (!kvm_x86_ops->mpx_supported())
 				continue;
 			break;
+		case MSR_TSC_AUX:
+			if (!kvm_x86_ops->rdtscp_supported())
+				continue;
+			break;
 		default:
 			break;
 		}
-- 
cgit v0.10.2


From 46896c73c1a4dde527c3a3cc43379deeb41985a1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Nov 2015 14:49:16 +0100
Subject: KVM: svm: add support for RDTSCP

RDTSCP was never supported for AMD CPUs, which nobody noticed because
Linux does not use it.  But exactly the fact that Linux does not
use it makes the implementation very simple; we can freely trash
MSR_TSC_AUX while running the guest.

Cc: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2401fc8..af34215 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -86,6 +86,7 @@ static const u32 host_save_user_msrs[] = {
 	MSR_FS_BASE,
 #endif
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
+	MSR_TSC_AUX,
 };
 
 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
@@ -135,6 +136,7 @@ struct vcpu_svm {
 	uint64_t asid_generation;
 	uint64_t sysenter_esp;
 	uint64_t sysenter_eip;
+	uint64_t tsc_aux;
 
 	u64 next_rip;
 
@@ -1238,6 +1240,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 			wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
 		}
 	}
+	/* This assumes that the kernel never uses MSR_TSC_AUX */
+	if (static_cpu_has(X86_FEATURE_RDTSCP))
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
 }
 
 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
@@ -3024,6 +3029,11 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_SYSENTER_ESP:
 		msr_info->data = svm->sysenter_esp;
 		break;
+	case MSR_TSC_AUX:
+		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+			return 1;
+		msr_info->data = svm->tsc_aux;
+		break;
 	/*
 	 * Nobody will change the following 5 values in the VMCB so we can
 	 * safely return them on rdmsr. They will always be 0 until LBRV is
@@ -3145,6 +3155,18 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 		svm->sysenter_esp = data;
 		svm->vmcb->save.sysenter_esp = data;
 		break;
+	case MSR_TSC_AUX:
+		if (!boot_cpu_has(X86_FEATURE_RDTSCP))
+			return 1;
+
+		/*
+		 * This is rare, so we update the MSR here instead of using
+		 * direct_access_msrs.  Doing that would require a rdmsr in
+		 * svm_vcpu_put.
+		 */
+		svm->tsc_aux = data;
+		wrmsrl(MSR_TSC_AUX, svm->tsc_aux);
+		break;
 	case MSR_IA32_DEBUGCTLMSR:
 		if (!boot_cpu_has(X86_FEATURE_LBRV)) {
 			vcpu_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
@@ -4041,7 +4063,7 @@ static int svm_get_lpage_level(void)
 
 static bool svm_rdtscp_supported(void)
 {
-	return false;
+	return boot_cpu_has(X86_FEATURE_RDTSCP);
 }
 
 static bool svm_invpcid_supported(void)
-- 
cgit v0.10.2


From aba2f06c070f604e388cf77b1dcc7f4cf4577eb0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 12 Nov 2015 16:42:18 +0100
Subject: KVM: x86: correctly print #AC in traces

Poor #AC was so unimportant until a few days ago that we were
not even tracing its name correctly.  But now it's all over
the place.

Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 1203025..ab9ae67 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq,
 #define kvm_trace_sym_exc						\
 	EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM),	\
 	EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF),		\
-	EXS(MF), EXS(MC)
+	EXS(MF), EXS(AC), EXS(MC)
 
 /*
  * Tracepoint for kvm interrupt injection:
-- 
cgit v0.10.2


From 0e3d0648bd903ff6cda7499f9349a2ce612bccb4 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 13 Nov 2015 11:52:45 +0100
Subject: KVM: x86: MMU: always set accessed bit in shadow PTEs

Commit 7a1638ce4220 ("nEPT: Redefine EPT-specific link_shadow_page()",
2013-08-05) says:

    Since nEPT doesn't support A/D bit, we should not set those bit
    when building the shadow page table.

but this is not necessary.  Even though nEPT doesn't support A/D
bits, and hence the vmcs12 EPT pointer will never enable them,
we always use them for shadow page tables if available (see
construct_eptp in vmx.c).  So we can set the A/D bits freely
in the shadow page table.

This patch hence basically reverts commit 7a1638ce4220.

Cc: Yang Zhang <yang.z.zhang@Intel.com>
Cc: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e7c2c14..276d2f2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2198,7 +2198,7 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }
 
-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
+static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
 {
 	u64 spte;
 
@@ -2206,10 +2206,7 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp, bool accessed)
 			VMX_EPT_WRITABLE_MASK != PT_WRITABLE_MASK);
 
 	spte = __pa(sp->spt) | PT_PRESENT_MASK | PT_WRITABLE_MASK |
-	       shadow_user_mask | shadow_x_mask;
-
-	if (accessed)
-		spte |= shadow_accessed_mask;
+	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
 
 	mmu_spte_set(sptep, spte);
 }
@@ -2740,7 +2737,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
 
-			link_shadow_page(iterator.sptep, sp, true);
+			link_shadow_page(iterator.sptep, sp);
 		}
 	}
 	return emulate;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 3058a22..d8fdc5c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			goto out_gpte_changed;
 
 		if (sp)
-			link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+			link_shadow_page(it.sptep, sp);
 	}
 
 	for (;
@@ -618,7 +618,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
 				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp, PT_GUEST_ACCESSED_MASK);
+		link_shadow_page(it.sptep, sp);
 	}
 
 	clear_sp_write_flooding_count(it.sptep);
-- 
cgit v0.10.2


From 4f52696a6c4d9b1449c462546f1318935c6973db Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 15 Nov 2015 10:40:36 +0100
Subject: KVM-async_pf: Delete an unnecessary check before the function call
 "kmem_cache_destroy"

The kmem_cache_destroy() function tests whether its argument is NULL
and then returns immediately. Thus the test around the call is not needed.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 77d42be..3531599 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -57,8 +57,7 @@ int kvm_async_pf_init(void)
 
 void kvm_async_pf_deinit(void)
 {
-	if (async_pf_cache)
-		kmem_cache_destroy(async_pf_cache);
+	kmem_cache_destroy(async_pf_cache);
 	async_pf_cache = NULL;
 }
 
-- 
cgit v0.10.2


From 33e941547923283f7f1022f3c35359ea9403d9a4 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Date: Sat, 14 Nov 2015 11:21:06 +0800
Subject: KVM: kvm_is_visible_gfn can be boolean

This patch makes kvm_is_visible_gfn return bool due to this particular
function only using either one or zero as its return value.

No functional change.

Signed-off-by: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 14f9596..2911919 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -641,7 +641,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
-int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
+bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 unsigned long kvm_host_page_size(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 484079e..73cbb41 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1164,15 +1164,15 @@ struct kvm_memory_slot *kvm_vcpu_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn
 	return __gfn_to_memslot(kvm_vcpu_memslots(vcpu), gfn);
 }
 
-int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+bool kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 {
 	struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
 
 	if (!memslot || memslot->id >= KVM_USER_MEM_SLOTS ||
 	      memslot->flags & KVM_MEMSLOT_INVALID)
-		return 0;
+		return false;
 
-	return 1;
+	return true;
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-- 
cgit v0.10.2


From 08ff0d5e63b4f360091dd171b0ede1a3361227a1 Mon Sep 17 00:00:00 2001
From: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Date: Sat, 14 Nov 2015 11:21:07 +0800
Subject: KVM: kvm_para_has_feature can be boolean

This patch makes kvm_para_has_feature return bool due to this
particular function only using either one or zero as its return
value.

No functional change.

Signed-off-by: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index 00a97bb..35e568f 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -4,10 +4,8 @@
 #include <uapi/linux/kvm_para.h>
 
 
-static inline int kvm_para_has_feature(unsigned int feature)
+static inline bool kvm_para_has_feature(unsigned int feature)
 {
-	if (kvm_arch_para_features() & (1UL << feature))
-		return 1;
-	return 0;
+	return !!(kvm_arch_para_features() & (1UL << feature));
 }
 #endif /* __LINUX_KVM_PARA_H */
-- 
cgit v0.10.2


From 378b417d652c4ff20be3144b7064e3a4ecd2571d Mon Sep 17 00:00:00 2001
From: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Date: Mon, 16 Nov 2015 11:10:24 +0800
Subject: KVM: powerpc: kvmppc_visible_gpa can be boolean

In another patch kvm_is_visible_gfn is maken return bool due to this
function only returns zero or one as its return value, let's also make
kvmppc_visible_gpa return bool to keep consistent.

No functional change.

Signed-off-by: Yaowei Bai <baiyaowei@cmss.chinamobile.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 64891b0..70fb08d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -512,7 +512,7 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	put_page(hpage);
 }
 
-static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+static bool kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
@@ -521,7 +521,7 @@ static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 
 	gpa &= ~0xFFFULL;
 	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
-		return 1;
+		return true;
 	}
 
 	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
-- 
cgit v0.10.2


From 018aabb56d6109c8f12397c24e59f67c58870ac1 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:41:28 +0900
Subject: KVM: x86: MMU: Encapsulate the type of rmap-chain head in a new
 struct

New struct kvm_rmap_head makes the code type-safe to some extent.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f608e17..8140077 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -214,6 +214,10 @@ union kvm_mmu_page_role {
 	};
 };
 
+struct kvm_rmap_head {
+	unsigned long val;
+};
+
 struct kvm_mmu_page {
 	struct list_head link;
 	struct hlist_node hash_link;
@@ -231,7 +235,7 @@ struct kvm_mmu_page {
 	bool unsync;
 	int root_count;          /* Currently serving as active root */
 	unsigned int unsync_children;
-	unsigned long parent_ptes;	/* Reverse mapping for parent_pte */
+	struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
 
 	/* The page is obsolete if mmu_valid_gen != kvm->arch.mmu_valid_gen.  */
 	unsigned long mmu_valid_gen;
@@ -606,7 +610,7 @@ struct kvm_lpage_info {
 };
 
 struct kvm_arch_memory_slot {
-	unsigned long *rmap[KVM_NR_PAGE_SIZES];
+	struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 276d2f2..d9a6801 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -909,36 +909,35 @@ static int mapping_level(struct kvm_vcpu *vcpu, gfn_t large_gfn,
 }
 
 /*
- * Pte mapping structures:
+ * About rmap_head encoding:
  *
- * If pte_list bit zero is zero, then pte_list point to the spte.
- *
- * If pte_list bit zero is one, (then pte_list & ~1) points to a struct
+ * If the bit zero of rmap_head->val is clear, then it points to the only spte
+ * in this rmap chain. Otherwise, (rmap_head->val & ~1) points to a struct
  * pte_list_desc containing more mappings.
- *
- * Returns the number of pte entries before the spte was added or zero if
- * the spte was not added.
- *
+ */
+
+/*
+ * Returns the number of pointers in the rmap chain, not counting the new one.
  */
 static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
-			unsigned long *pte_list)
+			struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
 	int i, count = 0;
 
-	if (!*pte_list) {
+	if (!rmap_head->val) {
 		rmap_printk("pte_list_add: %p %llx 0->1\n", spte, *spte);
-		*pte_list = (unsigned long)spte;
-	} else if (!(*pte_list & 1)) {
+		rmap_head->val = (unsigned long)spte;
+	} else if (!(rmap_head->val & 1)) {
 		rmap_printk("pte_list_add: %p %llx 1->many\n", spte, *spte);
 		desc = mmu_alloc_pte_list_desc(vcpu);
-		desc->sptes[0] = (u64 *)*pte_list;
+		desc->sptes[0] = (u64 *)rmap_head->val;
 		desc->sptes[1] = spte;
-		*pte_list = (unsigned long)desc | 1;
+		rmap_head->val = (unsigned long)desc | 1;
 		++count;
 	} else {
 		rmap_printk("pte_list_add: %p %llx many->many\n", spte, *spte);
-		desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		while (desc->sptes[PTE_LIST_EXT-1] && desc->more) {
 			desc = desc->more;
 			count += PTE_LIST_EXT;
@@ -955,8 +954,9 @@ static int pte_list_add(struct kvm_vcpu *vcpu, u64 *spte,
 }
 
 static void
-pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
-			   int i, struct pte_list_desc *prev_desc)
+pte_list_desc_remove_entry(struct kvm_rmap_head *rmap_head,
+			   struct pte_list_desc *desc, int i,
+			   struct pte_list_desc *prev_desc)
 {
 	int j;
 
@@ -967,43 +967,43 @@ pte_list_desc_remove_entry(unsigned long *pte_list, struct pte_list_desc *desc,
 	if (j != 0)
 		return;
 	if (!prev_desc && !desc->more)
-		*pte_list = (unsigned long)desc->sptes[0];
+		rmap_head->val = (unsigned long)desc->sptes[0];
 	else
 		if (prev_desc)
 			prev_desc->more = desc->more;
 		else
-			*pte_list = (unsigned long)desc->more | 1;
+			rmap_head->val = (unsigned long)desc->more | 1;
 	mmu_free_pte_list_desc(desc);
 }
 
-static void pte_list_remove(u64 *spte, unsigned long *pte_list)
+static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 {
 	struct pte_list_desc *desc;
 	struct pte_list_desc *prev_desc;
 	int i;
 
-	if (!*pte_list) {
+	if (!rmap_head->val) {
 		printk(KERN_ERR "pte_list_remove: %p 0->BUG\n", spte);
 		BUG();
-	} else if (!(*pte_list & 1)) {
+	} else if (!(rmap_head->val & 1)) {
 		rmap_printk("pte_list_remove:  %p 1->0\n", spte);
-		if ((u64 *)*pte_list != spte) {
+		if ((u64 *)rmap_head->val != spte) {
 			printk(KERN_ERR "pte_list_remove:  %p 1->BUG\n", spte);
 			BUG();
 		}
-		*pte_list = 0;
+		rmap_head->val = 0;
 	} else {
 		rmap_printk("pte_list_remove:  %p many->many\n", spte);
-		desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+		desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 		prev_desc = NULL;
 		while (desc) {
-			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
+			for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i) {
 				if (desc->sptes[i] == spte) {
-					pte_list_desc_remove_entry(pte_list,
-							       desc, i,
-							       prev_desc);
+					pte_list_desc_remove_entry(rmap_head,
+							desc, i, prev_desc);
 					return;
 				}
+			}
 			prev_desc = desc;
 			desc = desc->more;
 		}
@@ -1013,18 +1013,18 @@ static void pte_list_remove(u64 *spte, unsigned long *pte_list)
 }
 
 typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
+static void pte_list_walk(struct kvm_rmap_head *rmap_head, pte_list_walk_fn fn)
 {
 	struct pte_list_desc *desc;
 	int i;
 
-	if (!*pte_list)
+	if (!rmap_head->val)
 		return;
 
-	if (!(*pte_list & 1))
-		return fn((u64 *)*pte_list);
+	if (!(rmap_head->val & 1))
+		return fn((u64 *)rmap_head->val);
 
-	desc = (struct pte_list_desc *)(*pte_list & ~1ul);
+	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 	while (desc) {
 		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
 			fn(desc->sptes[i]);
@@ -1032,8 +1032,8 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
 	}
 }
 
-static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
-				    struct kvm_memory_slot *slot)
+static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
+					   struct kvm_memory_slot *slot)
 {
 	unsigned long idx;
 
@@ -1041,10 +1041,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
-/*
- * Take gfn and return the reverse mapping to it.
- */
-static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, struct kvm_mmu_page *sp)
+static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn,
+					 struct kvm_mmu_page *sp)
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *slot;
@@ -1065,24 +1063,24 @@ static bool rmap_can_add(struct kvm_vcpu *vcpu)
 static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
 	struct kvm_mmu_page *sp;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 	sp = page_header(__pa(spte));
 	kvm_mmu_page_set_gfn(sp, spte - sp->spt, gfn);
-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
-	return pte_list_add(vcpu, spte, rmapp);
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	return pte_list_add(vcpu, spte, rmap_head);
 }
 
 static void rmap_remove(struct kvm *kvm, u64 *spte)
 {
 	struct kvm_mmu_page *sp;
 	gfn_t gfn;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 	sp = page_header(__pa(spte));
 	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
-	rmapp = gfn_to_rmap(kvm, gfn, sp);
-	pte_list_remove(spte, rmapp);
+	rmap_head = gfn_to_rmap(kvm, gfn, sp);
+	pte_list_remove(spte, rmap_head);
 }
 
 /*
@@ -1102,17 +1100,18 @@ struct rmap_iterator {
  *
  * Returns sptep if found, NULL otherwise.
  */
-static u64 *rmap_get_first(unsigned long rmap, struct rmap_iterator *iter)
+static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
+			   struct rmap_iterator *iter)
 {
-	if (!rmap)
+	if (!rmap_head->val)
 		return NULL;
 
-	if (!(rmap & 1)) {
+	if (!(rmap_head->val & 1)) {
 		iter->desc = NULL;
-		return (u64 *)rmap;
+		return (u64 *)rmap_head->val;
 	}
 
-	iter->desc = (struct pte_list_desc *)(rmap & ~1ul);
+	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 	iter->pos = 0;
 	return iter->desc->sptes[iter->pos];
 }
@@ -1146,10 +1145,10 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
 	return NULL;
 }
 
-#define for_each_rmap_spte(_rmap_, _iter_, _spte_)			    \
-	   for (_spte_ = rmap_get_first(*_rmap_, _iter_);		    \
-		_spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});  \
-			_spte_ = rmap_get_next(_iter_))
+#define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
+	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
+	     _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});	\
+	     _spte_ = rmap_get_next(_iter_))
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
@@ -1207,14 +1206,15 @@ static bool spte_write_protect(struct kvm *kvm, u64 *sptep, bool pt_protect)
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
+static bool __rmap_write_protect(struct kvm *kvm,
+				 struct kvm_rmap_head *rmap_head,
 				 bool pt_protect)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
 	bool flush = false;
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_write_protect(kvm, sptep, pt_protect);
 
 	return flush;
@@ -1231,13 +1231,13 @@ static bool spte_clear_dirty(struct kvm *kvm, u64 *sptep)
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_clear_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
 	bool flush = false;
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_clear_dirty(kvm, sptep);
 
 	return flush;
@@ -1254,13 +1254,13 @@ static bool spte_set_dirty(struct kvm *kvm, u64 *sptep)
 	return mmu_spte_update(sptep, spte);
 }
 
-static bool __rmap_set_dirty(struct kvm *kvm, unsigned long *rmapp)
+static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
 	bool flush = false;
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep)
 		flush |= spte_set_dirty(kvm, sptep);
 
 	return flush;
@@ -1280,12 +1280,12 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask)
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 	while (mask) {
-		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-				      PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_write_protect(kvm, rmapp, false);
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_write_protect(kvm, rmap_head, false);
 
 		/* clear the first set bit */
 		mask &= mask - 1;
@@ -1305,12 +1305,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 				     struct kvm_memory_slot *slot,
 				     gfn_t gfn_offset, unsigned long mask)
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 
 	while (mask) {
-		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
-				      PT_PAGE_TABLE_LEVEL, slot);
-		__rmap_clear_dirty(kvm, rmapp);
+		rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+					  PT_PAGE_TABLE_LEVEL, slot);
+		__rmap_clear_dirty(kvm, rmap_head);
 
 		/* clear the first set bit */
 		mask &= mask - 1;
@@ -1342,27 +1342,27 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
 static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
 {
 	struct kvm_memory_slot *slot;
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	int i;
 	bool write_protected = false;
 
 	slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
 
 	for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) {
-		rmapp = __gfn_to_rmap(gfn, i, slot);
-		write_protected |= __rmap_write_protect(vcpu->kvm, rmapp, true);
+		rmap_head = __gfn_to_rmap(gfn, i, slot);
+		write_protected |= __rmap_write_protect(vcpu->kvm, rmap_head, true);
 	}
 
 	return write_protected;
 }
 
-static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
 	bool flush = false;
 
-	while ((sptep = rmap_get_first(*rmapp, &iter))) {
+	while ((sptep = rmap_get_first(rmap_head, &iter))) {
 		BUG_ON(!(*sptep & PT_PRESENT_MASK));
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
@@ -1373,14 +1373,14 @@ static bool kvm_zap_rmapp(struct kvm *kvm, unsigned long *rmapp)
 	return flush;
 }
 
-static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			   struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			   unsigned long data)
 {
-	return kvm_zap_rmapp(kvm, rmapp);
+	return kvm_zap_rmapp(kvm, rmap_head);
 }
 
-static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			     struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			     unsigned long data)
 {
@@ -1395,7 +1395,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	new_pfn = pte_pfn(*ptep);
 
 restart:
-	for_each_rmap_spte(rmapp, &iter, sptep) {
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n",
 			     sptep, *sptep, gfn, level);
 
@@ -1433,11 +1433,11 @@ struct slot_rmap_walk_iterator {
 
 	/* output fields. */
 	gfn_t gfn;
-	unsigned long *rmap;
+	struct kvm_rmap_head *rmap;
 	int level;
 
 	/* private field. */
-	unsigned long *end_rmap;
+	struct kvm_rmap_head *end_rmap;
 };
 
 static void
@@ -1496,7 +1496,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long end,
 				unsigned long data,
 				int (*handler)(struct kvm *kvm,
-					       unsigned long *rmapp,
+					       struct kvm_rmap_head *rmap_head,
 					       struct kvm_memory_slot *slot,
 					       gfn_t gfn,
 					       int level,
@@ -1540,7 +1540,8 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+			  int (*handler)(struct kvm *kvm,
+					 struct kvm_rmap_head *rmap_head,
 					 struct kvm_memory_slot *slot,
 					 gfn_t gfn, int level,
 					 unsigned long data))
@@ -1563,7 +1564,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
 }
 
-static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			 struct kvm_memory_slot *slot, gfn_t gfn, int level,
 			 unsigned long data)
 {
@@ -1573,18 +1574,19 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 	BUG_ON(!shadow_accessed_mask);
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			clear_bit((ffs(shadow_accessed_mask) - 1),
 				 (unsigned long *)sptep);
 		}
+	}
 
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
 
-static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
+static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
 			      struct kvm_memory_slot *slot, gfn_t gfn,
 			      int level, unsigned long data)
 {
@@ -1600,11 +1602,12 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	if (!shadow_accessed_mask)
 		goto out;
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (*sptep & shadow_accessed_mask) {
 			young = 1;
 			break;
 		}
+	}
 out:
 	return young;
 }
@@ -1613,14 +1616,14 @@ out:
 
 static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *sp;
 
 	sp = page_header(__pa(spte));
 
-	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp);
+	rmap_head = gfn_to_rmap(vcpu->kvm, gfn, sp);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, gfn, sp->role.level, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmap_head, NULL, gfn, sp->role.level, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
@@ -1737,7 +1740,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	 * this feature. See the comments in kvm_zap_obsolete_pages().
 	 */
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	sp->parent_ptes = 0;
+	sp->parent_ptes.val = 0;
 	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
@@ -2277,7 +2280,7 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 	u64 *sptep;
 	struct rmap_iterator iter;
 
-	while ((sptep = rmap_get_first(sp->parent_ptes, &iter)))
+	while ((sptep = rmap_get_first(&sp->parent_ptes, &iter)))
 		drop_parent_pte(sp, sptep);
 }
 
@@ -4492,7 +4495,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
 }
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, unsigned long *rmap);
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
 
 /* The caller should hold mmu-lock before calling this function. */
 static bool
@@ -4586,9 +4589,10 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 	spin_unlock(&kvm->mmu_lock);
 }
 
-static bool slot_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp)
+static bool slot_rmap_write_protect(struct kvm *kvm,
+				    struct kvm_rmap_head *rmap_head)
 {
-	return __rmap_write_protect(kvm, rmapp, false);
+	return __rmap_write_protect(kvm, rmap_head, false);
 }
 
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@ -4624,7 +4628,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 }
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-		unsigned long *rmapp)
+					 struct kvm_rmap_head *rmap_head)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -4633,7 +4637,7 @@ static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
 	struct kvm_mmu_page *sp;
 
 restart:
-	for_each_rmap_spte(rmapp, &iter, sptep) {
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		sp = page_header(__pa(sptep));
 		pfn = spte_to_pfn(*sptep);
 
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 03d518e..f7b0488 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -129,7 +129,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 {
 	static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 10);
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	struct kvm_mmu_page *rev_sp;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *slot;
@@ -150,8 +150,8 @@ static void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
 		return;
 	}
 
-	rmapp = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
-	if (!*rmapp) {
+	rmap_head = __gfn_to_rmap(gfn, rev_sp->role.level, slot);
+	if (!rmap_head->val) {
 		if (!__ratelimit(&ratelimit_state))
 			return;
 		audit_printk(kvm, "no rmap for writable spte %llx\n",
@@ -192,7 +192,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	unsigned long *rmapp;
+	struct kvm_rmap_head *rmap_head;
 	u64 *sptep;
 	struct rmap_iterator iter;
 	struct kvm_memslots *slots;
@@ -203,13 +203,14 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 	slots = kvm_memslots_for_spte_role(kvm, sp->role);
 	slot = __gfn_to_memslot(slots, sp->gfn);
-	rmapp = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
+	rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot);
 
-	for_each_rmap_spte(rmapp, &iter, sptep)
+	for_each_rmap_spte(rmap_head, &iter, sptep) {
 		if (is_writable_pte(*sptep))
 			audit_printk(kvm, "shadow page has writable "
 				     "mappings: gfn %llx role %x\n",
 				     sp->gfn, sp->role.word);
+	}
 }
 
 static void audit_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
-- 
cgit v0.10.2


From 7ee0e5b29d275ac299cdf8ef67e60bf1648c8c6a Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:42:23 +0900
Subject: KVM: x86: MMU: Remove unused parameter of __direct_map()

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9a6801..8a1593f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2708,9 +2708,8 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
 	__direct_pte_prefetch(vcpu, sp, sptep);
 }
 
-static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
-			int map_writable, int level, gfn_t gfn, pfn_t pfn,
-			bool prefault)
+static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
+			int level, gfn_t gfn, pfn_t pfn, bool prefault)
 {
 	struct kvm_shadow_walk_iterator iterator;
 	struct kvm_mmu_page *sp;
@@ -3018,11 +3017,9 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
 	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
-	r = __direct_map(vcpu, v, write, map_writable, level, gfn, pfn,
-			 prefault);
+	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
-
 	return r;
 
 out_unlock:
@@ -3531,8 +3528,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
 	make_mmu_pages_available(vcpu);
 	if (likely(!force_pt_level))
 		transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
-	r = __direct_map(vcpu, gpa, write, map_writable,
-			 level, gfn, pfn, prefault);
+	r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
 	spin_unlock(&vcpu->kvm->mmu_lock);
 
 	return r;
-- 
cgit v0.10.2


From fd9514572f721acbabb0ff24f6b5294a2449d492 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:43:13 +0900
Subject: KVM: x86: MMU: Add helper function to clear a bit in unsync child
 bitmap

Both __mmu_unsync_walk() and mmu_pages_clear_parents() have three line
code which clears a bit in the unsync child bitmap; the former places it
inside a loop block and uses a few goto statements to jump to it.

A new helper function, clear_unsync_child_bit(), makes the code cleaner.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 8a1593f..9832bc9 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1809,6 +1809,13 @@ static int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp,
 	return (pvec->nr == KVM_PAGE_ARRAY_NR);
 }
 
+static inline void clear_unsync_child_bit(struct kvm_mmu_page *sp, int idx)
+{
+	--sp->unsync_children;
+	WARN_ON((int)sp->unsync_children < 0);
+	__clear_bit(idx, sp->unsync_child_bitmap);
+}
+
 static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 			   struct kvm_mmu_pages *pvec)
 {
@@ -1818,8 +1825,10 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 		struct kvm_mmu_page *child;
 		u64 ent = sp->spt[i];
 
-		if (!is_shadow_present_pte(ent) || is_large_pte(ent))
-			goto clear_child_bitmap;
+		if (!is_shadow_present_pte(ent) || is_large_pte(ent)) {
+			clear_unsync_child_bit(sp, i);
+			continue;
+		}
 
 		child = page_header(ent & PT64_BASE_ADDR_MASK);
 
@@ -1828,28 +1837,21 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 				return -ENOSPC;
 
 			ret = __mmu_unsync_walk(child, pvec);
-			if (!ret)
-				goto clear_child_bitmap;
-			else if (ret > 0)
+			if (!ret) {
+				clear_unsync_child_bit(sp, i);
+				continue;
+			} else if (ret > 0) {
 				nr_unsync_leaf += ret;
-			else
+			} else
 				return ret;
 		} else if (child->unsync) {
 			nr_unsync_leaf++;
 			if (mmu_pages_add(pvec, child, i))
 				return -ENOSPC;
 		} else
-			 goto clear_child_bitmap;
-
-		continue;
-
-clear_child_bitmap:
-		__clear_bit(i, sp->unsync_child_bitmap);
-		sp->unsync_children--;
-		WARN_ON((int)sp->unsync_children < 0);
+			clear_unsync_child_bit(sp, i);
 	}
 
-
 	return nr_unsync_leaf;
 }
 
@@ -2012,9 +2014,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
 		if (!sp)
 			return;
 
-		--sp->unsync_children;
-		WARN_ON((int)sp->unsync_children < 0);
-		__clear_bit(idx, sp->unsync_child_bitmap);
+		clear_unsync_child_bit(sp, idx);
 		level++;
 	} while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children);
 }
-- 
cgit v0.10.2


From 029499b477389f7d6486c8c759a8498bcfecf322 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:44:05 +0900
Subject: KVM: x86: MMU: Make mmu_set_spte() return emulate value

mmu_set_spte()'s code is based on the assumption that the emulate
parameter has a valid pointer value if set_spte() returns true and
write_fault is not zero.  In other cases, emulate may be NULL, so a
NULL-check is needed.

Stop passing emulate pointer and make mmu_set_spte() return the emulate
value instead to clean up this complex interface.  Prefetch functions
can just throw away the return value.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9832bc9..74c120c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2564,13 +2564,13 @@ done:
 	return ret;
 }
 
-static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
-			 unsigned pte_access, int write_fault, int *emulate,
-			 int level, gfn_t gfn, pfn_t pfn, bool speculative,
-			 bool host_writable)
+static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
+			 int write_fault, int level, gfn_t gfn, pfn_t pfn,
+			 bool speculative, bool host_writable)
 {
 	int was_rmapped = 0;
 	int rmap_count;
+	bool emulate = false;
 
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 		 *sptep, write_fault, gfn);
@@ -2600,12 +2600,12 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	if (set_spte(vcpu, sptep, pte_access, level, gfn, pfn, speculative,
 	      true, host_writable)) {
 		if (write_fault)
-			*emulate = 1;
+			emulate = true;
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	}
 
-	if (unlikely(is_mmio_spte(*sptep) && emulate))
-		*emulate = 1;
+	if (unlikely(is_mmio_spte(*sptep)))
+		emulate = true;
 
 	pgprintk("%s: setting spte %llx\n", __func__, *sptep);
 	pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
@@ -2624,6 +2624,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 	}
 
 	kvm_release_pfn_clean(pfn);
+
+	return emulate;
 }
 
 static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
@@ -2658,9 +2660,8 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
 		return -1;
 
 	for (i = 0; i < ret; i++, gfn++, start++)
-		mmu_set_spte(vcpu, start, access, 0, NULL,
-			     sp->role.level, gfn, page_to_pfn(pages[i]),
-			     true, true);
+		mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
+			     page_to_pfn(pages[i]), true, true);
 
 	return 0;
 }
@@ -2721,9 +2722,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
-			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
-				     write, &emulate, level, gfn, pfn,
-				     prefault, map_writable);
+			emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
+					       write, level, gfn, pfn, prefault,
+					       map_writable);
 			direct_pte_prefetch(vcpu, iterator.sptep);
 			++vcpu->stat.pf_fixed;
 			break;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index d8fdc5c..11650ea 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -475,8 +475,8 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	 * we call mmu_set_spte() with host_writable = true because
 	 * pte_prefetch_gfn_to_pfn always gets a writable pfn.
 	 */
-	mmu_set_spte(vcpu, spte, pte_access, 0, NULL, PT_PAGE_TABLE_LEVEL,
-		     gfn, pfn, true, true);
+	mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
+		     true, true);
 
 	return true;
 }
@@ -556,7 +556,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	struct kvm_mmu_page *sp = NULL;
 	struct kvm_shadow_walk_iterator it;
 	unsigned direct_access, access = gw->pt_access;
-	int top_level, emulate = 0;
+	int top_level, emulate;
 
 	direct_access = gw->pte_access;
 
@@ -622,8 +622,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	}
 
 	clear_sp_write_flooding_count(it.sptep);
-	mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, &emulate,
-		     it.level, gw->gfn, pfn, prefault, map_writable);
+	emulate = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
+			       it.level, gw->gfn, pfn, prefault, map_writable);
 	FNAME(pte_prefetch)(vcpu, gw, it.sptep);
 
 	return emulate;
-- 
cgit v0.10.2


From afd28fe1c901429eba8957f54bdb4a13cc15ae44 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:44:55 +0900
Subject: KVM: x86: MMU: Remove is_rmap_spte() and use is_shadow_present_pte()

is_rmap_spte(), originally named is_rmap_pte(), was introduced when the
simple reverse mapping was implemented by commit cd4a4e5374110444
("[PATCH] KVM: MMU: Implement simple reverse mapping").  At that point,
its role was clear and only rmap_add() and rmap_remove() were using it
to select sptes that need to be reverse-mapped.

Independently of that, is_shadow_present_pte() was first introduced by
commit c7addb902054195b ("KVM: Allow not-present guest page faults to
bypass kvm") to do bypass_guest_pf optimization, which does not exist
any more.

These two seem to have changed their roles somewhat, and is_rmap_spte()
just calls is_shadow_present_pte() now.

Since using both of them without clear distinction just makes the code
confusing, remove is_rmap_spte().

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 74c120c..3104748 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -311,11 +311,6 @@ static int is_large_pte(u64 pte)
 	return pte & PT_PAGE_SIZE_MASK;
 }
 
-static int is_rmap_spte(u64 pte)
-{
-	return is_shadow_present_pte(pte);
-}
-
 static int is_last_spte(u64 pte, int level)
 {
 	if (level == PT_PAGE_TABLE_LEVEL)
@@ -540,7 +535,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
 	u64 old_spte = *sptep;
 	bool ret = false;
 
-	WARN_ON(!is_rmap_spte(new_spte));
+	WARN_ON(!is_shadow_present_pte(new_spte));
 
 	if (!is_shadow_present_pte(old_spte)) {
 		mmu_spte_set(sptep, new_spte);
@@ -595,7 +590,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	else
 		old_spte = __update_clear_spte_slow(sptep, 0ull);
 
-	if (!is_rmap_spte(old_spte))
+	if (!is_shadow_present_pte(old_spte))
 		return 0;
 
 	pfn = spte_to_pfn(old_spte);
@@ -2575,7 +2570,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
 	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,
 		 *sptep, write_fault, gfn);
 
-	if (is_rmap_spte(*sptep)) {
+	if (is_shadow_present_pte(*sptep)) {
 		/*
 		 * If we overwrite a PTE page pointer with a 2MB PMD, unlink
 		 * the parent of the now unreachable PTE.
@@ -2919,7 +2914,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	 * If the mapping has been changed, let the vcpu fault on the
 	 * same address again.
 	 */
-	if (!is_rmap_spte(spte)) {
+	if (!is_shadow_present_pte(spte)) {
 		ret = true;
 		goto exit;
 	}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index f7b0488..1cee3ec 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -183,7 +183,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 		return;
 
 	for (i = 0; i < PT64_ENT_PER_PAGE; ++i) {
-		if (!is_rmap_spte(sp->spt[i]))
+		if (!is_shadow_present_pte(sp->spt[i]))
 			continue;
 
 		inspect_spte_has_rmap(kvm, sp->spt + i);
-- 
cgit v0.10.2


From 77fbbbd2f09fae486190bb2bd7142647dc2a6e8b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:45:44 +0900
Subject: KVM: x86: MMU: Consolidate BUG_ON checks for reverse-mapped sptes

At some call sites of rmap_get_first() and rmap_get_next(), BUG_ON is
placed right after the call to detect unrelated sptes which must not be
found in the reverse-mapping list.

Move this check in rmap_get_first/next() so that all call sites, not
just the users of the for_each_rmap_spte() macro, will be checked the
same way.

One thing to keep in mind is that kvm_mmu_unlink_parents() also uses
rmap_get_first() to handle parent sptes.  The change will not break it
because parent sptes are present, at least until drop_parent_pte()
actually unlinks them, and not mmio-sptes.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt
index 3a4d681..daf9c0f 100644
--- a/Documentation/virtual/kvm/mmu.txt
+++ b/Documentation/virtual/kvm/mmu.txt
@@ -203,10 +203,10 @@ Shadow pages contain the following information:
     page cannot be destroyed.  See role.invalid.
   parent_ptes:
     The reverse mapping for the pte/ptes pointing at this page's spt. If
-    parent_ptes bit 0 is zero, only one spte points at this pages and
+    parent_ptes bit 0 is zero, only one spte points at this page and
     parent_ptes points at this single spte, otherwise, there exists multiple
     sptes pointing at this page and (parent_ptes & ~0x1) points at a data
-    structure with a list of parent_ptes.
+    structure with a list of parent sptes.
   unsync:
     If true, then the translations in this page may not match the guest's
     translation.  This is equivalent to the state of the tlb when a pte is
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3104748..5b249d4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1098,17 +1098,23 @@ struct rmap_iterator {
 static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
 			   struct rmap_iterator *iter)
 {
+	u64 *sptep;
+
 	if (!rmap_head->val)
 		return NULL;
 
 	if (!(rmap_head->val & 1)) {
 		iter->desc = NULL;
-		return (u64 *)rmap_head->val;
+		sptep = (u64 *)rmap_head->val;
+		goto out;
 	}
 
 	iter->desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
 	iter->pos = 0;
-	return iter->desc->sptes[iter->pos];
+	sptep = iter->desc->sptes[iter->pos];
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 
 /*
@@ -1118,14 +1124,14 @@ static u64 *rmap_get_first(struct kvm_rmap_head *rmap_head,
  */
 static u64 *rmap_get_next(struct rmap_iterator *iter)
 {
+	u64 *sptep;
+
 	if (iter->desc) {
 		if (iter->pos < PTE_LIST_EXT - 1) {
-			u64 *sptep;
-
 			++iter->pos;
 			sptep = iter->desc->sptes[iter->pos];
 			if (sptep)
-				return sptep;
+				goto out;
 		}
 
 		iter->desc = iter->desc->more;
@@ -1133,17 +1139,20 @@ static u64 *rmap_get_next(struct rmap_iterator *iter)
 		if (iter->desc) {
 			iter->pos = 0;
 			/* desc->sptes[0] cannot be NULL */
-			return iter->desc->sptes[iter->pos];
+			sptep = iter->desc->sptes[iter->pos];
+			goto out;
 		}
 	}
 
 	return NULL;
+out:
+	BUG_ON(!is_shadow_present_pte(*sptep));
+	return sptep;
 }
 
 #define for_each_rmap_spte(_rmap_head_, _iter_, _spte_)			\
 	for (_spte_ = rmap_get_first(_rmap_head_, _iter_);		\
-	     _spte_ && ({BUG_ON(!is_shadow_present_pte(*_spte_)); 1;});	\
-	     _spte_ = rmap_get_next(_iter_))
+	     _spte_; _spte_ = rmap_get_next(_iter_))
 
 static void drop_spte(struct kvm *kvm, u64 *sptep)
 {
@@ -1358,7 +1367,6 @@ static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
 	bool flush = false;
 
 	while ((sptep = rmap_get_first(rmap_head, &iter))) {
-		BUG_ON(!(*sptep & PT_PRESENT_MASK));
 		rmap_printk("%s: spte %p %llx.\n", __func__, sptep, *sptep);
 
 		drop_spte(kvm, sptep);
-- 
cgit v0.10.2


From 4700579241d2d587765a58dddd1b2a89902767c0 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 20 Nov 2015 17:46:29 +0900
Subject: KVM: x86: MMU: Move initialization of parent_ptes out from
 kvm_mmu_alloc_page()

Make kvm_mmu_alloc_page() do just what its name tells to do, and remove
the extra allocation error check and zero-initialization of parent_ptes:
shadow page headers allocated by kmem_cache_zalloc() are always in the
per-VCPU pools.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5b249d4..7f46e3e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1726,8 +1726,7 @@ static void drop_parent_pte(struct kvm_mmu_page *sp,
 	mmu_spte_clear_no_track(parent_pte);
 }
 
-static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
-					       u64 *parent_pte, int direct)
+static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct)
 {
 	struct kvm_mmu_page *sp;
 
@@ -1743,8 +1742,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
 	 * this feature. See the comments in kvm_zap_obsolete_pages().
 	 */
 	list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
-	sp->parent_ptes.val = 0;
-	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
 	kvm_mod_used_mmu_pages(vcpu->kvm, +1);
 	return sp;
 }
@@ -2133,10 +2130,13 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		trace_kvm_mmu_get_page(sp, false);
 		return sp;
 	}
+
 	++vcpu->kvm->stat.mmu_cache_miss;
-	sp = kvm_mmu_alloc_page(vcpu, parent_pte, direct);
-	if (!sp)
-		return sp;
+
+	sp = kvm_mmu_alloc_page(vcpu, direct);
+
+	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
+
 	sp->gfn = gfn;
 	sp->role = role;
 	hlist_add_head(&sp->hash_link,
-- 
cgit v0.10.2


From 98bba238429e200521594ed30dd1edad7faa0081 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Thu, 26 Nov 2015 21:14:34 +0900
Subject: KVM: x86: MMU: Move parent_pte handling from kvm_mmu_get_page() to
 link_shadow_page()

Every time kvm_mmu_get_page() is called with a non-NULL parent_pte
argument, link_shadow_page() follows that to set the parent entry so
that the new mapping will point to the returned page table.

Moving parent_pte handling there allows to clean up the code because
parent_pte is passed to kvm_mmu_get_page() just for mark_unsync() and
mmu_page_add_parent_pte().

In addition, the patch avoids calling mark_unsync() for other parents in
the sp->parent_ptes chain than the newly added parent_pte, because they
have been there since before the current page fault handling started.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7f46e3e..ec61b22 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2119,12 +2119,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		if (sp->unsync && kvm_sync_page_transient(vcpu, sp))
 			break;
 
-		mmu_page_add_parent_pte(vcpu, sp, parent_pte);
-		if (sp->unsync_children) {
+		if (sp->unsync_children)
 			kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-			kvm_mmu_mark_parents_unsync(sp);
-		} else if (sp->unsync)
-			kvm_mmu_mark_parents_unsync(sp);
 
 		__clear_sp_write_flooding_count(sp);
 		trace_kvm_mmu_get_page(sp, false);
@@ -2135,8 +2131,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 
 	sp = kvm_mmu_alloc_page(vcpu, direct);
 
-	mmu_page_add_parent_pte(vcpu, sp, parent_pte);
-
 	sp->gfn = gfn;
 	sp->role = role;
 	hlist_add_head(&sp->hash_link,
@@ -2204,7 +2198,8 @@ static void shadow_walk_next(struct kvm_shadow_walk_iterator *iterator)
 	return __shadow_walk_next(iterator, *iterator->sptep);
 }
 
-static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
+static void link_shadow_page(struct kvm_vcpu *vcpu, u64 *sptep,
+			     struct kvm_mmu_page *sp)
 {
 	u64 spte;
 
@@ -2215,6 +2210,11 @@ static void link_shadow_page(u64 *sptep, struct kvm_mmu_page *sp)
 	       shadow_user_mask | shadow_x_mask | shadow_accessed_mask;
 
 	mmu_spte_set(sptep, spte);
+
+	mmu_page_add_parent_pte(vcpu, sp, sptep);
+
+	if (sp->unsync_children || sp->unsync)
+		mark_unsync(sptep);
 }
 
 static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
@@ -2273,11 +2273,6 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 		mmu_page_zap_pte(kvm, sp, sp->spt + i);
 }
 
-static void kvm_mmu_put_page(struct kvm_mmu_page *sp, u64 *parent_pte)
-{
-	mmu_page_remove_parent_pte(sp, parent_pte);
-}
-
 static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	u64 *sptep;
@@ -2743,7 +2738,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
 
-			link_shadow_page(iterator.sptep, sp);
+			link_shadow_page(vcpu, iterator.sptep, sp);
 		}
 	}
 	return emulate;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 11650ea..0dcf9c8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -598,7 +598,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 			goto out_gpte_changed;
 
 		if (sp)
-			link_shadow_page(it.sptep, sp);
+			link_shadow_page(vcpu, it.sptep, sp);
 	}
 
 	for (;
@@ -618,7 +618,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
 				      true, direct_access, it.sptep);
-		link_shadow_page(it.sptep, sp);
+		link_shadow_page(vcpu, it.sptep, sp);
 	}
 
 	clear_sp_write_flooding_count(it.sptep);
@@ -629,8 +629,6 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	return emulate;
 
 out_gpte_changed:
-	if (sp)
-		kvm_mmu_put_page(sp, it.sptep);
 	kvm_release_pfn_clean(pfn);
 	return 0;
 }
-- 
cgit v0.10.2


From 74c4e63ab9b550b47ca2eb192e52d6c8971e2f31 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Thu, 26 Nov 2015 21:15:38 +0900
Subject: KVM: x86: MMU: Use for_each_rmap_spte macro instead of
 pte_list_walk()

As kvm_mmu_get_page() was changed so that every parent pointer would not
get into the sp->parent_ptes chain before the entry pointed to by it was
set properly, we can use the for_each_rmap_spte macro instead of
pte_list_walk().

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Cc: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ec61b22..204c7d4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1007,26 +1007,6 @@ static void pte_list_remove(u64 *spte, struct kvm_rmap_head *rmap_head)
 	}
 }
 
-typedef void (*pte_list_walk_fn) (u64 *spte);
-static void pte_list_walk(struct kvm_rmap_head *rmap_head, pte_list_walk_fn fn)
-{
-	struct pte_list_desc *desc;
-	int i;
-
-	if (!rmap_head->val)
-		return;
-
-	if (!(rmap_head->val & 1))
-		return fn((u64 *)rmap_head->val);
-
-	desc = (struct pte_list_desc *)(rmap_head->val & ~1ul);
-	while (desc) {
-		for (i = 0; i < PTE_LIST_EXT && desc->sptes[i]; ++i)
-			fn(desc->sptes[i]);
-		desc = desc->more;
-	}
-}
-
 static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level,
 					   struct kvm_memory_slot *slot)
 {
@@ -1749,7 +1729,12 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, int direct
 static void mark_unsync(u64 *spte);
 static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
 {
-	pte_list_walk(&sp->parent_ptes, mark_unsync);
+	u64 *sptep;
+	struct rmap_iterator iter;
+
+	for_each_rmap_spte(&sp->parent_ptes, &iter, sptep) {
+		mark_unsync(sptep);
+	}
 }
 
 static void mark_unsync(u64 *spte)
-- 
cgit v0.10.2


From bb11c6c96544737aede6a2eb92e5c6bc8b46534b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Thu, 26 Nov 2015 21:16:35 +0900
Subject: KVM: x86: MMU: Remove unused parameter parent_pte from
 kvm_mmu_get_page()

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 204c7d4..a1a3d19 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2071,8 +2071,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 					     gva_t gaddr,
 					     unsigned level,
 					     int direct,
-					     unsigned access,
-					     u64 *parent_pte)
+					     unsigned access)
 {
 	union kvm_mmu_page_role role;
 	unsigned quadrant;
@@ -2720,8 +2719,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
 			base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
 			pseudo_gfn = base_addr >> PAGE_SHIFT;
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
-					      iterator.level - 1,
-					      1, ACC_ALL, iterator.sptep);
+					      iterator.level - 1, 1, ACC_ALL);
 
 			link_shadow_page(vcpu, iterator.sptep, sp);
 		}
@@ -3078,8 +3076,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL,
-				      1, ACC_ALL, NULL);
+		sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, 1, ACC_ALL);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
 		vcpu->arch.mmu.root_hpa = __pa(sp->spt);
@@ -3091,9 +3088,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 			spin_lock(&vcpu->kvm->mmu_lock);
 			make_mmu_pages_available(vcpu);
 			sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
-					      i << 30,
-					      PT32_ROOT_LEVEL, 1, ACC_ALL,
-					      NULL);
+					i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
 			root = __pa(sp->spt);
 			++sp->root_count;
 			spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3130,7 +3125,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
 		sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL,
-				      0, ACC_ALL, NULL);
+				      0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
@@ -3163,9 +3158,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 		}
 		spin_lock(&vcpu->kvm->mmu_lock);
 		make_mmu_pages_available(vcpu);
-		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30,
-				      PT32_ROOT_LEVEL, 0,
-				      ACC_ALL, NULL);
+		sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
+				      0, ACC_ALL);
 		root = __pa(sp->spt);
 		++sp->root_count;
 		spin_unlock(&vcpu->kvm->mmu_lock);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 0dcf9c8..91e939b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -587,7 +587,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		if (!is_shadow_present_pte(*it.sptep)) {
 			table_gfn = gw->table_gfn[it.level - 2];
 			sp = kvm_mmu_get_page(vcpu, table_gfn, addr, it.level-1,
-					      false, access, it.sptep);
+					      false, access);
 		}
 
 		/*
@@ -617,7 +617,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 		direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
 
 		sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
-				      true, direct_access, it.sptep);
+				      true, direct_access);
 		link_shadow_page(vcpu, it.sptep, sp);
 	}
 
-- 
cgit v0.10.2


From e09fefdeeb517ff653516dea8a882ce001e99237 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:03:50 +0100
Subject: KVM: Use common function for VCPU lookup by id

Let's reuse the new common function for VPCU lookup by id.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split out the new function into a separate patch]

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 54b45b7..a29da44 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -308,16 +308,10 @@ static void kvmppc_dump_regs(struct kvm_vcpu *vcpu)
 
 static struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 {
-	int r;
-	struct kvm_vcpu *v, *ret = NULL;
+	struct kvm_vcpu *ret;
 
 	mutex_lock(&kvm->lock);
-	kvm_for_each_vcpu(r, v, kvm) {
-		if (v->vcpu_id == id) {
-			ret = v;
-			break;
-		}
-	}
+	ret = kvm_get_vcpu_by_id(kvm, id);
 	mutex_unlock(&kvm->lock);
 	return ret;
 }
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 5fbfb88..05f7de9 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -155,10 +155,8 @@ static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 
 static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 {
-	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu *tcpu;
 	int tid;
-	int i;
 
 	tid = vcpu->run->s.regs.gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
 	vcpu->stat.diagnose_9c++;
@@ -167,12 +165,9 @@ static int __diag_time_slice_end_directed(struct kvm_vcpu *vcpu)
 	if (tid == vcpu->vcpu_id)
 		return 0;
 
-	kvm_for_each_vcpu(i, tcpu, kvm)
-		if (tcpu->vcpu_id == tid) {
-			kvm_vcpu_yield_to(tcpu);
-			break;
-		}
-
+	tcpu = kvm_get_vcpu_by_id(vcpu->kvm, tid);
+	if (tcpu)
+		kvm_vcpu_yield_to(tcpu);
 	return 0;
 }
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 73cbb41..9649a42 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2257,7 +2257,7 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
 static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 {
 	int r;
-	struct kvm_vcpu *vcpu, *v;
+	struct kvm_vcpu *vcpu;
 
 	if (id >= KVM_MAX_VCPUS)
 		return -EINVAL;
@@ -2281,12 +2281,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
 		r = -EINVAL;
 		goto unlock_vcpu_destroy;
 	}
-
-	kvm_for_each_vcpu(r, v, kvm)
-		if (v->vcpu_id == id) {
-			r = -EEXIST;
-			goto unlock_vcpu_destroy;
-		}
+	if (kvm_get_vcpu_by_id(kvm, id)) {
+		r = -EEXIST;
+		goto unlock_vcpu_destroy;
+	}
 
 	BUG_ON(kvm->vcpus[atomic_read(&kvm->online_vcpus)]);
 
-- 
cgit v0.10.2


From c896939f7cff767091b5d84587cd144e5d3613b7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:55:08 +0100
Subject: KVM: use heuristic for fast VCPU lookup by id

Usually, VCPU ids match the array index. So let's try a fast
lookup first before falling back to the slow iteration.

Suggested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2911919..a754fc0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -472,6 +472,11 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
 	struct kvm_vcpu *vcpu;
 	int i;
 
+	if (id < 0 || id >= KVM_MAX_VCPUS)
+		return NULL;
+	vcpu = kvm_get_vcpu(kvm, id);
+	if (vcpu && vcpu->vcpu_id == id)
+		return vcpu;
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		if (vcpu->vcpu_id == id)
 			return vcpu;
-- 
cgit v0.10.2


From 4bd33b568855f5483a6c6d7e4706ef507ab8586b Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.vnet.ibm.com>
Date: Wed, 14 Oct 2015 12:37:35 +0200
Subject: KVM: Remove unnecessary debugfs dentry references

KVM creates debugfs files to export VM statistics to userland. To be
able to remove them on kvm exit it tracks the files' dentries.

Since their parent directory is also tracked and since each parent
direntry knows its children we can easily remove them by using
debugfs_remove_recursive(kvm_debugfs_dir). Therefore we don't
need the extra tracking in the kvm_stats_debugfs_item anymore.

Signed-off-by: Janosch Frank <frankja@linux.vnet.ibm.com>
Reviewed-By: Sascha Silbe <silbe@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a754fc0..590c46e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1016,7 +1016,6 @@ struct kvm_stats_debugfs_item {
 	const char *name;
 	int offset;
 	enum kvm_stat_kind kind;
-	struct dentry *dentry;
 };
 extern struct kvm_stats_debugfs_item debugfs_entries[];
 extern struct dentry *kvm_debugfs_dir;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9649a42..be3cef1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3447,10 +3447,9 @@ static int kvm_init_debug(void)
 		goto out;
 
 	for (p = debugfs_entries; p->name; ++p) {
-		p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
-						(void *)(long)p->offset,
-						stat_fops[p->kind]);
-		if (p->dentry == NULL)
+		if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
+					 (void *)(long)p->offset,
+					 stat_fops[p->kind]))
 			goto out_dir;
 	}
 
@@ -3462,15 +3461,6 @@ out:
 	return r;
 }
 
-static void kvm_exit_debug(void)
-{
-	struct kvm_stats_debugfs_item *p;
-
-	for (p = debugfs_entries; p->name; ++p)
-		debugfs_remove(p->dentry);
-	debugfs_remove(kvm_debugfs_dir);
-}
-
 static int kvm_suspend(void)
 {
 	if (kvm_usage_count)
@@ -3628,7 +3618,7 @@ EXPORT_SYMBOL_GPL(kvm_init);
 
 void kvm_exit(void)
 {
-	kvm_exit_debug();
+	debugfs_remove_recursive(kvm_debugfs_dir);
 	misc_deregister(&kvm_dev);
 	kmem_cache_destroy(kvm_vcpu_cache);
 	kvm_async_pf_deinit();
-- 
cgit v0.10.2


From 71f116bfedfdd6763f2caf842bf40a6506759029 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 19 Oct 2015 16:24:28 +0200
Subject: KVM: s390: rewrite vcpu_post_run and drop out early

Let's rewrite this function to better reflect how we actually handle
exit_code. By dropping out early we can save a few cycles. This
especially speeds up sie exits caused by host irqs.

Also, let's move the special -EOPNOTSUPP for intercepts to
the place where it belongs and convert it to -EREMOTE.

Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b4a5aa1..d53c107 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -54,9 +54,6 @@ void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 	switch (vcpu->arch.sie_block->icptcode) {
-	case 0x0:
-		vcpu->stat.exit_null++;
-		break;
 	case 0x10:
 		vcpu->stat.exit_external_request++;
 		break;
@@ -338,8 +335,10 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
 
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
 {
+	if (kvm_is_ucontrol(vcpu->kvm))
+		return -EOPNOTSUPP;
+
 	switch (vcpu->arch.sie_block->icptcode) {
-	case 0x00:
 	case 0x10:
 	case 0x18:
 		return handle_noop(vcpu);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8465892..5c36c8e 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2071,8 +2071,6 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
 
 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 {
-	int rc = -1;
-
 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
 		   vcpu->arch.sie_block->icptcode);
 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
@@ -2080,40 +2078,35 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	if (guestdbg_enabled(vcpu))
 		kvm_s390_restore_guest_per_regs(vcpu);
 
-	if (exit_reason >= 0) {
-		rc = 0;
+	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+
+	if (vcpu->arch.sie_block->icptcode > 0) {
+		int rc = kvm_handle_sie_intercept(vcpu);
+
+		if (rc != -EOPNOTSUPP)
+			return rc;
+		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
+		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
+		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
+		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
+		return -EREMOTE;
+	} else if (exit_reason != -EFAULT) {
+		vcpu->stat.exit_null++;
+		return 0;
 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
 		vcpu->run->s390_ucontrol.trans_exc_code =
 						current->thread.gmap_addr;
 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
-		rc = -EREMOTE;
-
+		return -EREMOTE;
 	} else if (current->thread.gmap_pfault) {
 		trace_kvm_s390_major_guest_pfault(vcpu);
 		current->thread.gmap_pfault = 0;
-		if (kvm_arch_setup_async_pf(vcpu)) {
-			rc = 0;
-		} else {
-			gpa_t gpa = current->thread.gmap_addr;
-			rc = kvm_arch_fault_in_page(vcpu, gpa, 1);
-		}
-	}
-
-	if (rc == -1)
-		rc = vcpu_post_run_fault_in_sie(vcpu);
-
-	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
-
-	if (rc == 0) {
-		if (kvm_is_ucontrol(vcpu->kvm))
-			/* Don't exit for host interrupts. */
-			rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
-		else
-			rc = kvm_handle_sie_intercept(vcpu);
+		if (kvm_arch_setup_async_pf(vcpu))
+			return 0;
+		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
 	}
-
-	return rc;
+	return vcpu_post_run_fault_in_sie(vcpu);
 }
 
 static int __vcpu_run(struct kvm_vcpu *vcpu)
@@ -2233,18 +2226,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = 0;
 	}
 
-	if (rc == -EOPNOTSUPP) {
-		/* intercept cannot be handled in-kernel, prepare kvm-run */
-		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
-		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
-		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
-		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
-		rc = 0;
-	}
-
 	if (rc == -EREMOTE) {
-		/* intercept was handled, but userspace support is needed
-		 * kvm_run has been prepared by the handler */
+		/* userspace support is needed, kvm_run has been prepared */
 		rc = 0;
 	}
 
-- 
cgit v0.10.2


From f7ba1d34263e333e82aa8879028ddd06d6d5f9ac Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Thu, 9 Oct 2014 16:04:48 +0200
Subject: s390/sclp: introduce checks for ESCA and HVS

Introduce sclp.has_hvs and sclp.has_esca to provide a way for kvm to check
whether the extended-SCA and the home-virtual-SCA facilities are available.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 821dde5..8324abb 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -53,6 +53,8 @@ struct sclp_info {
 	unsigned char has_sigpif : 1;
 	unsigned char has_core_type : 1;
 	unsigned char has_sprp : 1;
+	unsigned char has_hvs : 1;
+	unsigned char has_esca : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index 7bc6df3..ff1e1bb 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -43,7 +43,10 @@ struct read_info_sccb {
 	u8	_pad_92[100 - 92];	/* 92-99 */
 	u32	rnsize2;		/* 100-103 */
 	u64	rnmax2;			/* 104-111 */
-	u8	_pad_112[120 - 112];	/* 112-119 */
+	u8	_pad_112[116 - 112];	/* 112-115 */
+	u8	fac116;			/* 116 */
+	u8	_pad_117[119 - 117];	/* 117-118 */
+	u8	fac119;			/* 119 */
 	u16	hcpua;			/* 120-121 */
 	u8	_pad_122[4096 - 122];	/* 122-4095 */
 } __packed __aligned(PAGE_SIZE);
@@ -108,6 +111,8 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 	sclp.facilities = sccb->facilities;
 	sclp.has_sprp = !!(sccb->fac84 & 0x02);
 	sclp.has_core_type = !!(sccb->fac84 & 0x01);
+	sclp.has_esca = !!(sccb->fac116 & 0x08);
+	sclp.has_hvs = !!(sccb->fac119 & 0x80);
 	if (sccb->fac85 & 0x02)
 		S390_lowcore.machine_flags |= MACHINE_FLAG_ESOP;
 	sclp.rnmax = sccb->rnmax ? sccb->rnmax : sccb->rnmax2;
-- 
cgit v0.10.2


From 605145103abb21c555d5982073bee29269aaad51 Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2015 14:44:54 +0200
Subject: KVM: s390: Generalize access to IPTE controls

This patch generalizes access to the IPTE controls, which is a part of SCA.
This is to prepare for upcoming introduction of Extended SCA support.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index a7559f7..06f7edb 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -259,7 +259,7 @@ struct aste {
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
-	union ipte_control *ic = &vcpu->kvm->arch.sca->ipte_control;
+	union ipte_control *ic = kvm_s390_get_ipte_control(vcpu->kvm);
 
 	if (vcpu->arch.sie_block->eca & 1)
 		return ic->kh != 0;
@@ -274,7 +274,7 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count++;
 	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
 		while (old.k) {
@@ -296,7 +296,7 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count--;
 	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
 		new = old;
@@ -311,7 +311,7 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
 		while (old.kg) {
@@ -328,7 +328,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	ic = &vcpu->kvm->arch.sca->ipte_control;
+	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
 		new = old;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 1e70e00..844f711 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -340,4 +340,9 @@ void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu);
 void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu);
 void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 
+/* support for Basic/Extended SCA handling */
+static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
+{
+	return &kvm->arch.sca->ipte_control;
+}
 #endif
-- 
cgit v0.10.2


From a5bd764734838da64b37d771e5b7814eb1f61ffd Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2015 15:10:10 +0200
Subject: KVM: s390: Generalize access to SIGP controls

This patch generalizes access to the SIGP controls, which is a part of SCA.
This is to prepare for upcoming introduction of Extended SCA support.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 6a75352..2a4718a 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -34,6 +34,45 @@
 #define PFAULT_DONE 0x0680
 #define VIRTIO_PARAM 0x0d00
 
+/* handle external calls via sigp interpretation facility */
+static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
+{
+	struct sca_block *sca = vcpu->kvm->arch.sca;
+	uint8_t sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+	if (src_id)
+		*src_id = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+
+	return sigp_ctrl & SIGP_CTRL_C &&
+		atomic_read(&vcpu->arch.sie_block->cpuflags) &
+			CPUSTAT_ECALL_PEND;
+}
+
+static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
+{
+	struct sca_block *sca = vcpu->kvm->arch.sca;
+	uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+	uint8_t new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
+	uint8_t old_val = *sigp_ctrl & ~SIGP_CTRL_C;
+
+	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
+		/* another external call is pending */
+		return -EBUSY;
+	}
+	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	return 0;
+}
+
+static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
+{
+	struct sca_block *sca = vcpu->kvm->arch.sca;
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+
+	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+	*sigp_ctrl = 0;
+}
+
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
 {
 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -792,13 +831,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
 int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
 
 	if (!sclp.has_sigpif)
 		return test_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
 
-	return (sigp_ctrl & SIGP_CTRL_C) &&
-	       (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND);
+	return sca_ext_call_pending(vcpu, NULL);
 }
 
 int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
@@ -909,9 +946,7 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 
-	/* clear pending external calls set by sigp interpretation facility */
-	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
-	vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl = 0;
+	sca_clear_ext_call(vcpu);
 }
 
 int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
@@ -1003,21 +1038,6 @@ static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 	return 0;
 }
 
-static int __inject_extcall_sigpif(struct kvm_vcpu *vcpu, uint16_t src_id)
-{
-	unsigned char new_val, old_val;
-	uint8_t *sigp_ctrl = &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
-
-	new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
-	old_val = *sigp_ctrl & ~SIGP_CTRL_C;
-	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
-		/* another external call is pending */
-		return -EBUSY;
-	}
-	atomic_or(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
-	return 0;
-}
-
 static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
@@ -1034,7 +1054,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 		return -EINVAL;
 
 	if (sclp.has_sigpif)
-		return __inject_extcall_sigpif(vcpu, src_id);
+		return sca_inject_ext_call(vcpu, src_id);
 
 	if (test_and_set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs))
 		return -EBUSY;
@@ -2203,7 +2223,7 @@ static void store_local_irq(struct kvm_s390_local_interrupt *li,
 
 int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 {
-	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	int scn;
 	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	unsigned long pending_irqs;
@@ -2243,14 +2263,12 @@ int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
 		}
 	}
 
-	if ((sigp_ctrl & SIGP_CTRL_C) &&
-	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
-	     CPUSTAT_ECALL_PEND)) {
+	if (sca_ext_call_pending(vcpu, &scn)) {
 		if (n + sizeof(irq) > len)
 			return -ENOBUFS;
 		memset(&irq, 0, sizeof(irq));
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		irq.u.extcall.code = scn;
 		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
 			return -EFAULT;
 		n += sizeof(irq);
-- 
cgit v0.10.2


From a6e2f683e7691949d33ca9392e7807cfa9aca34e Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Tue, 21 Apr 2015 15:31:59 +0200
Subject: KVM: s390: Provide SCA-aware helpers for VCPU add/del

This patch provides SCA-aware helpers to create/delete a VCPU.
This is to prepare for upcoming introduction of Extended SCA support.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5c36c8e..8ddd488 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -283,6 +283,8 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm,
 }
 
 /* Section: vm related */
+static void sca_del_vcpu(struct kvm_vcpu *vcpu);
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -1189,11 +1191,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kvm_s390_clear_local_irqs(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
-		clear_bit(63 - vcpu->vcpu_id,
-			  (unsigned long *) &vcpu->kvm->arch.sca->mcn);
-		if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda ==
-		    (__u64) vcpu->arch.sie_block)
-			vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0;
+		sca_del_vcpu(vcpu);
 	}
 	smp_mb();
 
@@ -1249,6 +1247,32 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static void sca_del_vcpu(struct kvm_vcpu *vcpu)
+{
+	struct sca_block *sca = vcpu->kvm->arch.sca;
+
+	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+	if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
+		sca->cpu[vcpu->vcpu_id].sda = 0;
+}
+
+static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
+			unsigned int id)
+{
+	struct sca_block *sca = kvm->arch.sca;
+
+	if (!sca->cpu[id].sda)
+		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+	set_bit_inv(id, (unsigned long *) &sca->mcn);
+}
+
+static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
+{
+	return id < KVM_MAX_VCPUS;
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
@@ -1465,7 +1489,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	struct sie_page *sie_page;
 	int rc = -EINVAL;
 
-	if (id >= KVM_MAX_VCPUS)
+	if (!sca_can_add_vcpu(kvm, id))
 		goto out;
 
 	rc = -ENOMEM;
@@ -1487,13 +1511,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 			WARN_ON_ONCE(1);
 			goto out_free_cpu;
 		}
-		if (!kvm->arch.sca->cpu[id].sda)
-			kvm->arch.sca->cpu[id].sda =
-				(__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh =
-			(__u32)(((__u64)kvm->arch.sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
-		set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
+		sca_add_vcpu(vcpu, kvm, id);
 	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
-- 
cgit v0.10.2


From bc784ccee5eb9ae1e737927eb9d8a0fbf7601abc Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Thu, 23 Apr 2015 16:09:06 +0200
Subject: KVM: s390: Introduce new structures

This patch adds new structures and updates some existing ones to
provide the base for Extended SCA functionality.

The old sca_* structures were renamed to bsca_* to keep things uniform.

The access to fields of SIGP controls were turned into bitfields instead
of hardcoded bitmasks.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index efaac2c..923b13d 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -25,7 +25,9 @@
 #include <asm/fpu/api.h>
 #include <asm/isc.h>
 
-#define KVM_MAX_VCPUS 64
+#define KVM_S390_BSCA_CPU_SLOTS 64
+#define KVM_S390_ESCA_CPU_SLOTS 248
+#define KVM_MAX_VCPUS KVM_S390_BSCA_CPU_SLOTS
 #define KVM_USER_MEM_SLOTS 32
 
 /*
@@ -40,9 +42,34 @@
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
 
-struct sca_entry {
+union bsca_sigp_ctrl {
+	__u8 value;
+	struct {
+		__u8 c : 1;
+		__u8 r : 1;
+		__u8 scn : 6;
+	};
+} __packed;
+
+union esca_sigp_ctrl {
+	__u16 value;
+	struct {
+		__u8 c : 1;
+		__u8 reserved: 7;
+		__u8 scn;
+	};
+} __packed;
+
+struct esca_entry {
+	union esca_sigp_ctrl sigp_ctrl;
+	__u16   reserved1[3];
+	__u64   sda;
+	__u64   reserved2[6];
+} __packed;
+
+struct bsca_entry {
 	__u8	reserved0;
-	__u8	sigp_ctrl;
+	union bsca_sigp_ctrl	sigp_ctrl;
 	__u16	reserved[3];
 	__u64	sda;
 	__u64	reserved2[2];
@@ -57,14 +84,22 @@ union ipte_control {
 	};
 };
 
-struct sca_block {
+struct bsca_block {
 	union ipte_control ipte_control;
 	__u64	reserved[5];
 	__u64	mcn;
 	__u64	reserved2;
-	struct sca_entry cpu[64];
+	struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
 } __attribute__((packed));
 
+struct esca_block {
+	union ipte_control ipte_control;
+	__u64   reserved1[7];
+	__u64   mcn[4];
+	__u64   reserved2[20];
+	struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
+} __packed;
+
 #define CPUSTAT_STOPPED    0x80000000
 #define CPUSTAT_WAIT       0x10000000
 #define CPUSTAT_ECALL_PEND 0x08000000
@@ -585,7 +620,7 @@ struct kvm_s390_crypto_cb {
 };
 
 struct kvm_arch{
-	struct sca_block *sca;
+	struct bsca_block *sca;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_device *flic;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 2a4718a..aa221a4 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -37,25 +37,32 @@
 /* handle external calls via sigp interpretation facility */
 static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 {
-	struct sca_block *sca = vcpu->kvm->arch.sca;
-	uint8_t sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	struct bsca_block *sca = vcpu->kvm->arch.sca;
+	union bsca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
 
 	if (src_id)
-		*src_id = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		*src_id = sigp_ctrl.scn;
 
-	return sigp_ctrl & SIGP_CTRL_C &&
+	return sigp_ctrl.c &&
 		atomic_read(&vcpu->arch.sie_block->cpuflags) &
 			CPUSTAT_ECALL_PEND;
 }
 
 static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 {
-	struct sca_block *sca = vcpu->kvm->arch.sca;
-	uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-	uint8_t new_val = SIGP_CTRL_C | (src_id & SIGP_CTRL_SCN_MASK);
-	uint8_t old_val = *sigp_ctrl & ~SIGP_CTRL_C;
+	int expect, rc;
+	struct bsca_block *sca = vcpu->kvm->arch.sca;
+	union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+	union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
 
-	if (cmpxchg(sigp_ctrl, old_val, new_val) != old_val) {
+	new_val.scn = src_id;
+	new_val.c = 1;
+	old_val.c = 0;
+
+	expect = old_val.value;
+	rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+
+	if (rc != expect) {
 		/* another external call is pending */
 		return -EBUSY;
 	}
@@ -65,12 +72,12 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 {
-	struct sca_block *sca = vcpu->kvm->arch.sca;
+	struct bsca_block *sca = vcpu->kvm->arch.sca;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	uint8_t *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+	union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
 
 	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
-	*sigp_ctrl = 0;
+	sigp_ctrl->value = 0;
 }
 
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8ddd488..c268352 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1100,14 +1100,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	rc = -ENOMEM;
 
-	kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
+	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
 	if (!kvm->arch.sca)
 		goto out_err;
 	spin_lock(&kvm_lock);
 	sca_offset += 16;
-	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
 		sca_offset = 0;
-	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+	kvm->arch.sca = (struct bsca_block *)
+			((char *) kvm->arch.sca + sca_offset);
 	spin_unlock(&kvm_lock);
 
 	sprintf(debug_name, "kvm-%u", current->pid);
@@ -1190,9 +1191,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
 	kvm_s390_clear_local_irqs(vcpu);
 	kvm_clear_async_pf_completion_queue(vcpu);
-	if (!kvm_is_ucontrol(vcpu->kvm)) {
+	if (!kvm_is_ucontrol(vcpu->kvm))
 		sca_del_vcpu(vcpu);
-	}
 	smp_mb();
 
 	if (kvm_is_ucontrol(vcpu->kvm))
@@ -1249,7 +1249,7 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct sca_block *sca = vcpu->kvm->arch.sca;
+	struct bsca_block *sca = vcpu->kvm->arch.sca;
 
 	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
 	if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
@@ -1259,7 +1259,7 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 			unsigned int id)
 {
-	struct sca_block *sca = kvm->arch.sca;
+	struct bsca_block *sca = kvm->arch.sca;
 
 	if (!sca->cpu[id].sda)
 		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 844f711..df1abad 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -343,6 +343,8 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu);
 /* support for Basic/Extended SCA handling */
 static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm)
 {
-	return &kvm->arch.sca->ipte_control;
+	struct bsca_block *sca = kvm->arch.sca; /* SCA version doesn't matter */
+
+	return &sca->ipte_control;
 }
 #endif
-- 
cgit v0.10.2


From 7d43bafcff17c7fb07270999d3cf002f1ed6bd3f Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Wed, 22 Apr 2015 17:09:44 +0200
Subject: KVM: s390: Make provisions for ESCA utilization

This patch updates the routines (sca_*) to provide transparent access
to and manipulation on the data for both Basic and Extended SCA in use.
The kvm.arch.sca is generalized to (void *) to handle BSCA/ESCA cases.
Also the kvm.arch.use_esca flag is provided.
The actual functionality is kept the same.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 923b13d..25fdbf8 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -620,7 +620,8 @@ struct kvm_s390_crypto_cb {
 };
 
 struct kvm_arch{
-	struct bsca_block *sca;
+	void *sca;
+	int use_esca;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_device *flic;
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index aa221a4..60b36b0 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -37,30 +37,60 @@
 /* handle external calls via sigp interpretation facility */
 static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 {
-	struct bsca_block *sca = vcpu->kvm->arch.sca;
-	union bsca_sigp_ctrl sigp_ctrl = sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	int c, scn;
+
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl sigp_ctrl =
+			sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+
+		c = sigp_ctrl.c;
+		scn = sigp_ctrl.scn;
+	}
 
 	if (src_id)
-		*src_id = sigp_ctrl.scn;
+		*src_id = scn;
 
-	return sigp_ctrl.c &&
-		atomic_read(&vcpu->arch.sie_block->cpuflags) &
+	return c && atomic_read(&vcpu->arch.sie_block->cpuflags) &
 			CPUSTAT_ECALL_PEND;
 }
 
 static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 {
 	int expect, rc;
-	struct bsca_block *sca = vcpu->kvm->arch.sca;
-	union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
-	union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
 
-	new_val.scn = src_id;
-	new_val.c = 1;
-	old_val.c = 0;
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
 
-	expect = old_val.value;
-	rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl new_val = {0}, old_val = *sigp_ctrl;
+
+		new_val.scn = src_id;
+		new_val.c = 1;
+		old_val.c = 0;
+
+		expect = old_val.value;
+		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
+	}
 
 	if (rc != expect) {
 		/* another external call is pending */
@@ -72,12 +102,28 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 
 static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 {
-	struct bsca_block *sca = vcpu->kvm->arch.sca;
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	union bsca_sigp_ctrl *sigp_ctrl = &(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+	int rc, expect;
 
 	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
-	sigp_ctrl->value = 0;
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+		union esca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union esca_sigp_ctrl old = *sigp_ctrl;
+
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		union bsca_sigp_ctrl *sigp_ctrl =
+			&(sca->cpu[vcpu->vcpu_id].sigp_ctrl);
+		union bsca_sigp_ctrl old = *sigp_ctrl;
+
+		expect = old.value;
+		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
+	}
+	WARN_ON(rc != expect); /* cannot clear? */
 }
 
 int psw_extint_disabled(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c268352..41b3fed 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1077,6 +1077,15 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 	return 0;
 }
 
+static void sca_dispose(struct kvm *kvm)
+{
+	if (kvm->arch.use_esca)
+		BUG(); /* not implemented yet */
+	else
+		free_page((unsigned long)(kvm->arch.sca));
+	kvm->arch.sca = NULL;
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	int i, rc;
@@ -1100,6 +1109,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	rc = -ENOMEM;
 
+	kvm->arch.use_esca = 0; /* start with basic SCA */
 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
 	if (!kvm->arch.sca)
 		goto out_err;
@@ -1180,7 +1190,7 @@ out_err:
 	kfree(kvm->arch.crypto.crycb);
 	free_page((unsigned long)kvm->arch.model.fac);
 	debug_unregister(kvm->arch.dbf);
-	free_page((unsigned long)(kvm->arch.sca));
+	sca_dispose(kvm);
 	KVM_EVENT(3, "creation of vm failed: %d", rc);
 	return rc;
 }
@@ -1226,7 +1236,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	kvm_free_vcpus(kvm);
 	free_page((unsigned long)kvm->arch.model.fac);
-	free_page((unsigned long)(kvm->arch.sca));
+	sca_dispose(kvm);
 	debug_unregister(kvm->arch.dbf);
 	kfree(kvm->arch.crypto.crycb);
 	if (!kvm_is_ucontrol(kvm))
@@ -1249,23 +1259,41 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 {
-	struct bsca_block *sca = vcpu->kvm->arch.sca;
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
+
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
+		if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
+			sca->cpu[vcpu->vcpu_id].sda = 0;
+	} else {
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
 
-	clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
-	if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
-		sca->cpu[vcpu->vcpu_id].sda = 0;
+		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
+		if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
+			sca->cpu[vcpu->vcpu_id].sda = 0;
+	}
 }
 
 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 			unsigned int id)
 {
-	struct bsca_block *sca = kvm->arch.sca;
+	if (kvm->arch.use_esca) {
+		struct esca_block *sca = kvm->arch.sca;
 
-	if (!sca->cpu[id].sda)
-		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
-	vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-	vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
-	set_bit_inv(id, (unsigned long *) &sca->mcn);
+		if (!sca->cpu[id].sda)
+			sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+		set_bit_inv(id, (unsigned long *) sca->mcn);
+	} else {
+		struct bsca_block *sca = kvm->arch.sca;
+
+		if (!sca->cpu[id].sda)
+			sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
+		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		set_bit_inv(id, (unsigned long *) &sca->mcn);
+	}
 }
 
 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
@@ -1458,6 +1486,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= 0x10;
 
 	vcpu->arch.sie_block->ecb2  = 8;
+	if (vcpu->kvm->arch.use_esca)
+		vcpu->arch.sie_block->ecb2 |= 4;
 	vcpu->arch.sie_block->eca   = 0xC1002000U;
 	if (sclp.has_siif)
 		vcpu->arch.sie_block->eca |= 1;
-- 
cgit v0.10.2


From 5e0443152367ab9fef597a41a4e09a32df2bf887 Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Wed, 22 Apr 2015 18:08:39 +0200
Subject: KVM: s390: Introduce switching code

This patch adds code that performs transparent switch to Extended
SCA on addition of 65th VCPU in a VM. Disposal of ESCA is added too.
The entier ESCA functionality, however, is still not enabled.
The enablement will be provided in a separate patch.

This patch also uses read/write lock protection of SCA and its subfields for
possible disposal at the BSCA-to-ESCA transition. While only Basic SCA needs such
a protection (for the swap), any SCA access is now guarded.

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 25fdbf8..86c3386 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -622,6 +622,7 @@ struct kvm_s390_crypto_cb {
 struct kvm_arch{
 	void *sca;
 	int use_esca;
+	rwlock_t sca_lock;
 	debug_info_t *dbf;
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_device *flic;
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 06f7edb..d30db40 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -259,10 +259,14 @@ struct aste {
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
-	union ipte_control *ic = kvm_s390_get_ipte_control(vcpu->kvm);
+	if (vcpu->arch.sie_block->eca & 1) {
+		int rc;
 
-	if (vcpu->arch.sie_block->eca & 1)
-		return ic->kh != 0;
+		read_lock(&vcpu->kvm->arch.sca_lock);
+		rc = kvm_s390_get_ipte_control(vcpu->kvm)->kh != 0;
+		read_unlock(&vcpu->kvm->arch.sca_lock);
+		return rc;
+	}
 	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
@@ -274,16 +278,20 @@ static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count++;
 	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
+retry:
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
-		while (old.k) {
+		if (old.k) {
+			read_unlock(&vcpu->kvm->arch.sca_lock);
 			cond_resched();
-			old = READ_ONCE(*ic);
+			goto retry;
 		}
 		new = old;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 out:
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
@@ -296,12 +304,14 @@ static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 	vcpu->kvm->arch.ipte_lock_count--;
 	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
 		new = old;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
 	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
@@ -311,23 +321,28 @@ static void ipte_lock_siif(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
+retry:
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
-		while (old.kg) {
+		if (old.kg) {
+			read_unlock(&vcpu->kvm->arch.sca_lock);
 			cond_resched();
-			old = READ_ONCE(*ic);
+			goto retry;
 		}
 		new = old;
 		new.k = 1;
 		new.kh++;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
 
 static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	ic = kvm_s390_get_ipte_control(vcpu->kvm);
 	do {
 		old = READ_ONCE(*ic);
@@ -336,6 +351,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
 		if (!new.kh)
 			new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 	if (!new.kh)
 		wake_up(&vcpu->kvm->arch.ipte_wq);
 }
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 60b36b0..831c9ac 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -39,6 +39,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 {
 	int c, scn;
 
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl sigp_ctrl =
@@ -54,6 +55,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 		c = sigp_ctrl.c;
 		scn = sigp_ctrl.scn;
 	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 
 	if (src_id)
 		*src_id = scn;
@@ -66,6 +68,7 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 {
 	int expect, rc;
 
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
@@ -91,6 +94,7 @@ static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
 		expect = old_val.value;
 		rc = cmpxchg(&sigp_ctrl->value, old_val.value, new_val.value);
 	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 
 	if (rc != expect) {
 		/* another external call is pending */
@@ -106,6 +110,7 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 	int rc, expect;
 
 	atomic_andnot(CPUSTAT_ECALL_PEND, li->cpuflags);
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 		union esca_sigp_ctrl *sigp_ctrl =
@@ -123,6 +128,7 @@ static void sca_clear_ext_call(struct kvm_vcpu *vcpu)
 		expect = old.value;
 		rc = cmpxchg(&sigp_ctrl->value, old.value, 0);
 	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 	WARN_ON(rc != expect); /* cannot clear? */
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 41b3fed..5e884aa 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1080,7 +1080,7 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
 static void sca_dispose(struct kvm *kvm)
 {
 	if (kvm->arch.use_esca)
-		BUG(); /* not implemented yet */
+		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
 	else
 		free_page((unsigned long)(kvm->arch.sca));
 	kvm->arch.sca = NULL;
@@ -1110,6 +1110,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	rc = -ENOMEM;
 
 	kvm->arch.use_esca = 0; /* start with basic SCA */
+	rwlock_init(&kvm->arch.sca_lock);
 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(GFP_KERNEL);
 	if (!kvm->arch.sca)
 		goto out_err;
@@ -1259,6 +1260,7 @@ static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
 
 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 {
+	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 
@@ -1272,11 +1274,13 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 		if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
 			sca->cpu[vcpu->vcpu_id].sda = 0;
 	}
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
 
 static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 			unsigned int id)
 {
+	read_lock(&kvm->arch.sca_lock);
 	if (kvm->arch.use_esca) {
 		struct esca_block *sca = kvm->arch.sca;
 
@@ -1294,11 +1298,78 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
 		set_bit_inv(id, (unsigned long *) &sca->mcn);
 	}
+	read_unlock(&kvm->arch.sca_lock);
+}
+
+/* Basic SCA to Extended SCA data copy routines */
+static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
+{
+	d->sda = s->sda;
+	d->sigp_ctrl.c = s->sigp_ctrl.c;
+	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
+}
+
+static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
+{
+	int i;
+
+	d->ipte_control = s->ipte_control;
+	d->mcn[0] = s->mcn;
+	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
+		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
+}
+
+static int sca_switch_to_extended(struct kvm *kvm)
+{
+	struct bsca_block *old_sca = kvm->arch.sca;
+	struct esca_block *new_sca;
+	struct kvm_vcpu *vcpu;
+	unsigned int vcpu_idx;
+	u32 scaol, scaoh;
+
+	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
+	if (!new_sca)
+		return -ENOMEM;
+
+	scaoh = (u32)((u64)(new_sca) >> 32);
+	scaol = (u32)(u64)(new_sca) & ~0x3fU;
+
+	kvm_s390_vcpu_block_all(kvm);
+	write_lock(&kvm->arch.sca_lock);
+
+	sca_copy_b_to_e(new_sca, old_sca);
+
+	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
+		vcpu->arch.sie_block->scaoh = scaoh;
+		vcpu->arch.sie_block->scaol = scaol;
+		vcpu->arch.sie_block->ecb2 |= 0x04U;
+	}
+	kvm->arch.sca = new_sca;
+	kvm->arch.use_esca = 1;
+
+	write_unlock(&kvm->arch.sca_lock);
+	kvm_s390_vcpu_unblock_all(kvm);
+
+	free_page((unsigned long)old_sca);
+
+	VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca);
+	return 0;
 }
 
 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
 {
-	return id < KVM_MAX_VCPUS;
+	int rc;
+
+	if (id < KVM_S390_BSCA_CPU_SLOTS)
+		return true;
+	if (!sclp.has_esca)
+		return false;
+
+	mutex_lock(&kvm->lock);
+	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
+	mutex_unlock(&kvm->lock);
+
+	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From fe0edcb7311e80a349ad5f2b15f2d91a5606ed7f Mon Sep 17 00:00:00 2001
From: "Eugene (jno) Dvurechenski" <jno@linux.vnet.ibm.com>
Date: Wed, 22 Apr 2015 18:37:40 +0200
Subject: KVM: s390: Enable up to 248 VCPUs per VM

This patch allows s390 to have more than 64 VCPUs for a guest (up to
248 for memory usage considerations), if supported by the underlaying
hardware (sclp.has_esca).

Signed-off-by: Eugene (jno) Dvurechenski <jno@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 86c3386..12e9291 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -27,7 +27,7 @@
 
 #define KVM_S390_BSCA_CPU_SLOTS 64
 #define KVM_S390_ESCA_CPU_SLOTS 248
-#define KVM_MAX_VCPUS KVM_S390_BSCA_CPU_SLOTS
+#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
 #define KVM_USER_MEM_SLOTS 32
 
 /*
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5e884aa..16c19fb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -246,7 +246,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
-		r = KVM_MAX_VCPUS;
+		r = sclp.has_esca ? KVM_S390_ESCA_CPU_SLOTS
+				  : KVM_S390_BSCA_CPU_SLOTS;
 		break;
 	case KVM_CAP_NR_MEMSLOTS:
 		r = KVM_USER_MEM_SLOTS;
-- 
cgit v0.10.2


From 2c1bb2be986c56848ce92ba41ba32fc62ab3165f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Wed, 23 Sep 2015 09:45:50 +0200
Subject: KVM: s390: fast path for sca_ext_call_pending

If CPUSTAT_ECALL_PEND isn't set, we can't have an external call pending,
so we can directly avoid taking the lock.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 831c9ac..62ec925 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -39,6 +39,9 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 {
 	int c, scn;
 
+	if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_ECALL_PEND))
+		return 0;
+
 	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
@@ -60,8 +63,7 @@ static int sca_ext_call_pending(struct kvm_vcpu *vcpu, int *src_id)
 	if (src_id)
 		*src_id = scn;
 
-	return c && atomic_read(&vcpu->arch.sie_block->cpuflags) &
-			CPUSTAT_ECALL_PEND;
+	return c;
 }
 
 static int sca_inject_ext_call(struct kvm_vcpu *vcpu, int src_id)
-- 
cgit v0.10.2


From 5f3fe620a56f2f5c79e89522107f2476a45ed6ce Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 Oct 2015 12:34:19 +0200
Subject: KVM: s390: we always have a SCA

Having no sca can never happen, even when something goes wrong when
switching to ESCA. Otherwise we would have a serious bug.
Let's remove this superfluous check.

Acked-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 16c19fb..5c58127 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1608,13 +1608,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 
 	vcpu->arch.sie_block->icpua = id;
-	if (!kvm_is_ucontrol(kvm)) {
-		if (!kvm->arch.sca) {
-			WARN_ON_ONCE(1);
-			goto out_free_cpu;
-		}
+	if (!kvm_is_ucontrol(kvm))
 		sca_add_vcpu(vcpu, kvm, id);
-	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
-- 
cgit v0.10.2


From 2550882449299fd55c8214529cc0777b789db0f7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 Oct 2015 16:27:23 +0200
Subject: KVM: s390: fix SCA related races and double use

If something goes wrong in kvm_arch_vcpu_create, the VCPU has already
been added to the sca but will never be removed. Trying to create VCPUs
with duplicate ids (e.g. after a failed attempt) is problematic.

Also, when creating multiple VCPUs in parallel, we could theoretically
forget to set the correct SCA when the switch to ESCA happens just
before the VCPU is registered.

Let's add the VCPU to the SCA in kvm_arch_vcpu_postcreate, where we can
be sure that no duplicate VCPU with the same id is around and the VCPU
has already been registered at the VM. We also have to make sure to update
ECB at that point.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 5c58127..2ba5978 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1289,6 +1289,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 			sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+		vcpu->arch.sie_block->ecb2 |= 0x04U;
 		set_bit_inv(id, (unsigned long *) sca->mcn);
 	} else {
 		struct bsca_block *sca = kvm->arch.sca;
@@ -1493,8 +1494,11 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
 	preempt_enable();
 	mutex_unlock(&vcpu->kvm->lock);
-	if (!kvm_is_ucontrol(vcpu->kvm))
+	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
+		sca_add_vcpu(vcpu, vcpu->kvm, vcpu->vcpu_id);
+	}
+
 }
 
 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
@@ -1558,8 +1562,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->ecb |= 0x10;
 
 	vcpu->arch.sie_block->ecb2  = 8;
-	if (vcpu->kvm->arch.use_esca)
-		vcpu->arch.sie_block->ecb2 |= 4;
 	vcpu->arch.sie_block->eca   = 0xC1002000U;
 	if (sclp.has_siif)
 		vcpu->arch.sie_block->eca |= 1;
@@ -1608,9 +1610,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
 
 	vcpu->arch.sie_block->icpua = id;
-	if (!kvm_is_ucontrol(kvm))
-		sca_add_vcpu(vcpu, kvm, id);
-
 	spin_lock_init(&vcpu->arch.local_int.lock);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
-- 
cgit v0.10.2


From 10ce32d5b07470c5eb0fa821000c789073128b3f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 Oct 2015 12:41:41 +0200
Subject: KVM: s390: always set/clear the SCA sda field

Let's always set and clear the sda when enabling/disabling a VCPU.
Dealing with sda being set to something else makes no sense anymore
as we enable a VCPU in the SCA now after it has been registered at
the VM.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2ba5978..7e0092b 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1266,14 +1266,12 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 		struct esca_block *sca = vcpu->kvm->arch.sca;
 
 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
-		if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
-			sca->cpu[vcpu->vcpu_id].sda = 0;
+		sca->cpu[vcpu->vcpu_id].sda = 0;
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
 
 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
-		if (sca->cpu[vcpu->vcpu_id].sda == (__u64) vcpu->arch.sie_block)
-			sca->cpu[vcpu->vcpu_id].sda = 0;
+		sca->cpu[vcpu->vcpu_id].sda = 0;
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
@@ -1285,8 +1283,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 	if (kvm->arch.use_esca) {
 		struct esca_block *sca = kvm->arch.sca;
 
-		if (!sca->cpu[id].sda)
-			sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
 		vcpu->arch.sie_block->ecb2 |= 0x04U;
@@ -1294,8 +1291,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
 	} else {
 		struct bsca_block *sca = kvm->arch.sca;
 
-		if (!sca->cpu[id].sda)
-			sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
 		set_bit_inv(id, (unsigned long *) &sca->mcn);
-- 
cgit v0.10.2


From eaa78f343255dabac963ab8c7644367844d18e00 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 Oct 2015 16:29:01 +0200
Subject: KVM: s390: cleanup sca_add_vcpu

Now that we already have kvm and the VCPU id set for the VCPU, we can
convert sda_add_vcpu to look much more like sda_del_vcpu.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7e0092b..d9d71bb 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1276,27 +1276,26 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
 
-static void sca_add_vcpu(struct kvm_vcpu *vcpu, struct kvm *kvm,
-			unsigned int id)
+static void sca_add_vcpu(struct kvm_vcpu *vcpu)
 {
-	read_lock(&kvm->arch.sca_lock);
-	if (kvm->arch.use_esca) {
-		struct esca_block *sca = kvm->arch.sca;
+	read_lock(&vcpu->kvm->arch.sca_lock);
+	if (vcpu->kvm->arch.use_esca) {
+		struct esca_block *sca = vcpu->kvm->arch.sca;
 
-		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
 		vcpu->arch.sie_block->ecb2 |= 0x04U;
-		set_bit_inv(id, (unsigned long *) sca->mcn);
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
 	} else {
-		struct bsca_block *sca = kvm->arch.sca;
+		struct bsca_block *sca = vcpu->kvm->arch.sca;
 
-		sca->cpu[id].sda = (__u64) vcpu->arch.sie_block;
+		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
-		set_bit_inv(id, (unsigned long *) &sca->mcn);
+		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
 	}
-	read_unlock(&kvm->arch.sca_lock);
+	read_unlock(&vcpu->kvm->arch.sca_lock);
 }
 
 /* Basic SCA to Extended SCA data copy routines */
@@ -1492,7 +1491,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
 	mutex_unlock(&vcpu->kvm->lock);
 	if (!kvm_is_ucontrol(vcpu->kvm)) {
 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
-		sca_add_vcpu(vcpu, vcpu->kvm, vcpu->vcpu_id);
+		sca_add_vcpu(vcpu);
 	}
 
 }
-- 
cgit v0.10.2


From 4215825eeb1f704d1bd327ca01fb753b53ea34d3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 12 Oct 2015 12:57:22 +0200
Subject: KVM: s390: don't switch to ESCA for ucontrol

sca_add_vpcu is not called for ucontrol guests. We must also not
apply the sca checking for sca_can_add_vcpu as ucontrol guests
do not have to follow the sca limits.

As common code already checks that id < KVM_MAX_VCPUS all other
data structures are safe as well.

Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index d9d71bb..539d385 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1588,7 +1588,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	struct sie_page *sie_page;
 	int rc = -EINVAL;
 
-	if (!sca_can_add_vcpu(kvm, id))
+	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
 		goto out;
 
 	rc = -ENOMEM;
-- 
cgit v0.10.2


From 8dfd523f8523779210038264259546299a8398e9 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 24 Feb 2015 15:51:55 +0100
Subject: s390/sclp: introduce check for SIE

This patch adds a way to check if the SIE with zArchitecture support is
available.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index 8324abb..dea883f 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -29,7 +29,10 @@ struct sclp_ipl_info {
 
 struct sclp_core_entry {
 	u8 core_id;
-	u8 reserved0[2];
+	u8 reserved0;
+	u8 : 4;
+	u8 sief2 : 1;
+	u8 : 3;
 	u8 : 3;
 	u8 siif : 1;
 	u8 sigpif : 1;
@@ -55,6 +58,7 @@ struct sclp_info {
 	unsigned char has_sprp : 1;
 	unsigned char has_hvs : 1;
 	unsigned char has_esca : 1;
+	unsigned char has_sief2 : 1;
 	unsigned int ibc;
 	unsigned int mtid;
 	unsigned int mtid_cp;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index ff1e1bb..e0a1f4e 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -136,6 +136,7 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 			continue;
 		sclp.has_siif = cpue->siif;
 		sclp.has_sigpif = cpue->sigpif;
+		sclp.has_sief2 = cpue->sief2;
 		break;
 	}
 
-- 
cgit v0.10.2


From 7f16d7e787b731d9db273b822b4b8069102e57a6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Tue, 24 Feb 2015 15:54:47 +0100
Subject: s390: show virtualization support in /proc/cpuinfo

This patch exposes the SIE capability (aka virtualization support) via
/proc/cpuinfo -> "features" as "sie".

As we don't want to expose this hwcap via elf, let's add a second,
"internal"/non-elf capability list. The content is simply concatenated
to the existing features when printing /proc/cpuinfo.

We also add the defines to elf.h to keep the hwcap stuff at a common
place.

Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index bab6739..08e34a5 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -104,6 +104,9 @@
 #define HWCAP_S390_TE		1024
 #define HWCAP_S390_VXRS		2048
 
+/* Internal bits, not exposed via elf */
+#define HWCAP_INT_SIE		1UL
+
 /*
  * These are used to set parameters in the core dumps.
  */
@@ -169,6 +172,10 @@ extern unsigned int vdso_enabled;
 extern unsigned long elf_hwcap;
 #define ELF_HWCAP (elf_hwcap)
 
+/* Internal hardware capabilities, not exposed via elf */
+
+extern unsigned long int_hwcap;
+
 /* This yields a string that ld.so will use to load implementation
    specific libraries for optimization.  This is more specific in
    intent than poking at uname or /proc/cpuinfo.
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 7ce00e7..647128d 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -61,6 +61,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
 		"edat", "etf3eh", "highgprs", "te", "vx"
 	};
+	static const char * const int_hwcap_str[] = {
+		"sie"
+	};
 	unsigned long n = (unsigned long) v - 1;
 	int i;
 
@@ -75,6 +78,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		for (i = 0; i < ARRAY_SIZE(hwcap_str); i++)
 			if (hwcap_str[i] && (elf_hwcap & (1UL << i)))
 				seq_printf(m, "%s ", hwcap_str[i]);
+		for (i = 0; i < ARRAY_SIZE(int_hwcap_str); i++)
+			if (int_hwcap_str[i] && (int_hwcap & (1UL << i)))
+				seq_printf(m, "%s ", int_hwcap_str[i]);
 		seq_puts(m, "\n");
 		show_cacheinfo(m);
 	}
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index c837bca..dc83ae6 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -80,6 +80,8 @@ EXPORT_SYMBOL(console_irq);
 unsigned long elf_hwcap __read_mostly = 0;
 char elf_platform[ELF_PLATFORM_SIZE];
 
+unsigned long int_hwcap = 0;
+
 int __initdata memory_end_set;
 unsigned long __initdata memory_end;
 unsigned long __initdata max_physmem_end;
@@ -793,6 +795,13 @@ static int __init setup_hwcaps(void)
 		strcpy(elf_platform, "z13");
 		break;
 	}
+
+	/*
+	 * Virtualization support HWCAP_INT_SIE is bit 0.
+	 */
+	if (sclp.has_sief2)
+		int_hwcap |= HWCAP_INT_SIE;
+
 	return 0;
 }
 arch_initcall(setup_hwcaps);
-- 
cgit v0.10.2


From 07197fd05fa3eb2e8a5aae92938ca5d07bcac9fc Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Fri, 30 Jan 2015 16:01:38 +0100
Subject: KVM: s390: don't load kvm without virtualization support

If we don't have support for virtualization (SIE), e.g. when running under
a hypervisor not supporting execution of the SIE instruction, we should
immediately abort loading the kvm module, as the SIE instruction cannot
be enabled dynamically.

Currently, the SIE instructions fails with an exception on a non-SIE
host, resulting in the guest making no progress, instead of failing hard.

Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 539d385..49d3319 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2859,6 +2859,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 static int __init kvm_s390_init(void)
 {
+	if (!sclp.has_sief2) {
+		pr_info("SIE not available\n");
+		return -ENODEV;
+	}
+
 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
 }
 
-- 
cgit v0.10.2


From a6aacc3f87dfd44425fc17ea9875a5c2ad917227 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Tue, 24 Nov 2015 14:28:12 +0100
Subject: KVM: s390: remove pointless test_facility(2) check

This evaluates always to 'true'.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 49d3319..77724ce 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1027,7 +1027,7 @@ static int kvm_s390_apxa_installed(void)
 	u8 config[128];
 	int cc;
 
-	if (test_facility(2) && test_facility(12)) {
+	if (test_facility(12)) {
 		cc = kvm_s390_query_ap_config(config);
 
 		if (cc)
-- 
cgit v0.10.2


From 2f8a43d45d14ad62b105ed99151b453c12df7149 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Fri, 27 Nov 2015 11:00:54 +0100
Subject: KVM: s390: remove redudant assigment of error code

rc already contains -ENOMEM, no need to assign it twice.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 77724ce..6857262 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1618,10 +1618,8 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	 */
 	vcpu->arch.guest_fpregs.fprs = kzalloc(sizeof(freg_t) * __NUM_FPRS,
 					       GFP_KERNEL);
-	if (!vcpu->arch.guest_fpregs.fprs) {
-		rc = -ENOMEM;
+	if (!vcpu->arch.guest_fpregs.fprs)
 		goto out_free_sie_block;
-	}
 
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
-- 
cgit v0.10.2


From 8cdb654abe5730654d0385382c4e877a011bb8c8 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@marvell.com>
Date: Thu, 12 Nov 2015 19:59:14 +0800
Subject: KVM: arm/arm64: vgic: make vgic_io_ops static

vgic_io_ops is only referenced within vgic.c, so it can be declared
static.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 65461f8..0c739a7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -878,7 +878,7 @@ static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu,
 				       true);
 }
 
-struct kvm_io_device_ops vgic_io_ops = {
+static struct kvm_io_device_ops vgic_io_ops = {
 	.read	= vgic_handle_mmio_read,
 	.write	= vgic_handle_mmio_write,
 };
-- 
cgit v0.10.2


From b19e6892a90e7c9d15fde0a08516ec891a4e7d54 Mon Sep 17 00:00:00 2001
From: Amit Tomar <amittomer25@gmail.com>
Date: Thu, 26 Nov 2015 10:09:43 +0000
Subject: KVM: arm/arm64: Count guest exit due to various reasons

It would add guest exit statistics to debugfs, this can be helpful
while measuring KVM performance.

  [ Renamed some of the field names - Christoffer ]

Signed-off-by: Amit Singh Tomar <amittomer25@gmail.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6692982..f9f2779 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -150,6 +150,12 @@ struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_wakeup;
+	u32 hvc_exit_stat;
+	u64 wfe_exit_stat;
+	u64 wfi_exit_stat;
+	u64 mmio_exit_user;
+	u64 mmio_exit_kernel;
+	u64 exits;
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e06fd29..8a79a57 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -603,6 +603,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
 		vcpu->mode = OUTSIDE_GUEST_MODE;
+		vcpu->stat.exits++;
 		/*
 		 * Back from guest
 		 *************************************************************/
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 96e935b..5fa69d7 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -33,6 +33,12 @@
 #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(hvc_exit_stat),
+	VCPU_STAT(wfe_exit_stat),
+	VCPU_STAT(wfi_exit_stat),
+	VCPU_STAT(mmio_exit_user),
+	VCPU_STAT(mmio_exit_kernel),
+	VCPU_STAT(exits),
 	{ NULL }
 };
 
diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index 95f12b2..3ede90d 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -42,6 +42,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	trace_kvm_hvc(*vcpu_pc(vcpu), *vcpu_reg(vcpu, 0),
 		      kvm_vcpu_hvc_get_imm(vcpu));
+	vcpu->stat.hvc_exit_stat++;
 
 	ret = kvm_psci_call(vcpu);
 	if (ret < 0) {
@@ -89,9 +90,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	if (kvm_vcpu_get_hsr(vcpu) & HSR_WFI_IS_WFE) {
 		trace_kvm_wfx(*vcpu_pc(vcpu), true);
+		vcpu->stat.wfe_exit_stat++;
 		kvm_vcpu_on_spin(vcpu);
 	} else {
 		trace_kvm_wfx(*vcpu_pc(vcpu), false);
+		vcpu->stat.wfi_exit_stat++;
 		kvm_vcpu_block(vcpu);
 	}
 
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 3a10c9f..7f33b20 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -210,8 +210,11 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 
 	if (!ret) {
 		/* We handled the access successfully in the kernel. */
+		vcpu->stat.mmio_exit_kernel++;
 		kvm_handle_mmio_return(vcpu, run);
 		return 1;
+	} else {
+		vcpu->stat.mmio_exit_user++;
 	}
 
 	run->exit_reason	= KVM_EXIT_MMIO;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a35ce72..19504aa 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -197,6 +197,12 @@ struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
 	u32 halt_attempted_poll;
 	u32 halt_wakeup;
+	u32 hvc_exit_stat;
+	u64 wfe_exit_stat;
+	u64 wfi_exit_stat;
+	u64 mmio_exit_user;
+	u64 mmio_exit_kernel;
+	u64 exits;
 };
 
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index d250160..115522b 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -34,7 +34,16 @@
 
 #include "trace.h"
 
+#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM }
+#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU }
+
 struct kvm_stats_debugfs_item debugfs_entries[] = {
+	VCPU_STAT(hvc_exit_stat),
+	VCPU_STAT(wfe_exit_stat),
+	VCPU_STAT(wfi_exit_stat),
+	VCPU_STAT(mmio_exit_user),
+	VCPU_STAT(mmio_exit_kernel),
+	VCPU_STAT(exits),
 	{ NULL }
 };
 
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 15f0477..8bddae1 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,6 +39,7 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 	trace_kvm_hvc_arm64(*vcpu_pc(vcpu), vcpu_get_reg(vcpu, 0),
 			    kvm_vcpu_hvc_get_imm(vcpu));
+	vcpu->stat.hvc_exit_stat++;
 
 	ret = kvm_psci_call(vcpu);
 	if (ret < 0) {
@@ -71,9 +72,11 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
 	if (kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WFx_ISS_WFE) {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true);
+		vcpu->stat.wfe_exit_stat++;
 		kvm_vcpu_on_spin(vcpu);
 	} else {
 		trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false);
+		vcpu->stat.wfi_exit_stat++;
 		kvm_vcpu_block(vcpu);
 	}
 
-- 
cgit v0.10.2


From 3600c2fdc09a43a30909743569e35a29121602ed Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Thu, 5 Nov 2015 15:09:17 +0000
Subject: arm64: Add macros to read/write system registers

Rather than crafting custom macros for reading/writing each system
register provide generics accessors, read_sysreg and write_sysreg, for
this purpose.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index d48ab5b..4aeebec 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -20,6 +20,8 @@
 #ifndef __ASM_SYSREG_H
 #define __ASM_SYSREG_H
 
+#include <linux/stringify.h>
+
 #include <asm/opcodes.h>
 
 /*
@@ -208,6 +210,8 @@
 
 #else
 
+#include <linux/types.h>
+
 asm(
 "	.irp	num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
 "	.equ	__reg_num_x\\num, \\num\n"
@@ -232,6 +236,23 @@ static inline void config_sctlr_el1(u32 clear, u32 set)
 	val |= set;
 	asm volatile("msr sctlr_el1, %0" : : "r" (val));
 }
+
+/*
+ * Unlike read_cpuid, calls to read_sysreg are never expected to be
+ * optimized away or replaced with synthetic values.
+ */
+#define read_sysreg(r) ({					\
+	u64 __val;						\
+	asm volatile("mrs %0, " __stringify(r) : "=r" (__val));	\
+	__val;							\
+})
+
+#define write_sysreg(v, r) do {					\
+	u64 __val = (u64)v;					\
+	asm volatile("msr " __stringify(r) ", %0"		\
+		     : : "r" (__val));				\
+} while (0)
+
 #endif
 
 #endif	/* __ASM_SYSREG_H */
-- 
cgit v0.10.2


From c76a0a6695c61088c8d2e731e25305502666bf7d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 21 Oct 2015 10:09:49 +0100
Subject: arm64: KVM: Add a HYP-specific header file

In order to expose the various EL2 services that are private to
the hypervisor, add a new hyp.h file.

So far, it only contains mundane things such as section annotation
and VA manipulation.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
new file mode 100644
index 0000000..057f483
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM64_KVM_HYP_H__
+#define __ARM64_KVM_HYP_H__
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+#include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
+
+#define __hyp_text __section(.hyp.text) notrace
+
+#define kern_hyp_va(v) (typeof(v))((unsigned long)(v) & HYP_PAGE_OFFSET_MASK)
+#define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
+						      + PAGE_OFFSET)
+
+#endif /* __ARM64_KVM_HYP_H__ */
+
-- 
cgit v0.10.2


From 06282fd2c2bf61619649a2b13e4a08556598a64c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 15:50:37 +0100
Subject: arm64: KVM: Implement vgic-v2 save/restore

Implement the vgic-v2 save restore (mostly) as a direct translation
of the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 1949fe5..d31e4e5 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -10,6 +10,7 @@ KVM=../../../virt/kvm
 ARM=../../../arch/arm/kvm
 
 obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp/
 
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
new file mode 100644
index 0000000..d8d5968
--- /dev/null
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for Kernel-based Virtual Machine module, HYP part
+#
+
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 057f483..ac63553 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -29,5 +29,8 @@
 #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
 						      + PAGE_OFFSET)
 
+void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/vgic-v2-sr.c b/arch/arm64/kvm/hyp/vgic-v2-sr.c
new file mode 100644
index 0000000..e717612
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v2-sr.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	u32 eisr0, eisr1, elrsr0, elrsr1;
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	cpu_if->vgic_vmcr = readl_relaxed(base + GICH_VMCR);
+	cpu_if->vgic_misr = readl_relaxed(base + GICH_MISR);
+	eisr0  = readl_relaxed(base + GICH_EISR0);
+	elrsr0 = readl_relaxed(base + GICH_ELRSR0);
+	if (unlikely(nr_lr > 32)) {
+		eisr1  = readl_relaxed(base + GICH_EISR1);
+		elrsr1 = readl_relaxed(base + GICH_ELRSR1);
+	} else {
+		eisr1 = elrsr1 = 0;
+	}
+#ifdef CONFIG_CPU_BIG_ENDIAN
+	cpu_if->vgic_eisr  = ((u64)eisr0 << 32) | eisr1;
+	cpu_if->vgic_elrsr = ((u64)elrsr0 << 32) | elrsr1;
+#else
+	cpu_if->vgic_eisr  = ((u64)eisr1 << 32) | eisr0;
+	cpu_if->vgic_elrsr = ((u64)elrsr1 << 32) | elrsr0;
+#endif
+	cpu_if->vgic_apr    = readl_relaxed(base + GICH_APR);
+
+	writel_relaxed(0, base + GICH_HCR);
+
+	for (i = 0; i < nr_lr; i++)
+		cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4));
+}
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+	struct vgic_dist *vgic = &kvm->arch.vgic;
+	void __iomem *base = kern_hyp_va(vgic->vctrl_base);
+	int i, nr_lr;
+
+	if (!base)
+		return;
+
+	writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR);
+	writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR);
+	writel_relaxed(cpu_if->vgic_apr, base + GICH_APR);
+
+	nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+	for (i = 0; i < nr_lr; i++)
+		writel_relaxed(cpu_if->vgic_lr[i], base + GICH_LR0 + (i * 4));
+}
-- 
cgit v0.10.2


From 3c13b8f435acb452eac62d966148a8b6fa92151f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 1 Dec 2015 13:48:56 +0000
Subject: KVM: arm/arm64: vgic-v3: Make the LR indexing macro public

We store GICv3 LRs in reverse order so that the CPU can save/restore
them in rever order as well (don't ask why, the design is crazy),
and yet generate memory traffic that doesn't completely suck.

We need this macro to be available to the C version of save/restore.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d2f4147..13a3d53 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -279,6 +279,12 @@ struct vgic_v2_cpu_if {
 	u32		vgic_lr[VGIC_V2_MAX_LRS];
 };
 
+/*
+ * LRs are stored in reverse order in memory. make sure we index them
+ * correctly.
+ */
+#define VGIC_V3_LR_INDEX(lr)		(VGIC_V3_MAX_LRS - 1 - lr)
+
 struct vgic_v3_cpu_if {
 #ifdef CONFIG_KVM_ARM_VGIC_V3
 	u32		vgic_hcr;
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 487d635..3813d23 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -36,18 +36,12 @@
 #define GICH_LR_PHYSID_CPUID		(7UL << GICH_LR_PHYSID_CPUID_SHIFT)
 #define ICH_LR_VIRTUALID_MASK		(BIT_ULL(32) - 1)
 
-/*
- * LRs are stored in reverse order in memory. make sure we index them
- * correctly.
- */
-#define LR_INDEX(lr)			(VGIC_V3_MAX_LRS - 1 - lr)
-
 static u32 ich_vtr_el2;
 
 static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
 {
 	struct vgic_lr lr_desc;
-	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)];
+	u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)];
 
 	if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
 		lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
@@ -111,7 +105,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
 		lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
 	}
 
-	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
+	vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val;
 
 	if (!(lr_desc.state & LR_STATE_MASK))
 		vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr);
-- 
cgit v0.10.2


From f68d2b1b73cc3d8f6eb189c11ce79a472ed27c42 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 15:50:58 +0100
Subject: arm64: KVM: Implement vgic-v3 save/restore

Implement the vgic-v3 save restore as a direct translation of
the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index d8d5968..d1e38ce 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index ac63553..5759f9f 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -32,5 +32,8 @@
 void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 
+void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
+void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
new file mode 100644
index 0000000..78d05f3
--- /dev/null
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define vtr_to_max_lr_idx(v)		((v) & 0xf)
+#define vtr_to_nr_pri_bits(v)		(((u32)(v) >> 29) + 1)
+
+#define read_gicreg(r)							\
+	({								\
+		u64 reg;						\
+		asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg));	\
+		reg;							\
+	})
+
+#define write_gicreg(v,r)						\
+	do {								\
+		u64 __val = (v);					\
+		asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\
+	} while (0)
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * Make sure stores to the GIC via the memory mapped interface
+	 * are now visible to the system register interface.
+	 */
+	dsb(st);
+
+	cpu_if->vgic_vmcr  = read_gicreg(ICH_VMCR_EL2);
+	cpu_if->vgic_misr  = read_gicreg(ICH_MISR_EL2);
+	cpu_if->vgic_eisr  = read_gicreg(ICH_EISR_EL2);
+	cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2);
+
+	write_gicreg(0, ICH_HCR_EL2);
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (max_lr_idx) {
+	case 15:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2);
+	case 14:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2);
+	case 13:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2);
+	case 12:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2);
+	case 11:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2);
+	case 10:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2);
+	case 9:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2);
+	case 8:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2);
+	case 7:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2);
+	case 6:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2);
+	case 5:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2);
+	case 4:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2);
+	case 3:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2);
+	case 2:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2);
+	case 1:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2);
+	case 0:
+		cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2);
+		cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2);
+	case 6:
+		cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2);
+	default:
+		cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2);
+	}
+
+	switch (nr_pri_bits) {
+	case 7:
+		cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2);
+		cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2);
+	case 6:
+		cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2);
+	default:
+		cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2);
+	}
+
+	val = read_gicreg(ICC_SRE_EL2);
+	write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2);
+	isb(); /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */
+	write_gicreg(1, ICC_SRE_EL1);
+}
+
+void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
+	u64 val;
+	u32 max_lr_idx, nr_pri_bits;
+
+	/*
+	 * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a
+	 * Group0 interrupt (as generated in GICv2 mode) to be
+	 * delivered as a FIQ to the guest, with potentially fatal
+	 * consequences. So we must make sure that ICC_SRE_EL1 has
+	 * been actually programmed with the value we want before
+	 * starting to mess with the rest of the GIC.
+	 */
+	write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1);
+	isb();
+
+	write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
+	write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2);
+
+	val = read_gicreg(ICH_VTR_EL2);
+	max_lr_idx = vtr_to_max_lr_idx(val);
+	nr_pri_bits = vtr_to_nr_pri_bits(val);
+
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2);
+	}	 	                           
+		 	                           
+	switch (nr_pri_bits) {
+	case 7:
+		 write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2);
+		 write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2);
+	case 6:
+		 write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2);
+	default:
+		 write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2);
+	}
+
+	switch (max_lr_idx) {
+	case 15:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2);
+	case 14:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2);
+	case 13:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2);
+	case 12:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2);
+	case 11:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2);
+	case 10:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2);
+	case 9:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2);
+	case 8:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2);
+	case 7:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2);
+	case 6:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2);
+	case 5:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2);
+	case 4:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2);
+	case 3:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2);
+	case 2:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2);
+	case 1:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2);
+	case 0:
+		write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2);
+	}
+
+	/*
+	 * Ensures that the above will have reached the
+	 * (re)distributors. This ensure the guest will read the
+	 * correct values from the memory-mapped interface.
+	 */
+	isb();
+	dsb(sy);
+
+	/*
+	 * Prevent the guest from touching the GIC system registers if
+	 * SRE isn't enabled for GICv3 emulation.
+	 */
+	if (!cpu_if->vgic_sre) {
+		write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE,
+			     ICC_SRE_EL2);
+	}
+}
+
+u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
+{
+	return read_gicreg(ICH_VTR_EL2);
+}
-- 
cgit v0.10.2


From 1431af367e52b08038e78d346822966d968f1694 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 16:32:20 +0100
Subject: arm64: KVM: Implement timer save/restore

Implement the timer save restore as a direct translation of
the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index d1e38ce..455dc0a 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 5759f9f..f213e46 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -35,5 +35,8 @@ void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
 
+void __timer_save_state(struct kvm_vcpu *vcpu);
+void __timer_restore_state(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c
new file mode 100644
index 0000000..1051e5d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/timer-sr.c
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <clocksource/arm_arch_timer.h>
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* vcpu is already in the HYP VA space */
+void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	if (kvm->arch.timer.enabled) {
+		timer->cntv_ctl = read_sysreg(cntv_ctl_el0);
+		timer->cntv_cval = read_sysreg(cntv_cval_el0);
+	}
+
+	/* Disable the virtual timer */
+	write_sysreg(0, cntv_ctl_el0);
+
+	/* Allow physical timer/counter access for the host */
+	val = read_sysreg(cnthctl_el2);
+	val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN;
+	write_sysreg(val, cnthctl_el2);
+
+	/* Clear cntvoff for the host */
+	write_sysreg(0, cntvoff_el2);
+}
+
+void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+	u64 val;
+
+	/*
+	 * Disallow physical timer access for the guest
+	 * Physical counter access is allowed
+	 */
+	val = read_sysreg(cnthctl_el2);
+	val &= ~CNTHCTL_EL1PCEN;
+	val |= CNTHCTL_EL1PCTEN;
+	write_sysreg(val, cnthctl_el2);
+
+	if (kvm->arch.timer.enabled) {
+		write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
+		write_sysreg(timer->cntv_cval, cntv_cval_el0);
+		isb();
+		write_sysreg(timer->cntv_ctl, cntv_ctl_el0);
+	}
+}
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 9916d0e..25d0914 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -23,6 +23,12 @@
 #define ARCH_TIMER_CTRL_IT_MASK		(1 << 1)
 #define ARCH_TIMER_CTRL_IT_STAT		(1 << 2)
 
+#define CNTHCTL_EL1PCTEN		(1 << 0)
+#define CNTHCTL_EL1PCEN			(1 << 1)
+#define CNTHCTL_EVNTEN			(1 << 2)
+#define CNTHCTL_EVNTDIR			(1 << 3)
+#define CNTHCTL_EVNTI			(0xF << 4)
+
 enum arch_timer_reg {
 	ARCH_TIMER_REG_CTRL,
 	ARCH_TIMER_REG_TVAL,
-- 
cgit v0.10.2


From 6d6ec20fcf2830ca10c1b7c8efd7e2592c40e3d6 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 18:02:48 +0100
Subject: arm64: KVM: Implement system register save/restore

Implement the system register save/restore as a direct translation of
the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 455dc0a..ec94200 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -5,3 +5,4 @@
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index f213e46..778d56d 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -38,5 +38,8 @@ void __vgic_v3_restore_state(struct kvm_vcpu *vcpu);
 void __timer_save_state(struct kvm_vcpu *vcpu);
 void __timer_restore_state(struct kvm_vcpu *vcpu);
 
+void __sysreg_save_state(struct kvm_cpu_context *ctxt);
+void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
new file mode 100644
index 0000000..add8fcb
--- /dev/null
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2012-2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+/* ctxt is already in the HYP VA space */
+void __hyp_text __sysreg_save_state(struct kvm_cpu_context *ctxt)
+{
+	ctxt->sys_regs[MPIDR_EL1]	= read_sysreg(vmpidr_el2);
+	ctxt->sys_regs[CSSELR_EL1]	= read_sysreg(csselr_el1);
+	ctxt->sys_regs[SCTLR_EL1]	= read_sysreg(sctlr_el1);
+	ctxt->sys_regs[ACTLR_EL1]	= read_sysreg(actlr_el1);
+	ctxt->sys_regs[CPACR_EL1]	= read_sysreg(cpacr_el1);
+	ctxt->sys_regs[TTBR0_EL1]	= read_sysreg(ttbr0_el1);
+	ctxt->sys_regs[TTBR1_EL1]	= read_sysreg(ttbr1_el1);
+	ctxt->sys_regs[TCR_EL1]		= read_sysreg(tcr_el1);
+	ctxt->sys_regs[ESR_EL1]		= read_sysreg(esr_el1);
+	ctxt->sys_regs[AFSR0_EL1]	= read_sysreg(afsr0_el1);
+	ctxt->sys_regs[AFSR1_EL1]	= read_sysreg(afsr1_el1);
+	ctxt->sys_regs[FAR_EL1]		= read_sysreg(far_el1);
+	ctxt->sys_regs[MAIR_EL1]	= read_sysreg(mair_el1);
+	ctxt->sys_regs[VBAR_EL1]	= read_sysreg(vbar_el1);
+	ctxt->sys_regs[CONTEXTIDR_EL1]	= read_sysreg(contextidr_el1);
+	ctxt->sys_regs[TPIDR_EL0]	= read_sysreg(tpidr_el0);
+	ctxt->sys_regs[TPIDRRO_EL0]	= read_sysreg(tpidrro_el0);
+	ctxt->sys_regs[TPIDR_EL1]	= read_sysreg(tpidr_el1);
+	ctxt->sys_regs[AMAIR_EL1]	= read_sysreg(amair_el1);
+	ctxt->sys_regs[CNTKCTL_EL1]	= read_sysreg(cntkctl_el1);
+	ctxt->sys_regs[PAR_EL1]		= read_sysreg(par_el1);
+	ctxt->sys_regs[MDSCR_EL1]	= read_sysreg(mdscr_el1);
+
+	ctxt->gp_regs.regs.sp		= read_sysreg(sp_el0);
+	ctxt->gp_regs.regs.pc		= read_sysreg(elr_el2);
+	ctxt->gp_regs.regs.pstate	= read_sysreg(spsr_el2);
+	ctxt->gp_regs.sp_el1		= read_sysreg(sp_el1);
+	ctxt->gp_regs.elr_el1		= read_sysreg(elr_el1);
+	ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg(spsr_el1);
+}
+
+void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
+{
+	write_sysreg(ctxt->sys_regs[MPIDR_EL1],	  vmpidr_el2);
+	write_sysreg(ctxt->sys_regs[CSSELR_EL1],  csselr_el1);
+	write_sysreg(ctxt->sys_regs[SCTLR_EL1],	  sctlr_el1);
+	write_sysreg(ctxt->sys_regs[ACTLR_EL1],	  actlr_el1);
+	write_sysreg(ctxt->sys_regs[CPACR_EL1],	  cpacr_el1);
+	write_sysreg(ctxt->sys_regs[TTBR0_EL1],	  ttbr0_el1);
+	write_sysreg(ctxt->sys_regs[TTBR1_EL1],	  ttbr1_el1);
+	write_sysreg(ctxt->sys_regs[TCR_EL1],	  tcr_el1);
+	write_sysreg(ctxt->sys_regs[ESR_EL1],	  esr_el1);
+	write_sysreg(ctxt->sys_regs[AFSR0_EL1],	  afsr0_el1);
+	write_sysreg(ctxt->sys_regs[AFSR1_EL1],	  afsr1_el1);
+	write_sysreg(ctxt->sys_regs[FAR_EL1],	  far_el1);
+	write_sysreg(ctxt->sys_regs[MAIR_EL1],	  mair_el1);
+	write_sysreg(ctxt->sys_regs[VBAR_EL1],	  vbar_el1);
+	write_sysreg(ctxt->sys_regs[CONTEXTIDR_EL1], contextidr_el1);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL0],	  tpidr_el0);
+	write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
+	write_sysreg(ctxt->sys_regs[TPIDR_EL1],	  tpidr_el1);
+	write_sysreg(ctxt->sys_regs[AMAIR_EL1],	  amair_el1);
+	write_sysreg(ctxt->sys_regs[CNTKCTL_EL1], cntkctl_el1);
+	write_sysreg(ctxt->sys_regs[PAR_EL1],	  par_el1);
+	write_sysreg(ctxt->sys_regs[MDSCR_EL1],	  mdscr_el1);
+
+	write_sysreg(ctxt->gp_regs.regs.sp,	sp_el0);
+	write_sysreg(ctxt->gp_regs.regs.pc,	elr_el2);
+	write_sysreg(ctxt->gp_regs.regs.pstate,	spsr_el2);
+	write_sysreg(ctxt->gp_regs.sp_el1,	sp_el1);
+	write_sysreg(ctxt->gp_regs.elr_el1,	elr_el1);
+	write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
+}
-- 
cgit v0.10.2


From c209ec85a2a7d2fd38bca0a44b7e70abd079c178 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 19:28:29 +0100
Subject: arm64: KVM: Implement 32bit system register save/restore

Implement the 32bit system register save/restore as a direct
translation of the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 778d56d..bffd308 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -40,6 +40,8 @@ void __timer_restore_state(struct kvm_vcpu *vcpu);
 
 void __sysreg_save_state(struct kvm_cpu_context *ctxt);
 void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
+void __sysreg32_save_state(struct kvm_vcpu *vcpu);
+void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index add8fcb..eb05afb 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -88,3 +88,50 @@ void __hyp_text __sysreg_restore_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt->gp_regs.elr_el1,	elr_el1);
 	write_sysreg(ctxt->gp_regs.spsr[KVM_SPSR_EL1], spsr_el1);
 }
+
+void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	spsr[KVM_SPSR_ABT] = read_sysreg(spsr_abt);
+	spsr[KVM_SPSR_UND] = read_sysreg(spsr_und);
+	spsr[KVM_SPSR_IRQ] = read_sysreg(spsr_irq);
+	spsr[KVM_SPSR_FIQ] = read_sysreg(spsr_fiq);
+
+	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
+	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
+
+	if (!(read_sysreg(cptr_el2) & CPTR_EL2_TFP))
+		sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		sysreg[DBGVCR32_EL2] = read_sysreg(dbgvcr32_el2);
+}
+
+void __hyp_text __sysreg32_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 *spsr, *sysreg;
+
+	if (read_sysreg(hcr_el2) & HCR_RW)
+		return;
+
+	spsr = vcpu->arch.ctxt.gp_regs.spsr;
+	sysreg = vcpu->arch.ctxt.sys_regs;
+
+	write_sysreg(spsr[KVM_SPSR_ABT], spsr_abt);
+	write_sysreg(spsr[KVM_SPSR_UND], spsr_und);
+	write_sysreg(spsr[KVM_SPSR_IRQ], spsr_irq);
+	write_sysreg(spsr[KVM_SPSR_FIQ], spsr_fiq);
+
+	write_sysreg(sysreg[DACR32_EL2], dacr32_el2);
+	write_sysreg(sysreg[IFSR32_EL2], ifsr32_el2);
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		write_sysreg(sysreg[DBGVCR32_EL2], dbgvcr32_el2);
+}
-- 
cgit v0.10.2


From 8eb992674c9e69d57af199f36b6455dbc00ac9f9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 19 Oct 2015 21:02:46 +0100
Subject: arm64: KVM: Implement debug save/restore
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implement the debug save restore as a direct translation of
the assembly code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ec94200..ec14cac 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
new file mode 100644
index 0000000..7848322
--- /dev/null
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_mmu.h>
+
+#include "hyp.h"
+
+#define read_debug(r,n)		read_sysreg(r##n##_el1)
+#define write_debug(v,r,n)	write_sysreg(v, r##n##_el1)
+
+#define save_debug(ptr,reg,nr)						\
+	switch (nr) {							\
+	case 15:	ptr[15] = read_debug(reg, 15);			\
+	case 14:	ptr[14] = read_debug(reg, 14);			\
+	case 13:	ptr[13] = read_debug(reg, 13);			\
+	case 12:	ptr[12] = read_debug(reg, 12);			\
+	case 11:	ptr[11] = read_debug(reg, 11);			\
+	case 10:	ptr[10] = read_debug(reg, 10);			\
+	case 9:		ptr[9] = read_debug(reg, 9);			\
+	case 8:		ptr[8] = read_debug(reg, 8);			\
+	case 7:		ptr[7] = read_debug(reg, 7);			\
+	case 6:		ptr[6] = read_debug(reg, 6);			\
+	case 5:		ptr[5] = read_debug(reg, 5);			\
+	case 4:		ptr[4] = read_debug(reg, 4);			\
+	case 3:		ptr[3] = read_debug(reg, 3);			\
+	case 2:		ptr[2] = read_debug(reg, 2);			\
+	case 1:		ptr[1] = read_debug(reg, 1);			\
+	default:	ptr[0] = read_debug(reg, 0);			\
+	}
+
+#define restore_debug(ptr,reg,nr)					\
+	switch (nr) {							\
+	case 15:	write_debug(ptr[15], reg, 15);			\
+	case 14:	write_debug(ptr[14], reg, 14);			\
+	case 13:	write_debug(ptr[13], reg, 13);			\
+	case 12:	write_debug(ptr[12], reg, 12);			\
+	case 11:	write_debug(ptr[11], reg, 11);			\
+	case 10:	write_debug(ptr[10], reg, 10);			\
+	case 9:		write_debug(ptr[9], reg, 9);			\
+	case 8:		write_debug(ptr[8], reg, 8);			\
+	case 7:		write_debug(ptr[7], reg, 7);			\
+	case 6:		write_debug(ptr[6], reg, 6);			\
+	case 5:		write_debug(ptr[5], reg, 5);			\
+	case 4:		write_debug(ptr[4], reg, 4);			\
+	case 3:		write_debug(ptr[3], reg, 3);			\
+	case 2:		write_debug(ptr[2], reg, 2);			\
+	case 1:		write_debug(ptr[1], reg, 1);			\
+	default:	write_debug(ptr[0], reg, 0);			\
+	}
+
+void __hyp_text __debug_save_state(struct kvm_vcpu *vcpu,
+				   struct kvm_guest_debug_arch *dbg,
+				   struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	save_debug(dbg->dbg_bcr, dbgbcr, brps);
+	save_debug(dbg->dbg_bvr, dbgbvr, brps);
+	save_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	save_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	ctxt->sys_regs[MDCCINT_EL1] = read_sysreg(mdccint_el1);
+}
+
+void __hyp_text __debug_restore_state(struct kvm_vcpu *vcpu,
+				      struct kvm_guest_debug_arch *dbg,
+				      struct kvm_cpu_context *ctxt)
+{
+	u64 aa64dfr0;
+	int brps, wrps;
+
+	if (!(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY))
+		return;
+
+	aa64dfr0 = read_sysreg(id_aa64dfr0_el1);
+
+	brps = (aa64dfr0 >> 12) & 0xf;
+	wrps = (aa64dfr0 >> 20) & 0xf;
+
+	restore_debug(dbg->dbg_bcr, dbgbcr, brps);
+	restore_debug(dbg->dbg_bvr, dbgbvr, brps);
+	restore_debug(dbg->dbg_wcr, dbgwcr, wrps);
+	restore_debug(dbg->dbg_wvr, dbgwvr, wrps);
+
+	write_sysreg(ctxt->sys_regs[MDCCINT_EL1], mdccint_el1);
+}
+
+void __hyp_text __debug_cond_save_host_state(struct kvm_vcpu *vcpu)
+{
+	/* If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY is set, perform
+	 * a full save/restore cycle. */
+	if ((vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_KDE) ||
+	    (vcpu->arch.ctxt.sys_regs[MDSCR_EL1] & DBG_MDSCR_MDE))
+		vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
+
+	__debug_save_state(vcpu, &vcpu->arch.host_debug_state,
+			   kern_hyp_va(vcpu->arch.host_cpu_context));
+}
+
+void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
+{
+	__debug_restore_state(vcpu, &vcpu->arch.host_debug_state,
+			      kern_hyp_va(vcpu->arch.host_cpu_context));
+
+	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
+		vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
+}
+
+u32 __hyp_text __debug_read_mdcr_el2(void)
+{
+	return read_sysreg(mdcr_el2);
+}
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index bffd308..454e46f 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -43,5 +43,14 @@ void __sysreg_restore_state(struct kvm_cpu_context *ctxt);
 void __sysreg32_save_state(struct kvm_vcpu *vcpu);
 void __sysreg32_restore_state(struct kvm_vcpu *vcpu);
 
+void __debug_save_state(struct kvm_vcpu *vcpu,
+			struct kvm_guest_debug_arch *dbg,
+			struct kvm_cpu_context *ctxt);
+void __debug_restore_state(struct kvm_vcpu *vcpu,
+			   struct kvm_guest_debug_arch *dbg,
+			   struct kvm_cpu_context *ctxt);
+void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
+void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
-- 
cgit v0.10.2


From b97b66c14b96ab562e4fd516d804c5cd05c0529e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 22 Oct 2015 08:32:18 +0100
Subject: arm64: KVM: Implement guest entry

Contrary to the previous patch, the guest entry is fairly different
from its assembly counterpart, mostly because it is only concerned
with saving/restoring the GP registers, and nothing else.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ec14cac..1e1ff06 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
+obj-$(CONFIG_KVM_ARM_HOST) += entry.o
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
new file mode 100644
index 0000000..ff19695
--- /dev/null
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm-offsets.h>
+#include <asm/assembler.h>
+#include <asm/fpsimdmacros.h>
+#include <asm/kvm.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
+#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro save_callee_saved_regs ctxt
+	stp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	stp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	stp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	stp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	stp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	stp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+.macro restore_callee_saved_regs ctxt
+	ldp	x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
+	ldp	x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
+	ldp	x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
+	ldp	x25, x26, [\ctxt, #CPU_XREG_OFFSET(25)]
+	ldp	x27, x28, [\ctxt, #CPU_XREG_OFFSET(27)]
+	ldp	x29, lr,  [\ctxt, #CPU_XREG_OFFSET(29)]
+.endm
+
+/*
+ * u64 __guest_enter(struct kvm_vcpu *vcpu,
+ *		     struct kvm_cpu_context *host_ctxt);
+ */
+ENTRY(__guest_enter)
+	// x0: vcpu
+	// x1: host/guest context
+	// x2-x18: clobbered by macros
+
+	// Store the host regs
+	save_callee_saved_regs x1
+
+	// Preserve vcpu & host_ctxt for use at exit time
+	stp	x0, x1, [sp, #-16]!
+
+	add	x1, x0, #VCPU_CONTEXT
+
+	// Prepare x0-x1 for later restore by pushing them onto the stack
+	ldp	x2, x3, [x1, #CPU_XREG_OFFSET(0)]
+	stp	x2, x3, [sp, #-16]!
+
+	// x2-x18
+	ldp	x2, x3,   [x1, #CPU_XREG_OFFSET(2)]
+	ldp	x4, x5,   [x1, #CPU_XREG_OFFSET(4)]
+	ldp	x6, x7,   [x1, #CPU_XREG_OFFSET(6)]
+	ldp	x8, x9,   [x1, #CPU_XREG_OFFSET(8)]
+	ldp	x10, x11, [x1, #CPU_XREG_OFFSET(10)]
+	ldp	x12, x13, [x1, #CPU_XREG_OFFSET(12)]
+	ldp	x14, x15, [x1, #CPU_XREG_OFFSET(14)]
+	ldp	x16, x17, [x1, #CPU_XREG_OFFSET(16)]
+	ldr	x18,      [x1, #CPU_XREG_OFFSET(18)]
+
+	// x19-x29, lr
+	restore_callee_saved_regs x1
+
+	// Last bits of the 64bit state
+	ldp	x0, x1, [sp], #16
+
+	// Do not touch any register after this!
+	eret
+ENDPROC(__guest_enter)
+
+ENTRY(__guest_exit)
+	// x0: vcpu
+	// x1: return code
+	// x2-x3: free
+	// x4-x29,lr: vcpu regs
+	// vcpu x0-x3 on the stack
+
+	add	x2, x0, #VCPU_CONTEXT
+
+	stp	x4, x5,   [x2, #CPU_XREG_OFFSET(4)]
+	stp	x6, x7,   [x2, #CPU_XREG_OFFSET(6)]
+	stp	x8, x9,   [x2, #CPU_XREG_OFFSET(8)]
+	stp	x10, x11, [x2, #CPU_XREG_OFFSET(10)]
+	stp	x12, x13, [x2, #CPU_XREG_OFFSET(12)]
+	stp	x14, x15, [x2, #CPU_XREG_OFFSET(14)]
+	stp	x16, x17, [x2, #CPU_XREG_OFFSET(16)]
+	str	x18,      [x2, #CPU_XREG_OFFSET(18)]
+
+	ldp	x6, x7, [sp], #16	// x2, x3
+	ldp	x4, x5, [sp], #16	// x0, x1
+
+	stp	x4, x5, [x2, #CPU_XREG_OFFSET(0)]
+	stp	x6, x7, [x2, #CPU_XREG_OFFSET(2)]
+
+	save_callee_saved_regs x2
+
+	// Restore vcpu & host_ctxt from the stack
+	// (preserving return code in x1)
+	ldp	x0, x2, [sp], #16
+	// Now restore the host regs
+	restore_callee_saved_regs x2
+
+	mov	x0, x1
+	ret
+ENDPROC(__guest_exit)
+
+	/* Insert fault handling here */
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 454e46f..0809653 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -52,5 +52,7 @@ void __debug_restore_state(struct kvm_vcpu *vcpu,
 void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
 void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
 
+u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+
 #endif /* __ARM64_KVM_HYP_H__ */
 
-- 
cgit v0.10.2


From c1bf6e18e97e7ead77371d4251f8ef1567455584 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 28 Oct 2015 08:45:37 +0000
Subject: arm64: KVM: Add patchable function selector

KVM so far relies on code patching, and is likely to use it more
in the future. The main issue is that our alternative system works
at the instruction level, while we'd like to have alternatives at
the function level.

In order to cope with this, add the "hyp_alternate_select" macro that
outputs a brief sequence of code that in turn can be patched, allowing
an alternative function to be selected.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 0809653..73419a7 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -29,6 +29,30 @@
 #define hyp_kern_va(v) (typeof(v))((unsigned long)(v) - HYP_PAGE_OFFSET \
 						      + PAGE_OFFSET)
 
+/**
+ * hyp_alternate_select - Generates patchable code sequences that are
+ * used to switch between two implementations of a function, depending
+ * on the availability of a feature.
+ *
+ * @fname: a symbol name that will be defined as a function returning a
+ * function pointer whose type will match @orig and @alt
+ * @orig: A pointer to the default function, as returned by @fname when
+ * @cond doesn't hold
+ * @alt: A pointer to the alternate function, as returned by @fname
+ * when @cond holds
+ * @cond: a CPU feature (as described in asm/cpufeature.h)
+ */
+#define hyp_alternate_select(fname, orig, alt, cond)			\
+typeof(orig) * __hyp_text fname(void)					\
+{									\
+	typeof(alt) *val = orig;					\
+	asm volatile(ALTERNATIVE("nop		\n",			\
+				 "mov	%0, %1	\n",			\
+				 cond)					\
+		     : "+r" (val) : "r" (alt));				\
+	return val;							\
+}
+
 void __vgic_v2_save_state(struct kvm_vcpu *vcpu);
 void __vgic_v2_restore_state(struct kvm_vcpu *vcpu);
 
-- 
cgit v0.10.2


From be901e9b15cd2c8e48dc089b4655ea4a076e66fd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 21 Oct 2015 09:57:10 +0100
Subject: arm64: KVM: Implement the core world switch

Implement the core of the world switch in C. Not everything is there
yet, and there is nothing to re-enter the world switch either.

But this already outlines the code structure well enough.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 1e1ff06..9c11b0f 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += timer-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
+obj-$(CONFIG_KVM_ARM_HOST) += switch.o
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
new file mode 100644
index 0000000..79f59c9
--- /dev/null
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	/*
+	 * We are about to set CPTR_EL2.TFP to trap all floating point
+	 * register accesses to EL2, however, the ARM ARM clearly states that
+	 * traps are only taken to EL2 if the operation would not otherwise
+	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
+	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+	 */
+	val = vcpu->arch.hcr_el2;
+	if (!(val & HCR_RW)) {
+		write_sysreg(1 << 30, fpexc32_el2);
+		isb();
+	}
+	write_sysreg(val, hcr_el2);
+	/* Trap on AArch32 cp15 c15 accesses (EL1 or EL0) */
+	write_sysreg(1 << 15, hstr_el2);
+	write_sysreg(CPTR_EL2_TTA | CPTR_EL2_TFP, cptr_el2);
+	write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
+}
+
+static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(HCR_RW, hcr_el2);
+	write_sysreg(0, hstr_el2);
+	write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2);
+	write_sysreg(0, cptr_el2);
+}
+
+static void __hyp_text __activate_vm(struct kvm_vcpu *vcpu)
+{
+	struct kvm *kvm = kern_hyp_va(vcpu->kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+}
+
+static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu)
+{
+	write_sysreg(0, vttbr_el2);
+}
+
+static hyp_alternate_select(__vgic_call_save_state,
+			    __vgic_v2_save_state, __vgic_v3_save_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static hyp_alternate_select(__vgic_call_restore_state,
+			    __vgic_v2_restore_state, __vgic_v3_restore_state,
+			    ARM64_HAS_SYSREG_GIC_CPUIF);
+
+static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu)
+{
+	__vgic_call_save_state()(vcpu);
+	write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2);
+}
+
+static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
+{
+	u64 val;
+
+	val = read_sysreg(hcr_el2);
+	val |= 	HCR_INT_OVERRIDE;
+	val |= vcpu->arch.irq_lines;
+	write_sysreg(val, hcr_el2);
+
+	__vgic_call_restore_state()(vcpu);
+}
+
+int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
+{
+	struct kvm_cpu_context *host_ctxt;
+	struct kvm_cpu_context *guest_ctxt;
+	u64 exit_code;
+
+	vcpu = kern_hyp_va(vcpu);
+	write_sysreg(vcpu, tpidr_el2);
+
+	host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+	guest_ctxt = &vcpu->arch.ctxt;
+
+	__sysreg_save_state(host_ctxt);
+	__debug_cond_save_host_state(vcpu);
+
+	__activate_traps(vcpu);
+	__activate_vm(vcpu);
+
+	__vgic_restore_state(vcpu);
+	__timer_restore_state(vcpu);
+
+	/*
+	 * We must restore the 32-bit state before the sysregs, thanks
+	 * to Cortex-A57 erratum #852523.
+	 */
+	__sysreg32_restore_state(vcpu);
+	__sysreg_restore_state(guest_ctxt);
+	__debug_restore_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+
+	/* Jump in the fire! */
+	exit_code = __guest_enter(vcpu, host_ctxt);
+	/* And we're baaack! */
+
+	__sysreg_save_state(guest_ctxt);
+	__sysreg32_save_state(vcpu);
+	__timer_save_state(vcpu);
+	__vgic_save_state(vcpu);
+
+	__deactivate_traps(vcpu);
+	__deactivate_vm(vcpu);
+
+	__sysreg_restore_state(host_ctxt);
+
+	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
+	__debug_cond_restore_host_state(vcpu);
+
+	return exit_code;
+}
-- 
cgit v0.10.2


From c13d1683df16db16c91372177ca10c31677b5ed5 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 26 Oct 2015 08:34:09 +0000
Subject: arm64: KVM: Implement fpsimd save/restore

Implement the fpsimd save restore, keeping the lazy part in
assembler (as returning to C would be overkill).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 9c11b0f..56238d0 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -9,3 +9,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
+obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index ff19695..90cbf0f 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -27,6 +27,7 @@
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
+#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
 
 	.text
 	.pushsection	.hyp.text, "ax"
@@ -127,4 +128,33 @@ ENTRY(__guest_exit)
 	ret
 ENDPROC(__guest_exit)
 
-	/* Insert fault handling here */
+ENTRY(__fpsimd_guest_restore)
+	stp	x4, lr, [sp, #-16]!
+
+	mrs	x2, cptr_el2
+	bic	x2, x2, #CPTR_EL2_TFP
+	msr	cptr_el2, x2
+	isb
+
+	mrs	x3, tpidr_el2
+
+	ldr	x0, [x3, #VCPU_HOST_CONTEXT]
+	kern_hyp_va x0
+	add	x0, x0, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_save_state
+
+	add	x2, x3, #VCPU_CONTEXT
+	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
+	bl	__fpsimd_restore_state
+
+	mrs	x1, hcr_el2
+	tbnz	x1, #HCR_RW_SHIFT, 1f
+	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
+	msr	fpexc32_el2, x4
+1:
+	ldp	x4, lr, [sp], #16
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+
+	eret
+ENDPROC(__fpsimd_guest_restore)
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
new file mode 100644
index 0000000..da3f22c
--- /dev/null
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/fpsimdmacros.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+ENTRY(__fpsimd_save_state)
+	fpsimd_save	x0, 1
+	ret
+ENDPROC(__fpsimd_save_state)
+
+ENTRY(__fpsimd_restore_state)
+	fpsimd_restore	x0, 1
+	ret
+ENDPROC(__fpsimd_restore_state)
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 73419a7..70d4f69 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -76,6 +76,13 @@ void __debug_restore_state(struct kvm_vcpu *vcpu,
 void __debug_cond_save_host_state(struct kvm_vcpu *vcpu);
 void __debug_cond_restore_host_state(struct kvm_vcpu *vcpu);
 
+void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
+void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+static inline bool __fpsimd_enabled(void)
+{
+	return !(read_sysreg(cptr_el2) & CPTR_EL2_TFP);
+}
+
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
 
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 79f59c9..608155f 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -89,6 +89,7 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
+	bool fp_enabled;
 	u64 exit_code;
 
 	vcpu = kern_hyp_va(vcpu);
@@ -118,6 +119,8 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 	exit_code = __guest_enter(vcpu, host_ctxt);
 	/* And we're baaack! */
 
+	fp_enabled = __fpsimd_enabled();
+
 	__sysreg_save_state(guest_ctxt);
 	__sysreg32_save_state(vcpu);
 	__timer_save_state(vcpu);
@@ -128,6 +131,11 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 
 	__sysreg_restore_state(host_ctxt);
 
+	if (fp_enabled) {
+		__fpsimd_save_state(&guest_ctxt->gp_regs.fp_regs);
+		__fpsimd_restore_state(&host_ctxt->gp_regs.fp_regs);
+	}
+
 	__debug_save_state(vcpu, kern_hyp_va(vcpu->arch.debug_ptr), guest_ctxt);
 	__debug_cond_restore_host_state(vcpu);
 
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index eb05afb..3603541 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -107,7 +107,7 @@ void __hyp_text __sysreg32_save_state(struct kvm_vcpu *vcpu)
 	sysreg[DACR32_EL2] = read_sysreg(dacr32_el2);
 	sysreg[IFSR32_EL2] = read_sysreg(ifsr32_el2);
 
-	if (!(read_sysreg(cptr_el2) & CPTR_EL2_TFP))
+	if (__fpsimd_enabled())
 		sysreg[FPEXC32_EL2] = read_sysreg(fpexc32_el2);
 
 	if (vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
-- 
cgit v0.10.2


From 5eec0a91e32a2862e86265532ae773820e0afd77 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 23 Oct 2015 08:26:37 +0100
Subject: arm64: KVM: Implement TLB handling

Implement the TLB handling as a direct translation of the assembly
code version.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 56238d0..1a529f5 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o
 obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
+obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 90cbf0f..1050b2b 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -147,6 +147,7 @@ ENTRY(__fpsimd_guest_restore)
 	add	x0, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
 	bl	__fpsimd_restore_state
 
+	// Skip restoring fpexc32 for AArch64 guests
 	mrs	x1, hcr_el2
 	tbnz	x1, #HCR_RW_SHIFT, 1f
 	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
new file mode 100644
index 0000000..6fcb93a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hyp.h"
+
+void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	/*
+	 * We could do so much better if we had the VA as well.
+	 * Instead, we invalidate Stage-2 for this IPA, and the
+	 * whole of Stage-1. Weep...
+	 */
+	ipa >>= 12;
+	asm volatile("tlbi ipas2e1is, %0" : : "r" (ipa));
+
+	/*
+	 * We have to ensure completion of the invalidation at Stage-2,
+	 * since a table walk on another CPU could refill a TLB with a
+	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
+	 * the Stage-1 invalidation happened first.
+	 */
+	dsb(ish);
+	asm volatile("tlbi vmalle1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
+{
+	dsb(ishst);
+
+	/* Switch to requested VMID */
+	kvm = kern_hyp_va(kvm);
+	write_sysreg(kvm->arch.vttbr, vttbr_el2);
+	isb();
+
+	asm volatile("tlbi vmalls12e1is" : : );
+	dsb(ish);
+	isb();
+
+	write_sysreg(0, vttbr_el2);
+}
+
+void __hyp_text __tlb_flush_vm_context(void)
+{
+	dsb(ishst);
+	asm volatile("tlbi alle1is	\n"
+		     "ic ialluis	  ": : );
+	dsb(ish);
+}
-- 
cgit v0.10.2


From 2b28162cf65a6fe1c93d172675e4f2792792f17e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 08:01:56 +0000
Subject: arm64: KVM: HYP mode entry points

Add the entry points for HYP mode (both for hypercalls and
exception handling).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 1a529f5..826032b 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_KVM_ARM_HOST) += entry.o
 obj-$(CONFIG_KVM_ARM_HOST) += switch.o
 obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o
 obj-$(CONFIG_KVM_ARM_HOST) += tlb.o
+obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
new file mode 100644
index 0000000..818731a
--- /dev/null
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2015 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/alternative.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+#include <asm/cpufeature.h>
+#include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
+#include <asm/kvm_mmu.h>
+
+	.text
+	.pushsection	.hyp.text, "ax"
+
+.macro	save_x0_to_x3
+	stp	x0, x1, [sp, #-16]!
+	stp	x2, x3, [sp, #-16]!
+.endm
+
+.macro	restore_x0_to_x3
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+.endm
+
+el1_sync:				// Guest trapped into EL2
+	save_x0_to_x3
+
+	mrs	x1, esr_el2
+	lsr	x2, x1, #ESR_ELx_EC_SHIFT
+
+	cmp	x2, #ESR_ELx_EC_HVC64
+	b.ne	el1_trap
+
+	mrs	x3, vttbr_el2		// If vttbr is valid, the 64bit guest
+	cbnz	x3, el1_trap		// called HVC
+
+	/* Here, we're pretty sure the host called HVC. */
+	restore_x0_to_x3
+
+	/* Check for __hyp_get_vectors */
+	cbnz	x0, 1f
+	mrs	x0, vbar_el2
+	b	2f
+
+1:	stp	lr, xzr, [sp, #-16]!
+
+	/*
+	 * Compute the function address in EL2, and shuffle the parameters.
+	 */
+	kern_hyp_va	x0
+	mov	lr, x0
+	mov	x0, x1
+	mov	x1, x2
+	mov	x2, x3
+	blr	lr
+
+	ldp	lr, xzr, [sp], #16
+2:	eret
+
+el1_trap:
+	/*
+	 * x1: ESR
+	 * x2: ESR_EC
+	 */
+
+	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
+	cmp	x2, #ESR_ELx_EC_FP_ASIMD
+	b.eq	__fpsimd_guest_restore
+
+	cmp	x2, #ESR_ELx_EC_DABT_LOW
+	mov	x0, #ESR_ELx_EC_IABT_LOW
+	ccmp	x2, x0, #4, ne
+	b.ne	1f		// Not an abort we care about
+
+	/* This is an abort. Check for permission fault */
+alternative_if_not ARM64_WORKAROUND_834220
+	and	x2, x1, #ESR_ELx_FSC_TYPE
+	cmp	x2, #FSC_PERM
+	b.ne	1f		// Not a permission fault
+alternative_else
+	nop			// Use the permission fault path to
+	nop			// check for a valid S1 translation,
+	nop			// regardless of the ESR value.
+alternative_endif
+
+	/*
+	 * Check for Stage-1 page table walk, which is guaranteed
+	 * to give a valid HPFAR_EL2.
+	 */
+	tbnz	x1, #7, 1f	// S1PTW is set
+
+	/* Preserve PAR_EL1 */
+	mrs	x3, par_el1
+	stp	x3, xzr, [sp, #-16]!
+
+	/*
+	 * Permission fault, HPFAR_EL2 is invalid.
+	 * Resolve the IPA the hard way using the guest VA.
+	 * Stage-1 translation already validated the memory access rights.
+	 * As such, we can use the EL1 translation regime, and don't have
+	 * to distinguish between EL0 and EL1 access.
+	 */
+	mrs	x2, far_el2
+	at	s1e1r, x2
+	isb
+
+	/* Read result */
+	mrs	x3, par_el1
+	ldp	x0, xzr, [sp], #16	// Restore PAR_EL1 from the stack
+	msr	par_el1, x0
+	tbnz	x3, #0, 3f		// Bail out if we failed the translation
+	ubfx	x3, x3, #12, #36	// Extract IPA
+	lsl	x3, x3, #4		// and present it like HPFAR
+	b	2f
+
+1:	mrs	x3, hpfar_el2
+	mrs	x2, far_el2
+
+2:	mrs	x0, tpidr_el2
+	str	w1, [x0, #VCPU_ESR_EL2]
+	str	x2, [x0, #VCPU_FAR_EL2]
+	str	x3, [x0, #VCPU_HPFAR_EL2]
+
+	mov	x1, #ARM_EXCEPTION_TRAP
+	b	__guest_exit
+
+	/*
+	 * Translation failed. Just return to the guest and
+	 * let it fault again. Another CPU is probably playing
+	 * behind our back.
+	 */
+3:	restore_x0_to_x3
+
+	eret
+
+el1_irq:
+	save_x0_to_x3
+	mrs	x0, tpidr_el2
+	mov	x1, #ARM_EXCEPTION_IRQ
+	b	__guest_exit
+
+.macro invalid_vector	label, target = __kvm_hyp_panic
+	.align	2
+\label:
+	b \target
+ENDPROC(\label)
+.endm
+
+	/* None of these should ever happen */
+	invalid_vector	el2t_sync_invalid
+	invalid_vector	el2t_irq_invalid
+	invalid_vector	el2t_fiq_invalid
+	invalid_vector	el2t_error_invalid
+	invalid_vector	el2h_sync_invalid
+	invalid_vector	el2h_irq_invalid
+	invalid_vector	el2h_fiq_invalid
+	invalid_vector	el2h_error_invalid
+	invalid_vector	el1_sync_invalid
+	invalid_vector	el1_irq_invalid
+	invalid_vector	el1_fiq_invalid
+	invalid_vector	el1_error_invalid
+
+	.ltorg
+
+	.align 11
+
+ENTRY(__hyp_vector)
+	ventry	el2t_sync_invalid		// Synchronous EL2t
+	ventry	el2t_irq_invalid		// IRQ EL2t
+	ventry	el2t_fiq_invalid		// FIQ EL2t
+	ventry	el2t_error_invalid		// Error EL2t
+
+	ventry	el2h_sync_invalid		// Synchronous EL2h
+	ventry	el2h_irq_invalid		// IRQ EL2h
+	ventry	el2h_fiq_invalid		// FIQ EL2h
+	ventry	el2h_error_invalid		// Error EL2h
+
+	ventry	el1_sync			// Synchronous 64-bit EL1
+	ventry	el1_irq				// IRQ 64-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
+	ventry	el1_error_invalid		// Error 64-bit EL1
+
+	ventry	el1_sync			// Synchronous 32-bit EL1
+	ventry	el1_irq				// IRQ 32-bit EL1
+	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
+	ventry	el1_error_invalid		// Error 32-bit EL1
+ENDPROC(__hyp_vector)
-- 
cgit v0.10.2


From 53fd5b6487e4438049a5da5e36dfb8edcf1fd789 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 15:21:52 +0000
Subject: arm64: KVM: Add panic handling

Add the panic handler, together with the small bits of assembly
code to call the kernel's panic implementation.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 818731a..8e58a3b 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -155,7 +155,16 @@ el1_irq:
 	mov	x1, #ARM_EXCEPTION_IRQ
 	b	__guest_exit
 
-.macro invalid_vector	label, target = __kvm_hyp_panic
+ENTRY(__hyp_do_panic)
+	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
+		      PSR_MODE_EL1h)
+	msr	spsr_el2, lr
+	ldr	lr, =panic
+	msr	elr_el2, lr
+	eret
+ENDPROC(__hyp_do_panic)
+
+.macro invalid_vector	label, target = __hyp_panic
 	.align	2
 \label:
 	b \target
diff --git a/arch/arm64/kvm/hyp/hyp.h b/arch/arm64/kvm/hyp/hyp.h
index 70d4f69..fb27517 100644
--- a/arch/arm64/kvm/hyp/hyp.h
+++ b/arch/arm64/kvm/hyp/hyp.h
@@ -84,6 +84,7 @@ static inline bool __fpsimd_enabled(void)
 }
 
 u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt);
+void __noreturn __hyp_do_panic(unsigned long, ...);
 
 #endif /* __ARM64_KVM_HYP_H__ */
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 608155f..b012870 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -141,3 +141,33 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 
 	return exit_code;
 }
+
+static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
+
+void __hyp_text __noreturn __hyp_panic(void)
+{
+	unsigned long str_va = (unsigned long)__hyp_panic_string;
+	u64 spsr = read_sysreg(spsr_el2);
+	u64 elr = read_sysreg(elr_el2);
+	u64 par = read_sysreg(par_el1);
+
+	if (read_sysreg(vttbr_el2)) {
+		struct kvm_vcpu *vcpu;
+		struct kvm_cpu_context *host_ctxt;
+
+		vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2);
+		host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context);
+		__deactivate_traps(vcpu);
+		__deactivate_vm(vcpu);
+		__sysreg_restore_state(host_ctxt);
+	}
+
+	/* Call panic for real */
+	__hyp_do_panic(hyp_kern_va(str_va),
+		       spsr,  elr,
+		       read_sysreg(esr_el2),   read_sysreg(far_el2),
+		       read_sysreg(hpfar_el2), par,
+		       (void *)read_sysreg(tpidr_el2));
+
+	unreachable();
+}
-- 
cgit v0.10.2


From 044ac37d1281fc7b59d5dce4fe979a99369e95f2 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 13:58:00 +0000
Subject: arm64: KVM: Add compatibility aliases

So far, we've implemented the new world switch with a completely
different namespace, so that we could have both implementation
compiled in.

Let's take things one step further by adding weak aliases that
have the same names as the original implementation. The weak
attributes allows the new implementation to be overriden by the
old one, and everything still work.

At a later point, we'll be able to simply drop the old code, and
everything will hopefully keep working, thanks to the aliases we
have just added. This also saves us repainting all the callers.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 7848322..d071f45 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -135,3 +135,6 @@ u32 __hyp_text __debug_read_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
+
+__alias(__debug_read_mdcr_el2)
+u32 __weak __kvm_get_mdcr_el2(void);
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 8e58a3b..10d6d2a 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -189,6 +189,8 @@ ENDPROC(\label)
 
 	.align 11
 
+	.weak	__kvm_hyp_vector
+ENTRY(__kvm_hyp_vector)
 ENTRY(__hyp_vector)
 	ventry	el2t_sync_invalid		// Synchronous EL2t
 	ventry	el2t_irq_invalid		// IRQ EL2t
@@ -210,3 +212,4 @@ ENTRY(__hyp_vector)
 	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
 	ventry	el1_error_invalid		// Error 32-bit EL1
 ENDPROC(__hyp_vector)
+ENDPROC(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index b012870..7457ae4 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -142,6 +142,9 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 	return exit_code;
 }
 
+__alias(__guest_run)
+int __weak __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+
 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
 
 void __hyp_text __noreturn __hyp_panic(void)
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 6fcb93a..5f815cf 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -48,6 +48,9 @@ void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	write_sysreg(0, vttbr_el2);
 }
 
+__alias(__tlb_flush_vmid_ipa)
+void __weak __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+
 void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
 {
 	dsb(ishst);
@@ -64,6 +67,9 @@ void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, vttbr_el2);
 }
 
+__alias(__tlb_flush_vmid)
+void __weak __kvm_tlb_flush_vmid(struct kvm *kvm);
+
 void __hyp_text __tlb_flush_vm_context(void)
 {
 	dsb(ishst);
@@ -71,3 +77,6 @@ void __hyp_text __tlb_flush_vm_context(void)
 		     "ic ialluis	  ": : );
 	dsb(ish);
 }
+
+__alias(__tlb_flush_vm_context)
+void __weak __kvm_flush_vm_context(void);
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index 78d05f3..a769458 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -224,3 +224,6 @@ u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
 {
 	return read_gicreg(ICH_VTR_EL2);
 }
+
+__alias(__vgic_v3_read_ich_vtr_el2)
+u64 __weak __vgic_v3_get_ich_vtr_el2(void);
-- 
cgit v0.10.2


From 910917bb7db070cc67557a6b3c8fcceaa5c398a7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 27 Oct 2015 12:18:48 +0000
Subject: arm64: KVM: Map the kernel RO section into HYP

In order to run C code in HYP, we must make sure that the kernel's
RO section is mapped into HYP (otherwise things break badly).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 8a79a57..6e35d1d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -44,6 +44,7 @@
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_psci.h>
+#include <asm/sections.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension	virt");
@@ -1068,6 +1069,12 @@ static int init_hyp_mode(void)
 		goto out_free_mappings;
 	}
 
+	err = create_hyp_mappings(__start_rodata, __end_rodata);
+	if (err) {
+		kvm_err("Cannot map rodata section\n");
+		goto out_free_mappings;
+	}
+
 	/*
 	 * Map the Hyp stack pages
 	 */
-- 
cgit v0.10.2


From 1ea66d27e7b01086669ff2abdc3ac89dc90eae51 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 15:51:41 +0000
Subject: arm64: KVM: Move away from the assembly version of the world switch

This is it. We remove all of the code that has now been rewritten.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index d31e4e5..caee9ee 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,8 +23,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generi
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 86c2898..0ccdcbb 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -17,910 +17,7 @@
 
 #include <linux/linkage.h>
 
-#include <asm/alternative.h>
-#include <asm/asm-offsets.h>
 #include <asm/assembler.h>
-#include <asm/cpufeature.h>
-#include <asm/debug-monitors.h>
-#include <asm/esr.h>
-#include <asm/fpsimdmacros.h>
-#include <asm/kvm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_mmu.h>
-#include <asm/memory.h>
-
-#define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
-#define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
-#define CPU_SPSR_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_SPSR + 8*x)
-#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
-
-	.text
-	.pushsection	.hyp.text, "ax"
-	.align	PAGE_SHIFT
-
-.macro save_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	stp	x19, x20, [x3]
-	stp	x21, x22, [x3, #16]
-	stp	x23, x24, [x3, #32]
-	stp	x25, x26, [x3, #48]
-	stp	x27, x28, [x3, #64]
-	stp	x29, lr, [x3, #80]
-
-	mrs	x19, sp_el0
-	mrs	x20, elr_el2		// pc before entering el2
-	mrs	x21, spsr_el2		// pstate before entering el2
-
-	stp	x19, x20, [x3, #96]
-	str	x21, [x3, #112]
-
-	mrs	x22, sp_el1
-	mrs	x23, elr_el1
-	mrs	x24, spsr_el1
-
-	str	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	str	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	str	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-.endm
-
-.macro restore_common_regs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	ldr	x23, [x2, #CPU_GP_REG_OFFSET(CPU_ELR_EL1)]
-	ldr	x24, [x2, #CPU_SPSR_OFFSET(KVM_SPSR_EL1)]
-
-	msr	sp_el1, x22
-	msr	elr_el1, x23
-	msr	spsr_el1, x24
-
-	add	x3, x2, #CPU_XREG_OFFSET(31)    // SP_EL0
-	ldp	x19, x20, [x3]
-	ldr	x21, [x3, #16]
-
-	msr	sp_el0, x19
-	msr	elr_el2, x20 		// pc on return from el2
-	msr	spsr_el2, x21 		// pstate on return from el2
-
-	add	x3, x2, #CPU_XREG_OFFSET(19)
-	ldp	x19, x20, [x3]
-	ldp	x21, x22, [x3, #16]
-	ldp	x23, x24, [x3, #32]
-	ldp	x25, x26, [x3, #48]
-	ldp	x27, x28, [x3, #64]
-	ldp	x29, lr, [x3, #80]
-.endm
-
-.macro save_host_regs
-	save_common_regs
-.endm
-
-.macro restore_host_regs
-	restore_common_regs
-.endm
-
-.macro save_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_save x3, 4
-.endm
-
-.macro restore_fpsimd
-	// x2: cpu context address
-	// x3, x4: tmp regs
-	add	x3, x2, #CPU_GP_REG_OFFSET(CPU_FP_REGS)
-	fpsimd_restore x3, 4
-.endm
-
-.macro save_guest_regs
-	// x0 is the vcpu address
-	// x1 is the return code, do not corrupt!
-	// x2 is the cpu context
-	// x3 is a tmp register
-	// Guest's x0-x3 are on the stack
-
-	// Compute base to save registers
-	add	x3, x2, #CPU_XREG_OFFSET(4)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	str	x18, [x3, #112]
-
-	pop	x6, x7			// x2, x3
-	pop	x4, x5			// x0, x1
-
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	save_common_regs
-.endm
-
-.macro restore_guest_regs
-	// x0 is the vcpu address.
-	// x2 is the cpu context
-	// x3 is a tmp register
-
-	// Prepare x0-x3 for later restore
-	add	x3, x2, #CPU_XREG_OFFSET(0)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	push	x4, x5		// Push x0-x3 on the stack
-	push	x6, x7
-
-	// x4-x18
-	ldp	x4, x5, [x3, #32]
-	ldp	x6, x7, [x3, #48]
-	ldp	x8, x9, [x3, #64]
-	ldp	x10, x11, [x3, #80]
-	ldp	x12, x13, [x3, #96]
-	ldp	x14, x15, [x3, #112]
-	ldp	x16, x17, [x3, #128]
-	ldr	x18, [x3, #144]
-
-	// x19-x29, lr, sp*, elr*, spsr*
-	restore_common_regs
-
-	// Last bits of the 64bit state
-	pop	x2, x3
-	pop	x0, x1
-
-	// Do not touch any register after this!
-.endm
-
-/*
- * Macros to perform system register save/restore.
- *
- * Ordering here is absolutely critical, and must be kept consistent
- * in {save,restore}_sysregs, {save,restore}_guest_32bit_state,
- * and in kvm_asm.h.
- *
- * In other words, don't touch any of these unless you know what
- * you are doing.
- */
-.macro save_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	mrs	x4,	vmpidr_el2
-	mrs	x5,	csselr_el1
-	mrs	x6,	sctlr_el1
-	mrs	x7,	actlr_el1
-	mrs	x8,	cpacr_el1
-	mrs	x9,	ttbr0_el1
-	mrs	x10,	ttbr1_el1
-	mrs	x11,	tcr_el1
-	mrs	x12,	esr_el1
-	mrs	x13, 	afsr0_el1
-	mrs	x14,	afsr1_el1
-	mrs	x15,	far_el1
-	mrs	x16,	mair_el1
-	mrs	x17,	vbar_el1
-	mrs	x18,	contextidr_el1
-	mrs	x19,	tpidr_el0
-	mrs	x20,	tpidrro_el0
-	mrs	x21,	tpidr_el1
-	mrs	x22, 	amair_el1
-	mrs	x23, 	cntkctl_el1
-	mrs	x24,	par_el1
-	mrs	x25,	mdscr_el1
-
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-	stp	x8, x9, [x3, #32]
-	stp	x10, x11, [x3, #48]
-	stp	x12, x13, [x3, #64]
-	stp	x14, x15, [x3, #80]
-	stp	x16, x17, [x3, #96]
-	stp	x18, x19, [x3, #112]
-	stp	x20, x21, [x3, #128]
-	stp	x22, x23, [x3, #144]
-	stp	x24, x25, [x3, #160]
-.endm
-
-.macro save_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	mrs	x21, \type\()15_el1
-	mrs	x20, \type\()14_el1
-	mrs	x19, \type\()13_el1
-	mrs	x18, \type\()12_el1
-	mrs	x17, \type\()11_el1
-	mrs	x16, \type\()10_el1
-	mrs	x15, \type\()9_el1
-	mrs	x14, \type\()8_el1
-	mrs	x13, \type\()7_el1
-	mrs	x12, \type\()6_el1
-	mrs	x11, \type\()5_el1
-	mrs	x10, \type\()4_el1
-	mrs	x9, \type\()3_el1
-	mrs	x8, \type\()2_el1
-	mrs	x7, \type\()1_el1
-	mrs	x6, \type\()0_el1
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	str	x21, [x4, #(15 * 8)]
-	str	x20, [x4, #(14 * 8)]
-	str	x19, [x4, #(13 * 8)]
-	str	x18, [x4, #(12 * 8)]
-	str	x17, [x4, #(11 * 8)]
-	str	x16, [x4, #(10 * 8)]
-	str	x15, [x4, #(9 * 8)]
-	str	x14, [x4, #(8 * 8)]
-	str	x13, [x4, #(7 * 8)]
-	str	x12, [x4, #(6 * 8)]
-	str	x11, [x4, #(5 * 8)]
-	str	x10, [x4, #(4 * 8)]
-	str	x9, [x4, #(3 * 8)]
-	str	x8, [x4, #(2 * 8)]
-	str	x7, [x4, #(1 * 8)]
-	str	x6, [x4, #(0 * 8)]
-.endm
-
-.macro restore_sysregs
-	// x2: base address for cpu context
-	// x3: tmp register
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(MPIDR_EL1)
-
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	ldp	x8, x9, [x3, #32]
-	ldp	x10, x11, [x3, #48]
-	ldp	x12, x13, [x3, #64]
-	ldp	x14, x15, [x3, #80]
-	ldp	x16, x17, [x3, #96]
-	ldp	x18, x19, [x3, #112]
-	ldp	x20, x21, [x3, #128]
-	ldp	x22, x23, [x3, #144]
-	ldp	x24, x25, [x3, #160]
-
-	msr	vmpidr_el2,	x4
-	msr	csselr_el1,	x5
-	msr	sctlr_el1,	x6
-	msr	actlr_el1,	x7
-	msr	cpacr_el1,	x8
-	msr	ttbr0_el1,	x9
-	msr	ttbr1_el1,	x10
-	msr	tcr_el1,	x11
-	msr	esr_el1,	x12
-	msr	afsr0_el1,	x13
-	msr	afsr1_el1,	x14
-	msr	far_el1,	x15
-	msr	mair_el1,	x16
-	msr	vbar_el1,	x17
-	msr	contextidr_el1,	x18
-	msr	tpidr_el0,	x19
-	msr	tpidrro_el0,	x20
-	msr	tpidr_el1,	x21
-	msr	amair_el1,	x22
-	msr	cntkctl_el1,	x23
-	msr	par_el1,	x24
-	msr	mdscr_el1,	x25
-.endm
-
-.macro restore_debug type
-	// x4: pointer to register set
-	// x5: number of registers to skip
-	// x6..x22 trashed
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	ldr	x21, [x4, #(15 * 8)]
-	ldr	x20, [x4, #(14 * 8)]
-	ldr	x19, [x4, #(13 * 8)]
-	ldr	x18, [x4, #(12 * 8)]
-	ldr	x17, [x4, #(11 * 8)]
-	ldr	x16, [x4, #(10 * 8)]
-	ldr	x15, [x4, #(9 * 8)]
-	ldr	x14, [x4, #(8 * 8)]
-	ldr	x13, [x4, #(7 * 8)]
-	ldr	x12, [x4, #(6 * 8)]
-	ldr	x11, [x4, #(5 * 8)]
-	ldr	x10, [x4, #(4 * 8)]
-	ldr	x9, [x4, #(3 * 8)]
-	ldr	x8, [x4, #(2 * 8)]
-	ldr	x7, [x4, #(1 * 8)]
-	ldr	x6, [x4, #(0 * 8)]
-
-	adr	x22, 1f
-	add	x22, x22, x5, lsl #2
-	br	x22
-1:
-	msr	\type\()15_el1, x21
-	msr	\type\()14_el1, x20
-	msr	\type\()13_el1, x19
-	msr	\type\()12_el1, x18
-	msr	\type\()11_el1, x17
-	msr	\type\()10_el1, x16
-	msr	\type\()9_el1, x15
-	msr	\type\()8_el1, x14
-	msr	\type\()7_el1, x13
-	msr	\type\()6_el1, x12
-	msr	\type\()5_el1, x11
-	msr	\type\()4_el1, x10
-	msr	\type\()3_el1, x9
-	msr	\type\()2_el1, x8
-	msr	\type\()1_el1, x7
-	msr	\type\()0_el1, x6
-.endm
-
-.macro skip_32bit_state tmp, target
-	// Skip 32bit state if not needed
-	mrs	\tmp, hcr_el2
-	tbnz	\tmp, #HCR_RW_SHIFT, \target
-.endm
-
-.macro skip_tee_state tmp, target
-	// Skip ThumbEE state if not needed
-	mrs	\tmp, id_pfr0_el1
-	tbz	\tmp, #12, \target
-.endm
-
-.macro skip_debug_state tmp, target
-	ldr	\tmp, [x0, #VCPU_DEBUG_FLAGS]
-	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
-.endm
-
-/*
- * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
- */
-.macro skip_fpsimd_state tmp, target
-	mrs	\tmp, cptr_el2
-	tbnz	\tmp, #CPTR_EL2_TFP_SHIFT, \target
-.endm
-
-.macro compute_debug_state target
-	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
-	// is set, we do a full save/restore cycle and disable trapping.
-	add	x25, x0, #VCPU_CONTEXT
-
-	// Check the state of MDSCR_EL1
-	ldr	x25, [x25, #CPU_SYSREG_OFFSET(MDSCR_EL1)]
-	and	x26, x25, #DBG_MDSCR_KDE
-	and	x25, x25, #DBG_MDSCR_MDE
-	adds	xzr, x25, x26
-	b.eq	9998f		// Nothing to see there
-
-	// If any interesting bits was set, we must set the flag
-	mov	x26, #KVM_ARM64_DEBUG_DIRTY
-	str	x26, [x0, #VCPU_DEBUG_FLAGS]
-	b	9999f		// Don't skip restore
-
-9998:
-	// Otherwise load the flags from memory in case we recently
-	// trapped
-	skip_debug_state x25, \target
-9999:
-.endm
-
-.macro save_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	mrs	x4, spsr_abt
-	mrs	x5, spsr_und
-	mrs	x6, spsr_irq
-	mrs	x7, spsr_fiq
-	stp	x4, x5, [x3]
-	stp	x6, x7, [x3, #16]
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	mrs	x4, dacr32_el2
-	mrs	x5, ifsr32_el2
-	stp	x4, x5, [x3]
-
-	skip_fpsimd_state x8, 2f
-	mrs	x6, fpexc32_el2
-	str	x6, [x3, #16]
-2:
-	skip_debug_state x8, 1f
-	mrs	x7, dbgvcr32_el2
-	str	x7, [x3, #24]
-1:
-.endm
-
-.macro restore_guest_32bit_state
-	skip_32bit_state x3, 1f
-
-	add	x3, x2, #CPU_SPSR_OFFSET(KVM_SPSR_ABT)
-	ldp	x4, x5, [x3]
-	ldp	x6, x7, [x3, #16]
-	msr	spsr_abt, x4
-	msr	spsr_und, x5
-	msr	spsr_irq, x6
-	msr	spsr_fiq, x7
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
-	ldp	x4, x5, [x3]
-	msr	dacr32_el2, x4
-	msr	ifsr32_el2, x5
-
-	skip_debug_state x8, 1f
-	ldr	x7, [x3, #24]
-	msr	dbgvcr32_el2, x7
-1:
-.endm
-
-.macro activate_traps
-	ldr     x2, [x0, #VCPU_HCR_EL2]
-
-	/*
-	 * We are about to set CPTR_EL2.TFP to trap all floating point
-	 * register accesses to EL2, however, the ARM ARM clearly states that
-	 * traps are only taken to EL2 if the operation would not otherwise
-	 * trap to EL1.  Therefore, always make sure that for 32-bit guests,
-	 * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
-	 */
-	tbnz	x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
-	mov	x3, #(1 << 30)
-	msr	fpexc32_el2, x3
-	isb
-99:
-	msr     hcr_el2, x2
-	mov	x2, #CPTR_EL2_TTA
-	orr     x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-
-	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
-	msr	hstr_el2, x2
-
-	// Monitor Debug Config - see kvm_arm_setup_debug()
-	ldr	x2, [x0, #VCPU_MDCR_EL2]
-	msr	mdcr_el2, x2
-.endm
-
-.macro deactivate_traps
-	mov	x2, #HCR_RW
-	msr	hcr_el2, x2
-	msr	hstr_el2, xzr
-
-	mrs	x2, mdcr_el2
-	and	x2, x2, #MDCR_EL2_HPMN_MASK
-	msr	mdcr_el2, x2
-.endm
-
-.macro activate_vm
-	ldr	x1, [x0, #VCPU_KVM]
-	kern_hyp_va	x1
-	ldr	x2, [x1, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-.endm
-
-.macro deactivate_vm
-	msr	vttbr_el2, xzr
-.endm
-
-/*
- * Call into the vgic backend for state saving
- */
-.macro save_vgic_state
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__save_vgic_v2_state
-alternative_else
-	bl	__save_vgic_v3_state
-alternative_endif
-	mrs	x24, hcr_el2
-	mov	x25, #HCR_INT_OVERRIDE
-	neg	x25, x25
-	and	x24, x24, x25
-	msr	hcr_el2, x24
-.endm
-
-/*
- * Call into the vgic backend for state restoring
- */
-.macro restore_vgic_state
-	mrs	x24, hcr_el2
-	ldr	x25, [x0, #VCPU_IRQ_LINES]
-	orr	x24, x24, #HCR_INT_OVERRIDE
-	orr	x24, x24, x25
-	msr	hcr_el2, x24
-alternative_if_not ARM64_HAS_SYSREG_GIC_CPUIF
-	bl	__restore_vgic_v2_state
-alternative_else
-	bl	__restore_vgic_v3_state
-alternative_endif
-.endm
-
-.macro save_timer_state
-	// x0: vcpu pointer
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	mrs	x3, cntv_ctl_el0
-	and	x3, x3, #3
-	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
-
-	isb
-
-	mrs	x3, cntv_cval_el0
-	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
-
-1:
-	// Disable the virtual timer
-	msr	cntv_ctl_el0, xzr
-
-	// Allow physical timer/counter access for the host
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #3
-	msr	cnthctl_el2, x2
-
-	// Clear cntvoff for the host
-	msr	cntvoff_el2, xzr
-.endm
-
-.macro restore_timer_state
-	// x0: vcpu pointer
-	// Disallow physical timer access for the guest
-	// Physical counter access is allowed
-	mrs	x2, cnthctl_el2
-	orr	x2, x2, #1
-	bic	x2, x2, #2
-	msr	cnthctl_el2, x2
-
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va x2
-	ldr	w3, [x2, #KVM_TIMER_ENABLED]
-	cbz	w3, 1f
-
-	ldr	x3, [x2, #KVM_TIMER_CNTVOFF]
-	msr	cntvoff_el2, x3
-	ldr	x2, [x0, #VCPU_TIMER_CNTV_CVAL]
-	msr	cntv_cval_el0, x2
-	isb
-
-	ldr	w2, [x0, #VCPU_TIMER_CNTV_CTL]
-	and	x2, x2, #3
-	msr	cntv_ctl_el0, x2
-1:
-.endm
-
-__save_sysregs:
-	save_sysregs
-	ret
-
-__restore_sysregs:
-	restore_sysregs
-	ret
-
-/* Save debug state */
-__save_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	save_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	save_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	save_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	save_debug dbgwvr
-
-	mrs	x21, mdccint_el1
-	str	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	ret
-
-/* Restore debug state */
-__restore_debug:
-	// x2: ptr to CPU context
-	// x3: ptr to debug reg struct
-	// x4/x5/x6-22/x24-26: trashed
-
-	mrs	x26, id_aa64dfr0_el1
-	ubfx	x24, x26, #12, #4	// Extract BRPs
-	ubfx	x25, x26, #20, #4	// Extract WRPs
-	mov	w26, #15
-	sub	w24, w26, w24		// How many BPs to skip
-	sub	w25, w26, w25		// How many WPs to skip
-
-	mov	x5, x24
-	add	x4, x3, #DEBUG_BCR
-	restore_debug dbgbcr
-	add	x4, x3, #DEBUG_BVR
-	restore_debug dbgbvr
-
-	mov	x5, x25
-	add	x4, x3, #DEBUG_WCR
-	restore_debug dbgwcr
-	add	x4, x3, #DEBUG_WVR
-	restore_debug dbgwvr
-
-	ldr	x21, [x2, #CPU_SYSREG_OFFSET(MDCCINT_EL1)]
-	msr	mdccint_el1, x21
-
-	ret
-
-__save_fpsimd:
-	skip_fpsimd_state x3, 1f
-	save_fpsimd
-1:	ret
-
-__restore_fpsimd:
-	skip_fpsimd_state x3, 1f
-	restore_fpsimd
-1:	ret
-
-switch_to_guest_fpsimd:
-	push	x4, lr
-
-	mrs	x2, cptr_el2
-	bic	x2, x2, #CPTR_EL2_TFP
-	msr	cptr_el2, x2
-	isb
-
-	mrs	x0, tpidr_el2
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-	bl __save_fpsimd
-
-	add	x2, x0, #VCPU_CONTEXT
-	bl __restore_fpsimd
-
-	skip_32bit_state x3, 1f
-	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
-	msr	fpexc32_el2, x4
-1:
-	pop	x4, lr
-	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-/*
- * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
- *
- * This is the world switch. The first half of the function
- * deals with entering the guest, and anything from __kvm_vcpu_return
- * to the end of the function deals with reentering the host.
- * On the enter path, only x0 (vcpu pointer) must be preserved until
- * the last moment. On the exit path, x0 (vcpu pointer) and x1 (exception
- * code) must both be preserved until the epilogue.
- * In both cases, x2 points to the CPU context we're saving/restoring from/to.
- */
-ENTRY(__kvm_vcpu_run)
-	kern_hyp_va	x0
-	msr	tpidr_el2, x0	// Save the vcpu register
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	save_host_regs
-	bl __save_sysregs
-
-	compute_debug_state 1f
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__save_debug
-1:
-	activate_traps
-	activate_vm
-
-	restore_vgic_state
-	restore_timer_state
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	// We must restore the 32-bit state before the sysregs, thanks
-	// to Cortex-A57 erratum #852523.
-	restore_guest_32bit_state
-	bl __restore_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__restore_debug
-1:
-	restore_guest_regs
-
-	// That's it, no more messing around.
-	eret
-
-__kvm_vcpu_return:
-	// Assume x0 is the vcpu pointer, x1 the return code
-	// Guest's x0-x3 are on the stack
-
-	// Guest context
-	add	x2, x0, #VCPU_CONTEXT
-
-	save_guest_regs
-	bl __save_fpsimd
-	bl __save_sysregs
-
-	skip_debug_state x3, 1f
-	ldr	x3, [x0, #VCPU_DEBUG_PTR]
-	kern_hyp_va x3
-	bl	__save_debug
-1:
-	save_guest_32bit_state
-
-	save_timer_state
-	save_vgic_state
-
-	deactivate_traps
-	deactivate_vm
-
-	// Host context
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-	bl __restore_fpsimd
-	/* Clear FPSIMD and Trace trapping */
-	msr     cptr_el2, xzr
-
-	skip_debug_state x3, 1f
-	// Clear the dirty flag for the next run, as all the state has
-	// already been saved. Note that we nuke the whole 64bit word.
-	// If we ever add more flags, we'll have to be more careful...
-	str	xzr, [x0, #VCPU_DEBUG_FLAGS]
-	add	x3, x0, #VCPU_HOST_DEBUG_STATE
-	bl	__restore_debug
-1:
-	restore_host_regs
-
-	mov	x0, x1
-	ret
-END(__kvm_vcpu_run)
-
-// void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
-ENTRY(__kvm_tlb_flush_vmid_ipa)
-	dsb	ishst
-
-	kern_hyp_va	x0
-	ldr	x2, [x0, #KVM_VTTBR]
-	msr	vttbr_el2, x2
-	isb
-
-	/*
-	 * We could do so much better if we had the VA as well.
-	 * Instead, we invalidate Stage-2 for this IPA, and the
-	 * whole of Stage-1. Weep...
-	 */
-	lsr	x1, x1, #12
-	tlbi	ipas2e1is, x1
-	/*
-	 * We have to ensure completion of the invalidation at Stage-2,
-	 * since a table walk on another CPU could refill a TLB with a
-	 * complete (S1 + S2) walk based on the old Stage-2 mapping if
-	 * the Stage-1 invalidation happened first.
-	 */
-	dsb	ish
-	tlbi	vmalle1is
-	dsb	ish
-	isb
-
-	msr	vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid_ipa)
-
-/**
- * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs
- * @struct kvm *kvm - pointer to kvm structure
- *
- * Invalidates all Stage 1 and 2 TLB entries for current VMID.
- */
-ENTRY(__kvm_tlb_flush_vmid)
-	dsb     ishst
-
-	kern_hyp_va     x0
-	ldr     x2, [x0, #KVM_VTTBR]
-	msr     vttbr_el2, x2
-	isb
-
-	tlbi    vmalls12e1is
-	dsb     ish
-	isb
-
-	msr     vttbr_el2, xzr
-	ret
-ENDPROC(__kvm_tlb_flush_vmid)
-
-ENTRY(__kvm_flush_vm_context)
-	dsb	ishst
-	tlbi	alle1is
-	ic	ialluis
-	dsb	ish
-	ret
-ENDPROC(__kvm_flush_vm_context)
-
-__kvm_hyp_panic:
-	// Stash PAR_EL1 before corrupting it in __restore_sysregs
-	mrs	x0, par_el1
-	push	x0, xzr
-
-	// Guess the context by looking at VTTBR:
-	// If zero, then we're already a host.
-	// Otherwise restore a minimal host context before panicing.
-	mrs	x0, vttbr_el2
-	cbz	x0, 1f
-
-	mrs	x0, tpidr_el2
-
-	deactivate_traps
-	deactivate_vm
-
-	ldr	x2, [x0, #VCPU_HOST_CONTEXT]
-	kern_hyp_va x2
-
-	bl __restore_sysregs
-
-	/*
-	 * Make sure we have a valid host stack, and don't leave junk in the
-	 * frame pointer that will give us a misleading host stack unwinding.
-	 */
-	ldr	x22, [x2, #CPU_GP_REG_OFFSET(CPU_SP_EL1)]
-	msr	sp_el1, x22
-	mov	x29, xzr
-
-1:	adr	x0, __hyp_panic_str
-	adr	x1, 2f
-	ldp	x2, x3, [x1]
-	sub	x0, x0, x2
-	add	x0, x0, x3
-	mrs	x1, spsr_el2
-	mrs	x2, elr_el2
-	mrs	x3, esr_el2
-	mrs	x4, far_el2
-	mrs	x5, hpfar_el2
-	pop	x6, xzr		// active context PAR_EL1
-	mrs	x7, tpidr_el2
-
-	mov	lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
-		      PSR_MODE_EL1h)
-	msr	spsr_el2, lr
-	ldr	lr, =panic
-	msr	elr_el2, lr
-	eret
-
-	.align	3
-2:	.quad	HYP_PAGE_OFFSET
-	.quad	PAGE_OFFSET
-ENDPROC(__kvm_hyp_panic)
-
-__hyp_panic_str:
-	.ascii	"HYP panic:\nPS:%08x PC:%016x ESR:%08x\nFAR:%016x HPFAR:%016x PAR:%016x\nVCPU:%p\n\0"
-
-	.align	2
 
 /*
  * u64 kvm_call_hyp(void *hypfn, ...);
@@ -934,7 +31,7 @@ __hyp_panic_str:
  * passed as x0, x1, and x2 (a maximum of 3 arguments in addition to the
  * function pointer can be passed).  The function being called must be mapped
  * in Hyp mode (see init_hyp_mode in arch/arm/kvm/arm.c).  Return values are
- * passed in r0 and r1.
+ * passed in x0.
  *
  * A function pointer with a value of 0 has a special meaning, and is
  * used to implement __hyp_get_vectors in the same way as in
@@ -944,179 +41,3 @@ ENTRY(kvm_call_hyp)
 	hvc	#0
 	ret
 ENDPROC(kvm_call_hyp)
-
-.macro invalid_vector	label, target
-	.align	2
-\label:
-	b \target
-ENDPROC(\label)
-.endm
-
-	/* None of these should ever happen */
-	invalid_vector	el2t_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2t_error_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el2h_error_invalid, __kvm_hyp_panic
-	invalid_vector	el1_sync_invalid, __kvm_hyp_panic
-	invalid_vector	el1_irq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_fiq_invalid, __kvm_hyp_panic
-	invalid_vector	el1_error_invalid, __kvm_hyp_panic
-
-el1_sync:					// Guest trapped into EL2
-	push	x0, x1
-	push	x2, x3
-
-	mrs	x1, esr_el2
-	lsr	x2, x1, #ESR_ELx_EC_SHIFT
-
-	cmp	x2, #ESR_ELx_EC_HVC64
-	b.ne	el1_trap
-
-	mrs	x3, vttbr_el2			// If vttbr is valid, the 64bit guest
-	cbnz	x3, el1_trap			// called HVC
-
-	/* Here, we're pretty sure the host called HVC. */
-	pop	x2, x3
-	pop	x0, x1
-
-	/* Check for __hyp_get_vectors */
-	cbnz	x0, 1f
-	mrs	x0, vbar_el2
-	b	2f
-
-1:	push	lr, xzr
-
-	/*
-	 * Compute the function address in EL2, and shuffle the parameters.
-	 */
-	kern_hyp_va	x0
-	mov	lr, x0
-	mov	x0, x1
-	mov	x1, x2
-	mov	x2, x3
-	blr	lr
-
-	pop	lr, xzr
-2:	eret
-
-el1_trap:
-	/*
-	 * x1: ESR
-	 * x2: ESR_EC
-	 */
-
-	/* Guest accessed VFP/SIMD registers, save host, restore Guest */
-	cmp	x2, #ESR_ELx_EC_FP_ASIMD
-	b.eq	switch_to_guest_fpsimd
-
-	cmp	x2, #ESR_ELx_EC_DABT_LOW
-	mov	x0, #ESR_ELx_EC_IABT_LOW
-	ccmp	x2, x0, #4, ne
-	b.ne	1f		// Not an abort we care about
-
-	/* This is an abort. Check for permission fault */
-alternative_if_not ARM64_WORKAROUND_834220
-	and	x2, x1, #ESR_ELx_FSC_TYPE
-	cmp	x2, #FSC_PERM
-	b.ne	1f		// Not a permission fault
-alternative_else
-	nop			// Use the permission fault path to
-	nop			// check for a valid S1 translation,
-	nop			// regardless of the ESR value.
-alternative_endif
-
-	/*
-	 * Check for Stage-1 page table walk, which is guaranteed
-	 * to give a valid HPFAR_EL2.
-	 */
-	tbnz	x1, #7, 1f	// S1PTW is set
-
-	/* Preserve PAR_EL1 */
-	mrs	x3, par_el1
-	push	x3, xzr
-
-	/*
-	 * Permission fault, HPFAR_EL2 is invalid.
-	 * Resolve the IPA the hard way using the guest VA.
-	 * Stage-1 translation already validated the memory access rights.
-	 * As such, we can use the EL1 translation regime, and don't have
-	 * to distinguish between EL0 and EL1 access.
-	 */
-	mrs	x2, far_el2
-	at	s1e1r, x2
-	isb
-
-	/* Read result */
-	mrs	x3, par_el1
-	pop	x0, xzr			// Restore PAR_EL1 from the stack
-	msr	par_el1, x0
-	tbnz	x3, #0, 3f		// Bail out if we failed the translation
-	ubfx	x3, x3, #12, #36	// Extract IPA
-	lsl	x3, x3, #4		// and present it like HPFAR
-	b	2f
-
-1:	mrs	x3, hpfar_el2
-	mrs	x2, far_el2
-
-2:	mrs	x0, tpidr_el2
-	str	w1, [x0, #VCPU_ESR_EL2]
-	str	x2, [x0, #VCPU_FAR_EL2]
-	str	x3, [x0, #VCPU_HPFAR_EL2]
-
-	mov	x1, #ARM_EXCEPTION_TRAP
-	b	__kvm_vcpu_return
-
-	/*
-	 * Translation failed. Just return to the guest and
-	 * let it fault again. Another CPU is probably playing
-	 * behind our back.
-	 */
-3:	pop	x2, x3
-	pop	x0, x1
-
-	eret
-
-el1_irq:
-	push	x0, x1
-	push	x2, x3
-	mrs	x0, tpidr_el2
-	mov	x1, #ARM_EXCEPTION_IRQ
-	b	__kvm_vcpu_return
-
-	.ltorg
-
-	.align 11
-
-ENTRY(__kvm_hyp_vector)
-	ventry	el2t_sync_invalid		// Synchronous EL2t
-	ventry	el2t_irq_invalid		// IRQ EL2t
-	ventry	el2t_fiq_invalid		// FIQ EL2t
-	ventry	el2t_error_invalid		// Error EL2t
-
-	ventry	el2h_sync_invalid		// Synchronous EL2h
-	ventry	el2h_irq_invalid		// IRQ EL2h
-	ventry	el2h_fiq_invalid		// FIQ EL2h
-	ventry	el2h_error_invalid		// Error EL2h
-
-	ventry	el1_sync			// Synchronous 64-bit EL1
-	ventry	el1_irq				// IRQ 64-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 64-bit EL1
-	ventry	el1_error_invalid		// Error 64-bit EL1
-
-	ventry	el1_sync			// Synchronous 32-bit EL1
-	ventry	el1_irq				// IRQ 32-bit EL1
-	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
-	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__kvm_hyp_vector)
-
-
-ENTRY(__kvm_get_mdcr_el2)
-	mrs	x0, mdcr_el2
-	ret
-ENDPROC(__kvm_get_mdcr_el2)
-
-	.popsection
diff --git a/arch/arm64/kvm/vgic-v2-switch.S b/arch/arm64/kvm/vgic-v2-switch.S
deleted file mode 100644
index 3f00071..0000000
--- a/arch/arm64/kvm/vgic-v2-switch.S
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-#include <asm/kvm_mmu.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-ENTRY(__save_vgic_v2_state)
-__save_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* Save all interesting registers */
-	ldr	w5, [x2, #GICH_VMCR]
-	ldr	w6, [x2, #GICH_MISR]
-	ldr	w7, [x2, #GICH_EISR0]
-	ldr	w8, [x2, #GICH_EISR1]
-	ldr	w9, [x2, #GICH_ELRSR0]
-	ldr	w10, [x2, #GICH_ELRSR1]
-	ldr	w11, [x2, #GICH_APR]
-CPU_BE(	rev	w5,  w5  )
-CPU_BE(	rev	w6,  w6  )
-CPU_BE(	rev	w7,  w7  )
-CPU_BE(	rev	w8,  w8  )
-CPU_BE(	rev	w9,  w9  )
-CPU_BE(	rev	w10, w10 )
-CPU_BE(	rev	w11, w11 )
-
-	str	w5, [x3, #VGIC_V2_CPU_VMCR]
-	str	w6, [x3, #VGIC_V2_CPU_MISR]
-CPU_LE(	str	w7, [x3, #VGIC_V2_CPU_EISR] )
-CPU_LE(	str	w8, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_LE(	str	w9, [x3, #VGIC_V2_CPU_ELRSR] )
-CPU_LE(	str	w10, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w7, [x3, #(VGIC_V2_CPU_EISR + 4)] )
-CPU_BE(	str	w8, [x3, #VGIC_V2_CPU_EISR] )
-CPU_BE(	str	w9, [x3, #(VGIC_V2_CPU_ELRSR + 4)] )
-CPU_BE(	str	w10, [x3, #VGIC_V2_CPU_ELRSR] )
-	str	w11, [x3, #VGIC_V2_CPU_APR]
-
-	/* Clear GICH_HCR */
-	str	wzr, [x2, #GICH_HCR]
-
-	/* Save list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x2], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x3], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__save_vgic_v2_state)
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-ENTRY(__restore_vgic_v2_state)
-__restore_vgic_v2_state:
-	/* Get VGIC VCTRL base into x2 */
-	ldr	x2, [x0, #VCPU_KVM]
-	kern_hyp_va	x2
-	ldr	x2, [x2, #KVM_VGIC_VCTRL]
-	kern_hyp_va	x2
-	cbz	x2, 2f		// disabled
-
-	/* Compute the address of struct vgic_cpu */
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	/* We only restore a minimal set of registers */
-	ldr	w4, [x3, #VGIC_V2_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V2_CPU_VMCR]
-	ldr	w6, [x3, #VGIC_V2_CPU_APR]
-CPU_BE(	rev	w4, w4 )
-CPU_BE(	rev	w5, w5 )
-CPU_BE(	rev	w6, w6 )
-
-	str	w4, [x2, #GICH_HCR]
-	str	w5, [x2, #GICH_VMCR]
-	str	w6, [x2, #GICH_APR]
-
-	/* Restore list registers */
-	add	x2, x2, #GICH_LR0
-	ldr	w4, [x3, #VGIC_CPU_NR_LR]
-	add	x3, x3, #VGIC_V2_CPU_LR
-1:	ldr	w5, [x3], #4
-CPU_BE(	rev	w5, w5 )
-	str	w5, [x2], #4
-	sub	w4, w4, #1
-	cbnz	w4, 1b
-2:
-	ret
-ENDPROC(__restore_vgic_v2_state)
-
-	.popsection
diff --git a/arch/arm64/kvm/vgic-v3-switch.S b/arch/arm64/kvm/vgic-v3-switch.S
deleted file mode 100644
index 3c20730..0000000
--- a/arch/arm64/kvm/vgic-v3-switch.S
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Copyright (C) 2012,2013 - ARM Ltd
- * Author: Marc Zyngier <marc.zyngier@arm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/linkage.h>
-#include <linux/irqchip/arm-gic-v3.h>
-
-#include <asm/assembler.h>
-#include <asm/memory.h>
-#include <asm/asm-offsets.h>
-#include <asm/kvm.h>
-#include <asm/kvm_asm.h>
-#include <asm/kvm_arm.h>
-
-	.text
-	.pushsection	.hyp.text, "ax"
-
-/*
- * We store LRs in reverse order to let the CPU deal with streaming
- * access. Use this macro to make it look saner...
- */
-#define LR_OFFSET(n)	(VGIC_V3_CPU_LR + (15 - n) * 8)
-
-/*
- * Save the VGIC CPU state into memory
- * x0: Register pointing to VCPU struct
- * Do not corrupt x1!!!
- */
-.macro	save_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Make sure stores to the GIC via the memory mapped interface
-	// are now visible to the system register interface
-	dsb	st
-
-	// Save all interesting registers
-	mrs_s	x5, ICH_VMCR_EL2
-	mrs_s	x6, ICH_MISR_EL2
-	mrs_s	x7, ICH_EISR_EL2
-	mrs_s	x8, ICH_ELSR_EL2
-
-	str	w5, [x3, #VGIC_V3_CPU_VMCR]
-	str	w6, [x3, #VGIC_V3_CPU_MISR]
-	str	w7, [x3, #VGIC_V3_CPU_EISR]
-	str	w8, [x3, #VGIC_V3_CPU_ELRSR]
-
-	msr_s	ICH_HCR_EL2, xzr
-
-	mrs_s	x21, ICH_VTR_EL2
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	mrs_s	x20, ICH_LR15_EL2
-	mrs_s	x19, ICH_LR14_EL2
-	mrs_s	x18, ICH_LR13_EL2
-	mrs_s	x17, ICH_LR12_EL2
-	mrs_s	x16, ICH_LR11_EL2
-	mrs_s	x15, ICH_LR10_EL2
-	mrs_s	x14, ICH_LR9_EL2
-	mrs_s	x13, ICH_LR8_EL2
-	mrs_s	x12, ICH_LR7_EL2
-	mrs_s	x11, ICH_LR6_EL2
-	mrs_s	x10, ICH_LR5_EL2
-	mrs_s	x9, ICH_LR4_EL2
-	mrs_s	x8, ICH_LR3_EL2
-	mrs_s	x7, ICH_LR2_EL2
-	mrs_s	x6, ICH_LR1_EL2
-	mrs_s	x5, ICH_LR0_EL2
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	str	x20, [x3, #LR_OFFSET(15)]
-	str	x19, [x3, #LR_OFFSET(14)]
-	str	x18, [x3, #LR_OFFSET(13)]
-	str	x17, [x3, #LR_OFFSET(12)]
-	str	x16, [x3, #LR_OFFSET(11)]
-	str	x15, [x3, #LR_OFFSET(10)]
-	str	x14, [x3, #LR_OFFSET(9)]
-	str	x13, [x3, #LR_OFFSET(8)]
-	str	x12, [x3, #LR_OFFSET(7)]
-	str	x11, [x3, #LR_OFFSET(6)]
-	str	x10, [x3, #LR_OFFSET(5)]
-	str	x9, [x3, #LR_OFFSET(4)]
-	str	x8, [x3, #LR_OFFSET(3)]
-	str	x7, [x3, #LR_OFFSET(2)]
-	str	x6, [x3, #LR_OFFSET(1)]
-	str	x5, [x3, #LR_OFFSET(0)]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP0R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	mrs_s	x19, ICH_AP0R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-6:	mrs_s	x18, ICH_AP0R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-5:	mrs_s	x17, ICH_AP0R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP0R]
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	mrs_s	x20, ICH_AP1R3_EL2
-	str	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	mrs_s	x19, ICH_AP1R2_EL2
-	str	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-6:	mrs_s	x18, ICH_AP1R1_EL2
-	str	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-5:	mrs_s	x17, ICH_AP1R0_EL2
-	str	w17, [x3, #VGIC_V3_CPU_AP1R]
-
-	// Restore SRE_EL1 access and re-enable SRE at EL1.
-	mrs_s	x5, ICC_SRE_EL2
-	orr	x5, x5, #ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-	isb
-	mov	x5, #1
-	msr_s	ICC_SRE_EL1, x5
-.endm
-
-/*
- * Restore the VGIC CPU state from memory
- * x0: Register pointing to VCPU struct
- */
-.macro	restore_vgic_v3_state
-	// Compute the address of struct vgic_cpu
-	add	x3, x0, #VCPU_VGIC_CPU
-
-	// Restore all interesting registers
-	ldr	w4, [x3, #VGIC_V3_CPU_HCR]
-	ldr	w5, [x3, #VGIC_V3_CPU_VMCR]
-	ldr	w25, [x3, #VGIC_V3_CPU_SRE]
-
-	msr_s	ICC_SRE_EL1, x25
-
-	// make sure SRE is valid before writing the other registers
-	isb
-
-	msr_s	ICH_HCR_EL2, x4
-	msr_s	ICH_VMCR_EL2, x5
-
-	mrs_s	x21, ICH_VTR_EL2
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP1R + 3*4)]
-	msr_s	ICH_AP1R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP1R + 2*4)]
-	msr_s	ICH_AP1R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP1R + 1*4)]
-	msr_s	ICH_AP1R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP1R]
-	msr_s	ICH_AP1R0_EL2, x17
-
-	tbnz	w21, #29, 6f	// 6 bits
-	tbz	w21, #30, 5f	// 5 bits
-				// 7 bits
-	ldr	w20, [x3, #(VGIC_V3_CPU_AP0R + 3*4)]
-	msr_s	ICH_AP0R3_EL2, x20
-	ldr	w19, [x3, #(VGIC_V3_CPU_AP0R + 2*4)]
-	msr_s	ICH_AP0R2_EL2, x19
-6:	ldr	w18, [x3, #(VGIC_V3_CPU_AP0R + 1*4)]
-	msr_s	ICH_AP0R1_EL2, x18
-5:	ldr	w17, [x3, #VGIC_V3_CPU_AP0R]
-	msr_s	ICH_AP0R0_EL2, x17
-
-	and	w22, w21, #0xf
-	mvn	w22, w21
-	ubfiz	w23, w22, 2, 4	// w23 = (15 - ListRegs) * 4
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	ldr	x20, [x3, #LR_OFFSET(15)]
-	ldr	x19, [x3, #LR_OFFSET(14)]
-	ldr	x18, [x3, #LR_OFFSET(13)]
-	ldr	x17, [x3, #LR_OFFSET(12)]
-	ldr	x16, [x3, #LR_OFFSET(11)]
-	ldr	x15, [x3, #LR_OFFSET(10)]
-	ldr	x14, [x3, #LR_OFFSET(9)]
-	ldr	x13, [x3, #LR_OFFSET(8)]
-	ldr	x12, [x3, #LR_OFFSET(7)]
-	ldr	x11, [x3, #LR_OFFSET(6)]
-	ldr	x10, [x3, #LR_OFFSET(5)]
-	ldr	x9, [x3, #LR_OFFSET(4)]
-	ldr	x8, [x3, #LR_OFFSET(3)]
-	ldr	x7, [x3, #LR_OFFSET(2)]
-	ldr	x6, [x3, #LR_OFFSET(1)]
-	ldr	x5, [x3, #LR_OFFSET(0)]
-
-	adr	x24, 1f
-	add	x24, x24, x23
-	br	x24
-
-1:
-	msr_s	ICH_LR15_EL2, x20
-	msr_s	ICH_LR14_EL2, x19
-	msr_s	ICH_LR13_EL2, x18
-	msr_s	ICH_LR12_EL2, x17
-	msr_s	ICH_LR11_EL2, x16
-	msr_s	ICH_LR10_EL2, x15
-	msr_s	ICH_LR9_EL2,  x14
-	msr_s	ICH_LR8_EL2,  x13
-	msr_s	ICH_LR7_EL2,  x12
-	msr_s	ICH_LR6_EL2,  x11
-	msr_s	ICH_LR5_EL2,  x10
-	msr_s	ICH_LR4_EL2,   x9
-	msr_s	ICH_LR3_EL2,   x8
-	msr_s	ICH_LR2_EL2,   x7
-	msr_s	ICH_LR1_EL2,   x6
-	msr_s	ICH_LR0_EL2,   x5
-
-	// Ensure that the above will have reached the
-	// (re)distributors. This ensure the guest will read
-	// the correct values from the memory-mapped interface.
-	isb
-	dsb	sy
-
-	// Prevent the guest from touching the GIC system registers
-	// if SRE isn't enabled for GICv3 emulation
-	cbnz	x25, 1f
-	mrs_s	x5, ICC_SRE_EL2
-	and	x5, x5, #~ICC_SRE_EL2_ENABLE
-	msr_s	ICC_SRE_EL2, x5
-1:
-.endm
-
-ENTRY(__save_vgic_v3_state)
-	save_vgic_v3_state
-	ret
-ENDPROC(__save_vgic_v3_state)
-
-ENTRY(__restore_vgic_v3_state)
-	restore_vgic_v3_state
-	ret
-ENDPROC(__restore_vgic_v3_state)
-
-ENTRY(__vgic_v3_get_ich_vtr_el2)
-	mrs_s	x0, ICH_VTR_EL2
-	ret
-ENDPROC(__vgic_v3_get_ich_vtr_el2)
-
-	.popsection
-- 
cgit v0.10.2


From 9d8415d6c148a16b6d906a96f0596851d7e4d607 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 19:57:11 +0000
Subject: arm64: KVM: Turn system register numbers to an enum

Having the system register numbers as #defines has been a pain
since day one, as the ordering is pretty fragile, and moving
things around leads to renumbering and epic conflict resolutions.

Now that we're mostly acessing the sysreg file in C, an enum is
a much better type to use, and we can clean things up a bit.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 5e37710..52b777b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -20,82 +20,6 @@
 
 #include <asm/virt.h>
 
-/*
- * 0 is reserved as an invalid value.
- * Order *must* be kept in sync with the hyp switch code.
- */
-#define	MPIDR_EL1	1	/* MultiProcessor Affinity Register */
-#define	CSSELR_EL1	2	/* Cache Size Selection Register */
-#define	SCTLR_EL1	3	/* System Control Register */
-#define	ACTLR_EL1	4	/* Auxiliary Control Register */
-#define	CPACR_EL1	5	/* Coprocessor Access Control */
-#define	TTBR0_EL1	6	/* Translation Table Base Register 0 */
-#define	TTBR1_EL1	7	/* Translation Table Base Register 1 */
-#define	TCR_EL1		8	/* Translation Control Register */
-#define	ESR_EL1		9	/* Exception Syndrome Register */
-#define	AFSR0_EL1	10	/* Auxilary Fault Status Register 0 */
-#define	AFSR1_EL1	11	/* Auxilary Fault Status Register 1 */
-#define	FAR_EL1		12	/* Fault Address Register */
-#define	MAIR_EL1	13	/* Memory Attribute Indirection Register */
-#define	VBAR_EL1	14	/* Vector Base Address Register */
-#define	CONTEXTIDR_EL1	15	/* Context ID Register */
-#define	TPIDR_EL0	16	/* Thread ID, User R/W */
-#define	TPIDRRO_EL0	17	/* Thread ID, User R/O */
-#define	TPIDR_EL1	18	/* Thread ID, Privileged */
-#define	AMAIR_EL1	19	/* Aux Memory Attribute Indirection Register */
-#define	CNTKCTL_EL1	20	/* Timer Control Register (EL1) */
-#define	PAR_EL1		21	/* Physical Address Register */
-#define MDSCR_EL1	22	/* Monitor Debug System Control Register */
-#define MDCCINT_EL1	23	/* Monitor Debug Comms Channel Interrupt Enable Reg */
-
-/* 32bit specific registers. Keep them at the end of the range */
-#define	DACR32_EL2	24	/* Domain Access Control Register */
-#define	IFSR32_EL2	25	/* Instruction Fault Status Register */
-#define	FPEXC32_EL2	26	/* Floating-Point Exception Control Register */
-#define	DBGVCR32_EL2	27	/* Debug Vector Catch Register */
-#define	NR_SYS_REGS	28
-
-/* 32bit mapping */
-#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
-#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
-#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
-#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
-#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
-#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
-#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
-#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
-#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
-#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
-#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
-#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
-#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
-#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
-#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
-#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
-#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
-#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
-#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
-#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
-#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
-#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
-#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
-#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
-#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
-#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
-#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
-#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
-#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
-
-#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
-#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
-#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
-#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
-#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
-#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
-#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
-
-#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
-
 #define ARM_EXCEPTION_IRQ	  0
 #define ARM_EXCEPTION_TRAP	  1
 
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 25a4021..3066328 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -26,7 +26,6 @@
 
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/ptrace.h>
 #include <asm/cputype.h>
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 19504aa..689d4c9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -25,7 +25,6 @@
 #include <linux/types.h>
 #include <linux/kvm_types.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
@@ -85,6 +84,86 @@ struct kvm_vcpu_fault_info {
 	u64 hpfar_el2;		/* Hyp IPA Fault Address Register */
 };
 
+/*
+ * 0 is reserved as an invalid value.
+ * Order should be kept in sync with the save/restore code.
+ */
+enum vcpu_sysreg {
+	__INVALID_SYSREG__,
+	MPIDR_EL1,	/* MultiProcessor Affinity Register */
+	CSSELR_EL1,	/* Cache Size Selection Register */
+	SCTLR_EL1,	/* System Control Register */
+	ACTLR_EL1,	/* Auxiliary Control Register */
+	CPACR_EL1,	/* Coprocessor Access Control */
+	TTBR0_EL1,	/* Translation Table Base Register 0 */
+	TTBR1_EL1,	/* Translation Table Base Register 1 */
+	TCR_EL1,	/* Translation Control Register */
+	ESR_EL1,	/* Exception Syndrome Register */
+	AFSR0_EL1,	/* Auxilary Fault Status Register 0 */
+	AFSR1_EL1,	/* Auxilary Fault Status Register 1 */
+	FAR_EL1,	/* Fault Address Register */
+	MAIR_EL1,	/* Memory Attribute Indirection Register */
+	VBAR_EL1,	/* Vector Base Address Register */
+	CONTEXTIDR_EL1,	/* Context ID Register */
+	TPIDR_EL0,	/* Thread ID, User R/W */
+	TPIDRRO_EL0,	/* Thread ID, User R/O */
+	TPIDR_EL1,	/* Thread ID, Privileged */
+	AMAIR_EL1,	/* Aux Memory Attribute Indirection Register */
+	CNTKCTL_EL1,	/* Timer Control Register (EL1) */
+	PAR_EL1,	/* Physical Address Register */
+	MDSCR_EL1,	/* Monitor Debug System Control Register */
+	MDCCINT_EL1,	/* Monitor Debug Comms Channel Interrupt Enable Reg */
+
+	/* 32bit specific registers. Keep them at the end of the range */
+	DACR32_EL2,	/* Domain Access Control Register */
+	IFSR32_EL2,	/* Instruction Fault Status Register */
+	FPEXC32_EL2,	/* Floating-Point Exception Control Register */
+	DBGVCR32_EL2,	/* Debug Vector Catch Register */
+
+	NR_SYS_REGS	/* Nothing after this line! */
+};
+
+/* 32bit mapping */
+#define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
+#define c0_CSSELR	(CSSELR_EL1 * 2)/* Cache Size Selection Register */
+#define c1_SCTLR	(SCTLR_EL1 * 2)	/* System Control Register */
+#define c1_ACTLR	(ACTLR_EL1 * 2)	/* Auxiliary Control Register */
+#define c1_CPACR	(CPACR_EL1 * 2)	/* Coprocessor Access Control */
+#define c2_TTBR0	(TTBR0_EL1 * 2)	/* Translation Table Base Register 0 */
+#define c2_TTBR0_high	(c2_TTBR0 + 1)	/* TTBR0 top 32 bits */
+#define c2_TTBR1	(TTBR1_EL1 * 2)	/* Translation Table Base Register 1 */
+#define c2_TTBR1_high	(c2_TTBR1 + 1)	/* TTBR1 top 32 bits */
+#define c2_TTBCR	(TCR_EL1 * 2)	/* Translation Table Base Control R. */
+#define c3_DACR		(DACR32_EL2 * 2)/* Domain Access Control Register */
+#define c5_DFSR		(ESR_EL1 * 2)	/* Data Fault Status Register */
+#define c5_IFSR		(IFSR32_EL2 * 2)/* Instruction Fault Status Register */
+#define c5_ADFSR	(AFSR0_EL1 * 2)	/* Auxiliary Data Fault Status R */
+#define c5_AIFSR	(AFSR1_EL1 * 2)	/* Auxiliary Instr Fault Status R */
+#define c6_DFAR		(FAR_EL1 * 2)	/* Data Fault Address Register */
+#define c6_IFAR		(c6_DFAR + 1)	/* Instruction Fault Address Register */
+#define c7_PAR		(PAR_EL1 * 2)	/* Physical Address Register */
+#define c7_PAR_high	(c7_PAR + 1)	/* PAR top 32 bits */
+#define c10_PRRR	(MAIR_EL1 * 2)	/* Primary Region Remap Register */
+#define c10_NMRR	(c10_PRRR + 1)	/* Normal Memory Remap Register */
+#define c12_VBAR	(VBAR_EL1 * 2)	/* Vector Base Address Register */
+#define c13_CID		(CONTEXTIDR_EL1 * 2)	/* Context ID Register */
+#define c13_TID_URW	(TPIDR_EL0 * 2)	/* Thread ID, User R/W */
+#define c13_TID_URO	(TPIDRRO_EL0 * 2)/* Thread ID, User R/O */
+#define c13_TID_PRIV	(TPIDR_EL1 * 2)	/* Thread ID, Privileged */
+#define c10_AMAIR0	(AMAIR_EL1 * 2)	/* Aux Memory Attr Indirection Reg */
+#define c10_AMAIR1	(c10_AMAIR0 + 1)/* Aux Memory Attr Indirection Reg */
+#define c14_CNTKCTL	(CNTKCTL_EL1 * 2) /* Timer Control Register (PL1) */
+
+#define cp14_DBGDSCRext	(MDSCR_EL1 * 2)
+#define cp14_DBGBCR0	(DBGBCR0_EL1 * 2)
+#define cp14_DBGBVR0	(DBGBVR0_EL1 * 2)
+#define cp14_DBGBXVR0	(cp14_DBGBVR0 + 1)
+#define cp14_DBGWCR0	(DBGWCR0_EL1 * 2)
+#define cp14_DBGWVR0	(DBGWVR0_EL1 * 2)
+#define cp14_DBGDCCINT	(MDCCINT_EL1 * 2)
+
+#define NR_COPRO_REGS	(NR_SYS_REGS * 2)
+
 struct kvm_cpu_context {
 	struct kvm_regs	gp_regs;
 	union {
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 889c908..fe612a9 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -19,7 +19,6 @@
 #define __ARM64_KVM_MMIO_H__
 
 #include <linux/kvm_host.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_arm.h>
 
 /*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 25de8b2..4b72231 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -112,6 +112,7 @@ int main(void)
   DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
   DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
   DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
+  DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 115522b..fcb7788 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -28,7 +28,6 @@
 #include <asm/cputype.h>
 #include <asm/uaccess.h>
 #include <asm/kvm.h>
-#include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_coproc.h>
 
diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 8bddae1..eba89e4 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -23,6 +23,7 @@
 #include <linux/kvm_host.h>
 
 #include <asm/esr.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_mmu.h>
diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index d071f45..567a0d6 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 
 #include "hyp.h"
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index 1050b2b..fd0fbe9 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -27,7 +27,6 @@
 
 #define CPU_GP_REG_OFFSET(x)	(CPU_GP_REGS + x)
 #define CPU_XREG_OFFSET(x)	CPU_GP_REG_OFFSET(CPU_USER_PT_REGS + 8*x)
-#define CPU_SYSREG_OFFSET(x)	(CPU_SYSREGS + 8*x)
 
 	.text
 	.pushsection	.hyp.text, "ax"
@@ -150,7 +149,7 @@ ENTRY(__fpsimd_guest_restore)
 	// Skip restoring fpexc32 for AArch64 guests
 	mrs	x1, hcr_el2
 	tbnz	x1, #HCR_RW_SHIFT, 1f
-	ldr	x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
+	ldr	x4, [x3, #VCPU_FPEXC32_EL2]
 	msr	fpexc32_el2, x4
 1:
 	ldp	x4, lr, [sp], #16
diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c
index 3603541..42563098 100644
--- a/arch/arm64/kvm/hyp/sysreg-sr.c
+++ b/arch/arm64/kvm/hyp/sysreg-sr.c
@@ -18,6 +18,7 @@
 #include <linux/compiler.h>
 #include <linux/kvm_host.h>
 
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 
 #include "hyp.h"
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index d2650e8..88adebf 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -29,6 +29,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/esr.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_coproc.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_host.h>
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 3813d23..453eafd 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -28,6 +28,7 @@
 
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_arm.h>
+#include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
 
 /* These are for GICv2 emulation only */
-- 
cgit v0.10.2


From 23a13465c84c51ec4330863b59e9d50ee671f8b4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sun, 25 Oct 2015 20:03:08 +0000
Subject: arm64: KVM: Cleanup asm-offset.c

As we've now rewritten most of our code-base in C, most of the
KVM-specific code in asm-offset.c is useless. Delete-time again!

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>

diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 4b72231..94090a6 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -108,50 +108,11 @@ int main(void)
   DEFINE(CPU_GP_REGS,		offsetof(struct kvm_cpu_context, gp_regs));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_regs, regs));
   DEFINE(CPU_FP_REGS,		offsetof(struct kvm_regs, fp_regs));
-  DEFINE(CPU_SP_EL1,		offsetof(struct kvm_regs, sp_el1));
-  DEFINE(CPU_ELR_EL1,		offsetof(struct kvm_regs, elr_el1));
-  DEFINE(CPU_SPSR,		offsetof(struct kvm_regs, spsr));
-  DEFINE(CPU_SYSREGS,		offsetof(struct kvm_cpu_context, sys_regs));
   DEFINE(VCPU_FPEXC32_EL2,	offsetof(struct kvm_vcpu, arch.ctxt.sys_regs[FPEXC32_EL2]));
   DEFINE(VCPU_ESR_EL2,		offsetof(struct kvm_vcpu, arch.fault.esr_el2));
   DEFINE(VCPU_FAR_EL2,		offsetof(struct kvm_vcpu, arch.fault.far_el2));
   DEFINE(VCPU_HPFAR_EL2,	offsetof(struct kvm_vcpu, arch.fault.hpfar_el2));
-  DEFINE(VCPU_DEBUG_FLAGS,	offsetof(struct kvm_vcpu, arch.debug_flags));
-  DEFINE(VCPU_DEBUG_PTR,	offsetof(struct kvm_vcpu, arch.debug_ptr));
-  DEFINE(DEBUG_BCR, 		offsetof(struct kvm_guest_debug_arch, dbg_bcr));
-  DEFINE(DEBUG_BVR, 		offsetof(struct kvm_guest_debug_arch, dbg_bvr));
-  DEFINE(DEBUG_WCR, 		offsetof(struct kvm_guest_debug_arch, dbg_wcr));
-  DEFINE(DEBUG_WVR, 		offsetof(struct kvm_guest_debug_arch, dbg_wvr));
-  DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
-  DEFINE(VCPU_MDCR_EL2,	offsetof(struct kvm_vcpu, arch.mdcr_el2));
-  DEFINE(VCPU_IRQ_LINES,	offsetof(struct kvm_vcpu, arch.irq_lines));
   DEFINE(VCPU_HOST_CONTEXT,	offsetof(struct kvm_vcpu, arch.host_cpu_context));
-  DEFINE(VCPU_HOST_DEBUG_STATE, offsetof(struct kvm_vcpu, arch.host_debug_state));
-  DEFINE(VCPU_TIMER_CNTV_CTL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
-  DEFINE(VCPU_TIMER_CNTV_CVAL,	offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
-  DEFINE(KVM_TIMER_CNTVOFF,	offsetof(struct kvm, arch.timer.cntvoff));
-  DEFINE(KVM_TIMER_ENABLED,	offsetof(struct kvm, arch.timer.enabled));
-  DEFINE(VCPU_KVM,		offsetof(struct kvm_vcpu, kvm));
-  DEFINE(VCPU_VGIC_CPU,		offsetof(struct kvm_vcpu, arch.vgic_cpu));
-  DEFINE(VGIC_V2_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
-  DEFINE(VGIC_V2_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
-  DEFINE(VGIC_V2_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_misr));
-  DEFINE(VGIC_V2_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v2.vgic_eisr));
-  DEFINE(VGIC_V2_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v2.vgic_elrsr));
-  DEFINE(VGIC_V2_CPU_APR,	offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
-  DEFINE(VGIC_V2_CPU_LR,	offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
-  DEFINE(VGIC_V3_CPU_SRE,	offsetof(struct vgic_cpu, vgic_v3.vgic_sre));
-  DEFINE(VGIC_V3_CPU_HCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_hcr));
-  DEFINE(VGIC_V3_CPU_VMCR,	offsetof(struct vgic_cpu, vgic_v3.vgic_vmcr));
-  DEFINE(VGIC_V3_CPU_MISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_misr));
-  DEFINE(VGIC_V3_CPU_EISR,	offsetof(struct vgic_cpu, vgic_v3.vgic_eisr));
-  DEFINE(VGIC_V3_CPU_ELRSR,	offsetof(struct vgic_cpu, vgic_v3.vgic_elrsr));
-  DEFINE(VGIC_V3_CPU_AP0R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap0r));
-  DEFINE(VGIC_V3_CPU_AP1R,	offsetof(struct vgic_cpu, vgic_v3.vgic_ap1r));
-  DEFINE(VGIC_V3_CPU_LR,	offsetof(struct vgic_cpu, vgic_v3.vgic_lr));
-  DEFINE(VGIC_CPU_NR_LR,	offsetof(struct vgic_cpu, nr_lr));
-  DEFINE(KVM_VTTBR,		offsetof(struct kvm, arch.vttbr));
-  DEFINE(KVM_VGIC_VCTRL,	offsetof(struct kvm, arch.vgic.vctrl_base));
 #endif
 #ifdef CONFIG_CPU_PM
   DEFINE(CPU_SUSPEND_SZ,	sizeof(struct cpu_suspend_ctx));
-- 
cgit v0.10.2


From 3ffa75cd18134a03f86f9d9b8b6e9128e0eda254 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 26 Oct 2015 09:10:07 +0000
Subject: arm64: KVM: Remove weak attributes

As we've now switched to the new world switch implementation,
remove the weak attributes, as nobody is supposed to override
it anymore.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c
index 567a0d6..c9c1e97 100644
--- a/arch/arm64/kvm/hyp/debug-sr.c
+++ b/arch/arm64/kvm/hyp/debug-sr.c
@@ -132,10 +132,9 @@ void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu)
 		vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY;
 }
 
-u32 __hyp_text __debug_read_mdcr_el2(void)
+static u32 __hyp_text __debug_read_mdcr_el2(void)
 {
 	return read_sysreg(mdcr_el2);
 }
 
-__alias(__debug_read_mdcr_el2)
-u32 __weak __kvm_get_mdcr_el2(void);
+__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void);
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 10d6d2a..93e8d983 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -189,9 +189,7 @@ ENDPROC(\label)
 
 	.align 11
 
-	.weak	__kvm_hyp_vector
 ENTRY(__kvm_hyp_vector)
-ENTRY(__hyp_vector)
 	ventry	el2t_sync_invalid		// Synchronous EL2t
 	ventry	el2t_irq_invalid		// IRQ EL2t
 	ventry	el2t_fiq_invalid		// FIQ EL2t
@@ -211,5 +209,4 @@ ENTRY(__hyp_vector)
 	ventry	el1_irq				// IRQ 32-bit EL1
 	ventry	el1_fiq_invalid			// FIQ 32-bit EL1
 	ventry	el1_error_invalid		// Error 32-bit EL1
-ENDPROC(__hyp_vector)
 ENDPROC(__kvm_hyp_vector)
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 7457ae4..ca8f5a5 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -85,7 +85,7 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu)
 	__vgic_call_restore_state()(vcpu);
 }
 
-int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
+static int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpu_context *host_ctxt;
 	struct kvm_cpu_context *guest_ctxt;
@@ -142,8 +142,7 @@ int __hyp_text __guest_run(struct kvm_vcpu *vcpu)
 	return exit_code;
 }
 
-__alias(__guest_run)
-int __weak __kvm_vcpu_run(struct kvm_vcpu *vcpu);
+__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
 static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n";
 
diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c
index 5f815cf..2a7e0d8 100644
--- a/arch/arm64/kvm/hyp/tlb.c
+++ b/arch/arm64/kvm/hyp/tlb.c
@@ -17,7 +17,7 @@
 
 #include "hyp.h"
 
-void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
+static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 {
 	dsb(ishst);
 
@@ -48,10 +48,10 @@ void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
 	write_sysreg(0, vttbr_el2);
 }
 
-__alias(__tlb_flush_vmid_ipa)
-void __weak __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa);
+__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm,
+							    phys_addr_t ipa);
 
-void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
+static void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
 {
 	dsb(ishst);
 
@@ -67,10 +67,9 @@ void __hyp_text __tlb_flush_vmid(struct kvm *kvm)
 	write_sysreg(0, vttbr_el2);
 }
 
-__alias(__tlb_flush_vmid)
-void __weak __kvm_tlb_flush_vmid(struct kvm *kvm);
+__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm);
 
-void __hyp_text __tlb_flush_vm_context(void)
+static void __hyp_text __tlb_flush_vm_context(void)
 {
 	dsb(ishst);
 	asm volatile("tlbi alle1is	\n"
@@ -78,5 +77,4 @@ void __hyp_text __tlb_flush_vm_context(void)
 	dsb(ish);
 }
 
-__alias(__tlb_flush_vm_context)
-void __weak __kvm_flush_vm_context(void);
+__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void);
diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c
index a769458..9142e08 100644
--- a/arch/arm64/kvm/hyp/vgic-v3-sr.c
+++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c
@@ -220,10 +220,9 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 	}
 }
 
-u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
+static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void)
 {
 	return read_gicreg(ICH_VTR_EL2);
 }
 
-__alias(__vgic_v3_read_ich_vtr_el2)
-u64 __weak __vgic_v3_get_ich_vtr_el2(void);
+__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void);
-- 
cgit v0.10.2


From 7ec7c8c70b26de90f61be7523a6ad14df911219f Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Wed, 2 Dec 2015 14:27:03 +0100
Subject: KVM: s390: use assignment instead of memcpy

Replace two memcpy with proper assignment.

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6857262..6dec01d 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -2120,7 +2120,8 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
 	 */
 	kvm_check_async_pf_completion(vcpu);
 
-	memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
+	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
+	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
 
 	if (need_resched())
 		schedule();
@@ -2185,7 +2186,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 	if (guestdbg_enabled(vcpu))
 		kvm_s390_restore_guest_per_regs(vcpu);
 
-	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
+	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
+	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
 
 	if (vcpu->arch.sie_block->icptcode > 0) {
 		int rc = kvm_handle_sie_intercept(vcpu);
-- 
cgit v0.10.2


From 8335713ad08caf2c3dfcb5bc2c93d7e0276142d4 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 8 Dec 2015 16:55:27 +0100
Subject: KVM: s390: obey kptr_restrict in traces

The s390dbf and trace events provide a debugfs interface.
If kptr_restrict is active, we should not expose kernel
pointers. We can fence the debugfs output by using %pK
instead of %p.

Cc: Kees Cook <keescook@chromium.org>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6dec01d..c14845c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1185,7 +1185,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
-	KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);
+	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
 	return 0;
 out_err:
@@ -1245,7 +1245,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		gmap_free(kvm->arch.gmap);
 	kvm_s390_destroy_adapters(kvm);
 	kvm_s390_clear_float_irqs(kvm);
-	KVM_EVENT(3, "vm 0x%p destroyed", kvm);
+	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
 }
 
 /* Section: vcpu related */
@@ -1349,7 +1349,8 @@ static int sca_switch_to_extended(struct kvm *kvm)
 
 	free_page((unsigned long)old_sca);
 
-	VM_EVENT(kvm, 2, "Switched to ESCA (%p -> %p)", old_sca, kvm->arch.sca);
+	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
+		 old_sca, kvm->arch.sca);
 	return 0;
 }
 
@@ -1624,7 +1625,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 	rc = kvm_vcpu_init(vcpu, kvm, id);
 	if (rc)
 		goto out_free_sie_block;
-	VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu,
+	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
 		 vcpu->arch.sie_block);
 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
 
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index cc1d6c6..396485b 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -55,8 +55,8 @@ TRACE_EVENT(kvm_s390_create_vcpu,
 		    __entry->sie_block = sie_block;
 		    ),
 
-	    TP_printk("create cpu %d at %p, sie block at %p", __entry->id,
-		      __entry->vcpu, __entry->sie_block)
+	    TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
+		      __entry->id, __entry->vcpu, __entry->sie_block)
 	);
 
 TRACE_EVENT(kvm_s390_destroy_vcpu,
@@ -254,7 +254,7 @@ TRACE_EVENT(kvm_s390_enable_css,
 		    __entry->kvm = kvm;
 		    ),
 
-	    TP_printk("enabling channel I/O support (kvm @ %p)\n",
+	    TP_printk("enabling channel I/O support (kvm @ %pK)\n",
 		      __entry->kvm)
 	);
 
-- 
cgit v0.10.2


From a3a92c31bf0b57ad0ca7f092a6f3a57168ba9ae2 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 1 Dec 2014 17:24:42 +0100
Subject: KVM: s390: fix mismatch between user and in-kernel guest limit

While the userspace interface requests the maximum size the gmap code
expects to get a maximum address.

This error resulted in bigger page tables than necessary for some guest
sizes, e.g. a 2GB guest used 3 levels instead of 2.

At the same time we introduce KVM_S390_NO_MEM_LIMIT, which allows in a
bright future that a guest spans the complete 64 bit address space.

We also switch to TASK_MAX_SIZE for the initial memory size, this is a
cosmetic change as the previous size also resulted in a 4 level pagetable
creation.

Reported-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 2d09d1e..f083a16 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt
@@ -37,7 +37,8 @@ Returns: -EFAULT if the given address is not accessible
 Allows userspace to query the actual limit and set a new limit for
 the maximum guest memory size. The limit will be rounded up to
 2048 MB, 4096 GB, 8192 TB respectively, as this limit is governed by
-the number of page table levels.
+the number of page table levels. In the case that there is no limit we will set
+the limit to KVM_S390_NO_MEM_LIMIT (U64_MAX).
 
 2. GROUP: KVM_S390_VM_CPU_MODEL
 Architectures: s390
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 12e9291..c831441 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -627,6 +627,7 @@ struct kvm_arch{
 	struct kvm_s390_float_interrupt float_int;
 	struct kvm_device *flic;
 	struct gmap *gmap;
+	unsigned long mem_limit;
 	int css_support;
 	int use_irqchip;
 	int use_cmma;
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index ef1a5fc..d2aea31 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -66,6 +66,8 @@ struct kvm_s390_io_adapter_req {
 #define KVM_S390_VM_MEM_CLR_CMMA	1
 #define KVM_S390_VM_MEM_LIMIT_SIZE	2
 
+#define KVM_S390_NO_MEM_LIMIT		U64_MAX
+
 /* kvm attributes for KVM_S390_VM_TOD */
 #define KVM_S390_VM_TOD_LOW		0
 #define KVM_S390_VM_TOD_HIGH		1
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c14845c..8aa5e55 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -378,8 +378,8 @@ static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 	case KVM_S390_VM_MEM_LIMIT_SIZE:
 		ret = 0;
 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
-			 kvm->arch.gmap->asce_end);
-		if (put_user(kvm->arch.gmap->asce_end, (u64 __user *)attr->addr))
+			 kvm->arch.mem_limit);
+		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
 			ret = -EFAULT;
 		break;
 	default:
@@ -431,9 +431,17 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 		if (get_user(new_limit, (u64 __user *)attr->addr))
 			return -EFAULT;
 
-		if (new_limit > kvm->arch.gmap->asce_end)
+		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
+		    new_limit > kvm->arch.mem_limit)
 			return -E2BIG;
 
+		if (!new_limit)
+			return -EINVAL;
+
+		/* gmap_alloc takes last usable address */
+		if (new_limit != KVM_S390_NO_MEM_LIMIT)
+			new_limit -= 1;
+
 		ret = -EBUSY;
 		mutex_lock(&kvm->lock);
 		if (atomic_read(&kvm->online_vcpus) == 0) {
@@ -450,7 +458,9 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
 			}
 		}
 		mutex_unlock(&kvm->lock);
-		VM_EVENT(kvm, 3, "SET: max guest memory: %lu bytes", new_limit);
+		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
+		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
+			 (void *) kvm->arch.gmap->asce);
 		break;
 	}
 	default:
@@ -1172,8 +1182,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	if (type & KVM_VM_S390_UCONTROL) {
 		kvm->arch.gmap = NULL;
+		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
 	} else {
-		kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
+		kvm->arch.mem_limit = TASK_MAX_SIZE;
+		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
 		if (!kvm->arch.gmap)
 			goto out_err;
 		kvm->arch.gmap->private = kvm;
@@ -2829,6 +2841,9 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	if (mem->memory_size & 0xffffful)
 		return -EINVAL;
 
+	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 54ef3bc..63b0398 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -133,7 +133,7 @@ void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
 /**
  * gmap_alloc - allocate a guest address space
  * @mm: pointer to the parent mm_struct
- * @limit: maximum size of the gmap address space
+ * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
  */
@@ -402,7 +402,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
 	if ((from | to | len) & (PMD_SIZE - 1))
 		return -EINVAL;
 	if (len == 0 || from + len < from || to + len < to ||
-	    from + len > TASK_MAX_SIZE || to + len > gmap->asce_end)
+	    from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end)
 		return -EINVAL;
 
 	flush = 0;
-- 
cgit v0.10.2


From 32e6b236d26946eb076d1450bfb8f9978f15d6b9 Mon Sep 17 00:00:00 2001
From: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Date: Mon, 1 Dec 2014 17:24:42 +0100
Subject: KVM: s390: consider system MHA for guest storage

Verify that the guest maximum storage address is below the MHA (maximum
host address) value allowed on the host.

Acked-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Guenther Hutzl <hutzl@linux.vnet.ibm.com>
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
[adopt to match recent limit,size changes]

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8aa5e55..940e9ff 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1184,7 +1184,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 		kvm->arch.gmap = NULL;
 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
 	} else {
-		kvm->arch.mem_limit = TASK_MAX_SIZE;
+		if (sclp.hamax == U64_MAX)
+			kvm->arch.mem_limit = TASK_MAX_SIZE;
+		else
+			kvm->arch.mem_limit = min_t(unsigned long, TASK_MAX_SIZE,
+						    sclp.hamax + 1);
 		kvm->arch.gmap = gmap_alloc(current->mm, kvm->arch.mem_limit - 1);
 		if (!kvm->arch.gmap)
 			goto out_err;
diff --git a/drivers/s390/char/sclp_early.c b/drivers/s390/char/sclp_early.c
index e0a1f4e..6804354 100644
--- a/drivers/s390/char/sclp_early.c
+++ b/drivers/s390/char/sclp_early.c
@@ -40,7 +40,8 @@ struct read_info_sccb {
 	u8	fac85;			/* 85 */
 	u8	_pad_86[91 - 86];	/* 86-90 */
 	u8	flags;			/* 91 */
-	u8	_pad_92[100 - 92];	/* 92-99 */
+	u8	_pad_92[99 - 92];	/* 92-98 */
+	u8	hamaxpow;		/* 99 */
 	u32	rnsize2;		/* 100-103 */
 	u64	rnmax2;			/* 104-111 */
 	u8	_pad_112[116 - 112];	/* 112-115 */
@@ -120,6 +121,11 @@ static void __init sclp_facilities_detect(struct read_info_sccb *sccb)
 	sclp.rzm <<= 20;
 	sclp.ibc = sccb->ibc;
 
+	if (sccb->hamaxpow && sccb->hamaxpow < 64)
+		sclp.hamax = (1UL << sccb->hamaxpow) - 1;
+	else
+		sclp.hamax = U64_MAX;
+
 	if (!sccb->hcpua) {
 		if (MACHINE_IS_VM)
 			sclp.max_cores = 64;
-- 
cgit v0.10.2


From 7797dcf63f11b6e1d34822daf2317223d0f4ad46 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:13 +0300
Subject: drivers/hv: replace enum hv_message_type by u32

enum hv_message_type inside struct hv_message, hv_post_message
is not size portable. Replace enum by u32.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index 3782636..ab3be44 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -174,7 +174,7 @@ union hv_message_flags {
 
 /* Define synthetic interrupt controller message header. */
 struct hv_message_header {
-	enum hv_message_type message_type;
+	u32 message_type;
 	u8 payload_size;
 	union hv_message_flags message_flags;
 	u8 reserved[2];
@@ -347,7 +347,7 @@ enum hv_call_code {
 struct hv_input_post_message {
 	union hv_connection_id connectionid;
 	u32 reserved;
-	enum hv_message_type message_type;
+	u32 message_type;
 	u32 payload_size;
 	u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
 };
-- 
cgit v0.10.2


From 4f39bcfd1c132522380138a323f9af7902766301 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:14 +0300
Subject: drivers/hv: Move HV_SYNIC_STIMER_COUNT into Hyper-V UAPI x86 header

This constant is required for Hyper-V SynIC timers MSR's
support by userspace(QEMU).

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Acked-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 040d408..07981f0 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -269,4 +269,6 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 #define HV_SYNIC_SINT_AUTO_EOI		(1ULL << 17)
 #define HV_SYNIC_SINT_VECTOR_MASK	(0xFF)
 
+#define HV_SYNIC_STIMER_COUNT		(4)
+
 #endif
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index ab3be44..bf01b11 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -102,8 +102,6 @@ enum hv_message_type {
 	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
 };
 
-#define HV_SYNIC_STIMER_COUNT		(4)
-
 /* Define invalid partition identifier. */
 #define HV_PARTITION_ID_INVALID		((u64)0x0)
 
-- 
cgit v0.10.2


From 5b423efe11e822e092e8c911a6bad17eadf718eb Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:15 +0300
Subject: drivers/hv: Move struct hv_message into UAPI Hyper-V x86 header

This struct is required for Hyper-V SynIC timers implementation inside KVM
and for upcoming Hyper-V VMBus support by userspace(QEMU). So place it into
Hyper-V UAPI header.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Acked-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 07981f0..a41cdee 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -271,4 +271,80 @@ typedef struct _HV_REFERENCE_TSC_PAGE {
 
 #define HV_SYNIC_STIMER_COUNT		(4)
 
+/* Define synthetic interrupt controller message constants. */
+#define HV_MESSAGE_SIZE			(256)
+#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
+#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
+
+/* Define hypervisor message types. */
+enum hv_message_type {
+	HVMSG_NONE			= 0x00000000,
+
+	/* Memory access messages. */
+	HVMSG_UNMAPPED_GPA		= 0x80000000,
+	HVMSG_GPA_INTERCEPT		= 0x80000001,
+
+	/* Timer notification messages. */
+	HVMSG_TIMER_EXPIRED			= 0x80000010,
+
+	/* Error messages. */
+	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
+	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
+	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
+
+	/* Trace buffer complete messages. */
+	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
+
+	/* Platform-specific processor intercept messages. */
+	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
+	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
+	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
+	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
+	HVMSG_X64_APIC_EOI			= 0x80010004,
+	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
+};
+
+/* Define synthetic interrupt controller message flags. */
+union hv_message_flags {
+	__u8 asu8;
+	struct {
+		__u8 msg_pending:1;
+		__u8 reserved:7;
+	};
+};
+
+/* Define port identifier type. */
+union hv_port_id {
+	__u32 asu32;
+	struct {
+		__u32 id:24;
+		__u32 reserved:8;
+	} u;
+};
+
+/* Define synthetic interrupt controller message header. */
+struct hv_message_header {
+	__u32 message_type;
+	__u8 payload_size;
+	union hv_message_flags message_flags;
+	__u8 reserved[2];
+	union {
+		__u64 sender;
+		union hv_port_id port;
+	};
+};
+
+/* Define synthetic interrupt controller message format. */
+struct hv_message {
+	struct hv_message_header header;
+	union {
+		__u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
+	} u;
+};
+
+/* Define the synthetic interrupt message page layout. */
+struct hv_message_page {
+	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
+};
+
 #endif
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index bf01b11..d9d5063 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -63,10 +63,6 @@ enum hv_cpuid_function {
 /* Define version of the synthetic interrupt controller. */
 #define HV_SYNIC_VERSION		(1)
 
-/* Define synthetic interrupt controller message constants. */
-#define HV_MESSAGE_SIZE			(256)
-#define HV_MESSAGE_PAYLOAD_BYTE_COUNT	(240)
-#define HV_MESSAGE_PAYLOAD_QWORD_COUNT	(30)
 #define HV_ANY_VP			(0xFFFFFFFF)
 
 /* Define synthetic interrupt controller flag constants. */
@@ -74,46 +70,9 @@ enum hv_cpuid_function {
 #define HV_EVENT_FLAGS_BYTE_COUNT	(256)
 #define HV_EVENT_FLAGS_DWORD_COUNT	(256 / sizeof(u32))
 
-/* Define hypervisor message types. */
-enum hv_message_type {
-	HVMSG_NONE			= 0x00000000,
-
-	/* Memory access messages. */
-	HVMSG_UNMAPPED_GPA		= 0x80000000,
-	HVMSG_GPA_INTERCEPT		= 0x80000001,
-
-	/* Timer notification messages. */
-	HVMSG_TIMER_EXPIRED			= 0x80000010,
-
-	/* Error messages. */
-	HVMSG_INVALID_VP_REGISTER_VALUE	= 0x80000020,
-	HVMSG_UNRECOVERABLE_EXCEPTION	= 0x80000021,
-	HVMSG_UNSUPPORTED_FEATURE		= 0x80000022,
-
-	/* Trace buffer complete messages. */
-	HVMSG_EVENTLOG_BUFFERCOMPLETE	= 0x80000040,
-
-	/* Platform-specific processor intercept messages. */
-	HVMSG_X64_IOPORT_INTERCEPT		= 0x80010000,
-	HVMSG_X64_MSR_INTERCEPT		= 0x80010001,
-	HVMSG_X64_CPUID_INTERCEPT		= 0x80010002,
-	HVMSG_X64_EXCEPTION_INTERCEPT	= 0x80010003,
-	HVMSG_X64_APIC_EOI			= 0x80010004,
-	HVMSG_X64_LEGACY_FP_ERROR		= 0x80010005
-};
-
 /* Define invalid partition identifier. */
 #define HV_PARTITION_ID_INVALID		((u64)0x0)
 
-/* Define port identifier type. */
-union hv_port_id {
-	u32 asu32;
-	struct {
-		u32 id:24;
-		u32 reserved:8;
-	} u ;
-};
-
 /* Define port type. */
 enum hv_port_type {
 	HVPORT_MSG	= 1,
@@ -161,27 +120,6 @@ struct hv_connection_info {
 	};
 };
 
-/* Define synthetic interrupt controller message flags. */
-union hv_message_flags {
-	u8 asu8;
-	struct {
-		u8 msg_pending:1;
-		u8 reserved:7;
-	};
-};
-
-/* Define synthetic interrupt controller message header. */
-struct hv_message_header {
-	u32 message_type;
-	u8 payload_size;
-	union hv_message_flags message_flags;
-	u8 reserved[2];
-	union {
-		u64 sender;
-		union hv_port_id port;
-	};
-};
-
 /*
  * Timer configuration register.
  */
@@ -207,22 +145,9 @@ struct hv_timer_message_payload {
 	u64 delivery_time;	/* When the message was delivered */
 };
 
-/* Define synthetic interrupt controller message format. */
-struct hv_message {
-	struct hv_message_header header;
-	union {
-		u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT];
-	} u ;
-};
-
 /* Define the number of message buffers associated with each port. */
 #define HV_PORT_MESSAGE_BUFFER_COUNT	(16)
 
-/* Define the synthetic interrupt message page layout. */
-struct hv_message_page {
-	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
-};
-
 /* Define the synthetic interrupt controller event flags format. */
 union hv_synic_event_flags {
 	u8 flags8[HV_EVENT_FLAGS_BYTE_COUNT];
-- 
cgit v0.10.2


From c71acc4c74dddebbbbeede69fdd4f0b1a124f9df Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:16 +0300
Subject: drivers/hv: Move struct hv_timer_message_payload into UAPI Hyper-V
 x86 header

This struct is required for Hyper-V SynIC timers implementation inside KVM
and for upcoming Hyper-V VMBus support by userspace(QEMU). So place it into
Hyper-V UAPI header.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index a41cdee..2a5629e 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -347,4 +347,12 @@ struct hv_message_page {
 	struct hv_message sint_message[HV_SYNIC_SINT_COUNT];
 };
 
+/* Define timer message payload structure. */
+struct hv_timer_message_payload {
+	__u32 timer_index;
+	__u32 reserved;
+	__u64 expiration_time;	/* When the timer expired */
+	__u64 delivery_time;	/* When the message was delivered */
+};
+
 #endif
diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h
index d9d5063..678663e 100644
--- a/drivers/hv/hyperv_vmbus.h
+++ b/drivers/hv/hyperv_vmbus.h
@@ -136,15 +136,6 @@ union hv_timer_config {
 	};
 };
 
-
-/* Define timer message payload structure. */
-struct hv_timer_message_payload {
-	u32 timer_index;
-	u32 reserved;
-	u64 expiration_time;	/* When the timer expired */
-	u64 delivery_time;	/* When the message was delivered */
-};
-
 /* Define the number of message buffers associated with each port. */
 #define HV_PORT_MESSAGE_BUFFER_COUNT	(16)
 
-- 
cgit v0.10.2


From e18eaeed2b056094a5626288d47ceefc740c90e5 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:17 +0300
Subject: kvm/x86: Rearrange func's declarations inside Hyper-V header

This rearrangement places functions declarations together
according to their functionality, so future additions
will be simplier.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 315af4b..9483d49 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -24,14 +24,6 @@
 #ifndef __ARCH_X86_KVM_HYPERV_H__
 #define __ARCH_X86_KVM_HYPERV_H__
 
-int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
-int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
-bool kvm_hv_hypercall_enabled(struct kvm *kvm);
-int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
-
-int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
-void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
-
 static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
 {
 	return &vcpu->arch.hyperv.synic;
@@ -46,10 +38,18 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
 	arch = container_of(hv, struct kvm_vcpu_arch, hyperv);
 	return container_of(arch, struct kvm_vcpu, arch);
 }
-void kvm_hv_irq_routing_update(struct kvm *kvm);
 
-void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
+int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
+
+bool kvm_hv_hypercall_enabled(struct kvm *kvm);
+int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
 
+void kvm_hv_irq_routing_update(struct kvm *kvm);
+int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
+void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
 
+void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+
 #endif
-- 
cgit v0.10.2


From 0ae80384b257b0a1ffa4e5d9eab2eb559bb063c5 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:18 +0300
Subject: kvm/x86: Added Hyper-V vcpu_to_hv_vcpu()/hv_vcpu_to_vcpu() helpers

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 9483d49..d5d8217 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -24,21 +24,29 @@
 #ifndef __ARCH_X86_KVM_HYPERV_H__
 #define __ARCH_X86_KVM_HYPERV_H__
 
-static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
+static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu)
 {
-	return &vcpu->arch.hyperv.synic;
+	return &vcpu->arch.hyperv;
 }
 
-static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
+static inline struct kvm_vcpu *hv_vcpu_to_vcpu(struct kvm_vcpu_hv *hv_vcpu)
 {
-	struct kvm_vcpu_hv *hv;
 	struct kvm_vcpu_arch *arch;
 
-	hv = container_of(synic, struct kvm_vcpu_hv, synic);
-	arch = container_of(hv, struct kvm_vcpu_arch, hyperv);
+	arch = container_of(hv_vcpu, struct kvm_vcpu_arch, hyperv);
 	return container_of(arch, struct kvm_vcpu, arch);
 }
 
+static inline struct kvm_vcpu_hv_synic *vcpu_to_synic(struct kvm_vcpu *vcpu)
+{
+	return &vcpu->arch.hyperv.synic;
+}
+
+static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic)
+{
+	return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic));
+}
+
 int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host);
 int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 
-- 
cgit v0.10.2


From 93bf4172481c4b2a8544c83a687946252563edd0 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:19 +0300
Subject: kvm/x86: Hyper-V internal helper to read MSR
 HV_X64_MSR_TIME_REF_COUNT

This helper will be used also in Hyper-V SynIC timers implementation.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 41869a9..9958926 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -335,6 +335,11 @@ static void synic_init(struct kvm_vcpu_hv_synic *synic)
 	}
 }
 
+static u64 get_time_ref_counter(struct kvm *kvm)
+{
+	return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+}
+
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	synic_init(vcpu_to_synic(vcpu));
@@ -576,11 +581,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = hv->hv_hypercall;
 		break;
-	case HV_X64_MSR_TIME_REF_COUNT: {
-		data =
-		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+	case HV_X64_MSR_TIME_REF_COUNT:
+		data = get_time_ref_counter(kvm);
 		break;
-	}
 	case HV_X64_MSR_REFERENCE_TSC:
 		data = hv->hv_tsc_page;
 		break;
-- 
cgit v0.10.2


From 765eaa0f70eaa274ec8b815d8c210c20cf7b6dbc Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:20 +0300
Subject: kvm/x86: Hyper-V SynIC message slot pending clearing at SINT ack

The SynIC message protocol mandates that the message slot is claimed
by atomically setting message type to something other than HVMSG_NONE.
If another message is to be delivered while the slot is still busy,
message pending flag is asserted to indicate to the guest that the
hypervisor wants to be notified when the slot is released.

To make sure the protocol works regardless of where the message
sources are (kernel or userspace), clear the pending flag on SINT ACK
notification, and let the message sources compete for the slot again.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 9958926..6412b6b 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -27,6 +27,7 @@
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/highmem.h>
 #include <asm/apicdef.h>
 #include <trace/events/kvm.h>
 
@@ -116,13 +117,43 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id)
 	return (synic->active) ? synic : NULL;
 }
 
+static void synic_clear_sint_msg_pending(struct kvm_vcpu_hv_synic *synic,
+					u32 sint)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct page *page;
+	gpa_t gpa;
+	struct hv_message *msg;
+	struct hv_message_page *msg_page;
+
+	gpa = synic->msg_page & PAGE_MASK;
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		vcpu_err(vcpu, "Hyper-V SynIC can't get msg page, gpa 0x%llx\n",
+			 gpa);
+		return;
+	}
+	msg_page = kmap_atomic(page);
+
+	msg = &msg_page->sint_message[sint];
+	msg->header.message_flags.msg_pending = 0;
+
+	kunmap_atomic(msg_page);
+	kvm_release_page_dirty(page);
+	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
+}
+
 static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 {
 	struct kvm *kvm = vcpu->kvm;
+	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
 	int gsi, idx;
 
 	vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint);
 
+	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
+		synic_clear_sint_msg_pending(synic, sint);
+
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]);
 	if (gsi != -1)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 590c46e..f44c24b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -450,6 +450,8 @@ struct kvm {
 
 #define vcpu_debug(vcpu, fmt, ...)					\
 	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+#define vcpu_err(vcpu, fmt, ...)					\
+	kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
 
 static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 {
-- 
cgit v0.10.2


From 1f4b34f825e8cef6f493d06b46605384785b3d16 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 30 Nov 2015 19:22:21 +0300
Subject: kvm/x86: Hyper-V SynIC timers

Per Hyper-V specification (and as required by Hyper-V-aware guests),
SynIC provides 4 per-vCPU timers.  Each timer is programmed via a pair
of MSRs, and signals expiration by delivering a special format message
to the configured SynIC message slot and triggering the corresponding
synthetic interrupt.

Note: as implemented by this patch, all periodic timers are "lazy"
(i.e. if the vCPU wasn't scheduled for more than the timer period the
timer events are lost), regardless of the corresponding configuration
MSR.  If deemed necessary, the "catch up" mode (the timer period is
shortened until the timer catches up) will be implemented later.

Changes v2:
* Use remainder to calculate periodic timer expiration time

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: "K. Y. Srinivasan" <kys@microsoft.com>
CC: Haiyang Zhang <haiyangz@microsoft.com>
CC: Vitaly Kuznetsov <vkuznets@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8140077..a7c8987 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -379,6 +379,17 @@ struct kvm_mtrr {
 	struct list_head head;
 };
 
+/* Hyper-V SynIC timer */
+struct kvm_vcpu_hv_stimer {
+	struct hrtimer timer;
+	int index;
+	u64 config;
+	u64 count;
+	u64 exp_time;
+	struct hv_message msg;
+	bool msg_pending;
+};
+
 /* Hyper-V synthetic interrupt controller (SynIC)*/
 struct kvm_vcpu_hv_synic {
 	u64 version;
@@ -398,6 +409,8 @@ struct kvm_vcpu_hv {
 	s64 runtime_offset;
 	struct kvm_vcpu_hv_synic synic;
 	struct kvm_hyperv_exit exit;
+	struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
+	DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
 };
 
 struct kvm_vcpu_arch {
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index 2a5629e..7956412 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -355,4 +355,10 @@ struct hv_timer_message_payload {
 	__u64 delivery_time;	/* When the message was delivered */
 };
 
+#define HV_STIMER_ENABLE		(1ULL << 0)
+#define HV_STIMER_PERIODIC		(1ULL << 1)
+#define HV_STIMER_LAZY			(1ULL << 2)
+#define HV_STIMER_AUTOENABLE		(1ULL << 3)
+#define HV_STIMER_SINT(config)		(__u8)(((config) >> 16) & 0x0F)
+
 #endif
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6412b6b..8ff8829 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -147,15 +147,32 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 {
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
-	int gsi, idx;
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	struct kvm_vcpu_hv_stimer *stimer;
+	int gsi, idx, stimers_pending;
 
 	vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint);
 
 	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
 		synic_clear_sint_msg_pending(synic, sint);
 
+	/* Try to deliver pending Hyper-V SynIC timers messages */
+	stimers_pending = 0;
+	for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
+		stimer = &hv_vcpu->stimer[idx];
+		if (stimer->msg_pending &&
+		    (stimer->config & HV_STIMER_ENABLE) &&
+		    HV_STIMER_SINT(stimer->config) == sint) {
+			set_bit(stimer->index,
+				hv_vcpu->stimer_pending_bitmap);
+			stimers_pending++;
+		}
+	}
+	if (stimers_pending)
+		kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
+
 	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = atomic_read(&vcpu_to_synic(vcpu)->sint_to_gsi[sint]);
+	gsi = atomic_read(&synic->sint_to_gsi[sint]);
 	if (gsi != -1)
 		kvm_notify_acked_gsi(kvm, gsi);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -371,9 +388,268 @@ static u64 get_time_ref_counter(struct kvm *kvm)
 	return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
 }
 
+static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer,
+				bool vcpu_kick)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+
+	set_bit(stimer->index,
+		vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
+	kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
+	if (vcpu_kick)
+		kvm_vcpu_kick(vcpu);
+}
+
+static void stimer_stop(struct kvm_vcpu_hv_stimer *stimer)
+{
+	hrtimer_cancel(&stimer->timer);
+}
+
+static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+
+	stimer_stop(stimer);
+	clear_bit(stimer->index,
+		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
+	stimer->msg_pending = false;
+}
+
+static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
+{
+	struct kvm_vcpu_hv_stimer *stimer;
+
+	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
+	stimer_mark_expired(stimer, true);
+
+	return HRTIMER_NORESTART;
+}
+
+static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer)
+{
+	u64 time_now;
+	ktime_t ktime_now;
+	u64 remainder;
+
+	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
+	ktime_now = ktime_get();
+
+	div64_u64_rem(time_now - stimer->exp_time, stimer->count, &remainder);
+	stimer->exp_time = time_now + (stimer->count - remainder);
+
+	hrtimer_start(&stimer->timer,
+		      ktime_add_ns(ktime_now,
+				   100 * (stimer->exp_time - time_now)),
+		      HRTIMER_MODE_ABS);
+}
+
+static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
+{
+	u64 time_now;
+	ktime_t ktime_now;
+
+	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
+	ktime_now = ktime_get();
+
+	if (stimer->config & HV_STIMER_PERIODIC) {
+		if (stimer->count == 0)
+			return -EINVAL;
+
+		stimer->exp_time = time_now + stimer->count;
+		hrtimer_start(&stimer->timer,
+			      ktime_add_ns(ktime_now, 100 * stimer->count),
+			      HRTIMER_MODE_ABS);
+		return 0;
+	}
+	stimer->exp_time = stimer->count;
+	if (time_now >= stimer->count) {
+		/*
+		 * Expire timer according to Hypervisor Top-Level Functional
+		 * specification v4(15.3.1):
+		 * "If a one shot is enabled and the specified count is in
+		 * the past, it will expire immediately."
+		 */
+		stimer_mark_expired(stimer, false);
+		return 0;
+	}
+
+	hrtimer_start(&stimer->timer,
+		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
+		      HRTIMER_MODE_ABS);
+	return 0;
+}
+
+static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
+			     bool host)
+{
+	if (stimer->count == 0 || HV_STIMER_SINT(config) == 0)
+		config &= ~HV_STIMER_ENABLE;
+	stimer->config = config;
+	stimer_cleanup(stimer);
+	if (stimer->config & HV_STIMER_ENABLE)
+		if (stimer_start(stimer))
+			return 1;
+	return 0;
+}
+
+static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
+			    bool host)
+{
+	stimer->count = count;
+
+	stimer_cleanup(stimer);
+	if (stimer->count == 0)
+		stimer->config &= ~HV_STIMER_ENABLE;
+	else if (stimer->config & HV_STIMER_AUTOENABLE) {
+		stimer->config |= HV_STIMER_ENABLE;
+		if (stimer_start(stimer))
+			return 1;
+	}
+
+	return 0;
+}
+
+static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
+{
+	*pconfig = stimer->config;
+	return 0;
+}
+
+static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
+{
+	*pcount = stimer->count;
+	return 0;
+}
+
+static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
+			     struct hv_message *src_msg)
+{
+	struct kvm_vcpu *vcpu = synic_to_vcpu(synic);
+	struct page *page;
+	gpa_t gpa;
+	struct hv_message *dst_msg;
+	int r;
+	struct hv_message_page *msg_page;
+
+	if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
+		return -ENOENT;
+
+	gpa = synic->msg_page & PAGE_MASK;
+	page = kvm_vcpu_gfn_to_page(vcpu, gpa >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return -EFAULT;
+
+	msg_page = kmap_atomic(page);
+	dst_msg = &msg_page->sint_message[sint];
+	if (sync_cmpxchg(&dst_msg->header.message_type, HVMSG_NONE,
+			 src_msg->header.message_type) != HVMSG_NONE) {
+		dst_msg->header.message_flags.msg_pending = 1;
+		r = -EAGAIN;
+	} else {
+		memcpy(&dst_msg->u.payload, &src_msg->u.payload,
+		       src_msg->header.payload_size);
+		dst_msg->header.message_type = src_msg->header.message_type;
+		dst_msg->header.payload_size = src_msg->header.payload_size;
+		r = synic_set_irq(synic, sint);
+		if (r >= 1)
+			r = 0;
+		else if (r == 0)
+			r = -EFAULT;
+	}
+	kunmap_atomic(msg_page);
+	kvm_release_page_dirty(page);
+	kvm_vcpu_mark_page_dirty(vcpu, gpa >> PAGE_SHIFT);
+	return r;
+}
+
+static void stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
+	struct hv_message *msg = &stimer->msg;
+	struct hv_timer_message_payload *payload =
+			(struct hv_timer_message_payload *)&msg->u.payload;
+	int r;
+
+	stimer->msg_pending = true;
+	payload->expiration_time = stimer->exp_time;
+	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
+	r = synic_deliver_msg(vcpu_to_synic(vcpu),
+			      HV_STIMER_SINT(stimer->config), msg);
+	if (!r)
+		stimer->msg_pending = false;
+}
+
+static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
+{
+	stimer_send_msg(stimer);
+	if (!(stimer->config & HV_STIMER_PERIODIC))
+		stimer->config |= ~HV_STIMER_ENABLE;
+	else
+		stimer_restart(stimer);
+}
+
+void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	struct kvm_vcpu_hv_stimer *stimer;
+	u64 time_now;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
+			stimer = &hv_vcpu->stimer[i];
+			stimer_stop(stimer);
+			if (stimer->config & HV_STIMER_ENABLE) {
+				time_now = get_time_ref_counter(vcpu->kvm);
+				if (time_now >= stimer->exp_time)
+					stimer_expiration(stimer);
+			}
+		}
+}
+
+void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		stimer_cleanup(&hv_vcpu->stimer[i]);
+}
+
+static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct hv_message *msg = &stimer->msg;
+	struct hv_timer_message_payload *payload =
+			(struct hv_timer_message_payload *)&msg->u.payload;
+
+	memset(&msg->header, 0, sizeof(msg->header));
+	msg->header.message_type = HVMSG_TIMER_EXPIRED;
+	msg->header.payload_size = sizeof(*payload);
+
+	payload->timer_index = stimer->index;
+	payload->expiration_time = 0;
+	payload->delivery_time = 0;
+}
+
+static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
+{
+	memset(stimer, 0, sizeof(*stimer));
+	stimer->index = timer_index;
+	hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	stimer->timer.function = stimer_timer_callback;
+	stimer_prepare_msg(stimer);
+}
+
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 {
-	synic_init(vcpu_to_synic(vcpu));
+	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
+	int i;
+
+	synic_init(&hv_vcpu->synic);
+
+	bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
+	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
+		stimer_init(&hv_vcpu->stimer[i], i);
 }
 
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu)
@@ -590,6 +866,24 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
 	case HV_X64_MSR_EOM:
 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 		return synic_set_msr(vcpu_to_synic(vcpu), msr, data, host);
+	case HV_X64_MSR_STIMER0_CONFIG:
+	case HV_X64_MSR_STIMER1_CONFIG:
+	case HV_X64_MSR_STIMER2_CONFIG:
+	case HV_X64_MSR_STIMER3_CONFIG: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
+
+		return stimer_set_config(vcpu_to_stimer(vcpu, timer_index),
+					 data, host);
+	}
+	case HV_X64_MSR_STIMER0_COUNT:
+	case HV_X64_MSR_STIMER1_COUNT:
+	case HV_X64_MSR_STIMER2_COUNT:
+	case HV_X64_MSR_STIMER3_COUNT: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
+
+		return stimer_set_count(vcpu_to_stimer(vcpu, timer_index),
+					data, host);
+	}
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V uhandled wrmsr: 0x%x data 0x%llx\n",
 			    msr, data);
@@ -673,6 +967,24 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_EOM:
 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
 		return synic_get_msr(vcpu_to_synic(vcpu), msr, pdata);
+	case HV_X64_MSR_STIMER0_CONFIG:
+	case HV_X64_MSR_STIMER1_CONFIG:
+	case HV_X64_MSR_STIMER2_CONFIG:
+	case HV_X64_MSR_STIMER3_CONFIG: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
+
+		return stimer_get_config(vcpu_to_stimer(vcpu, timer_index),
+					 pdata);
+	}
+	case HV_X64_MSR_STIMER0_COUNT:
+	case HV_X64_MSR_STIMER1_COUNT:
+	case HV_X64_MSR_STIMER2_COUNT:
+	case HV_X64_MSR_STIMER3_COUNT: {
+		int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
+
+		return stimer_get_count(vcpu_to_stimer(vcpu, timer_index),
+					pdata);
+	}
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index d5d8217..60eccd4 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -59,5 +59,29 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
 int kvm_hv_activate_synic(struct kvm_vcpu *vcpu);
 
 void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
+
+static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,
+							int timer_index)
+{
+	return &vcpu_to_hv_vcpu(vcpu)->stimer[timer_index];
+}
+
+static inline struct kvm_vcpu *stimer_to_vcpu(struct kvm_vcpu_hv_stimer *stimer)
+{
+	struct kvm_vcpu_hv *hv_vcpu;
+
+	hv_vcpu = container_of(stimer - stimer->index, struct kvm_vcpu_hv,
+			       stimer[0]);
+	return hv_vcpu_to_vcpu(hv_vcpu);
+}
+
+static inline bool kvm_hv_has_stimer_pending(struct kvm_vcpu *vcpu)
+{
+	return !bitmap_empty(vcpu->arch.hyperv.stimer_pending_bitmap,
+			     HV_SYNIC_STIMER_COUNT);
+}
+
+void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f1d6501..b6102c1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -967,6 +967,7 @@ static u32 emulated_msrs[] = {
 	HV_X64_MSR_VP_INDEX,
 	HV_X64_MSR_VP_RUNTIME,
 	HV_X64_MSR_SCONTROL,
+	HV_X64_MSR_STIMER0_CONFIG,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 
@@ -2199,6 +2200,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
 		return kvm_hv_set_msr_common(vcpu, msr, data,
 					     msr_info->host_initiated);
 	case MSR_IA32_BBL_CR_CTL3:
@@ -2403,6 +2405,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15:
 	case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
 	case HV_X64_MSR_CRASH_CTL:
+	case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT:
 		return kvm_hv_get_msr_common(vcpu,
 					     msr_info->index, &msr_info->data);
 		break;
@@ -6489,6 +6492,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
+			kvm_hv_process_stimers(vcpu);
 	}
 
 	/*
@@ -7649,6 +7654,7 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 	int idx;
 
+	kvm_hv_vcpu_uninit(vcpu);
 	kvm_pmu_destroy(vcpu);
 	kfree(vcpu->arch.mce_banks);
 	kvm_free_lapic(vcpu);
@@ -8043,6 +8049,9 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	    kvm_cpu_has_interrupt(vcpu))
 		return true;
 
+	if (kvm_hv_has_stimer_pending(vcpu))
+		return true;
+
 	return false;
 }
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f44c24b..2969c47 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -144,6 +144,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_IOAPIC_EOI_EXIT   28
 #define KVM_REQ_HV_RESET          29
 #define KVM_REQ_HV_EXIT           30
+#define KVM_REQ_HV_STIMER         31
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v0.10.2


From 0bcf261cc86d082923082f79febe2d13c055f217 Mon Sep 17 00:00:00 2001
From: Li RongQing <roy.qing.li@gmail.com>
Date: Thu, 3 Dec 2015 13:29:34 +0800
Subject: KVM: VMX: fix the writing POSTED_INTR_NV

POSTED_INTR_NV is 16bit, should not use 64bit write function

[ 5311.676074] vmwrite error: reg 3 value 0 (err 12)
  [ 5311.680001] CPU: 49 PID: 4240 Comm: qemu-system-i38 Tainted: G I 4.1.13-WR8.0.0.0_standard #1
  [ 5311.689343] Hardware name: Intel Corporation S2600WT2/S2600WT2, BIOS SE5C610.86B.01.01.0008.021120151325 02/11/2015
  [ 5311.699550] 00000000 00000000 e69a7e1c c1950de1 00000000 e69a7e38 fafcff45 fafebd24
  [ 5311.706924] 00000003 00000000 0000000c b6a06dfa e69a7e40 fafcff79 e69a7eb0 fafd5f57
  [ 5311.714296] e69a7ec0 c1080600 00000000 00000001 c0e18018 000001be 00000000 00000b43
  [ 5311.721651] Call Trace:
  [ 5311.722942] [<c1950de1>] dump_stack+0x4b/0x75
  [ 5311.726467] [<fafcff45>] vmwrite_error+0x35/0x40 [kvm_intel]
  [ 5311.731444] [<fafcff79>] vmcs_writel+0x29/0x30 [kvm_intel]
  [ 5311.736228] [<fafd5f57>] vmx_create_vcpu+0x337/0xb90 [kvm_intel]
  [ 5311.741600] [<c1080600>] ? dequeue_task_fair+0x2e0/0xf60
  [ 5311.746197] [<faf3b9ca>] kvm_arch_vcpu_create+0x3a/0x70 [kvm]
  [ 5311.751278] [<faf29e9d>] kvm_vm_ioctl+0x14d/0x640 [kvm]
  [ 5311.755771] [<c1129d44>] ? free_pages_prepare+0x1a4/0x2d0
  [ 5311.760455] [<c13e2842>] ? debug_smp_processor_id+0x12/0x20
  [ 5311.765333] [<c10793be>] ? sched_move_task+0xbe/0x170
  [ 5311.769621] [<c11752b3>] ? kmem_cache_free+0x213/0x230
  [ 5311.774016] [<faf29d50>] ? kvm_set_memory_region+0x60/0x60 [kvm]
  [ 5311.779379] [<c1199fa2>] do_vfs_ioctl+0x2e2/0x500
  [ 5311.783285] [<c11752b3>] ? kmem_cache_free+0x213/0x230
  [ 5311.787677] [<c104dc73>] ? __mmdrop+0x63/0xd0
  [ 5311.791196] [<c104dc73>] ? __mmdrop+0x63/0xd0
  [ 5311.794712] [<c104dc73>] ? __mmdrop+0x63/0xd0
  [ 5311.798234] [<c11a2ed7>] ? __fget+0x57/0x90
  [ 5311.801559] [<c11a2f72>] ? __fget_light+0x22/0x50
  [ 5311.805464] [<c119a240>] SyS_ioctl+0x80/0x90
  [ 5311.808885] [<c1957d30>] sysenter_do_call+0x12/0x12
  [ 5312.059280] kvm: zapping shadow pages for mmio generation wraparound
  [ 5313.678415] kvm [4231]: vcpu0 disabled perfctr wrmsr: 0xc2 data 0xffff
  [ 5313.726518] kvm [4231]: vcpu0 unhandled rdmsr: 0x570

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
Cc: Yang Zhang <yang.z.zhang@Intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1a8bfaa..c39737f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4776,7 +4776,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
 		vmcs_write16(GUEST_INTR_STATUS, 0);
 
-		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
 		vmcs_write64(POSTED_INTR_DESC_ADDR, __pa((&vmx->pi_desc)));
 	}
 
@@ -9498,7 +9498,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 		 */
 		vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
 		vmx->nested.pi_pending = false;
-		vmcs_write64(POSTED_INTR_NV, POSTED_INTR_VECTOR);
+		vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR);
 		vmcs_write64(POSTED_INTR_DESC_ADDR,
 			page_to_phys(vmx->nested.pi_desc_page) +
 			(unsigned long)(vmcs12->posted_intr_desc_addr &
-- 
cgit v0.10.2


From f35310546399eb77f03d37e760320e021f9a8568 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 3 Dec 2015 15:49:56 +0100
Subject: KVM: VMX: fix read/write sizes of VMCS fields

In theory this should have broken EPT on 32-bit kernels (due to
reading the high part of natural-width field GUEST_CR3).  Not sure
if no one noticed or the processor behaves differently from the
documentation.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39737f..b1af1e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4868,7 +4868,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	seg_setup(VCPU_SREG_CS);
 	vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-	vmcs_write32(GUEST_CS_BASE, 0xffff0000);
+	vmcs_writel(GUEST_CS_BASE, 0xffff0000ul);
 
 	seg_setup(VCPU_SREG_DS);
 	seg_setup(VCPU_SREG_ES);
@@ -4904,7 +4904,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 
 	vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
 	vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, 0);
-	vmcs_write32(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+	vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, 0);
 
 	setup_msrs(vmx);
 
@@ -7893,7 +7893,7 @@ static void dump_vmcs(void)
 	u32 pin_based_exec_ctrl = vmcs_read32(PIN_BASED_VM_EXEC_CONTROL);
 	u32 secondary_exec_control = 0;
 	unsigned long cr4 = vmcs_readl(GUEST_CR4);
-	u64 efer = vmcs_readl(GUEST_IA32_EFER);
+	u64 efer = vmcs_read64(GUEST_IA32_EFER);
 	int i, n;
 
 	if (cpu_has_secondary_exec_ctrls())
@@ -10159,7 +10159,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	 * Additionally, restore L2's PDPTR to vmcs12.
 	 */
 	if (enable_ept) {
-		vmcs12->guest_cr3 = vmcs_read64(GUEST_CR3);
+		vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
 		vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
 		vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
 		vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
-- 
cgit v0.10.2


From 845c5b4054635f98eb6f1c783c0cc14b28772cb0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 3 Dec 2015 15:51:00 +0100
Subject: KVM: VMX: fix read/write sizes of VMCS fields in dump_vmcs

This was not printing the high parts of several 64-bit fields on
32-bit kernels.  Separate from the previous one to make the patches
easier to review.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b1af1e4..b1a453d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7909,10 +7909,10 @@ static void dump_vmcs(void)
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT) &&
 	    (cr4 & X86_CR4_PAE) && !(efer & EFER_LMA))
 	{
-		pr_err("PDPTR0 = 0x%016lx  PDPTR1 = 0x%016lx\n",
-		       vmcs_readl(GUEST_PDPTR0), vmcs_readl(GUEST_PDPTR1));
-		pr_err("PDPTR2 = 0x%016lx  PDPTR3 = 0x%016lx\n",
-		       vmcs_readl(GUEST_PDPTR2), vmcs_readl(GUEST_PDPTR3));
+		pr_err("PDPTR0 = 0x%016llx  PDPTR1 = 0x%016llx\n",
+		       vmcs_read64(GUEST_PDPTR0), vmcs_read64(GUEST_PDPTR1));
+		pr_err("PDPTR2 = 0x%016llx  PDPTR3 = 0x%016llx\n",
+		       vmcs_read64(GUEST_PDPTR2), vmcs_read64(GUEST_PDPTR3));
 	}
 	pr_err("RSP = 0x%016lx  RIP = 0x%016lx\n",
 	       vmcs_readl(GUEST_RSP), vmcs_readl(GUEST_RIP));
@@ -7933,16 +7933,16 @@ static void dump_vmcs(void)
 	vmx_dump_sel("TR:  ", GUEST_TR_SELECTOR);
 	if ((vmexit_ctl & (VM_EXIT_SAVE_IA32_PAT | VM_EXIT_SAVE_IA32_EFER)) ||
 	    (vmentry_ctl & (VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_IA32_EFER)))
-		pr_err("EFER =     0x%016llx  PAT = 0x%016lx\n",
-		       efer, vmcs_readl(GUEST_IA32_PAT));
-	pr_err("DebugCtl = 0x%016lx  DebugExceptions = 0x%016lx\n",
-	       vmcs_readl(GUEST_IA32_DEBUGCTL),
+		pr_err("EFER =     0x%016llx  PAT = 0x%016llx\n",
+		       efer, vmcs_read64(GUEST_IA32_PAT));
+	pr_err("DebugCtl = 0x%016llx  DebugExceptions = 0x%016lx\n",
+	       vmcs_read64(GUEST_IA32_DEBUGCTL),
 	       vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS));
 	if (vmentry_ctl & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL)
-		pr_err("PerfGlobCtl = 0x%016lx\n",
-		       vmcs_readl(GUEST_IA32_PERF_GLOBAL_CTRL));
+		pr_err("PerfGlobCtl = 0x%016llx\n",
+		       vmcs_read64(GUEST_IA32_PERF_GLOBAL_CTRL));
 	if (vmentry_ctl & VM_ENTRY_LOAD_BNDCFGS)
-		pr_err("BndCfgS = 0x%016lx\n", vmcs_readl(GUEST_BNDCFGS));
+		pr_err("BndCfgS = 0x%016llx\n", vmcs_read64(GUEST_BNDCFGS));
 	pr_err("Interruptibility = %08x  ActivityState = %08x\n",
 	       vmcs_read32(GUEST_INTERRUPTIBILITY_INFO),
 	       vmcs_read32(GUEST_ACTIVITY_STATE));
@@ -7971,11 +7971,12 @@ static void dump_vmcs(void)
 	       vmcs_read32(HOST_IA32_SYSENTER_CS),
 	       vmcs_readl(HOST_IA32_SYSENTER_EIP));
 	if (vmexit_ctl & (VM_EXIT_LOAD_IA32_PAT | VM_EXIT_LOAD_IA32_EFER))
-		pr_err("EFER = 0x%016lx  PAT = 0x%016lx\n",
-		       vmcs_readl(HOST_IA32_EFER), vmcs_readl(HOST_IA32_PAT));
+		pr_err("EFER = 0x%016llx  PAT = 0x%016llx\n",
+		       vmcs_read64(HOST_IA32_EFER),
+		       vmcs_read64(HOST_IA32_PAT));
 	if (vmexit_ctl & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
-		pr_err("PerfGlobCtl = 0x%016lx\n",
-		       vmcs_readl(HOST_IA32_PERF_GLOBAL_CTRL));
+		pr_err("PerfGlobCtl = 0x%016llx\n",
+		       vmcs_read64(HOST_IA32_PERF_GLOBAL_CTRL));
 
 	pr_err("*** Control State ***\n");
 	pr_err("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
@@ -7998,16 +7999,16 @@ static void dump_vmcs(void)
 	pr_err("IDTVectoring: info=%08x errcode=%08x\n",
 	       vmcs_read32(IDT_VECTORING_INFO_FIELD),
 	       vmcs_read32(IDT_VECTORING_ERROR_CODE));
-	pr_err("TSC Offset = 0x%016lx\n", vmcs_readl(TSC_OFFSET));
+	pr_err("TSC Offset = 0x%016llx\n", vmcs_read64(TSC_OFFSET));
 	if (secondary_exec_control & SECONDARY_EXEC_TSC_SCALING)
-		pr_err("TSC Multiplier = 0x%016lx\n",
-		       vmcs_readl(TSC_MULTIPLIER));
+		pr_err("TSC Multiplier = 0x%016llx\n",
+		       vmcs_read64(TSC_MULTIPLIER));
 	if (cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW)
 		pr_err("TPR Threshold = 0x%02x\n", vmcs_read32(TPR_THRESHOLD));
 	if (pin_based_exec_ctrl & PIN_BASED_POSTED_INTR)
 		pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV));
 	if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT))
-		pr_err("EPT pointer = 0x%016lx\n", vmcs_readl(EPT_POINTER));
+		pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER));
 	n = vmcs_read32(CR3_TARGET_COUNT);
 	for (i = 0; i + 1 < n; i += 4)
 		pr_err("CR3 target%u=%016lx target%u=%016lx\n",
-- 
cgit v0.10.2


From 8a86aea920f1bb1cf7050e48112227a1c42cafe5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 3 Dec 2015 15:56:55 +0100
Subject: KVM: vmx: detect mismatched size in VMCS read/write

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
	I am sending this as RFC because the error messages it produces are
	very ugly.  Because of inlining, the original line is lost.  The
	alternative is to change vmcs_read/write/checkXX into macros, but
	then you need to have a single huge BUILD_BUG_ON or BUILD_BUG_ON_MSG
	because multiple BUILD_BUG_ON* with the same __LINE__ are not
	supported well.

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b1a453d..62d958a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1447,7 +1447,51 @@ static inline void ept_sync_context(u64 eptp)
 	}
 }
 
-static __always_inline unsigned long vmcs_readl(unsigned long field)
+static __always_inline void vmcs_check16(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
+			 "16-bit accessor invalid for 64-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "16-bit accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "16-bit accessor invalid for 32-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "16-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_check32(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "32-bit accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "32-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_check64(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "64-bit accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "64-bit accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "64-bit accessor invalid for 32-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x6000,
+			 "64-bit accessor invalid for natural width field");
+}
+
+static __always_inline void vmcs_checkl(unsigned long field)
+{
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0,
+			 "Natural width accessor invalid for 16-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
+			 "Natural width accessor invalid for 64-bit field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2001,
+			 "Natural width accessor invalid for 64-bit high field");
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x4000,
+			 "Natural width accessor invalid for 32-bit field");
+}
+
+static __always_inline unsigned long __vmcs_readl(unsigned long field)
 {
 	unsigned long value;
 
@@ -1458,23 +1502,32 @@ static __always_inline unsigned long vmcs_readl(unsigned long field)
 
 static __always_inline u16 vmcs_read16(unsigned long field)
 {
-	return vmcs_readl(field);
+	vmcs_check16(field);
+	return __vmcs_readl(field);
 }
 
 static __always_inline u32 vmcs_read32(unsigned long field)
 {
-	return vmcs_readl(field);
+	vmcs_check32(field);
+	return __vmcs_readl(field);
 }
 
 static __always_inline u64 vmcs_read64(unsigned long field)
 {
+	vmcs_check64(field);
 #ifdef CONFIG_X86_64
-	return vmcs_readl(field);
+	return __vmcs_readl(field);
 #else
-	return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
+	return __vmcs_readl(field) | ((u64)__vmcs_readl(field+1) << 32);
 #endif
 }
 
+static __always_inline unsigned long vmcs_readl(unsigned long field)
+{
+	vmcs_checkl(field);
+	return __vmcs_readl(field);
+}
+
 static noinline void vmwrite_error(unsigned long field, unsigned long value)
 {
 	printk(KERN_ERR "vmwrite error: reg %lx value %lx (err %d)\n",
@@ -1482,7 +1535,7 @@ static noinline void vmwrite_error(unsigned long field, unsigned long value)
 	dump_stack();
 }
 
-static void vmcs_writel(unsigned long field, unsigned long value)
+static __always_inline void __vmcs_writel(unsigned long field, unsigned long value)
 {
 	u8 error;
 
@@ -1492,33 +1545,46 @@ static void vmcs_writel(unsigned long field, unsigned long value)
 		vmwrite_error(field, value);
 }
 
-static void vmcs_write16(unsigned long field, u16 value)
+static __always_inline void vmcs_write16(unsigned long field, u16 value)
 {
-	vmcs_writel(field, value);
+	vmcs_check16(field);
+	__vmcs_writel(field, value);
 }
 
-static void vmcs_write32(unsigned long field, u32 value)
+static __always_inline void vmcs_write32(unsigned long field, u32 value)
 {
-	vmcs_writel(field, value);
+	vmcs_check32(field);
+	__vmcs_writel(field, value);
 }
 
-static void vmcs_write64(unsigned long field, u64 value)
+static __always_inline void vmcs_write64(unsigned long field, u64 value)
 {
-	vmcs_writel(field, value);
+	vmcs_check64(field);
+	__vmcs_writel(field, value);
 #ifndef CONFIG_X86_64
 	asm volatile ("");
-	vmcs_writel(field+1, value >> 32);
+	__vmcs_writel(field+1, value >> 32);
 #endif
 }
 
-static void vmcs_clear_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_writel(unsigned long field, unsigned long value)
+{
+	vmcs_checkl(field);
+	__vmcs_writel(field, value);
+}
+
+static __always_inline void vmcs_clear_bits(unsigned long field, u32 mask)
 {
-	vmcs_writel(field, vmcs_readl(field) & ~mask);
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
+			 "vmcs_clear_bits does not support 64-bit fields");
+	__vmcs_writel(field, __vmcs_readl(field) & ~mask);
 }
 
-static void vmcs_set_bits(unsigned long field, u32 mask)
+static __always_inline void vmcs_set_bits(unsigned long field, u32 mask)
 {
-	vmcs_writel(field, vmcs_readl(field) | mask);
+        BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6000) == 0x2000,
+			 "vmcs_set_bits does not support 64-bit fields");
+	__vmcs_writel(field, __vmcs_readl(field) | mask);
 }
 
 static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
-- 
cgit v0.10.2


From 671d9ab38097fae45ff4f24562789b98b51d37ec Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Fri, 20 Nov 2015 19:52:12 +0100
Subject: kvm: Dump guest rIP when the guest tried something unsupported

It looks like this in action:

  kvm [5197]: vcpu0, guest rIP: 0xffffffff810187ba unhandled rdmsr: 0xc001102

and helps to pinpoint quickly where in the guest we did the unsupported
thing.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 2969c47..61c3e6c6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -447,7 +447,8 @@ struct kvm {
 
 /* The guest did something we don't support. */
 #define vcpu_unimpl(vcpu, fmt, ...)					\
-	kvm_pr_unimpl("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
+	kvm_pr_unimpl("vcpu%i, guest rIP: 0x%lx " fmt,			\
+			(vcpu)->vcpu_id, kvm_rip_read(vcpu), ## __VA_ARGS__)
 
 #define vcpu_debug(vcpu, fmt, ...)					\
 	kvm_debug("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
-- 
cgit v0.10.2


From 481d2bcc8454a44811db2bb68ac216fc6c5a23db Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 14 Dec 2015 18:33:05 +0300
Subject: kvm/x86: Remove Hyper-V SynIC timer stopping

It's possible that guest send us Hyper-V EOM at the middle
of Hyper-V SynIC timer running, so we start processing of Hyper-V
SynIC timers in vcpu context and stop the Hyper-V SynIC timer
unconditionally:

    host                                       guest
    ------------------------------------------------------------------------------
                                           start periodic stimer
    start periodic timer
    timer expires after 15ms
    send expiration message into guest
    restart periodic timer
    timer expires again after 15 ms
    msg slot is still not cleared so
    setup ->msg_pending
(1) restart periodic timer
                                           process timer msg and clear slot
                                           ->msg_pending was set:
                                               send EOM into host
    received EOM
      kvm_make_request(KVM_REQ_HV_STIMER)

    kvm_hv_process_stimers():
        ...
        stimer_stop()
        if (time_now >= stimer->exp_time)
                stimer_expiration(stimer);

Because the timer was rearmed at (1), time_now < stimer->exp_time
and stimer_expiration is not called.  The timer then never fires.

The patch fixes such situation by not stopping Hyper-V SynIC timer
at all, because it's safe to restart it without stop in vcpu context
and timer callback always returns HRTIMER_NORESTART.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 8ff8829..f34f666 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -598,7 +598,6 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
 			stimer = &hv_vcpu->stimer[i];
-			stimer_stop(stimer);
 			if (stimer->config & HV_STIMER_ENABLE) {
 				time_now = get_time_ref_counter(vcpu->kvm);
 				if (time_now >= stimer->exp_time)
-- 
cgit v0.10.2


From e078ef81514222ffc10bf1767c15df16ca0b84db Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 14 Dec 2015 17:58:33 +0000
Subject: ARM: KVM: Cleanup exception injection

David Binderman reported that the exception injection code had a
couple of unused variables lingering around.

Upon examination, it looked like this code could do with an
anticipated spring cleaning, which amounts to deduplicating
the CPSR/SPSR update, and making it look a bit more like
the architecture spec.

The spurious variables are removed in the process.

Reported-by: David Binderman <dcb314@hotmail.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index d6c0052..dc99159 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -275,6 +275,40 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
 		return vbar;
 }
 
+/*
+ * Switch to an exception mode, updating both CPSR and SPSR. Follow
+ * the logic described in AArch32.EnterMode() from the ARMv8 ARM.
+ */
+static void kvm_update_psr(struct kvm_vcpu *vcpu, unsigned long mode)
+{
+	unsigned long cpsr = *vcpu_cpsr(vcpu);
+	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
+
+	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | mode;
+
+	switch (mode) {
+	case FIQ_MODE:
+		*vcpu_cpsr(vcpu) |= PSR_F_BIT;
+		/* Fall through */
+	case ABT_MODE:
+	case IRQ_MODE:
+		*vcpu_cpsr(vcpu) |= PSR_A_BIT;
+		/* Fall through */
+	default:
+		*vcpu_cpsr(vcpu) |= PSR_I_BIT;
+	}
+
+	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
+
+	if (sctlr & SCTLR_TE)
+		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
+	if (sctlr & SCTLR_EE)
+		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
+
+	/* Note: These now point to the mode banked copies */
+	*vcpu_spsr(vcpu) = cpsr;
+}
+
 /**
  * kvm_inject_undefined - inject an undefined exception into the guest
  * @vcpu: The VCPU to receive the undefined exception
@@ -286,29 +320,13 @@ static u32 exc_vector_base(struct kvm_vcpu *vcpu)
  */
 void kvm_inject_undefined(struct kvm_vcpu *vcpu)
 {
-	unsigned long new_lr_value;
-	unsigned long new_spsr_value;
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset = 4;
 	u32 return_offset = (is_thumb) ? 2 : 4;
 
-	new_spsr_value = cpsr;
-	new_lr_value = *vcpu_pc(vcpu) - return_offset;
-
-	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | UND_MODE;
-	*vcpu_cpsr(vcpu) |= PSR_I_BIT;
-	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
-
-	if (sctlr & SCTLR_TE)
-		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
-	if (sctlr & SCTLR_EE)
-		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
-
-	/* Note: These now point to UND banked copies */
-	*vcpu_spsr(vcpu) = cpsr;
-	*vcpu_reg(vcpu, 14) = new_lr_value;
+	kvm_update_psr(vcpu, UND_MODE);
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset;
 
 	/* Branch to exception vector */
 	*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
@@ -320,30 +338,14 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
  */
 static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
 {
-	unsigned long new_lr_value;
-	unsigned long new_spsr_value;
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	u32 sctlr = vcpu->arch.cp15[c1_SCTLR];
 	bool is_thumb = (cpsr & PSR_T_BIT);
 	u32 vect_offset;
 	u32 return_offset = (is_thumb) ? 4 : 0;
 	bool is_lpae;
 
-	new_spsr_value = cpsr;
-	new_lr_value = *vcpu_pc(vcpu) + return_offset;
-
-	*vcpu_cpsr(vcpu) = (cpsr & ~MODE_MASK) | ABT_MODE;
-	*vcpu_cpsr(vcpu) |= PSR_I_BIT | PSR_A_BIT;
-	*vcpu_cpsr(vcpu) &= ~(PSR_IT_MASK | PSR_J_BIT | PSR_E_BIT | PSR_T_BIT);
-
-	if (sctlr & SCTLR_TE)
-		*vcpu_cpsr(vcpu) |= PSR_T_BIT;
-	if (sctlr & SCTLR_EE)
-		*vcpu_cpsr(vcpu) |= PSR_E_BIT;
-
-	/* Note: These now point to ABT banked copies */
-	*vcpu_spsr(vcpu) = cpsr;
-	*vcpu_reg(vcpu, 14) = new_lr_value;
+	kvm_update_psr(vcpu, ABT_MODE);
+	*vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
 
 	if (is_pabt)
 		vect_offset = 12;
-- 
cgit v0.10.2


From 281243cbe075d27ab884858d6e0b15d8ed61bc25 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 16 Dec 2015 15:41:12 +0000
Subject: arm64: KVM: debug: Remove spurious inline attributes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The debug trapping code is pretty heavy on the "inline" attribute,
but most functions are actually referenced in the sysreg tables,
making the inlining imposible.

Removing the useless inline qualifier seems the right thing to do,
having verified that the output code is similar.

Cc: Alex Bennée <alex.bennee@linaro.org>
Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 88adebf..eec3598 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -220,9 +220,9 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
  * All writes will set the KVM_ARM64_DEBUG_DIRTY flag to ensure the
  * hyp.S code switches between host and guest values in future.
  */
-static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void reg_to_dbg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 	u64 val = p->regval;
 
@@ -235,18 +235,18 @@ static inline void reg_to_dbg(struct kvm_vcpu *vcpu,
 	vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
 }
 
-static inline void dbg_to_reg(struct kvm_vcpu *vcpu,
-			      struct sys_reg_params *p,
-			      u64 *dbg_reg)
+static void dbg_to_reg(struct kvm_vcpu *vcpu,
+		       struct sys_reg_params *p,
+		       u64 *dbg_reg)
 {
 	p->regval = *dbg_reg;
 	if (p->is_32bit)
 		p->regval &= 0xffffffffUL;
 }
 
-static inline bool trap_bvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
@@ -280,15 +280,15 @@ static int get_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_bvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg] = rd->val;
 }
 
-static inline bool trap_bcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_bcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 
@@ -323,15 +323,15 @@ static int get_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_bcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_bcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg] = rd->val;
 }
 
-static inline bool trap_wvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 
@@ -366,15 +366,15 @@ static int get_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_wvr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wvr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg] = rd->val;
 }
 
-static inline bool trap_wcr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_wcr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 
@@ -408,8 +408,8 @@ static int get_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 	return 0;
 }
 
-static inline void reset_wcr(struct kvm_vcpu *vcpu,
-			     const struct sys_reg_desc *rd)
+static void reset_wcr(struct kvm_vcpu *vcpu,
+		      const struct sys_reg_desc *rd)
 {
 	vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg] = rd->val;
 }
@@ -723,9 +723,9 @@ static bool trap_debug32(struct kvm_vcpu *vcpu,
  * system is in.
  */
 
-static inline bool trap_xvr(struct kvm_vcpu *vcpu,
-			    struct sys_reg_params *p,
-			    const struct sys_reg_desc *rd)
+static bool trap_xvr(struct kvm_vcpu *vcpu,
+		     struct sys_reg_params *p,
+		     const struct sys_reg_desc *rd)
 {
 	u64 *dbg_reg = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
-- 
cgit v0.10.2


From 9d4dc688342a3cbda43a1789cd2c6c888658c60d Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 16 Nov 2015 11:28:16 +0000
Subject: arm/arm64: KVM: Remove unreferenced S2_PGD_ORDER

Since commit a987370 ("arm64: KVM: Fix stage-2 PGD allocation to have
per-page refcounting") there is no reference to S2_PGD_ORDER, so kill it
for the good.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index dc641dd..b05bb5a 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -135,7 +135,6 @@
 #define KVM_PHYS_SIZE	(1ULL << KVM_PHYS_SHIFT)
 #define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
 #define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
-#define S2_PGD_ORDER	get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 61d96a6..22f7fa0 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -656,9 +656,9 @@ static void *kvm_alloc_hwpgd(void)
  * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
  * @kvm:	The KVM struct pointer for the VM.
  *
- * Allocates the 1st level table only of size defined by S2_PGD_ORDER (can
- * support either full 40-bit input addresses or limited to 32-bit input
- * addresses). Clears the allocated pages.
+ * Allocates only the stage-2 HW PGD level table(s) (can support either full
+ * 40-bit input addresses or limited to 32-bit input addresses). Clears the
+ * allocated pages.
  *
  * Note we don't need locking here as this is only called when the VM is
  * created, which can only be done once.
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6150567..54cba80 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,7 +158,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
 #define PTRS_PER_S2_PGD_SHIFT	(KVM_PHYS_SHIFT - PGDIR_SHIFT)
 #endif
 #define PTRS_PER_S2_PGD		(1 << PTRS_PER_S2_PGD_SHIFT)
-#define S2_PGD_ORDER		get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
 
 #define kvm_pgd_index(addr)	(((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
 
-- 
cgit v0.10.2


From 8420dcd37ef34040c8fc5a27bf66887b3b2faf80 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 16 Nov 2015 11:28:17 +0000
Subject: arm: KVM: Make kvm_arm.h friendly to assembly code

kvm_arm.h is included from both C code and assembly code; however some
definitions in this header supplied with U/UL/ULL suffixes which might
confuse assembly once they got evaluated.
We have _AC macro for such cases, so just wrap problem places with it.

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index b05bb5a..01d4d7a 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -19,6 +19,7 @@
 #ifndef __ARM_KVM_ARM_H__
 #define __ARM_KVM_ARM_H__
 
+#include <linux/const.h>
 #include <linux/types.h>
 
 /* Hyp Configuration Register (HCR) bits */
@@ -132,9 +133,9 @@
  * space.
  */
 #define KVM_PHYS_SHIFT	(40)
-#define KVM_PHYS_SIZE	(1ULL << KVM_PHYS_SHIFT)
-#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - 1ULL)
-#define PTRS_PER_S2_PGD	(1ULL << (KVM_PHYS_SHIFT - 30))
+#define KVM_PHYS_SIZE	(_AC(1, ULL) << KVM_PHYS_SHIFT)
+#define KVM_PHYS_MASK	(KVM_PHYS_SIZE - _AC(1, ULL))
+#define PTRS_PER_S2_PGD	(_AC(1, ULL) << (KVM_PHYS_SHIFT - 30))
 
 /* Virtualization Translation Control Register (VTCR) bits */
 #define VTCR_SH0	(3 << 12)
@@ -161,17 +162,17 @@
 #define VTTBR_X		(5 - KVM_T0SZ)
 #endif
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
-#define VTTBR_BADDR_MASK  (((1LLU << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
-#define VTTBR_VMID_SHIFT  (48LLU)
-#define VTTBR_VMID_MASK	  (0xffLLU << VTTBR_VMID_SHIFT)
+#define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
+#define VTTBR_VMID_SHIFT  _AC(48, ULL)
+#define VTTBR_VMID_MASK	  (_AC(0xff, ULL) << VTTBR_VMID_SHIFT)
 
 /* Hyp Syndrome Register (HSR) bits */
 #define HSR_EC_SHIFT	(26)
-#define HSR_EC		(0x3fU << HSR_EC_SHIFT)
-#define HSR_IL		(1U << 25)
+#define HSR_EC		(_AC(0x3f, UL) << HSR_EC_SHIFT)
+#define HSR_IL		(_AC(1, UL) << 25)
 #define HSR_ISS		(HSR_IL - 1)
 #define HSR_ISV_SHIFT	(24)
-#define HSR_ISV		(1U << HSR_ISV_SHIFT)
+#define HSR_ISV		(_AC(1, UL) << HSR_ISV_SHIFT)
 #define HSR_SRT_SHIFT	(16)
 #define HSR_SRT_MASK	(0xf << HSR_SRT_SHIFT)
 #define HSR_FSC		(0x3f)
@@ -179,9 +180,9 @@
 #define HSR_SSE		(1 << 21)
 #define HSR_WNR		(1 << 6)
 #define HSR_CV_SHIFT	(24)
-#define HSR_CV		(1U << HSR_CV_SHIFT)
+#define HSR_CV		(_AC(1, UL) << HSR_CV_SHIFT)
 #define HSR_COND_SHIFT	(20)
-#define HSR_COND	(0xfU << HSR_COND_SHIFT)
+#define HSR_COND	(_AC(0xf, UL) << HSR_COND_SHIFT)
 
 #define FSC_FAULT	(0x04)
 #define FSC_ACCESS	(0x08)
@@ -209,13 +210,13 @@
 #define HSR_EC_DABT	(0x24)
 #define HSR_EC_DABT_HYP	(0x25)
 
-#define HSR_WFI_IS_WFE		(1U << 0)
+#define HSR_WFI_IS_WFE		(_AC(1, UL) << 0)
 
-#define HSR_HVC_IMM_MASK	((1UL << 16) - 1)
+#define HSR_HVC_IMM_MASK	((_AC(1, UL) << 16) - 1)
 
-#define HSR_DABT_S1PTW		(1U << 7)
-#define HSR_DABT_CM		(1U << 8)
-#define HSR_DABT_EA		(1U << 9)
+#define HSR_DABT_S1PTW		(_AC(1, UL) << 7)
+#define HSR_DABT_CM		(_AC(1, UL) << 8)
+#define HSR_DABT_EA		(_AC(1, UL) << 9)
 
 #define kvm_arm_exception_type	\
 	{0, "RESET" }, 		\
-- 
cgit v0.10.2


From 20475f784d29991b3b843c80c38a36f2ebb35ac4 Mon Sep 17 00:00:00 2001
From: Vladimir Murzin <vladimir.murzin@arm.com>
Date: Mon, 16 Nov 2015 11:28:18 +0000
Subject: arm64: KVM: Add support for 16-bit VMID

The ARMv8.1 architecture extension allows to choose between 8-bit and
16-bit of VMID, so use this capability for KVM.

Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 01d4d7a..e22089f 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -164,7 +164,7 @@
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_MASK  (((_AC(1, ULL) << (40 - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  _AC(48, ULL)
-#define VTTBR_VMID_MASK	  (_AC(0xff, ULL) << VTTBR_VMID_SHIFT)
+#define VTTBR_VMID_MASK(size)	(_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
 /* Hyp Syndrome Register (HSR) bits */
 #define HSR_EC_SHIFT	(26)
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 405aa18..9203c21 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -279,6 +279,11 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 				       pgd_t *merged_hyp_pgd,
 				       unsigned long hyp_idmap_start) { }
 
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+	return 8;
+}
+
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __ARM_KVM_MMU_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 6e35d1d..f6bcc2e 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -59,7 +59,8 @@ static DEFINE_PER_CPU(struct kvm_vcpu *, kvm_arm_running_vcpu);
 
 /* The VMID used in the VTTBR */
 static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
-static u8 kvm_next_vmid;
+static u32 kvm_next_vmid;
+static unsigned int kvm_vmid_bits __read_mostly;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
@@ -434,11 +435,12 @@ static void update_vttbr(struct kvm *kvm)
 	kvm->arch.vmid_gen = atomic64_read(&kvm_vmid_gen);
 	kvm->arch.vmid = kvm_next_vmid;
 	kvm_next_vmid++;
+	kvm_next_vmid &= (1 << kvm_vmid_bits) - 1;
 
 	/* update vttbr to be used with the new vmid */
 	pgd_phys = virt_to_phys(kvm_get_hwpgd(kvm));
 	BUG_ON(pgd_phys & ~VTTBR_BADDR_MASK);
-	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK;
+	vmid = ((u64)(kvm->arch.vmid) << VTTBR_VMID_SHIFT) & VTTBR_VMID_MASK(kvm_vmid_bits);
 	kvm->arch.vttbr = pgd_phys | vmid;
 
 	spin_unlock(&kvm_vmid_lock);
@@ -1135,6 +1137,10 @@ static int init_hyp_mode(void)
 
 	kvm_perf_init();
 
+	/* set size of VMID supported by CPU */
+	kvm_vmid_bits = kvm_get_vmid_bits();
+	kvm_info("%d-bit VMID\n", kvm_vmid_bits);
+
 	kvm_info("Hyp mode initialized successfully\n");
 
 	return 0;
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 5e6857b..738a95f 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -125,6 +125,7 @@
 #define VTCR_EL2_SL0_LVL1	(1 << 6)
 #define VTCR_EL2_T0SZ_MASK	0x3f
 #define VTCR_EL2_T0SZ_40B	24
+#define VTCR_EL2_VS		19
 
 /*
  * We configure the Stage-2 page tables to always restrict the IPA space to be
@@ -169,7 +170,7 @@
 #define VTTBR_BADDR_SHIFT (VTTBR_X - 1)
 #define VTTBR_BADDR_MASK  (((UL(1) << (PHYS_MASK_SHIFT - VTTBR_X)) - 1) << VTTBR_BADDR_SHIFT)
 #define VTTBR_VMID_SHIFT  (UL(48))
-#define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
+#define VTTBR_VMID_MASK(size) (_AT(u64, (1 << size) - 1) << VTTBR_VMID_SHIFT)
 
 /* Hyp System Trap Register */
 #define HSTR_EL2_T(x)	(1 << x)
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 54cba80..0bf8b43 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -20,6 +20,7 @@
 
 #include <asm/page.h>
 #include <asm/memory.h>
+#include <asm/cpufeature.h>
 
 /*
  * As we only have the TTBR0_EL2 register, we cannot express
@@ -301,5 +302,12 @@ static inline void __kvm_extend_hypmap(pgd_t *boot_hyp_pgd,
 	merged_hyp_pgd[idmap_idx] = __pgd(__pa(boot_hyp_pgd) | PMD_TYPE_TABLE);
 }
 
+static inline unsigned int kvm_get_vmid_bits(void)
+{
+	int reg = read_system_reg(SYS_ID_AA64MMFR1_EL1);
+
+	return (cpuid_feature_extract_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
+}
+
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S
index 178ba22..3e568dc 100644
--- a/arch/arm64/kvm/hyp-init.S
+++ b/arch/arm64/kvm/hyp-init.S
@@ -94,6 +94,15 @@ __do_hyp_init:
 	 */
 	mrs	x5, ID_AA64MMFR0_EL1
 	bfi	x4, x5, #16, #3
+	/*
+	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS bit in
+	 * VTCR_EL2.
+	 */
+	mrs	x5, ID_AA64MMFR1_EL1
+	ubfx	x5, x5, #5, #1
+	lsl	x5, x5, #VTCR_EL2_VS
+	orr	x4, x4, x5
+
 	msr	vtcr_el2, x4
 
 	mrs	x4, mair_el1
-- 
cgit v0.10.2


From 1b1ebe820fcb446146dfb2d04a1f0b7905645f75 Mon Sep 17 00:00:00 2001
From: Fengguang Wu <fengguang.wu@intel.com>
Date: Fri, 18 Dec 2015 15:51:44 +0800
Subject: MAINTAINERS: add git URL for KVM/ARM

Acked-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/MAINTAINERS b/MAINTAINERS
index 9bff63c..8e92b45 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6102,6 +6102,7 @@ M:	Marc Zyngier <marc.zyngier@arm.com>
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:	kvmarm@lists.cs.columbia.edu
 W:	http://systems.cs.columbia.edu/projects/kvm-arm
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
 S:	Supported
 F:	arch/arm/include/uapi/asm/kvm*
 F:	arch/arm/include/asm/kvm*
-- 
cgit v0.10.2


From c7da6fa43cb1c5e649da0f478a491feb9208cae7 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Fri, 18 Dec 2015 14:38:43 +0300
Subject: arm/arm64: KVM: Detect vGIC presence at runtime

Before commit 662d9715840aef44dcb573b0f9fab9e8319c868a
("arm/arm64: KVM: Kill CONFIG_KVM_ARM_{VGIC,TIMER}") is was possible to
compile the kernel without vGIC and vTimer support. Commit message says
about possibility to detect vGIC support in runtime, but this has never
been implemented.

This patch introduces runtime check, restoring the lost functionality.
It again allows to use KVM on hardware without vGIC. Interrupt
controller has to be emulated in userspace in this case.

-ENODEV return code from probe function means there's no GIC at all.
-ENXIO happens when, for example, there is GIC node in the device tree,
but it does not specify vGIC resources. Any other error code is still
treated as full stop because it might mean some really serious problems.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f6bcc2e..dda1959 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -63,6 +63,8 @@ static u32 kvm_next_vmid;
 static unsigned int kvm_vmid_bits __read_mostly;
 static DEFINE_SPINLOCK(kvm_vmid_lock);
 
+static bool vgic_present;
+
 static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
 {
 	BUG_ON(preemptible());
@@ -134,7 +136,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm->arch.vmid_gen = 0;
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
-	kvm->arch.max_vcpus = kvm_vgic_get_max_vcpus();
+	kvm->arch.max_vcpus = vgic_present ?
+				kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS;
 
 	return ret;
 out_free_stage2_pgd:
@@ -174,6 +177,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
+		r = vgic_present;
+		break;
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
@@ -917,6 +922,8 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 
 	switch (dev_id) {
 	case KVM_ARM_DEVICE_VGIC_V2:
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
@@ -931,6 +938,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 	switch (ioctl) {
 	case KVM_CREATE_IRQCHIP: {
+		if (!vgic_present)
+			return -ENXIO;
 		return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
 	}
 	case KVM_ARM_SET_DEVICE_ADDR: {
@@ -1121,8 +1130,17 @@ static int init_hyp_mode(void)
 	 * Init HYP view of VGIC
 	 */
 	err = kvm_vgic_hyp_init();
-	if (err)
+	switch (err) {
+	case 0:
+		vgic_present = true;
+		break;
+	case -ENODEV:
+	case -ENXIO:
+		vgic_present = false;
+		break;
+	default:
 		goto out_free_context;
+	}
 
 	/*
 	 * Init HYP architected timer support
-- 
cgit v0.10.2


From 774926641d1968a4839da3a6ac79d914742aac2f Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 18 Dec 2015 18:54:49 +0900
Subject: KVM: x86: MMU: Use clear_page() instead of init_shadow_page_table()

Not just in order to clean up the code, but to make it faster by using
enhanced instructions: the initialization became 20-30% faster on our
testing machine.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a1a3d19..7f5a82b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2041,14 +2041,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
 	}
 }
 
-static void init_shadow_page_table(struct kvm_mmu_page *sp)
-{
-	int i;
-
-	for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
-		sp->spt[i] = 0ull;
-}
-
 static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
 {
 	sp->write_flooding_count = 0;
@@ -2128,7 +2120,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
 		account_shadowed(vcpu->kvm, sp);
 	}
 	sp->mmu_valid_gen = vcpu->kvm->arch.mmu_valid_gen;
-	init_shadow_page_table(sp);
+	clear_page(sp->spt);
 	trace_kvm_mmu_get_page(sp, true);
 	return sp;
 }
-- 
cgit v0.10.2


From 0af2593b2ad125880a78d1bba966e450cc2330df Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 30 Dec 2015 08:26:17 -0800
Subject: kvm: x86: fix comment about {mmu,nested_mmu}.gva_to_gpa

The comment had the meaning of mmu.gva_to_gpa and nested_mmu.gva_to_gpa
swapped. Fix that, and also add some details describing how each translation
works.

Signed-off-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7f5a82b..420a5ca 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4024,10 +4024,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	g_context->inject_page_fault = kvm_inject_page_fault;
 
 	/*
-	 * Note that arch.mmu.gva_to_gpa translates l2_gva to l1_gpa. The
-	 * translation of l2_gpa to l1_gpa addresses is done using the
-	 * arch.nested_mmu.gva_to_gpa function. Basically the gva_to_gpa
-	 * functions between mmu and nested_mmu are swapped.
+	 * Note that arch.mmu.gva_to_gpa translates l2_gpa to l1_gpa using
+	 * L1's nested page tables (e.g. EPT12). The nested translation
+	 * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using
+	 * L2's page tables as the first level of translation and L1's
+	 * nested page tables as the second level of translation. Basically
+	 * the gva_to_gpa functions between mmu and nested_mmu are swapped.
 	 */
 	if (!is_paging(vcpu)) {
 		g_context->nx = false;
-- 
cgit v0.10.2


From c57ee5faf4503b0cd586c3af663262a3d5599fc1 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 5 Jan 2016 18:20:24 +0200
Subject: kvm/s390: drop unpaired smp_mb

smp_mb on vcpu destroy isn't paired with anything, violating pairing
rules, and seems to be useless.

Drop it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Message-Id: <1452010811-25486-1-git-send-email-mst@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 940e9ff..9f8eea3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1221,7 +1221,6 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 	kvm_clear_async_pf_completion_queue(vcpu);
 	if (!kvm_is_ucontrol(vcpu->kvm))
 		sca_del_vcpu(vcpu);
-	smp_mb();
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 		gmap_free(vcpu->arch.gmap);
-- 
cgit v0.10.2


From c6e5f166373a20a433c38a34bd16f2b62bd0864c Mon Sep 17 00:00:00 2001
From: Fan Zhang <zhangfan@linux.vnet.ibm.com>
Date: Thu, 7 Jan 2016 18:24:29 +0800
Subject: KVM: s390: implement the RI support of guest

This patch adds runtime instrumentation support for KVM guest. We need to
setup a save area for the runtime instrumentation-controls control block(RICCB)
and implement the necessary interfaces to live migrate the guest settings.

We setup the sie control block in a way, that the runtime
instrumentation instructions of a guest are handled by hardware.

We also add a capability KVM_CAP_S390_RI to make this feature opt-in as
it needs migration support.

Signed-off-by: Fan Zhang <zhangfan@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index c831441..df0acff 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -217,7 +217,8 @@ struct kvm_s390_sie_block {
 	__u64	pp;			/* 0x01de */
 	__u8	reserved1e6[2];		/* 0x01e6 */
 	__u64	itdba;			/* 0x01e8 */
-	__u8	reserved1f0[16];	/* 0x01f0 */
+	__u64   riccbd;			/* 0x01f0 */
+	__u8    reserved1f8[8];		/* 0x01f8 */
 } __attribute__((packed));
 
 struct kvm_s390_itdb {
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index d2aea31..fe84bd5 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -153,6 +153,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
 #define KVM_SYNC_VRS    (1UL << 6)
+#define KVM_SYNC_RICCB  (1UL << 7)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -170,6 +171,8 @@ struct kvm_sync_regs {
 	__u64 vrs[32][2];	/* vector registers */
 	__u8  reserved[512];	/* for future vector expansion */
 	__u32 fpc;	/* only valid with vector registers */
+	__u8 padding[52];	/* riccb needs to be 64byte aligned */
+	__u8 riccb[64];		/* runtime instrumentation controls block */
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 9f8eea3..5927c61 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -258,6 +258,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_VECTOR_REGISTERS:
 		r = MACHINE_HAS_VX;
 		break;
+	case KVM_CAP_S390_RI:
+		r = test_facility(64);
+		break;
 	default:
 		r = 0;
 	}
@@ -358,6 +361,20 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 			 r ? "(not available)" : "(success)");
 		break;
+	case KVM_CAP_S390_RI:
+		r = -EINVAL;
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus)) {
+			r = -EBUSY;
+		} else if (test_facility(64)) {
+			set_kvm_facility(kvm->arch.model.fac->mask, 64);
+			set_kvm_facility(kvm->arch.model.fac->list, 64);
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
+			 r ? "(not available)" : "(success)");
+		break;
 	case KVM_CAP_S390_USER_STSI:
 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
 		kvm->arch.user_stsi = 1;
@@ -1395,6 +1412,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
 	if (test_kvm_facility(vcpu->kvm, 129))
 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
@@ -1578,10 +1597,13 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 		vcpu->arch.sie_block->eca |= 1;
 	if (sclp.has_sigpif)
 		vcpu->arch.sie_block->eca |= 0x10000000U;
+	if (test_kvm_facility(vcpu->kvm, 64))
+		vcpu->arch.sie_block->ecb3 |= 0x01;
 	if (test_kvm_facility(vcpu->kvm, 129)) {
 		vcpu->arch.sie_block->eca |= 0x00020000;
 		vcpu->arch.sie_block->ecd |= 0x20000000;
 	}
+	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
 	vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
 
 	if (vcpu->kvm->arch.use_cmma) {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6e32f75..9da9051 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -849,6 +849,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_SPLIT_IRQCHIP 121
 #define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
 #define KVM_CAP_HYPERV_SYNIC 123
+#define KVM_CAP_S390_RI 124
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v0.10.2


From 1dab1345d885819a3a0d5d08ce0b5c2e12c65343 Mon Sep 17 00:00:00 2001
From: Nicholas Krause <xerofoify@gmail.com>
Date: Wed, 30 Dec 2015 13:08:46 -0500
Subject: kvm: x86: Check kvm_write_guest return value in kvm_write_wall_clock

This makes sure the wall clock is updated only after an odd version value
is successfully written to guest memory.

Signed-off-by: Nicholas Krause <xerofoify@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6102c1..102c302 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1169,7 +1169,8 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
 
 	++version;
 
-	kvm_write_guest(kvm, wall_clock, &version, sizeof(version));
+	if (kvm_write_guest(kvm, wall_clock, &version, sizeof(version)))
+		return;
 
 	/*
 	 * The guest calculates current wall clock time by adding
-- 
cgit v0.10.2


From 6c71f8ae155422a030b4c382cb985dde006ccc3f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 Jan 2016 14:53:46 +0100
Subject: KVM: Remove unused KVM_REQ_KICK to save a bit in vcpu->requests

Suggested-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
[Takuya moved all subsequent constants to fill the void, but that
 is useless in view of the following patches.  So this change looks
 nothing like the original. - Paolo]
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 61c3e6c6..5ac775b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -122,7 +122,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_UNHALT             6
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_CLOCK_UPDATE       8
-#define KVM_REQ_KICK               9
+/* 9 is unused */
 #define KVM_REQ_DEACTIVATE_FPU    10
 #define KVM_REQ_EVENT             11
 #define KVM_REQ_APF_HALT          12
-- 
cgit v0.10.2


From 0cd310437255be81cd2413407c1d61eb70286fe2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 Jan 2016 15:00:53 +0100
Subject: KVM: document which architecture uses each request bit

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5ac775b..48abf67 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -114,12 +114,14 @@ static inline bool is_error_page(struct page *page)
  * vcpu->requests bit members
  */
 #define KVM_REQ_TLB_FLUSH          0
-#define KVM_REQ_MIGRATE_TIMER      1
-#define KVM_REQ_REPORT_TPR_ACCESS  2
 #define KVM_REQ_MMU_RELOAD         3
-#define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_PENDING_TIMER      5
 #define KVM_REQ_UNHALT             6
+
+/* x86-specific requests */
+#define KVM_REQ_MIGRATE_TIMER      1
+#define KVM_REQ_REPORT_TPR_ACCESS  2
+#define KVM_REQ_TRIPLE_FAULT       4
 #define KVM_REQ_MMU_SYNC           7
 #define KVM_REQ_CLOCK_UPDATE       8
 /* 9 is unused */
@@ -130,14 +132,10 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_NMI               14
 #define KVM_REQ_PMU               15
 #define KVM_REQ_PMI               16
-#define KVM_REQ_WATCHDOG          17
 #define KVM_REQ_MASTERCLOCK_UPDATE 18
 #define KVM_REQ_MCLOCK_INPROGRESS 19
-#define KVM_REQ_EPR_EXIT          20
 #define KVM_REQ_SCAN_IOAPIC       21
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
-#define KVM_REQ_ENABLE_IBS        23
-#define KVM_REQ_DISABLE_IBS       24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
 #define KVM_REQ_SMI               26
 #define KVM_REQ_HV_CRASH          27
@@ -146,6 +144,14 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_HV_EXIT           30
 #define KVM_REQ_HV_STIMER         31
 
+/* PPC-specific requests */
+#define KVM_REQ_WATCHDOG          17
+#define KVM_REQ_EPR_EXIT          20
+
+/* s390-specific requests */
+#define KVM_REQ_ENABLE_IBS        23
+#define KVM_REQ_DISABLE_IBS       24
+
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-- 
cgit v0.10.2


From 6662ba347b29b6df0756ffedb167fa4d89bab06f Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 Jan 2016 15:02:44 +0100
Subject: KVM: renumber vcpu->request bits

Leave room for 4 more arch-independent requests.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 48abf67..b0ec0f7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -114,43 +114,42 @@ static inline bool is_error_page(struct page *page)
  * vcpu->requests bit members
  */
 #define KVM_REQ_TLB_FLUSH          0
-#define KVM_REQ_MMU_RELOAD         3
-#define KVM_REQ_PENDING_TIMER      5
-#define KVM_REQ_UNHALT             6
+#define KVM_REQ_MMU_RELOAD         1
+#define KVM_REQ_PENDING_TIMER      2
+#define KVM_REQ_UNHALT             3
 
 /* x86-specific requests */
-#define KVM_REQ_MIGRATE_TIMER      1
-#define KVM_REQ_REPORT_TPR_ACCESS  2
-#define KVM_REQ_TRIPLE_FAULT       4
-#define KVM_REQ_MMU_SYNC           7
-#define KVM_REQ_CLOCK_UPDATE       8
-/* 9 is unused */
-#define KVM_REQ_DEACTIVATE_FPU    10
-#define KVM_REQ_EVENT             11
-#define KVM_REQ_APF_HALT          12
-#define KVM_REQ_STEAL_UPDATE      13
-#define KVM_REQ_NMI               14
-#define KVM_REQ_PMU               15
-#define KVM_REQ_PMI               16
-#define KVM_REQ_MASTERCLOCK_UPDATE 18
-#define KVM_REQ_MCLOCK_INPROGRESS 19
-#define KVM_REQ_SCAN_IOAPIC       21
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
+#define KVM_REQ_MIGRATE_TIMER      8
+#define KVM_REQ_REPORT_TPR_ACCESS  9
+#define KVM_REQ_TRIPLE_FAULT      10
+#define KVM_REQ_MMU_SYNC          11
+#define KVM_REQ_CLOCK_UPDATE      12
+#define KVM_REQ_DEACTIVATE_FPU    13
+#define KVM_REQ_EVENT             14
+#define KVM_REQ_APF_HALT          15
+#define KVM_REQ_STEAL_UPDATE      16
+#define KVM_REQ_NMI               17
+#define KVM_REQ_PMU               18
+#define KVM_REQ_PMI               19
+#define KVM_REQ_SMI               20
+#define KVM_REQ_MASTERCLOCK_UPDATE 21
+#define KVM_REQ_MCLOCK_INPROGRESS 22
+#define KVM_REQ_SCAN_IOAPIC       23
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
 #define KVM_REQ_APIC_PAGE_RELOAD  25
-#define KVM_REQ_SMI               26
-#define KVM_REQ_HV_CRASH          27
-#define KVM_REQ_IOAPIC_EOI_EXIT   28
-#define KVM_REQ_HV_RESET          29
-#define KVM_REQ_HV_EXIT           30
-#define KVM_REQ_HV_STIMER         31
+#define KVM_REQ_HV_CRASH          26
+#define KVM_REQ_IOAPIC_EOI_EXIT   27
+#define KVM_REQ_HV_RESET          28
+#define KVM_REQ_HV_EXIT           29
+#define KVM_REQ_HV_STIMER         30
 
 /* PPC-specific requests */
-#define KVM_REQ_WATCHDOG          17
-#define KVM_REQ_EPR_EXIT          20
+#define KVM_REQ_WATCHDOG           8
+#define KVM_REQ_EPR_EXIT           9
 
 /* s390-specific requests */
-#define KVM_REQ_ENABLE_IBS        23
-#define KVM_REQ_DISABLE_IBS       24
+#define KVM_REQ_ENABLE_IBS         8
+#define KVM_REQ_DISABLE_IBS        9
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v0.10.2


From 2860c4b1678646c99f5f1d77d026cd12ffd8a3a9 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 Jan 2016 15:05:10 +0100
Subject: KVM: move architecture-dependent requests to arch/

Since the numbers now overlap, it makes sense to enumerate
them in asm/kvm_host.h rather than linux/kvm_host.h.  Functions
that refer to architecture-specific requests are also moved
to arch/.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index cfa758c..271fefb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -50,6 +50,10 @@
 #define KVM_NR_IRQCHIPS          1
 #define KVM_IRQCHIP_NUM_PINS     256
 
+/* PPC-specific vcpu->requests bit members */
+#define KVM_REQ_WATCHDOG           8
+#define KVM_REQ_EPR_EXIT           9
+
 #include <linux/mmu_notifier.h>
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index df0acff..6742414 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -39,6 +39,10 @@
 #define KVM_IRQCHIP_NUM_PINS 4096
 #define KVM_HALT_POLL_NS_DEFAULT 0
 
+/* s390-specific vcpu->requests bit members */
+#define KVM_REQ_ENABLE_IBS         8
+#define KVM_REQ_DISABLE_IBS        9
+
 #define SIGP_CTRL_C		0x80
 #define SIGP_CTRL_SCN_MASK	0x3f
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a7c8987..44adbb8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -46,6 +46,31 @@
 
 #define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
 
+/* x86-specific vcpu->requests bit members */
+#define KVM_REQ_MIGRATE_TIMER      8
+#define KVM_REQ_REPORT_TPR_ACCESS  9
+#define KVM_REQ_TRIPLE_FAULT      10
+#define KVM_REQ_MMU_SYNC          11
+#define KVM_REQ_CLOCK_UPDATE      12
+#define KVM_REQ_DEACTIVATE_FPU    13
+#define KVM_REQ_EVENT             14
+#define KVM_REQ_APF_HALT          15
+#define KVM_REQ_STEAL_UPDATE      16
+#define KVM_REQ_NMI               17
+#define KVM_REQ_PMU               18
+#define KVM_REQ_PMI               19
+#define KVM_REQ_SMI               20
+#define KVM_REQ_MASTERCLOCK_UPDATE 21
+#define KVM_REQ_MCLOCK_INPROGRESS 22
+#define KVM_REQ_SCAN_IOAPIC       23
+#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
+#define KVM_REQ_APIC_PAGE_RELOAD  25
+#define KVM_REQ_HV_CRASH          26
+#define KVM_REQ_IOAPIC_EOI_EXIT   27
+#define KVM_REQ_HV_RESET          28
+#define KVM_REQ_HV_EXIT           29
+#define KVM_REQ_HV_STIMER         30
+
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
 			  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@@ -1268,6 +1293,9 @@ u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
+void kvm_make_mclock_inprogress_request(struct kvm *kvm);
+void kvm_make_scan_ioapic_request(struct kvm *kvm);
+
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
 				     struct kvm_async_pf *work);
 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 102c302..107ceaf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1686,6 +1686,11 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
 #endif
 }
 
+void kvm_make_mclock_inprogress_request(struct kvm *kvm)
+{
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+}
+
 static void kvm_gen_update_masterclock(struct kvm *kvm)
 {
 #ifdef CONFIG_X86_64
@@ -2699,6 +2704,11 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
 	return kvm_arch_has_noncoherent_dma(vcpu->kvm);
 }
 
+static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
+{
+	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	/* Address WBINVD may be executed by guest */
@@ -6337,6 +6347,11 @@ static void process_smi(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 }
 
+void kvm_make_scan_ioapic_request(struct kvm *kvm)
+{
+	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+}
+
 static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 {
 	u64 eoi_exit_bitmap[4];
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b0ec0f7..f707f74 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -111,46 +111,14 @@ static inline bool is_error_page(struct page *page)
 }
 
 /*
- * vcpu->requests bit members
+ * Architecture-independent vcpu->requests bit members
+ * Bits 4-7 are reserved for more arch-independent bits.
  */
 #define KVM_REQ_TLB_FLUSH          0
 #define KVM_REQ_MMU_RELOAD         1
 #define KVM_REQ_PENDING_TIMER      2
 #define KVM_REQ_UNHALT             3
 
-/* x86-specific requests */
-#define KVM_REQ_MIGRATE_TIMER      8
-#define KVM_REQ_REPORT_TPR_ACCESS  9
-#define KVM_REQ_TRIPLE_FAULT      10
-#define KVM_REQ_MMU_SYNC          11
-#define KVM_REQ_CLOCK_UPDATE      12
-#define KVM_REQ_DEACTIVATE_FPU    13
-#define KVM_REQ_EVENT             14
-#define KVM_REQ_APF_HALT          15
-#define KVM_REQ_STEAL_UPDATE      16
-#define KVM_REQ_NMI               17
-#define KVM_REQ_PMU               18
-#define KVM_REQ_PMI               19
-#define KVM_REQ_SMI               20
-#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS 22
-#define KVM_REQ_SCAN_IOAPIC       23
-#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD  25
-#define KVM_REQ_HV_CRASH          26
-#define KVM_REQ_IOAPIC_EOI_EXIT   27
-#define KVM_REQ_HV_RESET          28
-#define KVM_REQ_HV_EXIT           29
-#define KVM_REQ_HV_STIMER         30
-
-/* PPC-specific requests */
-#define KVM_REQ_WATCHDOG           8
-#define KVM_REQ_EPR_EXIT           9
-
-/* s390-specific requests */
-#define KVM_REQ_ENABLE_IBS         8
-#define KVM_REQ_DISABLE_IBS        9
-
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
@@ -689,8 +657,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
-void kvm_make_mclock_inprogress_request(struct kvm *kvm);
-void kvm_make_scan_ioapic_request(struct kvm *kvm);
 bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 
 long kvm_arch_dev_ioctl(struct file *filp,
@@ -1011,11 +977,6 @@ static inline bool kvm_is_error_gpa(struct kvm *kvm, gpa_t gpa)
 	return kvm_is_error_hva(hva);
 }
 
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
-	set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
 enum kvm_stat_kind {
 	KVM_STAT_VM,
 	KVM_STAT_VCPU,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be3cef1..314c777 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -206,16 +206,6 @@ void kvm_reload_remote_mmus(struct kvm *kvm)
 	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
-void kvm_make_mclock_inprogress_request(struct kvm *kvm)
-{
-	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
-}
-
-void kvm_make_scan_ioapic_request(struct kvm *kvm)
-{
-	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
-}
-
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
 {
 	struct page *page;
-- 
cgit v0.10.2


From 1ac1b65ac199205724a8077d37ba7e64a1b7e77c Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:18 +0300
Subject: kvm/x86: Hyper-V timers fix incorrect logical operation

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index f34f666..e4ef13a 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -583,7 +583,7 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 {
 	stimer_send_msg(stimer);
 	if (!(stimer->config & HV_STIMER_PERIODIC))
-		stimer->config |= ~HV_STIMER_ENABLE;
+		stimer->config &= ~HV_STIMER_ENABLE;
 	else
 		stimer_restart(stimer);
 }
-- 
cgit v0.10.2


From 019b9781ccd667d4160f3636c8735e3baa085555 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:19 +0300
Subject: kvm/x86: Drop stimer_stop() function

The function stimer_stop() is called in one place
so remove the function and replace it's call by function
content.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index e4ef13a..6b2ed93 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -400,16 +400,11 @@ static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer,
 		kvm_vcpu_kick(vcpu);
 }
 
-static void stimer_stop(struct kvm_vcpu_hv_stimer *stimer)
-{
-	hrtimer_cancel(&stimer->timer);
-}
-
 static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
 {
 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 
-	stimer_stop(stimer);
+	hrtimer_cancel(&stimer->timer);
 	clear_bit(stimer->index,
 		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 	stimer->msg_pending = false;
-- 
cgit v0.10.2


From f808495da56f28e94c6448125158f1175009fcfc Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:20 +0300
Subject: kvm/x86: Hyper-V unify stimer_start() and stimer_restart()

This will be used in future to start Hyper-V SynIC timer
in several places by one logic in one function.

Changes v2:
* drop stimer->count == 0 check inside stimer_start()
* comment stimer_start() assumptions

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6b2ed93..0dd7d17 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -408,6 +408,7 @@ static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
 	clear_bit(stimer->index,
 		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
 	stimer->msg_pending = false;
+	stimer->exp_time = 0;
 }
 
 static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
@@ -420,24 +421,11 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
 	return HRTIMER_NORESTART;
 }
 
-static void stimer_restart(struct kvm_vcpu_hv_stimer *stimer)
-{
-	u64 time_now;
-	ktime_t ktime_now;
-	u64 remainder;
-
-	time_now = get_time_ref_counter(stimer_to_vcpu(stimer)->kvm);
-	ktime_now = ktime_get();
-
-	div64_u64_rem(time_now - stimer->exp_time, stimer->count, &remainder);
-	stimer->exp_time = time_now + (stimer->count - remainder);
-
-	hrtimer_start(&stimer->timer,
-		      ktime_add_ns(ktime_now,
-				   100 * (stimer->exp_time - time_now)),
-		      HRTIMER_MODE_ABS);
-}
-
+/*
+ * stimer_start() assumptions:
+ * a) stimer->count is not equal to 0
+ * b) stimer->config has HV_STIMER_ENABLE flag
+ */
 static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 {
 	u64 time_now;
@@ -447,12 +435,21 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 	ktime_now = ktime_get();
 
 	if (stimer->config & HV_STIMER_PERIODIC) {
-		if (stimer->count == 0)
-			return -EINVAL;
+		if (stimer->exp_time) {
+			if (time_now >= stimer->exp_time) {
+				u64 remainder;
+
+				div64_u64_rem(time_now - stimer->exp_time,
+					      stimer->count, &remainder);
+				stimer->exp_time =
+					time_now + (stimer->count - remainder);
+			}
+		} else
+			stimer->exp_time = time_now + stimer->count;
 
-		stimer->exp_time = time_now + stimer->count;
 		hrtimer_start(&stimer->timer,
-			      ktime_add_ns(ktime_now, 100 * stimer->count),
+			      ktime_add_ns(ktime_now,
+					   100 * (stimer->exp_time - time_now)),
 			      HRTIMER_MODE_ABS);
 		return 0;
 	}
@@ -580,7 +577,7 @@ static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 	if (!(stimer->config & HV_STIMER_PERIODIC))
 		stimer->config &= ~HV_STIMER_ENABLE;
 	else
-		stimer_restart(stimer);
+		stimer_start(stimer);
 }
 
 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 0cdeabb1186fc3a6c7854f05cec7c99e32935ebc Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:21 +0300
Subject: kvm/x86: Reorg stimer_expiration() to better control timer restart

Split stimer_expiration() into two parts - timer expiration message
sending and timer restart/cleanup based on timer state(config).

This also fixes a bug where a one-shot timer message whose delivery
failed once would get lost for good.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0dd7d17..5f85c12 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -554,30 +554,27 @@ static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
 	return r;
 }
 
-static void stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
+static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
 {
 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 	struct hv_message *msg = &stimer->msg;
 	struct hv_timer_message_payload *payload =
 			(struct hv_timer_message_payload *)&msg->u.payload;
-	int r;
 
-	stimer->msg_pending = true;
 	payload->expiration_time = stimer->exp_time;
 	payload->delivery_time = get_time_ref_counter(vcpu->kvm);
-	r = synic_deliver_msg(vcpu_to_synic(vcpu),
-			      HV_STIMER_SINT(stimer->config), msg);
-	if (!r)
-		stimer->msg_pending = false;
+	return synic_deliver_msg(vcpu_to_synic(vcpu),
+				 HV_STIMER_SINT(stimer->config), msg);
 }
 
 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 {
-	stimer_send_msg(stimer);
-	if (!(stimer->config & HV_STIMER_PERIODIC))
-		stimer->config &= ~HV_STIMER_ENABLE;
-	else
-		stimer_start(stimer);
+	stimer->msg_pending = true;
+	if (!stimer_send_msg(stimer)) {
+		stimer->msg_pending = false;
+		if (!(stimer->config & HV_STIMER_PERIODIC))
+			stimer->config &= ~HV_STIMER_ENABLE;
+	}
 }
 
 void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
@@ -594,6 +591,11 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 				time_now = get_time_ref_counter(vcpu->kvm);
 				if (time_now >= stimer->exp_time)
 					stimer_expiration(stimer);
+
+				if (stimer->config & HV_STIMER_ENABLE)
+					stimer_start(stimer);
+				else
+					stimer_cleanup(stimer);
 			}
 		}
 }
-- 
cgit v0.10.2


From 23a3b201fd187f1e7af573b3794c3c5ebf7d2c06 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:22 +0300
Subject: kvm/x86: Hyper-V fix SynIC timer disabling condition

Hypervisor Function Specification(HFS) doesn't require
to disable SynIC timer at timer config write if timer->count = 0.

So drop this check, this allow to load timers MSR's
during migration restore, because config are set before count
in QEMU side.

Also fix condition according to HFS doc(15.3.1):
"It is not permitted to set the SINTx field to zero for an
enabled timer. If attempted, the timer will be
marked disabled (that is, bit 0 cleared) immediately."

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 5f85c12..abfb920 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -474,7 +474,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 			     bool host)
 {
-	if (stimer->count == 0 || HV_STIMER_SINT(config) == 0)
+	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
 		config &= ~HV_STIMER_ENABLE;
 	stimer->config = config;
 	stimer_cleanup(stimer);
-- 
cgit v0.10.2


From 7be58a6488a9d36886d9423a1ed54fe104c7b182 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:23 +0300
Subject: kvm/x86: Skip SynIC vector check for QEMU side

QEMU zero-inits Hyper-V SynIC vectors. We should allow that,
and don't reject zero values if set by the host.

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index abfb920..ddae13e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -72,12 +72,13 @@ static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
 	return false;
 }
 
-static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint, u64 data)
+static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
+			  u64 data, bool host)
 {
 	int vector;
 
 	vector = data & HV_SYNIC_SINT_VECTOR_MASK;
-	if (vector < 16)
+	if (vector < 16 && !host)
 		return 1;
 	/*
 	 * Guest may configure multiple SINTs to use the same vector, so
@@ -247,7 +248,7 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 		break;
 	}
 	case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
-		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data);
+		ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
 		break;
 	default:
 		ret = 1;
-- 
cgit v0.10.2


From f3b138c5d89a1f74a2b46adaa1067aea9a7e3cbb Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Mon, 28 Dec 2015 18:27:24 +0300
Subject: kvm/x86: Update SynIC timers on guest entry only

Consolidate updating the Hyper-V SynIC timers in a
single place: on guest entry in processing KVM_REQ_HV_STIMER
request.  This simplifies the overall logic, and makes sure
the most current state of msrs and guest clock is used for
arming the timers (to achieve that, KVM_REQ_HV_STIMER
has to be processed after KVM_REQ_CLOCK_UPDATE).

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
Reviewed-by: Roman Kagan <rkagan@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ddae13e..101c2e4 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -389,7 +389,7 @@ static u64 get_time_ref_counter(struct kvm *kvm)
 	return div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
 }
 
-static void stimer_mark_expired(struct kvm_vcpu_hv_stimer *stimer,
+static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
 				bool vcpu_kick)
 {
 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
@@ -417,7 +417,7 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
 	struct kvm_vcpu_hv_stimer *stimer;
 
 	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
-	stimer_mark_expired(stimer, true);
+	stimer_mark_pending(stimer, true);
 
 	return HRTIMER_NORESTART;
 }
@@ -462,7 +462,7 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 		 * "If a one shot is enabled and the specified count is in
 		 * the past, it will expire immediately."
 		 */
-		stimer_mark_expired(stimer, false);
+		stimer_mark_pending(stimer, false);
 		return 0;
 	}
 
@@ -475,30 +475,24 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 			     bool host)
 {
+	stimer_cleanup(stimer);
 	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
 		config &= ~HV_STIMER_ENABLE;
 	stimer->config = config;
-	stimer_cleanup(stimer);
-	if (stimer->config & HV_STIMER_ENABLE)
-		if (stimer_start(stimer))
-			return 1;
+	stimer_mark_pending(stimer, false);
 	return 0;
 }
 
 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
 			    bool host)
 {
-	stimer->count = count;
-
 	stimer_cleanup(stimer);
+	stimer->count = count;
 	if (stimer->count == 0)
 		stimer->config &= ~HV_STIMER_ENABLE;
-	else if (stimer->config & HV_STIMER_AUTOENABLE) {
+	else if (stimer->config & HV_STIMER_AUTOENABLE)
 		stimer->config |= HV_STIMER_ENABLE;
-		if (stimer_start(stimer))
-			return 1;
-	}
-
+	stimer_mark_pending(stimer, false);
 	return 0;
 }
 
@@ -582,18 +576,24 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
 	struct kvm_vcpu_hv_stimer *stimer;
-	u64 time_now;
+	u64 time_now, exp_time;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
 		if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
 			stimer = &hv_vcpu->stimer[i];
 			if (stimer->config & HV_STIMER_ENABLE) {
-				time_now = get_time_ref_counter(vcpu->kvm);
-				if (time_now >= stimer->exp_time)
-					stimer_expiration(stimer);
+				exp_time = stimer->exp_time;
+
+				if (exp_time) {
+					time_now =
+						get_time_ref_counter(vcpu->kvm);
+					if (time_now >= exp_time)
+						stimer_expiration(stimer);
+				}
 
-				if (stimer->config & HV_STIMER_ENABLE)
+				if ((stimer->config & HV_STIMER_ENABLE) &&
+				    stimer->count)
 					stimer_start(stimer);
 				else
 					stimer_cleanup(stimer);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 107ceaf..fad1d096 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6508,6 +6508,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			r = 0;
 			goto out;
 		}
+
+		/*
+		 * KVM_REQ_HV_STIMER has to be processed after
+		 * KVM_REQ_CLOCK_UPDATE, because Hyper-V SynIC timers
+		 * depend on the guest clock being up-to-date
+		 */
 		if (kvm_check_request(KVM_REQ_HV_STIMER, vcpu))
 			kvm_hv_process_stimers(vcpu);
 	}
-- 
cgit v0.10.2


From 18659a9cb1885d00dd428f8857f7f628e54a45ee Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Wed, 23 Dec 2015 16:53:59 +0300
Subject: kvm/x86: Hyper-V SynIC tracepoints

Trace the following Hyper SynIC events:
* set msr
* set sint irq
* ack sint
* sint irq eoi

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 101c2e4..2d83d459 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -152,7 +152,7 @@ static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
 	struct kvm_vcpu_hv_stimer *stimer;
 	int gsi, idx, stimers_pending;
 
-	vcpu_debug(vcpu, "Hyper-V SynIC acked sint %d\n", sint);
+	trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
 
 	if (synic->msg_page & HV_SYNIC_SIMP_ENABLE)
 		synic_clear_sint_msg_pending(synic, sint);
@@ -202,8 +202,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
 	if (!synic->active)
 		return 1;
 
-	vcpu_debug(vcpu, "Hyper-V SynIC set msr 0x%x 0x%llx host %d\n",
-		   msr, data, host);
+	trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
+
 	ret = 0;
 	switch (msr) {
 	case HV_X64_MSR_SCONTROL:
@@ -312,7 +312,7 @@ int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
 	irq.level = 1;
 
 	ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL);
-	vcpu_debug(vcpu, "Hyper-V SynIC set irq ret %d\n", ret);
+	trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
 	return ret;
 }
 
@@ -332,7 +332,7 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
 	struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
 	int i;
 
-	vcpu_debug(vcpu, "Hyper-V SynIC send eoi vec %d\n", vector);
+	trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
 
 	for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
 		if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index ab9ae67..4be3500 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1025,6 +1025,99 @@ TRACE_EVENT(kvm_pi_irte_update,
 		  __entry->pi_desc_addr)
 );
 
+/*
+ * Tracepoint for kvm_hv_notify_acked_sint.
+ */
+TRACE_EVENT(kvm_hv_notify_acked_sint,
+	TP_PROTO(int vcpu_id, u32 sint),
+	TP_ARGS(vcpu_id, sint),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->sint = sint;
+	),
+
+	TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint)
+);
+
+/*
+ * Tracepoint for synic_set_irq.
+ */
+TRACE_EVENT(kvm_hv_synic_set_irq,
+	TP_PROTO(int vcpu_id, u32 sint, int vector, int ret),
+	TP_ARGS(vcpu_id, sint, vector, ret),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+		__field(int, vector)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->sint = sint;
+		__entry->vector = vector;
+		__entry->ret = ret;
+	),
+
+	TP_printk("vcpu_id %d sint %u vector %d ret %d",
+		  __entry->vcpu_id, __entry->sint, __entry->vector,
+		  __entry->ret)
+);
+
+/*
+ * Tracepoint for kvm_hv_synic_send_eoi.
+ */
+TRACE_EVENT(kvm_hv_synic_send_eoi,
+	TP_PROTO(int vcpu_id, int vector),
+	TP_ARGS(vcpu_id, vector),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, sint)
+		__field(int, vector)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->vector	= vector;
+	),
+
+	TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector)
+);
+
+/*
+ * Tracepoint for synic_set_msr.
+ */
+TRACE_EVENT(kvm_hv_synic_set_msr,
+	TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host),
+	TP_ARGS(vcpu_id, msr, data, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(u32, msr)
+		__field(u64, data)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->msr = msr;
+		__entry->data = data;
+		__entry->host = host
+	),
+
+	TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d",
+		  __entry->vcpu_id, __entry->msr, __entry->data, __entry->host)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v0.10.2


From ac3e5fcae8ca658e7dcc3fdcd50af7e4779f58c1 Mon Sep 17 00:00:00 2001
From: Andrey Smetanin <asmetanin@virtuozzo.com>
Date: Wed, 23 Dec 2015 16:54:00 +0300
Subject: kvm/x86: Hyper-V SynIC timers tracepoints

Trace the following Hyper SynIC timers events:
* periodic timer start
* one-shot timer start
* timer callback
* timer expiration and message delivery result
* timer config setup
* timer count setup
* timer cleanup

Signed-off-by: Andrey Smetanin <asmetanin@virtuozzo.com>
CC: Gleb Natapov <gleb@kernel.org>
CC: Paolo Bonzini <pbonzini@redhat.com>
CC: Roman Kagan <rkagan@virtuozzo.com>
CC: Denis V. Lunev <den@openvz.org>
CC: qemu-devel@nongnu.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2d83d459..c58ba67 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -405,6 +405,9 @@ static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
 {
 	struct kvm_vcpu *vcpu = stimer_to_vcpu(stimer);
 
+	trace_kvm_hv_stimer_cleanup(stimer_to_vcpu(stimer)->vcpu_id,
+				    stimer->index);
+
 	hrtimer_cancel(&stimer->timer);
 	clear_bit(stimer->index,
 		  vcpu_to_hv_vcpu(vcpu)->stimer_pending_bitmap);
@@ -417,6 +420,8 @@ static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
 	struct kvm_vcpu_hv_stimer *stimer;
 
 	stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
+	trace_kvm_hv_stimer_callback(stimer_to_vcpu(stimer)->vcpu_id,
+				     stimer->index);
 	stimer_mark_pending(stimer, true);
 
 	return HRTIMER_NORESTART;
@@ -448,6 +453,11 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 		} else
 			stimer->exp_time = time_now + stimer->count;
 
+		trace_kvm_hv_stimer_start_periodic(
+					stimer_to_vcpu(stimer)->vcpu_id,
+					stimer->index,
+					time_now, stimer->exp_time);
+
 		hrtimer_start(&stimer->timer,
 			      ktime_add_ns(ktime_now,
 					   100 * (stimer->exp_time - time_now)),
@@ -466,6 +476,10 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 		return 0;
 	}
 
+	trace_kvm_hv_stimer_start_one_shot(stimer_to_vcpu(stimer)->vcpu_id,
+					   stimer->index,
+					   time_now, stimer->count);
+
 	hrtimer_start(&stimer->timer,
 		      ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
 		      HRTIMER_MODE_ABS);
@@ -475,6 +489,9 @@ static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
 static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 			     bool host)
 {
+	trace_kvm_hv_stimer_set_config(stimer_to_vcpu(stimer)->vcpu_id,
+				       stimer->index, config, host);
+
 	stimer_cleanup(stimer);
 	if ((stimer->config & HV_STIMER_ENABLE) && HV_STIMER_SINT(config) == 0)
 		config &= ~HV_STIMER_ENABLE;
@@ -486,6 +503,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
 static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
 			    bool host)
 {
+	trace_kvm_hv_stimer_set_count(stimer_to_vcpu(stimer)->vcpu_id,
+				      stimer->index, count, host);
+
 	stimer_cleanup(stimer);
 	stimer->count = count;
 	if (stimer->count == 0)
@@ -564,8 +584,13 @@ static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
 
 static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
 {
+	int r;
+
 	stimer->msg_pending = true;
-	if (!stimer_send_msg(stimer)) {
+	r = stimer_send_msg(stimer);
+	trace_kvm_hv_stimer_expiration(stimer_to_vcpu(stimer)->vcpu_id,
+				       stimer->index, r);
+	if (!r) {
 		stimer->msg_pending = false;
 		if (!(stimer->config & HV_STIMER_PERIODIC))
 			stimer->config &= ~HV_STIMER_ENABLE;
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 4be3500..ad9f6a2 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1118,6 +1118,176 @@ TRACE_EVENT(kvm_hv_synic_set_msr,
 		  __entry->vcpu_id, __entry->msr, __entry->data, __entry->host)
 );
 
+/*
+ * Tracepoint for stimer_set_config.
+ */
+TRACE_EVENT(kvm_hv_stimer_set_config,
+	TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host),
+	TP_ARGS(vcpu_id, timer_index, config, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, config)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->config = config;
+		__entry->host = host;
+	),
+
+	TP_printk("vcpu_id %d timer %d config 0x%llx host %d",
+		  __entry->vcpu_id, __entry->timer_index, __entry->config,
+		  __entry->host)
+);
+
+/*
+ * Tracepoint for stimer_set_count.
+ */
+TRACE_EVENT(kvm_hv_stimer_set_count,
+	TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host),
+	TP_ARGS(vcpu_id, timer_index, count, host),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, count)
+		__field(bool, host)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->count = count;
+		__entry->host = host;
+	),
+
+	TP_printk("vcpu_id %d timer %d count %llu host %d",
+		  __entry->vcpu_id, __entry->timer_index, __entry->count,
+		  __entry->host)
+);
+
+/*
+ * Tracepoint for stimer_start(periodic timer case).
+ */
+TRACE_EVENT(kvm_hv_stimer_start_periodic,
+	TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time),
+	TP_ARGS(vcpu_id, timer_index, time_now, exp_time),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, time_now)
+		__field(u64, exp_time)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->time_now = time_now;
+		__entry->exp_time = exp_time;
+	),
+
+	TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu",
+		  __entry->vcpu_id, __entry->timer_index, __entry->time_now,
+		  __entry->exp_time)
+);
+
+/*
+ * Tracepoint for stimer_start(one-shot timer case).
+ */
+TRACE_EVENT(kvm_hv_stimer_start_one_shot,
+	TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count),
+	TP_ARGS(vcpu_id, timer_index, time_now, count),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(u64, time_now)
+		__field(u64, count)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->time_now = time_now;
+		__entry->count = count;
+	),
+
+	TP_printk("vcpu_id %d timer %d time_now %llu count %llu",
+		  __entry->vcpu_id, __entry->timer_index, __entry->time_now,
+		  __entry->count)
+);
+
+/*
+ * Tracepoint for stimer_timer_callback.
+ */
+TRACE_EVENT(kvm_hv_stimer_callback,
+	TP_PROTO(int vcpu_id, int timer_index),
+	TP_ARGS(vcpu_id, timer_index),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+	),
+
+	TP_printk("vcpu_id %d timer %d",
+		  __entry->vcpu_id, __entry->timer_index)
+);
+
+/*
+ * Tracepoint for stimer_expiration.
+ */
+TRACE_EVENT(kvm_hv_stimer_expiration,
+	TP_PROTO(int vcpu_id, int timer_index, int msg_send_result),
+	TP_ARGS(vcpu_id, timer_index, msg_send_result),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+		__field(int, msg_send_result)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+		__entry->msg_send_result = msg_send_result;
+	),
+
+	TP_printk("vcpu_id %d timer %d msg send result %d",
+		  __entry->vcpu_id, __entry->timer_index,
+		  __entry->msg_send_result)
+);
+
+/*
+ * Tracepoint for stimer_cleanup.
+ */
+TRACE_EVENT(kvm_hv_stimer_cleanup,
+	TP_PROTO(int vcpu_id, int timer_index),
+	TP_ARGS(vcpu_id, timer_index),
+
+	TP_STRUCT__entry(
+		__field(int, vcpu_id)
+		__field(int, timer_index)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu_id;
+		__entry->timer_index = timer_index;
+	),
+
+	TP_printk("vcpu_id %d timer %d",
+		  __entry->vcpu_id, __entry->timer_index)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v0.10.2


From 45bdbcfdf241149642fb6c25ab0c209d59c371b7 Mon Sep 17 00:00:00 2001
From: Huaitong Han <huaitong.han@intel.com>
Date: Tue, 12 Jan 2016 16:04:20 +0800
Subject: kvm: x86: Fix vmwrite to SECONDARY_VM_EXEC_CONTROL

vmx_cpuid_tries to update SECONDARY_VM_EXEC_CONTROL in the VMCS, but
it will cause a vmwrite error on older CPUs because the code does not
check for the presence of CPU_BASED_ACTIVATE_SECONDARY_CONTROLS.

This will get rid of the following trace on e.g. Core2 6600:

vmwrite error: reg 401e value 10 (err 12)
Call Trace:
[<ffffffff8116e2b9>] dump_stack+0x40/0x57
[<ffffffffa020b88d>] vmx_cpuid_update+0x5d/0x150 [kvm_intel]
[<ffffffffa01d8fdc>] kvm_vcpu_ioctl_set_cpuid2+0x4c/0x70 [kvm]
[<ffffffffa01b8363>] kvm_arch_vcpu_ioctl+0x903/0xfa0 [kvm]

Fixes: feda805fe7c4ed9cf78158e73b1218752e3b4314
Cc: stable@vger.kernel.org
Reported-by: Zdenek Kaspar <zkaspar82@gmail.com>
Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 62d958a..be3f1735 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8989,7 +8989,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 	}
 
-	vmcs_set_secondary_exec_control(secondary_exec_ctl);
+	if (cpu_has_secondary_exec_ctrls())
+		vmcs_set_secondary_exec_control(secondary_exec_ctl);
 
 	if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
 		if (guest_cpuid_has_pcommit(vcpu))
-- 
cgit v0.10.2