From 04bdfa8ab5bab929cc57f73952c503a88372601d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 4 Sep 2015 16:24:38 +0200
Subject: arm/arm64: KVM: vgic: Move active state handling to flush_hwstate

We currently set the physical active state only when we *inject* a new
pending virtual interrupt, but this is actually not correct, because we
could have been preempted and run something else on the system that
resets the active state to clear.  This causes us to run the VM with the
timer set to fire, but without setting the physical active state.

The solution is to always check the LR configurations, and we if have a
mapped interrupt in the LR in either the pending or active state
(virtual), then set the physical active state.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9eb489a..6bd1c9b 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1144,26 +1144,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
 		struct irq_phys_map *map;
 		map = vgic_irq_map_search(vcpu, irq);
 
-		/*
-		 * If we have a mapping, and the virtual interrupt is
-		 * being injected, then we must set the state to
-		 * active in the physical world. Otherwise the
-		 * physical interrupt will fire and the guest will
-		 * exit before processing the virtual interrupt.
-		 */
 		if (map) {
-			int ret;
-
-			BUG_ON(!map->active);
 			vlr.hwirq = map->phys_irq;
 			vlr.state |= LR_HW;
 			vlr.state &= ~LR_EOI_INT;
 
-			ret = irq_set_irqchip_state(map->irq,
-						    IRQCHIP_STATE_ACTIVE,
-						    true);
-			WARN_ON(ret);
-
 			/*
 			 * Make sure we're not going to sample this
 			 * again, as a HW-backed interrupt cannot be
@@ -1255,7 +1240,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
 	unsigned long *pa_percpu, *pa_shared;
-	int i, vcpu_id;
+	int i, vcpu_id, lr, ret;
 	int overflow = 0;
 	int nr_shared = vgic_nr_shared_irqs(dist);
 
@@ -1310,6 +1295,31 @@ epilog:
 		 */
 		clear_bit(vcpu_id, dist->irq_pending_on_cpu);
 	}
+
+	for (lr = 0; lr < vgic->nr_lr; lr++) {
+		struct vgic_lr vlr;
+
+		if (!test_bit(lr, vgic_cpu->lr_used))
+			continue;
+
+		vlr = vgic_get_lr(vcpu, lr);
+
+		/*
+		 * If we have a mapping, and the virtual interrupt is
+		 * presented to the guest (as pending or active), then we must
+		 * set the state to active in the physical world. See
+		 * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt.
+		 */
+		if (vlr.state & LR_HW) {
+			struct irq_phys_map *map;
+			map = vgic_irq_map_search(vcpu, vlr.irq);
+
+			ret = irq_set_irqchip_state(map->irq,
+						    IRQCHIP_STATE_ACTIVE,
+						    true);
+			WARN_ON(ret);
+		}
+	}
 }
 
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
-- 
cgit v0.10.2


From 4ad9e16af36bbe8657aabe494ff912acbc213ce4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 4 Sep 2015 16:24:39 +0200
Subject: arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0

Provide a better quality of implementation and be architecture compliant
on ARMv7 for the architected timer by resetting the CNTV_CTL to 0 on
reset of the timer.

This change alone fixes the UEFI reset issue reported by Laszlo back in
February.

Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Drew Jones <drjones@redhat.com>
Cc: Wei Huang <wei@redhat.com>
Cc: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 76e38d2..48c6e1a 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -200,6 +200,14 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 	timer->irq = irq;
 
 	/*
+	 * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
+	 * and to 0 for ARMv7.  We provide an implementation that always
+	 * resets the timer to be disabled and unmasked and is compliant with
+	 * the ARMv7 architecture.
+	 */
+	timer->cntv_ctl = 0;
+
+	/*
 	 * Tell the VGIC that the virtual interrupt is tied to a
 	 * physical interrupt. We do that once per VCPU.
 	 */
-- 
cgit v0.10.2


From 857d1a973077245f03b351e2539529c86267bfe4 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 24 Aug 2015 14:42:05 +0100
Subject: arm64: KVM: set {v,}TCR_EL2 RES1 bits

Currently we don't set the RES1 bits of TCR_EL2 and VTCR_EL2 when
configuring them, which could lead to unexpected behaviour when an
architectural meaning is defined for those bits.

Set the RES1 bits to avoid issues.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7605e09..cbc5e1a 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -95,6 +95,7 @@
 			 SCTLR_EL2_SA | SCTLR_EL2_I)
 
 /* TCR_EL2 Registers bits */
+#define TCR_EL2_RES1	((1 << 31) | (1 << 23))
 #define TCR_EL2_TBI	(1 << 20)
 #define TCR_EL2_PS	(7 << 16)
 #define TCR_EL2_PS_40B	(2 << 16)
@@ -106,9 +107,10 @@
 #define TCR_EL2_MASK	(TCR_EL2_TG0 | TCR_EL2_SH0 | \
 			 TCR_EL2_ORGN0 | TCR_EL2_IRGN0 | TCR_EL2_T0SZ)
 
-#define TCR_EL2_FLAGS	(TCR_EL2_PS_40B)
+#define TCR_EL2_FLAGS	(TCR_EL2_RES1 | TCR_EL2_PS_40B)
 
 /* VTCR_EL2 Registers bits */
+#define VTCR_EL2_RES1		(1 << 31)
 #define VTCR_EL2_PS_MASK	(7 << 16)
 #define VTCR_EL2_TG0_MASK	(1 << 14)
 #define VTCR_EL2_TG0_4K		(0 << 14)
@@ -147,7 +149,8 @@
  */
 #define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
 				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \
+				 VTCR_EL2_RES1)
 #define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
 #else
 /*
@@ -158,7 +161,8 @@
  */
 #define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
 				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \
+				 VTCR_EL2_RES1)
 #define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
 #endif
 
-- 
cgit v0.10.2


From 0c0672922dcc70ffba11d96385e98e42fb3ae08d Mon Sep 17 00:00:00 2001
From: Alexander Spyridakis <a.spyridakis@virtualopensystems.com>
Date: Fri, 4 Sep 2015 17:06:24 +0200
Subject: arm/arm64: KVM: Fix PSCI affinity info return value for non valid
 cores

If a guest requests the affinity info for a non-existing vCPU we need to
properly return an error, instead of erroneously reporting an off state.

Signed-off-by: Alexander Spyridakis <a.spyridakis@virtualopensystems.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 4b94b51..ad6f642 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -126,7 +126,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 
 static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 {
-	int i;
+	int i, matching_cpus = 0;
 	unsigned long mpidr;
 	unsigned long target_affinity;
 	unsigned long target_affinity_mask;
@@ -151,12 +151,16 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 	 */
 	kvm_for_each_vcpu(i, tmp, kvm) {
 		mpidr = kvm_vcpu_get_mpidr_aff(tmp);
-		if (((mpidr & target_affinity_mask) == target_affinity) &&
-		    !tmp->arch.pause) {
-			return PSCI_0_2_AFFINITY_LEVEL_ON;
+		if ((mpidr & target_affinity_mask) == target_affinity) {
+			matching_cpus++;
+			if (!tmp->arch.pause)
+				return PSCI_0_2_AFFINITY_LEVEL_ON;
 		}
 	}
 
+	if (!matching_cpus)
+		return PSCI_RET_INVALID_PARAMS;
+
 	return PSCI_0_2_AFFINITY_LEVEL_OFF;
 }
 
-- 
cgit v0.10.2


From edb9272f35d8bc97c86101a13f67d0ba16f3eecc Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Mon, 14 Sep 2015 17:38:51 +0800
Subject: KVM: fix polling for guest halt continued even if disable it

If there is already some polling ongoing, it's impossible to disable the
polling, since as soon as somebody sets halt_poll_ns to 0, polling will
never stop, as grow and shrink are only handled if halt_poll_ns is != 0.

This patch fix it by reset vcpu->halt_poll_ns in order to stop polling
when polling is disabled.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a25a731..eb4c9d2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2043,7 +2043,8 @@ out:
 		else if (vcpu->halt_poll_ns < halt_poll_ns &&
 			block_ns < halt_poll_ns)
 			grow_halt_poll_ns(vcpu);
-	}
+	} else
+		vcpu->halt_poll_ns = 0;
 
 	trace_kvm_vcpu_wakeup(block_ns, waited);
 }
-- 
cgit v0.10.2


From 43297dda0a51e4ffed0888ce727c218cfb7474b6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 14 Sep 2015 16:06:03 +0100
Subject: KVM: arm64: add workaround for Cortex-A57 erratum #852523

When restoring the system register state for an AArch32 guest at EL2,
writes to DACR32_EL2 may not be correctly synchronised by Cortex-A57,
which can lead to the guest effectively running with junk in the DACR
and running into unexpected domain faults.

This patch works around the issue by re-ordering our restoration of the
AArch32 register aliases so that they happen before the AArch64 system
registers. Ensuring that the registers are restored in this order
guarantees that they will be correctly synchronised by the core.

Cc: <stable@vger.kernel.org>
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8188f6a..39aa322 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -745,6 +745,9 @@ ENTRY(__kvm_vcpu_run)
 	// Guest context
 	add	x2, x0, #VCPU_CONTEXT
 
+	// We must restore the 32-bit state before the sysregs, thanks
+	// to Cortex-A57 erratum #852523.
+	restore_guest_32bit_state
 	bl __restore_sysregs
 
 	skip_debug_state x3, 1f
@@ -752,7 +755,6 @@ ENTRY(__kvm_vcpu_run)
 	kern_hyp_va x3
 	bl	__restore_debug
 1:
-	restore_guest_32bit_state
 	restore_guest_regs
 
 	// That's it, no more messing around.
-- 
cgit v0.10.2


From 81523aac01a7f68af2576800c8623edfb6323017 Mon Sep 17 00:00:00 2001
From: Wei Yang <weiyang@linux.vnet.ibm.com>
Date: Fri, 11 Sep 2015 14:12:53 +0800
Subject: KVM: make the declaration of functions within 80 characters

After 'commit 0b8ba4a2b658 ("KVM: fix checkpatch.pl errors in
kvm/coalesced_mmio.h")', the declaration of the two function will exceed 80
characters.

This patch reduces the TAPs to make each line in 80 characters.

Signed-off-by: Wei Yang <weiyang@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 5cbf190..6bca74c 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -24,9 +24,9 @@ struct kvm_coalesced_mmio_dev {
 int kvm_coalesced_mmio_init(struct kvm *kvm);
 void kvm_coalesced_mmio_free(struct kvm *kvm);
 int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
-																				struct kvm_coalesced_mmio_zone *zone);
+					struct kvm_coalesced_mmio_zone *zone);
 int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
-																				struct kvm_coalesced_mmio_zone *zone);
+					struct kvm_coalesced_mmio_zone *zone);
 
 #else
 
-- 
cgit v0.10.2


From 8453fecbecae26edb3f278627376caab05d9a88d Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 15 Sep 2015 14:41:54 +0800
Subject: kvm: don't try to register to KVM_FAST_MMIO_BUS for non mmio eventfd

We only want zero length mmio eventfd to be registered on
KVM_FAST_MMIO_BUS. So check this explicitly when arg->len is zero to
make sure this.

Cc: stable@vger.kernel.org
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9ff4193..e404806 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -846,7 +846,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	/* When length is ignored, MMIO is also put on a separate bus, for
 	 * faster lookups.
 	 */
-	if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
+	if (!args->len && bus_idx == KVM_MMIO_BUS) {
 		ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
 					      p->addr, 0, &p->dev);
 		if (ret < 0)
@@ -901,7 +901,7 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		if (!p->length) {
+		if (!p->length && p->bus_idx == KVM_MMIO_BUS) {
 			kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
 						  &p->dev);
 		}
-- 
cgit v0.10.2


From 85da11ca587c8eb73993a1b503052391a73586f9 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 15 Sep 2015 14:41:55 +0800
Subject: kvm: factor out core eventfd assign/deassign logic

This patch factors out core eventfd assign/deassign logic and leaves
the argument checking and bus index selection to callers.

Cc: stable@vger.kernel.org
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index e404806..0829c7f 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -771,40 +771,14 @@ static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
 	return KVM_MMIO_BUS;
 }
 
-static int
-kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
+				enum kvm_bus bus_idx,
+				struct kvm_ioeventfd *args)
 {
-	enum kvm_bus              bus_idx;
-	struct _ioeventfd        *p;
-	struct eventfd_ctx       *eventfd;
-	int                       ret;
-
-	bus_idx = ioeventfd_bus_from_flags(args->flags);
-	/* must be natural-word sized, or 0 to ignore length */
-	switch (args->len) {
-	case 0:
-	case 1:
-	case 2:
-	case 4:
-	case 8:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	/* check for range overflow */
-	if (args->addr + args->len < args->addr)
-		return -EINVAL;
 
-	/* check for extra flags that we don't understand */
-	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
-		return -EINVAL;
-
-	/* ioeventfd with no length can't be combined with DATAMATCH */
-	if (!args->len &&
-	    args->flags & (KVM_IOEVENTFD_FLAG_PIO |
-			   KVM_IOEVENTFD_FLAG_DATAMATCH))
-		return -EINVAL;
+	struct eventfd_ctx *eventfd;
+	struct _ioeventfd *p;
+	int ret;
 
 	eventfd = eventfd_ctx_fdget(args->fd);
 	if (IS_ERR(eventfd))
@@ -873,14 +847,13 @@ fail:
 }
 
 static int
-kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
+			   struct kvm_ioeventfd *args)
 {
-	enum kvm_bus              bus_idx;
 	struct _ioeventfd        *p, *tmp;
 	struct eventfd_ctx       *eventfd;
 	int                       ret = -ENOENT;
 
-	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	eventfd = eventfd_ctx_fdget(args->fd);
 	if (IS_ERR(eventfd))
 		return PTR_ERR(eventfd);
@@ -918,6 +891,48 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 	return ret;
 }
 
+static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
+
+	return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+}
+
+static int
+kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
+{
+	enum kvm_bus              bus_idx;
+
+	bus_idx = ioeventfd_bus_from_flags(args->flags);
+	/* must be natural-word sized, or 0 to ignore length */
+	switch (args->len) {
+	case 0:
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* check for range overflow */
+	if (args->addr + args->len < args->addr)
+		return -EINVAL;
+
+	/* check for extra flags that we don't understand */
+	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
+		return -EINVAL;
+
+	/* ioeventfd with no length can't be combined with DATAMATCH */
+	if (!args->len &&
+	    args->flags & (KVM_IOEVENTFD_FLAG_PIO |
+			   KVM_IOEVENTFD_FLAG_DATAMATCH))
+		return -EINVAL;
+
+	return kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
+}
+
 int
 kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
-- 
cgit v0.10.2


From eefd6b06b17c5478e7c24bea6f64beaa2c431ca6 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 15 Sep 2015 14:41:56 +0800
Subject: kvm: fix double free for fast mmio eventfd

We register wildcard mmio eventfd on two buses, once for KVM_MMIO_BUS
and once on KVM_FAST_MMIO_BUS but with a single iodev
instance. This will lead to an issue: kvm_io_bus_destroy() knows
nothing about the devices on two buses pointing to a single dev. Which
will lead to double free[1] during exit. Fix this by allocating two
instances of iodevs then registering one on KVM_MMIO_BUS and another
on KVM_FAST_MMIO_BUS.

CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic #28-Ubuntu
Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013
task: ffff88009ae0c4b0 ti: ffff88020e7f0000 task.ti: ffff88020e7f0000
RIP: 0010:[<ffffffffc07e25d8>]  [<ffffffffc07e25d8>] ioeventfd_release+0x28/0x60 [kvm]
RSP: 0018:ffff88020e7f3bc8  EFLAGS: 00010292
RAX: dead000000200200 RBX: ffff8801ec19c900 RCX: 000000018200016d
RDX: ffff8801ec19cf80 RSI: ffffea0008bf1d40 RDI: ffff8801ec19c900
RBP: ffff88020e7f3bd8 R08: 000000002fc75a01 R09: 000000018200016d
R10: ffffffffc07df6ae R11: ffff88022fc75a98 R12: ffff88021e7cc000
R13: ffff88021e7cca48 R14: ffff88021e7cca50 R15: ffff8801ec19c880
FS:  00007fc1ee3e6700(0000) GS:ffff88023e240000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f8f389d8000 CR3: 000000023dc13000 CR4: 00000000001427e0
Stack:
ffff88021e7cc000 0000000000000000 ffff88020e7f3be8 ffffffffc07e2622
ffff88020e7f3c38 ffffffffc07df69a ffff880232524160 ffff88020e792d80
 0000000000000000 ffff880219b78c00 0000000000000008 ffff8802321686a8
Call Trace:
[<ffffffffc07e2622>] ioeventfd_destructor+0x12/0x20 [kvm]
[<ffffffffc07df69a>] kvm_put_kvm+0xca/0x210 [kvm]
[<ffffffffc07df818>] kvm_vcpu_release+0x18/0x20 [kvm]
[<ffffffff811f69f7>] __fput+0xe7/0x250
[<ffffffff811f6bae>] ____fput+0xe/0x10
[<ffffffff81093f04>] task_work_run+0xd4/0xf0
[<ffffffff81079358>] do_exit+0x368/0xa50
[<ffffffff81082c8f>] ? recalc_sigpending+0x1f/0x60
[<ffffffff81079ad5>] do_group_exit+0x45/0xb0
[<ffffffff81085c71>] get_signal+0x291/0x750
[<ffffffff810144d8>] do_signal+0x28/0xab0
[<ffffffff810f3a3b>] ? do_futex+0xdb/0x5d0
[<ffffffff810b7028>] ? __wake_up_locked_key+0x18/0x20
[<ffffffff810f3fa6>] ? SyS_futex+0x76/0x170
[<ffffffff81014fc9>] do_notify_resume+0x69/0xb0
[<ffffffff817cb9af>] int_signal+0x12/0x17
Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 <48> 89 10 48 b8 00 01 10 00 00
 RIP  [<ffffffffc07e25d8>] ioeventfd_release+0x28/0x60 [kvm]
 RSP <ffff88020e7f3bc8>

Cc: stable@vger.kernel.org
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0829c7f..79db453 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -817,16 +817,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
 	if (ret < 0)
 		goto unlock_fail;
 
-	/* When length is ignored, MMIO is also put on a separate bus, for
-	 * faster lookups.
-	 */
-	if (!args->len && bus_idx == KVM_MMIO_BUS) {
-		ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
-					      p->addr, 0, &p->dev);
-		if (ret < 0)
-			goto register_fail;
-	}
-
 	kvm->buses[bus_idx]->ioeventfd_count++;
 	list_add_tail(&p->list, &kvm->ioeventfds);
 
@@ -834,8 +824,6 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
 
 	return 0;
 
-register_fail:
-	kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
 unlock_fail:
 	mutex_unlock(&kvm->slots_lock);
 
@@ -874,10 +862,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 			continue;
 
 		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
-		if (!p->length && p->bus_idx == KVM_MMIO_BUS) {
-			kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
-						  &p->dev);
-		}
 		kvm->buses[bus_idx]->ioeventfd_count--;
 		ioeventfd_release(p);
 		ret = 0;
@@ -894,14 +878,19 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
 static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
+	int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+
+	if (!args->len && bus_idx == KVM_MMIO_BUS)
+		kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
 
-	return kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+	return ret;
 }
 
 static int
 kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
 	enum kvm_bus              bus_idx;
+	int ret;
 
 	bus_idx = ioeventfd_bus_from_flags(args->flags);
 	/* must be natural-word sized, or 0 to ignore length */
@@ -930,7 +919,25 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 			   KVM_IOEVENTFD_FLAG_DATAMATCH))
 		return -EINVAL;
 
-	return kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
+	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
+	if (ret)
+		goto fail;
+
+	/* When length is ignored, MMIO is also put on a separate bus, for
+	 * faster lookups.
+	 */
+	if (!args->len && bus_idx == KVM_MMIO_BUS) {
+		ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
+		if (ret < 0)
+			goto fast_fail;
+	}
+
+	return 0;
+
+fast_fail:
+	kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
+fail:
+	return ret;
 }
 
 int
-- 
cgit v0.10.2


From 8f4216c7d28976f7ec1b2bcbfa0a9f787133c45e Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Tue, 15 Sep 2015 14:41:57 +0800
Subject: kvm: fix zero length mmio searching

Currently, if we had a zero length mmio eventfd assigned on
KVM_MMIO_BUS. It will never be found by kvm_io_bus_cmp() since it
always compares the kvm_io_range() with the length that guest
wrote. This will cause e.g for vhost, kick will be trapped by qemu
userspace instead of vhost. Fixing this by using zero length if an
iodevice is zero length.

Cc: stable@vger.kernel.org
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eb4c9d2..9af68db 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3157,10 +3157,25 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 static inline int kvm_io_bus_cmp(const struct kvm_io_range *r1,
 				 const struct kvm_io_range *r2)
 {
-	if (r1->addr < r2->addr)
+	gpa_t addr1 = r1->addr;
+	gpa_t addr2 = r2->addr;
+
+	if (addr1 < addr2)
 		return -1;
-	if (r1->addr + r1->len > r2->addr + r2->len)
+
+	/* If r2->len == 0, match the exact address.  If r2->len != 0,
+	 * accept any overlapping write.  Any order is acceptable for
+	 * overlapping ranges, because kvm_io_bus_get_first_dev ensures
+	 * we process all of them.
+	 */
+	if (r2->len) {
+		addr1 += r1->len;
+		addr2 += r2->len;
+	}
+
+	if (addr1 > addr2)
 		return 1;
+
 	return 0;
 }
 
-- 
cgit v0.10.2


From 62bea5bff486644ecf363fe8a1a2f6f32c614a49 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 15 Sep 2015 18:27:57 +0200
Subject: KVM: add halt_attempted_poll to VCPU stats

This new statistic can help diagnosing VCPUs that, for any reason,
trigger bad behavior of halt_poll_ns autotuning.

For example, say halt_poll_ns = 480000, and wakeups are spaced exactly
like 479us, 481us, 479us, 481us. Then KVM always fails polling and wastes
10+20+40+80+160+320+480 = 1110 microseconds out of every
479+481+479+481+479+481+479 = 3359 microseconds. The VCPU then
is consuming about 30% more CPU than it would use without
polling.  This would show as an abnormally high number of
attempted polling compared to the successful polls.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com<
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dcba0fa..687ddeb 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -148,6 +148,7 @@ struct kvm_vm_stat {
 
 struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 };
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 415938d..4865945 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -195,6 +195,7 @@ struct kvm_vm_stat {
 
 struct kvm_vcpu_stat {
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 };
 
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index e8c8d9d..3a54dbc 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -128,6 +128,7 @@ struct kvm_vcpu_stat {
 	u32 msa_disabled_exits;
 	u32 flush_dcache_exits;
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 };
 
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd4c129..49ff3bf 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -55,6 +55,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
 	{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
+	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
 	{ "halt_wakeup",  VCPU_STAT(halt_wakeup),	 KVM_STAT_VCPU },
 	{NULL}
 };
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98eebbf..195886a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -108,6 +108,7 @@ struct kvm_vcpu_stat {
 	u32 dec_exits;
 	u32 ext_intr_exits;
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 	u32 dbell_exits;
 	u32 gdbell_exits;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index d75bf32..cf00916 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -53,6 +53,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "ext_intr",    VCPU_STAT(ext_intr_exits) },
 	{ "queue_intr",  VCPU_STAT(queue_intr) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), },
+	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "pf_storage",  VCPU_STAT(pf_storage) },
 	{ "sp_storage",  VCPU_STAT(sp_storage) },
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ae458f0..fd58751 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "dec",        VCPU_STAT(dec_exits) },
 	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "doorbell", VCPU_STAT(dbell_exits) },
 	{ "guest doorbell", VCPU_STAT(gdbell_exits) },
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3d012e0..6ce4a0b 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -210,6 +210,7 @@ struct kvm_vcpu_stat {
 	u32 exit_validity;
 	u32 exit_instruction;
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 	u32 instruction_lctl;
 	u32 instruction_lctlg;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index c91eb94..2f807ab 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -63,6 +63,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c12e845..349f80a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -711,6 +711,7 @@ struct kvm_vcpu_stat {
 	u32 nmi_window_exits;
 	u32 halt_exits;
 	u32 halt_successful_poll;
+	u32 halt_attempted_poll;
 	u32 halt_wakeup;
 	u32 request_irq_exits;
 	u32 irq_exits;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a60bdbc..6bbb0df 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -149,6 +149,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "nmi_window", VCPU_STAT(nmi_window_exits) },
 	{ "halt_exits", VCPU_STAT(halt_exits) },
 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
+	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
 	{ "hypercalls", VCPU_STAT(hypercalls) },
 	{ "request_irq", VCPU_STAT(request_irq_exits) },
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 9af68db..04146a2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2004,6 +2004,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 	if (vcpu->halt_poll_ns) {
 		ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
 
+		++vcpu->stat.halt_attempted_poll;
 		do {
 			/*
 			 * This sets KVM_REQ_UNHALT if an interrupt
-- 
cgit v0.10.2


From 04bb92e4b4cf06a66889d37b892b78f926faa9d4 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@hotmail.com>
Date: Wed, 16 Sep 2015 19:31:11 +0800
Subject: KVM: vmx: fix VPID is 0000H in non-root operation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reference SDM 28.1:

The current VPID is 0000H in the following situations:
- Outside VMX operation. (This includes operation in system-management
  mode under the default treatment of SMIs and SMM with VMX operation;
  see Section 34.14.)
- In VMX root operation.
- In VMX non-root operation when the “enable VPID” VM-execution control
  is 0.

The VPID should never be 0000H in non-root operation when "enable VPID"
VM-execution control is 1. However, commit 34a1cd60 ("kvm: x86: vmx:
move some vmx setting from vmx_init() to hardware_setup()") remove the
codes which reserve 0000H for VMX root operation.

This patch fix it by again reserving 0000H for VMX root operation.

Cc: stable@vger.kernel.org # 3.19+
Fixes: 34a1cd60d17f62c1f077c1478a6c2ca8c3d17af4
Reported-by: Wincy Van <fanwenyi0529@gmail.com>
Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d019868..6407674 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6064,6 +6064,8 @@ static __init int hardware_setup(void)
 	memcpy(vmx_msr_bitmap_longmode_x2apic,
 			vmx_msr_bitmap_longmode, PAGE_SIZE);
 
+	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+
 	if (enable_apicv) {
 		for (msr = 0x800; msr <= 0x8ff; msr++)
 			vmx_disable_intercept_msr_read_x2apic(msr);
-- 
cgit v0.10.2


From 1713e5aa05fff3951e747548b373bd2c81be4e7a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 16 Sep 2015 10:54:37 +0100
Subject: arm64: KVM: Fix user access for debug registers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When setting the debug register from userspace, make sure that
copy_from_user() is called with its parameters in the expected
order. It otherwise doesn't do what you think.

Fixes: 84e690bfbed1 ("KVM: arm64: introduce vcpu->arch.debug_ptr")
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Cc: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b41607d..1d0463e 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -272,7 +272,7 @@ static int set_bvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 {
 	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bvr[rd->reg];
 
-	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
 	return 0;
 }
@@ -314,7 +314,7 @@ static int set_bcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 {
 	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_bcr[rd->reg];
 
-	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
 
 	return 0;
@@ -358,7 +358,7 @@ static int set_wvr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 {
 	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wvr[rd->reg];
 
-	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
 	return 0;
 }
@@ -400,7 +400,7 @@ static int set_wcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd,
 {
 	__u64 *r = &vcpu->arch.vcpu_debug_state.dbg_wcr[rd->reg];
 
-	if (copy_from_user(uaddr, r, KVM_REG_SIZE(reg->id)) != 0)
+	if (copy_from_user(r, uaddr, KVM_REG_SIZE(reg->id)) != 0)
 		return -EFAULT;
 	return 0;
 }
-- 
cgit v0.10.2


From ca09f02f122b2ecb0f5ddfc5fd47b29ed657d4fd Mon Sep 17 00:00:00 2001
From: Marek Majtyka <marek.majtyka@tieto.com>
Date: Wed, 16 Sep 2015 12:04:55 +0200
Subject: arm: KVM: Fix incorrect device to IPA mapping

A critical bug has been found in device memory stage1 translation for
VMs with more then 4GB of address space. Once vm_pgoff size is smaller
then pa (which is true for LPAE case, u32 and u64 respectively) some
more significant bits of pa may be lost as a shift operation is performed
on u32 and later cast onto u64.

Example: vm_pgoff(u32)=0x00210030, PAGE_SHIFT=12
        expected pa(u64):   0x0000002010030000
        produced pa(u64):   0x0000000010030000

The fix is to change the order of operations (casting first onto phys_addr_t
and then shifting).

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
[maz: fixed changelog and patch formatting]
Cc: stable@vger.kernel.org
Signed-off-by: Marek Majtyka <marek.majtyka@tieto.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 7b42012..6984342 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -1792,8 +1792,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		if (vma->vm_flags & VM_PFNMAP) {
 			gpa_t gpa = mem->guest_phys_addr +
 				    (vm_start - mem->userspace_addr);
-			phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) +
-					 vm_start - vma->vm_start;
+			phys_addr_t pa;
+
+			pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT;
+			pa += vm_start - vma->vm_start;
 
 			/* IO region dirty page logging not allowed */
 			if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES)
-- 
cgit v0.10.2


From 9bf9fde2c98ba8362ea1d41d8bd8b32a23776e67 Mon Sep 17 00:00:00 2001
From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com>
Date: Wed, 16 Sep 2015 09:13:50 -0400
Subject: KVM: s390: Replace incorrect atomic_or with atomic_andnot

The offending commit accidentally replaces an atomic_clear with an
atomic_or instead of an atomic_andnot in kvm_s390_vcpu_request_handled.
The symptom is that kvm guests on s390 hang on startup.
This patch simply replaces the incorrect atomic_or with atomic_andnot

Fixes: 805de8f43c20 (atomic: Replace atomic_{set,clear}_mask() usage)
Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f807ab..0a67c40 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1575,7 +1575,7 @@ static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
 
 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
 {
-	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
+	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
 }
 
 /*
-- 
cgit v0.10.2


From c2f58514cfb374d5368c9da945f1765cd48eb0da Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Wed, 5 Aug 2015 11:53:57 +0100
Subject: arm/arm64: KVM: vgic: Check for !irqchip_in_kernel() when mapping
 resources

Until b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops"),
kvm_vgic_map_resources() used to include a check on irqchip_in_kernel(),
and vgic_v2_map_resources() still has it.

But now vm_ops are not initialized until we call kvm_vgic_create().
Therefore kvm_vgic_map_resources() can being called without a VGIC,
and we die because vm_ops.map_resources is NULL.

Fixing this restores QEMU's kernel-irqchip=off option to a working state,
allowing to use GIC emulation in userspace.

Fixes: b26e5fdac43c ("arm/arm64: KVM: introduce per-VM ops")
Cc: stable@vger.kernel.org
Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
[maz: reworked commit message]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ce404a5..dc017ad 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -446,7 +446,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Map the VGIC hardware resources before running a vcpu the first
 	 * time on this VM.
 	 */
-	if (unlikely(!vgic_ready(kvm))) {
+	if (unlikely(irqchip_in_kernel(kvm) && !vgic_ready(kvm))) {
 		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
-- 
cgit v0.10.2


From c4cbba9fa078f55d9f6d081dbb4aec7cf969e7c7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 16 Sep 2015 16:18:59 +0100
Subject: arm64: KVM: Disable virtual timer even if the guest is not using it

When running a guest with the architected timer disabled (with QEMU and
the kernel_irqchip=off option, for example), it is important to make
sure the timer gets turned off. Otherwise, the guest may try to
enable it anyway, leading to a screaming HW interrupt.

The fix is to unconditionally turn off the virtual timer on guest
exit.

Cc: stable@vger.kernel.org
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 39aa322..60a83e2 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -562,8 +562,6 @@
 	mrs	x3, cntv_ctl_el0
 	and	x3, x3, #3
 	str	w3, [x0, #VCPU_TIMER_CNTV_CTL]
-	bic	x3, x3, #1		// Clear Enable
-	msr	cntv_ctl_el0, x3
 
 	isb
 
@@ -571,6 +569,9 @@
 	str	x3, [x0, #VCPU_TIMER_CNTV_CVAL]
 
 1:
+	// Disable the virtual timer
+	msr	cntv_ctl_el0, xzr
+
 	// Allow physical timer/counter access for the host
 	mrs	x2, cnthctl_el2
 	orr	x2, x2, #3
-- 
cgit v0.10.2


From 688bc577ac42ae3d07c889a1f0a72f0b23763d58 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 16 Sep 2015 16:18:59 +0100
Subject: arm: KVM: Disable virtual timer even if the guest is not using it

When running a guest with the architected timer disabled (with QEMU and
the kernel_irqchip=off option, for example), it is important to make
sure the timer gets turned off. Otherwise, the guest may try to
enable it anyway, leading to a screaming HW interrupt.

The fix is to unconditionally turn off the virtual timer on guest
exit.

Cc: stable@vger.kernel.org
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 702740d..51a5950 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -515,8 +515,7 @@ ARM_BE8(rev	r6, r6  )
 
 	mrc	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
 	str	r2, [vcpu, #VCPU_TIMER_CNTV_CTL]
-	bic	r2, #1			@ Clear ENABLE
-	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
+
 	isb
 
 	mrrc	p15, 3, rr_lo_hi(r2, r3), c14	@ CNTV_CVAL
@@ -529,6 +528,9 @@ ARM_BE8(rev	r6, r6  )
 	mcrr	p15, 4, r2, r2, c14	@ CNTVOFF
 
 1:
+	mov	r2, #0			@ Clear ENABLE
+	mcr	p15, 0, r2, c14, c3, 1	@ CNTV_CTL
+
 	@ Allow physical timer/counter access for the host
 	mrc	p15, 4, r2, c14, c1, 0	@ CNTHCTL
 	orr	r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
-- 
cgit v0.10.2


From 34c3faa353db8f5d3ce9966cf854d5643c64c4db Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 15 Sep 2015 17:15:33 +0100
Subject: arm64: KVM: Remove all traces of the ThumbEE registers

Although the ThumbEE registers and traps were present in earlier
versions of the v8 architecture, it was retrospectively removed and so
we can do the same.

Whilst this breaks migrating a guest started on a previous version of
the kernel, it is much better to kill these (non existent) registers
as soon as possible.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
[maz: added commend about migration]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index cbc5e1a..9694f26 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -172,7 +172,6 @@
 #define VTTBR_VMID_MASK	  (UL(0xFF) << VTTBR_VMID_SHIFT)
 
 /* Hyp System Trap Register */
-#define HSTR_EL2_TTEE	(1 << 16)
 #define HSTR_EL2_T(x)	(1 << x)
 
 /* Hyp Coproccessor Trap Register Shifts */
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 67fa0de..5e37710 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -53,9 +53,7 @@
 #define	IFSR32_EL2	25	/* Instruction Fault Status Register */
 #define	FPEXC32_EL2	26	/* Floating-Point Exception Control Register */
 #define	DBGVCR32_EL2	27	/* Debug Vector Catch Register */
-#define	TEECR32_EL1	28	/* ThumbEE Configuration Register */
-#define	TEEHBR32_EL1	29	/* ThumbEE Handler Base Register */
-#define	NR_SYS_REGS	30
+#define	NR_SYS_REGS	28
 
 /* 32bit mapping */
 #define c0_MPIDR	(MPIDR_EL1 * 2)	/* MultiProcessor ID Register */
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 60a83e2..8563477 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -433,20 +433,13 @@
 	mrs	x5, ifsr32_el2
 	stp	x4, x5, [x3]
 
-	skip_fpsimd_state x8, 3f
+	skip_fpsimd_state x8, 2f
 	mrs	x6, fpexc32_el2
 	str	x6, [x3, #16]
-3:
-	skip_debug_state x8, 2f
+2:
+	skip_debug_state x8, 1f
 	mrs	x7, dbgvcr32_el2
 	str	x7, [x3, #24]
-2:
-	skip_tee_state x8, 1f
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
-	mrs	x4, teecr32_el1
-	mrs	x5, teehbr32_el1
-	stp	x4, x5, [x3]
 1:
 .endm
 
@@ -466,16 +459,9 @@
 	msr	dacr32_el2, x4
 	msr	ifsr32_el2, x5
 
-	skip_debug_state x8, 2f
+	skip_debug_state x8, 1f
 	ldr	x7, [x3, #24]
 	msr	dbgvcr32_el2, x7
-2:
-	skip_tee_state x8, 1f
-
-	add	x3, x2, #CPU_SYSREG_OFFSET(TEECR32_EL1)
-	ldp	x4, x5, [x3]
-	msr	teecr32_el1, x4
-	msr	teehbr32_el1, x5
 1:
 .endm
 
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 1d0463e..d03d3af 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -539,13 +539,6 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	{ Op0(0b10), Op1(0b000), CRn(0b0111), CRm(0b1110), Op2(0b110),
 	  trap_dbgauthstatus_el1 },
 
-	/* TEECR32_EL1 */
-	{ Op0(0b10), Op1(0b010), CRn(0b0000), CRm(0b0000), Op2(0b000),
-	  NULL, reset_val, TEECR32_EL1, 0 },
-	/* TEEHBR32_EL1 */
-	{ Op0(0b10), Op1(0b010), CRn(0b0001), CRm(0b0000), Op2(0b000),
-	  NULL, reset_val, TEEHBR32_EL1, 0 },
-
 	/* MDCCSR_EL1 */
 	{ Op0(0b10), Op1(0b011), CRn(0b0000), CRm(0b0001), Op2(0b000),
 	  trap_raz_wi },
-- 
cgit v0.10.2


From ef748917b529847277f07c98c55e1c0ce416449f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Wed, 2 Sep 2015 14:31:21 +0800
Subject: arm/arm64: KVM: Remove 'config KVM_ARM_MAX_VCPUS'

This patch removes config option of KVM_ARM_MAX_VCPUS,
and like other ARCHs, just choose the maximum allowed
value from hardware, and follows the reasons:

1) from distribution view, the option has to be
defined as the max allowed value because it need to
meet all kinds of virtulization applications and
need to support most of SoCs;

2) using a bigger value doesn't introduce extra memory
consumption, and the help text in Kconfig isn't accurate
because kvm_vpu structure isn't allocated until request
of creating VCPU is sent from QEMU;

3) the main effect is that the field of vcpus[] in 'struct kvm'
becomes a bit bigger(sizeof(void *) per vcpu) and need more cache
lines to hold the structure, but 'struct kvm' is one generic struct,
and it has worked well on other ARCHs already in this way. Also,
the world switch frequecy is often low, for example, it is ~2000
when running kernel building load in VM from APM xgene KVM host,
so the effect is very small, and the difference can't be observed
in my test at all.

Cc: Dann Frazier <dann.frazier@canonical.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dcba0fa..c8c226a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -29,12 +29,6 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
-#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
-#else
-#define KVM_MAX_VCPUS 0
-#endif
-
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -44,6 +38,8 @@
 
 #include <kvm/arm_vgic.h>
 
+#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
+
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index bfb915d..210ecca 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -45,15 +45,4 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
-config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM"
-	depends on KVM_ARM_HOST
-	default 4
-	help
-	  Static number of max supported virtual CPUs per VM.
-
-	  If you choose a high number, the vcpu structures will be quite
-	  large, so only choose a reasonable number that you expect to
-	  actually use.
-
 endif # VIRTUALIZATION
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 415938d..3fb58ea 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -30,12 +30,6 @@
 
 #define __KVM_HAVE_ARCH_INTC_INITIALIZED
 
-#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
-#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
-#else
-#define KVM_MAX_VCPUS 0
-#endif
-
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
@@ -43,6 +37,8 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 
+#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
+
 #define KVM_VCPU_MAX_FEATURES 3
 
 int __attribute_const__ kvm_target_cpu(void);
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index bfffe8f..5c7e920e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -41,15 +41,4 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
-config KVM_ARM_MAX_VCPUS
-	int "Number maximum supported virtual CPUs per VM"
-	depends on KVM_ARM_HOST
-	default 4
-	help
-	  Static number of max supported virtual CPUs per VM.
-
-	  If you choose a high number, the vcpu structures will be quite
-	  large, so only choose a reasonable number that you expect to
-	  actually use.
-
 endif # VIRTUALIZATION
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d901f1a..4e14dac 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -35,11 +35,7 @@
 #define VGIC_V3_MAX_LRS		16
 #define VGIC_MAX_IRQS		1024
 #define VGIC_V2_MAX_CPUS	8
-
-/* Sanity checks... */
-#if (KVM_MAX_VCPUS > 255)
-#error Too many KVM VCPUs, the VGIC only supports up to 255 VCPUs for now
-#endif
+#define VGIC_V3_MAX_CPUS	255
 
 #if (VGIC_NR_IRQS_LEGACY & 31)
 #error "VGIC_NR_IRQS must be a multiple of 32"
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index afbf925..7dd5d62 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -288,7 +288,7 @@ int vgic_v3_probe(struct device_node *vgic_node,
 
 	vgic->vctrl_base = NULL;
 	vgic->type = VGIC_V3;
-	vgic->max_gic_vcpus = KVM_MAX_VCPUS;
+	vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
 
 	kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
 		 vcpu_res.start, vgic->maint_irq);
-- 
cgit v0.10.2


From 00cc1633816de8c95f337608a1ea64e228faf771 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Fri, 18 Sep 2015 11:27:45 +0200
Subject: sched: access local runqueue directly in single_task_running

Commit 2ee507c47293 ("sched: Add function single_task_running to let a task
check if it is the only task running on a cpu") referenced the current
runqueue with the smp_processor_id.  When CONFIG_DEBUG_PREEMPT is enabled,
that is only allowed if preemption is disabled or the currrent task is
bound to the local cpu (e.g. kernel worker).

With commit f78195129963 ("kvm: add halt_poll_ns module parameter") KVM
calls single_task_running. If CONFIG_DEBUG_PREEMPT is enabled that
generates a lot of kernel messages.

To avoid adding preemption in that cases, as it would limit the usefulness,
we change single_task_running to access directly the cpu local runqueue.

Cc: Tim Chen <tim.c.chen@linux.intel.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org>
Fixes: 2ee507c472939db4b146d545352b8a7c79ef47f8
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3595403..4064f79 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2666,13 +2666,20 @@ unsigned long nr_running(void)
 
 /*
  * Check if only the current task is running on the cpu.
+ *
+ * Caution: this function does not check that the caller has disabled
+ * preemption, thus the result might have a time-of-check-to-time-of-use
+ * race.  The caller is responsible to use it correctly, for example:
+ *
+ * - from a non-preemptable section (of course)
+ *
+ * - from a thread that is bound to a single CPU
+ *
+ * - in a loop with very short iterations (e.g. a polling loop)
  */
 bool single_task_running(void)
 {
-	if (cpu_rq(smp_processor_id())->nr_running == 1)
-		return true;
-	else
-		return false;
+	return raw_rq()->nr_running == 1;
 }
 EXPORT_SYMBOL(single_task_running);
 
-- 
cgit v0.10.2