From 3cd60e31185343d4132ca7cf3c9becb903b3ec1b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 4 Jun 2014 16:47:55 +0530
Subject: KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation

We use time base for PURR and SPURR emulation with PR KVM since we
are emulating a single threaded core. When using time base
we need to make sure that we don't accumulate time spent in the host
in PURR and SPURR value.

Also we don't need to emulate mtspr because both the registers are
hypervisor resource.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index f52f656..a20cc0b 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -83,8 +83,6 @@ struct kvmppc_vcpu_book3s {
 	u64 sdr1;
 	u64 hior;
 	u64 msr_mask;
-	u64 purr_offset;
-	u64 spurr_offset;
 #ifdef CONFIG_PPC_BOOK3S_32
 	u32 vsid_pool[VSID_POOL_SIZE];
 	u32 vsid_next;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bb66d8b..4a58731 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -503,8 +503,8 @@ struct kvm_vcpu_arch {
 #ifdef CONFIG_BOOKE
 	u32 decar;
 #endif
-	u32 tbl;
-	u32 tbu;
+	/* Time base value when we entered the guest */
+	u64 entry_tb;
 	u32 tcr;
 	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
 	u32 ivor[64];
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 3f29526..3565e77 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -439,12 +439,6 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 		    (mfmsr() & MSR_HV))
 			vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
 		break;
-	case SPRN_PURR:
-		to_book3s(vcpu)->purr_offset = spr_val - get_tb();
-		break;
-	case SPRN_SPURR:
-		to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
-		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
@@ -572,10 +566,16 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		*spr_val = 0;
 		break;
 	case SPRN_PURR:
-		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		/*
+		 * On exit we would have updated purr
+		 */
+		*spr_val = vcpu->arch.purr;
 		break;
 	case SPRN_SPURR:
-		*spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+		/*
+		 * On exit we would have updated spurr
+		 */
+		*spr_val = vcpu->arch.spurr;
 		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 8eef1e5..671f5c92 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -120,6 +120,11 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 #ifdef CONFIG_PPC_BOOK3S_64
 	svcpu->shadow_fscr = vcpu->arch.shadow_fscr;
 #endif
+	/*
+	 * Now also save the current time base value. We use this
+	 * to find the guest purr and spurr value.
+	 */
+	vcpu->arch.entry_tb = get_tb();
 	svcpu->in_use = true;
 }
 
@@ -166,6 +171,12 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 #ifdef CONFIG_PPC_BOOK3S_64
 	vcpu->arch.shadow_fscr = svcpu->shadow_fscr;
 #endif
+	/*
+	 * Update purr and spurr using time base on exit.
+	 */
+	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
+	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
+
 	svcpu->in_use = false;
 
 out:
-- 
cgit v0.10.2


From 8f42ab2749d00ea15157ab896cfbed73a247b3e1 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 5 Jun 2014 17:38:02 +0530
Subject: KVM: PPC: BOOK3S: PR: Emulate virtual timebase register

virtual time base register is a per VM, per cpu register that needs
to be saved and restored on vm exit and entry. Writing to VTB is not
allowed in the privileged mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
[agraf: fix compile error]
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 4a58731..bd3caea 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -505,6 +505,7 @@ struct kvm_vcpu_arch {
 #endif
 	/* Time base value when we entered the guest */
 	u64 entry_tb;
+	u64 entry_vtb;
 	u32 tcr;
 	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
 	u32 ivor[64];
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index bffd89d..c8f3381 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1203,6 +1203,15 @@
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 
+static inline unsigned long mfvtb (void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mfspr(SPRN_VTB);
+#endif
+	return 0;
+}
+
 #ifdef __powerpc64__
 #if defined(CONFIG_PPC_CELL) || defined(CONFIG_PPC_FSL_BOOK3E)
 #define mftb()		({unsigned long rval;				\
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 1d428e60..03cbada 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -102,6 +102,15 @@ static inline u64 get_rtc(void)
 	return (u64)hi * 1000000000 + lo;
 }
 
+static inline u64 get_vtb(void)
+{
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return mfvtb();
+#endif
+	return 0;
+}
+
 #ifdef CONFIG_PPC64
 static inline u64 get_tb(void)
 {
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index c254c27..ddce1ea 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -646,6 +646,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_BESCR:
 			val = get_reg_val(reg->id, vcpu->arch.bescr);
 			break;
+		case KVM_REG_PPC_VTB:
+			val = get_reg_val(reg->id, vcpu->arch.vtb);
+			break;
 		default:
 			r = -EINVAL;
 			break;
@@ -750,6 +753,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_BESCR:
 			vcpu->arch.bescr = set_reg_val(reg->id, val);
 			break;
+		case KVM_REG_PPC_VTB:
+			vcpu->arch.vtb = set_reg_val(reg->id, val);
+			break;
 		default:
 			r = -EINVAL;
 			break;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 3565e77..1bb16a5 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -577,6 +577,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 		 */
 		*spr_val = vcpu->arch.spurr;
 		break;
+	case SPRN_VTB:
+		*spr_val = vcpu->arch.vtb;
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7a12edb..315e884 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -897,9 +897,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_IC:
 		*val = get_reg_val(id, vcpu->arch.ic);
 		break;
-	case KVM_REG_PPC_VTB:
-		*val = get_reg_val(id, vcpu->arch.vtb);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		*val = get_reg_val(id, vcpu->arch.csigr);
 		break;
@@ -1097,9 +1094,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_IC:
 		vcpu->arch.ic = set_reg_val(id, *val);
 		break;
-	case KVM_REG_PPC_VTB:
-		vcpu->arch.vtb = set_reg_val(id, *val);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		vcpu->arch.csigr = set_reg_val(id, *val);
 		break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 671f5c92..d2deb9e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -125,6 +125,7 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 	 * to find the guest purr and spurr value.
 	 */
 	vcpu->arch.entry_tb = get_tb();
+	vcpu->arch.entry_vtb = get_vtb();
 	svcpu->in_use = true;
 }
 
@@ -176,7 +177,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 	 */
 	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
 	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
-
+	vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
 	svcpu->in_use = false;
 
 out:
-- 
cgit v0.10.2


From 06da28e76b87331ebccdb6d486cfd94835b8be5e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Thu, 5 Jun 2014 17:38:05 +0530
Subject: KVM: PPC: BOOK3S: PR: Emulate instruction counter

Writing to IC is not allowed in the privileged mode.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index bd3caea..f9ae696 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -506,6 +506,7 @@ struct kvm_vcpu_arch {
 	/* Time base value when we entered the guest */
 	u64 entry_tb;
 	u64 entry_vtb;
+	u64 entry_ic;
 	u32 tcr;
 	ulong tsr; /* we need to perform set/clr_bits() which requires ulong */
 	u32 ivor[64];
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index ddce1ea..90aa5c7 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -649,6 +649,9 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_VTB:
 			val = get_reg_val(reg->id, vcpu->arch.vtb);
 			break;
+		case KVM_REG_PPC_IC:
+			val = get_reg_val(reg->id, vcpu->arch.ic);
+			break;
 		default:
 			r = -EINVAL;
 			break;
@@ -756,6 +759,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		case KVM_REG_PPC_VTB:
 			vcpu->arch.vtb = set_reg_val(reg->id, val);
 			break;
+		case KVM_REG_PPC_IC:
+			vcpu->arch.ic = set_reg_val(reg->id, val);
+			break;
 		default:
 			r = -EINVAL;
 			break;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 1bb16a5..84fddcd 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -580,6 +580,9 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 	case SPRN_VTB:
 		*spr_val = vcpu->arch.vtb;
 		break;
+	case SPRN_IC:
+		*spr_val = vcpu->arch.ic;
+		break;
 	case SPRN_GQR0:
 	case SPRN_GQR1:
 	case SPRN_GQR2:
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 315e884..1562acf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -894,9 +894,6 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	case KVM_REG_PPC_CIABR:
 		*val = get_reg_val(id, vcpu->arch.ciabr);
 		break;
-	case KVM_REG_PPC_IC:
-		*val = get_reg_val(id, vcpu->arch.ic);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		*val = get_reg_val(id, vcpu->arch.csigr);
 		break;
@@ -1091,9 +1088,6 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		if ((vcpu->arch.ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER)
 			vcpu->arch.ciabr &= ~CIABR_PRIV;	/* disable */
 		break;
-	case KVM_REG_PPC_IC:
-		vcpu->arch.ic = set_reg_val(id, *val);
-		break;
 	case KVM_REG_PPC_CSIGR:
 		vcpu->arch.csigr = set_reg_val(id, *val);
 		break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d2deb9e..3da412e 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -126,6 +126,8 @@ void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 	 */
 	vcpu->arch.entry_tb = get_tb();
 	vcpu->arch.entry_vtb = get_vtb();
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.entry_ic = mfspr(SPRN_IC);
 	svcpu->in_use = true;
 }
 
@@ -178,6 +180,8 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 	vcpu->arch.purr += get_tb() - vcpu->arch.entry_tb;
 	vcpu->arch.spurr += get_tb() - vcpu->arch.entry_tb;
 	vcpu->arch.vtb += get_vtb() - vcpu->arch.entry_vtb;
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcpu->arch.ic += mfspr(SPRN_IC) - vcpu->arch.entry_ic;
 	svcpu->in_use = false;
 
 out:
-- 
cgit v0.10.2


From fb4188bad02f4871b26cf19b98e8d92499ca5d31 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 9 Jun 2014 01:16:32 +0200
Subject: KVM: PPC: Book3s PR: Disable AIL mode with OPAL

When we're using PR KVM we must not allow the CPU to take interrupts
in virtual mode, as the SLB does not contain host kernel mappings
when running inside the guest context.

To make sure we get good performance for non-KVM tasks but still
properly functioning PR KVM, let's just disable AIL whenever a vcpu
is scheduled in.

This is fundamentally different from how we deal with AIL on pSeries
type machines where we disable AIL for the whole machine as soon as
a single KVM VM is up.

The reason for that is easy - on pSeries we do not have control over
per-cpu configuration of AIL. We also don't want to mess with CPU hotplug
races and AIL configuration, so setting it per CPU is easier and more
flexible.

This patch fixes running PR KVM on POWER8 bare metal for me.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3da412e..8ea7da4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -71,6 +71,12 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 	svcpu->in_use = 0;
 	svcpu_put(svcpu);
 #endif
+
+	/* Disable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_AIL);
+
 	vcpu->cpu = smp_processor_id();
 #ifdef CONFIG_PPC_BOOK3S_32
 	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
@@ -91,6 +97,12 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+
+	/* Enable AIL if supported */
+	if (cpu_has_feature(CPU_FTR_HVMODE) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_AIL_3);
+
 	vcpu->cpu = -1;
 }
 
-- 
cgit v0.10.2


From f6bf3a66227447f89f25b9db0ae39357decf2509 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 17:13:55 +0200
Subject: KVM: PPC: Book3s HV: Fix tlbie compile error

Some compilers complain about uninitialized variables in the compute_tlbie_rb
function. When you follow the code path you'll realize that we'll never get
to that point, but the compiler isn't all that smart.

So just default to 4k page sizes for everything, making the compiler happy
and the code slightly easier to read.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paul Mackerras <paulus@samba.org>

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index fddb72b..c7871f3 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -110,16 +110,12 @@ static inline int __hpte_actual_psize(unsigned int lp, int psize)
 static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 					     unsigned long pte_index)
 {
-	int b_psize, a_psize;
+	int b_psize = MMU_PAGE_4K, a_psize = MMU_PAGE_4K;
 	unsigned int penc;
 	unsigned long rb = 0, va_low, sllp;
 	unsigned int lp = (r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
 
-	if (!(v & HPTE_V_LARGE)) {
-		/* both base and actual psize is 4k */
-		b_psize = MMU_PAGE_4K;
-		a_psize = MMU_PAGE_4K;
-	} else {
+	if (v & HPTE_V_LARGE) {
 		for (b_psize = 0; b_psize < MMU_PAGE_COUNT; b_psize++) {
 
 			/* valid entries have a shift value */
-- 
cgit v0.10.2


From 568fccc43f901889b94b228cd0238916cb40e0bd Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 16 Jun 2014 16:37:38 +0200
Subject: KVM: PPC: Book3S PR: Handle hyp doorbell exits

If we're running PR KVM in HV mode, we may get hypervisor doorbell interrupts.
Handle those the same way we treat normal doorbells.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 8ea7da4..3b82e86 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -988,6 +988,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	case BOOK3S_INTERRUPT_DECREMENTER:
 	case BOOK3S_INTERRUPT_HV_DECREMENTER:
 	case BOOK3S_INTERRUPT_DOORBELL:
+	case BOOK3S_INTERRUPT_H_DOORBELL:
 		vcpu->stat.dec_exits++;
 		r = RESUME_GUEST;
 		break;
-- 
cgit v0.10.2


From 05a308c722822b0fbcc706b54be70f9bb9d52539 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 12 Jun 2014 18:16:10 +1000
Subject: KVM: PPC: Book3S HV: Fix ABIv2 indirect branch issue

To establish addressability quickly, ABIv2 requires the target
address of the function being called to be in r12.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 868347e..da1cac5 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1913,8 +1913,8 @@ hcall_try_real_mode:
 	lwax	r3,r3,r4
 	cmpwi	r3,0
 	beq	guest_exit_cont
-	add	r3,r3,r4
-	mtctr	r3
+	add	r12,r3,r4
+	mtctr	r12
 	mr	r3,r9		/* get vcpu pointer */
 	ld	r4,VCPU_GPR(R4)(r9)
 	bctrl
-- 
cgit v0.10.2


From ad7d4584a225e3c1ac634dc803421fe842491ecf Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 12 Jun 2014 18:16:53 +1000
Subject: KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC()

Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are
assembly functions that are exported to modules and also require
a valid r2.

As such we need to use _GLOBAL_TOC so we provide a global entry
point that establishes the TOC (r2).

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index da1cac5..64ac56f 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -48,7 +48,7 @@
  *
  * LR = return address to continue at after eventually re-enabling MMU
  */
-_GLOBAL(kvmppc_hv_entry_trampoline)
+_GLOBAL_TOC(kvmppc_hv_entry_trampoline)
 	mflr	r0
 	std	r0, PPC_LR_STKOFF(r1)
 	stdu	r1, -112(r1)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 9eec675..4850a22 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -146,7 +146,7 @@ kvmppc_handler_skip_ins:
  * On entry, r4 contains the guest shadow MSR
  * MSR.EE has to be 0 when calling this function
  */
-_GLOBAL(kvmppc_entry_trampoline)
+_GLOBAL_TOC(kvmppc_entry_trampoline)
 	mfmsr	r5
 	LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter)
 	toreal(r7)
-- 
cgit v0.10.2


From da166facd432d0edde509f941304915f9bed704d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 16 Jun 2014 14:37:53 +0200
Subject: KVM: PPC: Book3S PR: Fix ABIv2 on LE

We switched to ABIv2 on Little Endian systems now which gets rid of the
dotted function names. Branch to the actual functions when we see such
a system.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index e2c29e3..d044b8b 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -25,7 +25,11 @@
 #include <asm/exception-64s.h>
 
 #if defined(CONFIG_PPC_BOOK3S_64)
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) 		name
+#else
 #define FUNC(name) 		GLUE(.,name)
+#endif
 #define GET_SHADOW_VCPU(reg)    addi	reg, r13, PACA_SVCPU
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index 4850a22..16c4d88 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -36,7 +36,11 @@
 
 #if defined(CONFIG_PPC_BOOK3S_64)
 
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define FUNC(name) 		name
+#else
 #define FUNC(name) 		GLUE(.,name)
+#endif
 
 #elif defined(CONFIG_PPC_BOOK3S_32)
 
-- 
cgit v0.10.2


From f396df35188c59a5ecb83932190505ef297754e6 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 16 Jun 2014 13:58:11 +0200
Subject: KVM: PPC: Book3S PR: Fix sparse endian checks

While sending sparse with endian checks over the code base, it triggered at
some places that were missing casts or had wrong types. Fix them up.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 52a63bf..f7c25c6 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -40,8 +40,9 @@ static int kvmppc_h_pr_enter(struct kvm_vcpu *vcpu)
 {
 	long flags = kvmppc_get_gpr(vcpu, 4);
 	long pte_index = kvmppc_get_gpr(vcpu, 5);
-	unsigned long pteg[2 * 8];
-	unsigned long pteg_addr, i, *hpte;
+	__be64 pteg[2 * 8];
+	__be64 *hpte;
+	unsigned long pteg_addr, i;
 	long int ret;
 
 	i = pte_index & 7;
@@ -93,8 +94,8 @@ static int kvmppc_h_pr_remove(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-	pte[0] = be64_to_cpu(pte[0]);
-	pte[1] = be64_to_cpu(pte[1]);
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -171,8 +172,8 @@ static int kvmppc_h_pr_bulk_remove(struct kvm_vcpu *vcpu)
 
 		pteg = get_pteg_addr(vcpu, tsh & H_BULK_REMOVE_PTEX);
 		copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-		pte[0] = be64_to_cpu(pte[0]);
-		pte[1] = be64_to_cpu(pte[1]);
+		pte[0] = be64_to_cpu((__force __be64)pte[0]);
+		pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 		/* tsl = AVPN */
 		flags = (tsh & H_BULK_REMOVE_FLAGS) >> 26;
@@ -211,8 +212,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 	pteg = get_pteg_addr(vcpu, pte_index);
 	mutex_lock(&vcpu->kvm->arch.hpt_mutex);
 	copy_from_user(pte, (void __user *)pteg, sizeof(pte));
-	pte[0] = be64_to_cpu(pte[0]);
-	pte[1] = be64_to_cpu(pte[1]);
+	pte[0] = be64_to_cpu((__force __be64)pte[0]);
+	pte[1] = be64_to_cpu((__force __be64)pte[1]);
 
 	ret = H_NOT_FOUND;
 	if ((pte[0] & HPTE_V_VALID) == 0 ||
@@ -231,8 +232,8 @@ static int kvmppc_h_pr_protect(struct kvm_vcpu *vcpu)
 
 	rb = compute_tlbie_rb(v, r, pte_index);
 	vcpu->arch.mmu.tlbie(vcpu, rb, rb & 1 ? true : false);
-	pte[0] = cpu_to_be64(pte[0]);
-	pte[1] = cpu_to_be64(pte[1]);
+	pte[0] = (__force u64)cpu_to_be64(pte[0]);
+	pte[1] = (__force u64)cpu_to_be64(pte[1]);
 	copy_to_user((void __user *)pteg, pte, sizeof(pte));
 	ret = H_SUCCESS;
 
-- 
cgit v0.10.2


From 1f0eeb7e1a88f46afa0f435cf7c34b0c84cf2394 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 18 Jun 2014 10:15:22 +0300
Subject: KVM: PPC: e500mc: Enhance tlb invalidation condition on vcpu schedule

On vcpu schedule, the condition checked for tlb pollution is too loose.
The tlb entries of a vcpu become polluted (vs stale) only when a different
vcpu within the same logical partition runs in-between. Optimize the tlb
invalidation condition keeping last_vcpu per logical partition id.

With the new invalidation condition, a guest shows 4% performance improvement
on P5020DS while running a memory stress application with the cpu oversubscribed,
the other guest running a cpu intensive workload.

Guest - old invalidation condition
  real 3.89
  user 3.87
  sys 0.01

Guest - enhanced invalidation condition
  real 3.75
  user 3.73
  sys 0.01

Host
  real 3.70
  user 1.85
  sys 0.00

The memory stress application accesses 4KB pages backed by 75% of available
TLB0 entries:

char foo[ENTRIES][4096] __attribute__ ((aligned (4096)));

int main()
{
	char bar;
	int i, j;

	for (i = 0; i < ITERATIONS; i++)
        	for (j = 0; j < ENTRIES; j++)
            		bar = foo[j][0];

	return 0;
}

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Reviewed-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 17e4562..690499d 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -110,7 +110,7 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
 {
 }
 
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu_on_cpu);
+static DEFINE_PER_CPU(struct kvm_vcpu *[KVMPPC_NR_LPIDS], last_vcpu_of_lpid);
 
 static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 {
@@ -141,9 +141,9 @@ static void kvmppc_core_vcpu_load_e500mc(struct kvm_vcpu *vcpu, int cpu)
 	mtspr(SPRN_GESR, vcpu->arch.shared->esr);
 
 	if (vcpu->arch.oldpir != mfspr(SPRN_PIR) ||
-	    __get_cpu_var(last_vcpu_on_cpu) != vcpu) {
+	    __get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] != vcpu) {
 		kvmppc_e500_tlbil_all(vcpu_e500);
-		__get_cpu_var(last_vcpu_on_cpu) = vcpu;
+		__get_cpu_var(last_vcpu_of_lpid)[vcpu->kvm->arch.lpid] = vcpu;
 	}
 
 	kvmppc_load_guest_fp(vcpu);
-- 
cgit v0.10.2


From 699a0ea0823d32030b0666b28ff8633960f7ffa7 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 2 Jun 2014 11:02:59 +1000
Subject: KVM: PPC: Book3S: Controls for in-kernel sPAPR hypercall handling

This provides a way for userspace controls which sPAPR hcalls get
handled in the kernel.  Each hcall can be individually enabled or
disabled for in-kernel handling, except for H_RTAS.  The exception
for H_RTAS is because userspace can already control whether
individual RTAS functions are handled in-kernel or not via the
KVM_PPC_RTAS_DEFINE_TOKEN ioctl, and because the numeric value for
H_RTAS is out of the normal sequence of hcall numbers.

Hcalls are enabled or disabled using the KVM_ENABLE_CAP ioctl for the
KVM_CAP_PPC_ENABLE_HCALL capability on the file descriptor for the VM.
The args field of the struct kvm_enable_cap specifies the hcall number
in args[0] and the enable/disable flag in args[1]; 0 means disable
in-kernel handling (so that the hcall will always cause an exit to
userspace) and 1 means enable.  Enabling or disabling in-kernel
handling of an hcall is effective across the whole VM.

The ability for KVM_ENABLE_CAP to be used on a VM file descriptor
on PowerPC is new, added by this commit.  The KVM_CAP_ENABLE_CAP_VM
capability advertises that this ability exists.

When a VM is created, an initial set of hcalls are enabled for
in-kernel handling.  The set that is enabled is the set that have
an in-kernel implementation at this point.  Any new hcall
implementations from this point onwards should not be added to the
default set without a good reason.

No distinction is made between real-mode and virtual-mode hcall
implementations; the one setting controls them both.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0fe3649..5c54d19 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2863,8 +2863,8 @@ The fields in each entry are defined as follows:
          this function/index combination
 
 
-6. Capabilities that can be enabled
------------------------------------
+6. Capabilities that can be enabled on vCPUs
+--------------------------------------------
 
 There are certain capabilities that change the behavior of the virtual CPU when
 enabled. To enable them, please see section 4.37. Below you can find a list of
@@ -3002,3 +3002,40 @@ Parameters: args[0] is the XICS device fd
             args[1] is the XICS CPU number (server ID) for this vcpu
 
 This capability connects the vcpu to an in-kernel XICS device.
+
+
+7. Capabilities that can be enabled on VMs
+------------------------------------------
+
+There are certain capabilities that change the behavior of the virtual
+machine when enabled. To enable them, please see section 4.37. Below
+you can find a list of capabilities and what their effect on the VM
+is when enabling them.
+
+The following information is provided along with the description:
+
+  Architectures: which instruction set architectures provide this ioctl.
+      x86 includes both i386 and x86_64.
+
+  Parameters: what parameters are accepted by the capability.
+
+  Returns: the return value.  General error numbers (EBADF, ENOMEM, EINVAL)
+      are not detailed, but errors with specific meanings are.
+
+
+7.1 KVM_CAP_PPC_ENABLE_HCALL
+
+Architectures: ppc
+Parameters: args[0] is the sPAPR hcall number
+	    args[1] is 0 to disable, 1 to enable in-kernel handling
+
+This capability controls whether individual sPAPR hypercalls (hcalls)
+get handled by the kernel or not.  Enabling or disabling in-kernel
+handling of an hcall is effective across the VM.  On creation, an
+initial set of hcalls are enabled for in-kernel handling, which
+consists of those hcalls for which in-kernel handlers were implemented
+before this capability was implemented.  If disabled, the kernel will
+not to attempt to handle the hcall, but will always exit to userspace
+to handle it.  Note that it may not make sense to enable some and
+disable others of a group of related hcalls, but KVM does not prevent
+userspace from doing that.
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a20cc0b..052ab2a 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -187,6 +187,7 @@ extern void kvmppc_hv_entry_trampoline(void);
 extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
+extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
 extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 				 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index f9ae696..62b2cee 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -34,6 +34,7 @@
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
+#include <asm/hvcall.h>
 
 #define KVM_MAX_VCPUS		NR_CPUS
 #define KVM_MAX_VCORES		NR_CPUS
@@ -263,6 +264,7 @@ struct kvm_arch {
 #ifdef CONFIG_PPC_BOOK3S_64
 	struct list_head spapr_tce_tables;
 	struct list_head rtas_tokens;
+	DECLARE_BITMAP(enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 #endif
 #ifdef CONFIG_KVM_MPIC
 	struct openpic *mpic;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index f5995a9..17ffcb4 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -493,6 +493,7 @@ int main(void)
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
 	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
+	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
 	DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
 	DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1562acf..cf445d2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -67,6 +67,8 @@
 /* Used as a "null" value for timebase values */
 #define TB_NIL	(~(u64)0)
 
+static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -562,6 +564,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
+	if (req <= MAX_HCALL_OPCODE &&
+	    !test_bit(req/4, vcpu->kvm->arch.enabled_hcalls))
+		return RESUME_HOST;
+
 	switch (req) {
 	case H_ENTER:
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -2269,6 +2275,10 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
 	 */
 	cpumask_setall(&kvm->arch.need_tlb_flush);
 
+	/* Start out with the default set of hcalls enabled */
+	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
+	       sizeof(kvm->arch.enabled_hcalls));
+
 	kvm->arch.rma = NULL;
 
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
@@ -2407,6 +2417,45 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
 	return r;
 }
 
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_REMOVE,
+	H_ENTER,
+	H_READ,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_GET_TCE,
+	H_PUT_TCE,
+	H_SET_DABR,
+	H_SET_XDABR,
+	H_CEDE,
+	H_PROD,
+	H_CONFER,
+	H_REGISTER_VPA,
+#ifdef CONFIG_KVM_XICS
+	H_EOI,
+	H_CPPR,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR,
+	H_XIRR_X,
+#endif
+	0
+};
+
+static void init_default_hcalls(void)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, default_enabled_hcalls);
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
 	.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
 	.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -2454,6 +2503,8 @@ static int kvmppc_book3s_init_hv(void)
 	kvm_ops_hv.owner = THIS_MODULE;
 	kvmppc_hv_ops = &kvm_ops_hv;
 
+	init_default_hcalls();
+
 	r = kvmppc_mmu_hv_init();
 	return r;
 }
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 64ac56f..33aaade 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1909,6 +1909,17 @@ hcall_try_real_mode:
 	clrrdi	r3,r3,2
 	cmpldi	r3,hcall_real_table_end - hcall_real_table
 	bge	guest_exit_cont
+	/* See if this hcall is enabled for in-kernel handling */
+	ld	r4, VCPU_KVM(r9)
+	srdi	r0, r3, 8	/* r0 = (r3 / 4) >> 6 */
+	sldi	r0, r0, 3	/* index into kvm->arch.enabled_hcalls[] */
+	add	r4, r4, r0
+	ld	r0, KVM_ENABLED_HCALLS(r4)
+	rlwinm	r4, r3, 32-2, 0x3f	/* r4 = (r3 / 4) & 0x3f */
+	srd	r0, r0, r4
+	andi.	r0, r0, 1
+	beq	guest_exit_cont
+	/* Get pointer to handler, if any, and call it */
 	LOAD_REG_ADDR(r4, hcall_real_table)
 	lwax	r3,r3,r4
 	cmpwi	r3,0
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 3b82e86..123ac7d 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1597,6 +1597,11 @@ static int kvmppc_core_init_vm_pr(struct kvm *kvm)
 {
 	mutex_init(&kvm->arch.hpt_mutex);
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	/* Start out with the default set of hcalls enabled */
+	kvmppc_pr_init_default_hcalls(kvm);
+#endif
+
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		spin_lock(&kvm_global_user_count_lock);
 		if (++kvm_global_user_count == 1)
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f7c25c6..eacaa6e 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -267,6 +267,10 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
+	if (cmd <= MAX_HCALL_OPCODE &&
+	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
+		return EMULATE_FAIL;
+
 	switch (cmd) {
 	case H_ENTER:
 		return kvmppc_h_pr_enter(vcpu);
@@ -304,3 +308,36 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 
 	return EMULATE_FAIL;
 }
+
+
+/*
+ * List of hcall numbers to enable by default.
+ * For compatibility with old userspace, we enable by default
+ * all hcalls that were implemented before the hcall-enabling
+ * facility was added.  Note this list should not include H_RTAS.
+ */
+static unsigned int default_hcall_list[] = {
+	H_ENTER,
+	H_REMOVE,
+	H_PROTECT,
+	H_BULK_REMOVE,
+	H_PUT_TCE,
+	H_CEDE,
+#ifdef CONFIG_KVM_XICS
+	H_XIRR,
+	H_CPPR,
+	H_EOI,
+	H_IPI,
+	H_IPOLL,
+	H_XIRR_X,
+#endif
+	0
+};
+
+void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
+{
+	int i;
+
+	for (i = 0; default_hcall_list[i]; ++i)
+		__set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls);
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 61c738a..3222a4d 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -387,6 +387,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_UNSET_IRQ:
 	case KVM_CAP_PPC_IRQ_LEVEL:
 	case KVM_CAP_ENABLE_CAP:
+	case KVM_CAP_ENABLE_CAP_VM:
 	case KVM_CAP_ONE_REG:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
@@ -417,6 +418,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PPC_ALLOC_HTAB:
 	case KVM_CAP_PPC_RTAS:
 	case KVM_CAP_PPC_FIXUP_HCALL:
+	case KVM_CAP_PPC_ENABLE_HCALL:
 #ifdef CONFIG_KVM_XICS
 	case KVM_CAP_IRQ_XICS:
 #endif
@@ -1099,6 +1101,40 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+	case KVM_CAP_PPC_ENABLE_HCALL: {
+		unsigned long hcall = cap->args[0];
+
+		r = -EINVAL;
+		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
+		    cap->args[1] > 1)
+			break;
+		if (cap->args[1])
+			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		else
+			clear_bit(hcall / 4, kvm->arch.enabled_hcalls);
+		r = 0;
+		break;
+	}
+#endif
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
                        unsigned int ioctl, unsigned long arg)
 {
@@ -1118,6 +1154,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
 		break;
 	}
+	case KVM_ENABLE_CAP:
+	{
+		struct kvm_enable_cap cap;
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case KVM_CREATE_SPAPR_TCE: {
 		struct kvm_create_spapr_tce create_tce;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index e11d8f1..0418b74 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -758,6 +758,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_VM_ATTRIBUTES 101
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
+#define KVM_CAP_PPC_ENABLE_HCALL 104
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit v0.10.2


From ae2113a4f1a6cd5a3cd3d75f394547922758e9ac Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Mon, 2 Jun 2014 11:03:00 +1000
Subject: KVM: PPC: Book3S: Allow only implemented hcalls to be enabled or
 disabled

This adds code to check that when the KVM_CAP_PPC_ENABLE_HCALL
capability is used to enable or disable in-kernel handling of an
hcall, that the hcall is actually implemented by the kernel.
If not an EINVAL error is returned.

This also checks the default-enabled list of hcalls and prints a
warning if any hcall there is not actually implemented.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 5c54d19..6955318 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3039,3 +3039,7 @@ not to attempt to handle the hcall, but will always exit to userspace
 to handle it.  Note that it may not make sense to enable some and
 disable others of a group of related hcalls, but KVM does not prevent
 userspace from doing that.
+
+If the hcall number specified is not one that has an in-kernel
+implementation, the KVM_ENABLE_CAP ioctl will fail with an EINVAL
+error.
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 052ab2a..ceb70aa 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -146,6 +146,7 @@ extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *
 extern int kvmppc_mmu_hpte_sysinit(void);
 extern void kvmppc_mmu_hpte_sysexit(void);
 extern int kvmppc_mmu_hv_init(void);
+extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
@@ -188,6 +189,8 @@ extern u32 kvmppc_alignment_dsisr(struct kvm_vcpu *vcpu, unsigned int inst);
 extern ulong kvmppc_alignment_dar(struct kvm_vcpu *vcpu, unsigned int inst);
 extern int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd);
 extern void kvmppc_pr_init_default_hcalls(struct kvm *kvm);
+extern int kvmppc_hcall_impl_pr(unsigned long cmd);
+extern int kvmppc_hcall_impl_hv_realmode(unsigned long cmd);
 extern void kvmppc_copy_to_svcpu(struct kvmppc_book3s_shadow_vcpu *svcpu,
 				 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 9c89cdd..e2fd5a1 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -228,7 +228,7 @@ struct kvmppc_ops {
 	void (*fast_vcpu_kick)(struct kvm_vcpu *vcpu);
 	long (*arch_vm_ioctl)(struct file *filp, unsigned int ioctl,
 			      unsigned long arg);
-
+	int (*hcall_implemented)(unsigned long hcall);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 90aa5c7..bd75902 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -925,6 +925,11 @@ int kvmppc_core_check_processor_compat(void)
 	return 0;
 }
 
+int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hcall)
+{
+	return kvm->arch.kvm_ops->hcall_implemented(hcall);
+}
+
 static int kvmppc_book3s_init(void)
 {
 	int r;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index cf445d2..c4377c7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -645,6 +645,28 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 	return RESUME_GUEST;
 }
 
+static int kvmppc_hcall_impl_hv(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_CEDE:
+	case H_PROD:
+	case H_CONFER:
+	case H_REGISTER_VPA:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+		return 1;
+	}
+
+	/* See if it's in the real-mode table */
+	return kvmppc_hcall_impl_hv_realmode(cmd);
+}
+
 static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				 struct task_struct *tsk)
 {
@@ -2451,9 +2473,13 @@ static unsigned int default_hcall_list[] = {
 static void init_default_hcalls(void)
 {
 	int i;
+	unsigned int hcall;
 
-	for (i = 0; default_hcall_list[i]; ++i)
-		__set_bit(default_hcall_list[i] / 4, default_enabled_hcalls);
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_hv(hcall));
+		__set_bit(hcall / 4, default_enabled_hcalls);
+	}
 }
 
 static struct kvmppc_ops kvm_ops_hv = {
@@ -2488,6 +2514,7 @@ static struct kvmppc_ops kvm_ops_hv = {
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_hv,
 	.fast_vcpu_kick = kvmppc_fast_vcpu_kick_hv,
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_hv,
+	.hcall_implemented = kvmppc_hcall_impl_hv,
 };
 
 static int kvmppc_book3s_init_hv(void)
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cde8a6..3b41447 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -212,3 +212,16 @@ bool kvm_hv_mode_active(void)
 {
 	return atomic_read(&hv_vm_count) != 0;
 }
+
+extern int hcall_real_table[], hcall_real_table_end[];
+
+int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
+{
+	cmd /= 4;
+	if (cmd < hcall_real_table_end - hcall_real_table &&
+	    hcall_real_table[cmd])
+		return 1;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 33aaade..e66c1e38 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2042,6 +2042,7 @@ hcall_real_table:
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
 	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+	.globl	hcall_real_table_end
 hcall_real_table_end:
 
 ignore_hdec:
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 123ac7d..15fd6c2 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1670,6 +1670,9 @@ static struct kvmppc_ops kvm_ops_pr = {
 	.emulate_mfspr = kvmppc_core_emulate_mfspr_pr,
 	.fast_vcpu_kick = kvm_vcpu_kick,
 	.arch_vm_ioctl  = kvm_arch_vm_ioctl_pr,
+#ifdef CONFIG_PPC_BOOK3S_64
+	.hcall_implemented = kvmppc_hcall_impl_pr,
+#endif
 };
 
 
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index eacaa6e..6d0143f 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -309,6 +309,27 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 	return EMULATE_FAIL;
 }
 
+int kvmppc_hcall_impl_pr(unsigned long cmd)
+{
+	switch (cmd) {
+	case H_ENTER:
+	case H_REMOVE:
+	case H_PROTECT:
+	case H_BULK_REMOVE:
+	case H_PUT_TCE:
+	case H_CEDE:
+#ifdef CONFIG_KVM_XICS
+	case H_XIRR:
+	case H_CPPR:
+	case H_EOI:
+	case H_IPI:
+	case H_IPOLL:
+	case H_XIRR_X:
+#endif
+		return 1;
+	}
+	return 0;
+}
 
 /*
  * List of hcall numbers to enable by default.
@@ -337,7 +358,11 @@ static unsigned int default_hcall_list[] = {
 void kvmppc_pr_init_default_hcalls(struct kvm *kvm)
 {
 	int i;
+	unsigned int hcall;
 
-	for (i = 0; default_hcall_list[i]; ++i)
-		__set_bit(default_hcall_list[i] / 4, kvm->arch.enabled_hcalls);
+	for (i = 0; default_hcall_list[i]; ++i) {
+		hcall = default_hcall_list[i];
+		WARN_ON(!kvmppc_hcall_impl_pr(hcall));
+		__set_bit(hcall / 4, kvm->arch.enabled_hcalls);
+	}
 }
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3222a4d..7efc2b7 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -1119,6 +1119,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		if (hcall > MAX_HCALL_OPCODE || (hcall & 3) ||
 		    cap->args[1] > 1)
 			break;
+		if (!kvmppc_book3s_hcall_implemented(kvm, hcall))
+			break;
 		if (cap->args[1])
 			set_bit(hcall / 4, kvm->arch.enabled_hcalls);
 		else
-- 
cgit v0.10.2


From 9642382e826066c2d30d1b23d1b45410cdd8e07d Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Mon, 2 Jun 2014 11:03:01 +1000
Subject: KVM: PPC: Book3S HV: Add H_SET_MODE hcall handling

This adds support for the H_SET_MODE hcall.  This hcall is a
multiplexer that has several functions, some of which are called
rarely, and some which are potentially called very frequently.
Here we add support for the functions that set the debug registers
CIABR (Completed Instruction Address Breakpoint Register) and
DAWR/DAWRX (Data Address Watchpoint Register and eXtension),
since they could be updated by the guest as often as every context
switch.

This also adds a kvmppc_power8_compatible() function to test to see
if a guest is compatible with POWER8 or not.  The CIABR and DAWR/X
only exist on POWER8.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 5dbbb29..85bc8c0 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -279,6 +279,12 @@
 #define H_GET_24X7_DATA		0xF07C
 #define H_GET_PERF_COUNTER_INFO	0xF080
 
+/* Values for 2nd argument to H_SET_MODE */
+#define H_SET_MODE_RESOURCE_SET_CIABR		1
+#define H_SET_MODE_RESOURCE_SET_DAWR		2
+#define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
+#define H_SET_MODE_RESOURCE_LE			4
+
 #ifndef __ASSEMBLY__
 
 /**
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c4377c7..7db9df2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -557,6 +557,48 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 	vcpu->arch.dtl.dirty = true;
 }
 
+static bool kvmppc_power8_compatible(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.vcore->arch_compat >= PVR_ARCH_207)
+		return true;
+	if ((!vcpu->arch.vcore->arch_compat) &&
+	    cpu_has_feature(CPU_FTR_ARCH_207S))
+		return true;
+	return false;
+}
+
+static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
+			     unsigned long resource, unsigned long value1,
+			     unsigned long value2)
+{
+	switch (resource) {
+	case H_SET_MODE_RESOURCE_SET_CIABR:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (value2)
+			return H_P4;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		/* Guests can't breakpoint the hypervisor */
+		if ((value1 & CIABR_PRIV) == CIABR_PRIV_HYPER)
+			return H_P3;
+		vcpu->arch.ciabr  = value1;
+		return H_SUCCESS;
+	case H_SET_MODE_RESOURCE_SET_DAWR:
+		if (!kvmppc_power8_compatible(vcpu))
+			return H_P2;
+		if (mflags)
+			return H_UNSUPPORTED_FLAG_START;
+		if (value2 & DABRX_HYP)
+			return H_P4;
+		vcpu->arch.dawr  = value1;
+		vcpu->arch.dawrx = value2;
+		return H_SUCCESS;
+	default:
+		return H_TOO_HARD;
+	}
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
@@ -626,7 +668,14 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 
 		/* Send the error out to userspace via KVM_RUN */
 		return rc;
-
+	case H_SET_MODE:
+		ret = kvmppc_h_set_mode(vcpu, kvmppc_get_gpr(vcpu, 4),
+					kvmppc_get_gpr(vcpu, 5),
+					kvmppc_get_gpr(vcpu, 6),
+					kvmppc_get_gpr(vcpu, 7));
+		if (ret == H_TOO_HARD)
+			return RESUME_HOST;
+		break;
 	case H_XIRR:
 	case H_CPPR:
 	case H_EOI:
@@ -652,6 +701,7 @@ static int kvmppc_hcall_impl_hv(unsigned long cmd)
 	case H_PROD:
 	case H_CONFER:
 	case H_REGISTER_VPA:
+	case H_SET_MODE:
 #ifdef CONFIG_KVM_XICS
 	case H_XIRR:
 	case H_CPPR:
-- 
cgit v0.10.2


From d57cef91a0c30d3439a4d235eb94ab9efbf797a0 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Mon, 30 Jun 2014 15:54:58 +0300
Subject: KVM: PPC: e500: Fix default tlb for victim hint

Tlb search operation used for victim hint relies on the default tlb set by the
host. When hardware tablewalk support is enabled in the host, the default tlb is
TLB1 which leads KVM to evict the bolted entry. Set and restore the default tlb
when searching for victim hint.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Reviewed-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index d0918e0..8d24f78 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -40,7 +40,9 @@
 
 /* MAS registers bit definitions */
 
-#define MAS0_TLBSEL(x)		(((x) << 28) & 0x30000000)
+#define MAS0_TLBSEL_MASK        0x30000000
+#define MAS0_TLBSEL_SHIFT       28
+#define MAS0_TLBSEL(x)          (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
 #define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_ESEL_SHIFT		16
 #define MAS0_ESEL(x)		(((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
@@ -86,6 +88,7 @@
 #define MAS3_SPSIZE		0x0000003e
 #define MAS3_SPSIZE_SHIFT	1
 
+#define MAS4_TLBSEL_MASK	MAS0_TLBSEL_MASK
 #define MAS4_TLBSELD(x) 	MAS0_TLBSEL(x)
 #define MAS4_INDD		0x00008000	/* Default IND */
 #define MAS4_TSIZED(x)		MAS1_TSIZE(x)
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index dd2cc03..79677d7 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -107,11 +107,15 @@ static u32 get_host_mas0(unsigned long eaddr)
 {
 	unsigned long flags;
 	u32 mas0;
+	u32 mas4;
 
 	local_irq_save(flags);
 	mtspr(SPRN_MAS6, 0);
+	mas4 = mfspr(SPRN_MAS4);
+	mtspr(SPRN_MAS4, mas4 & ~MAS4_TLBSEL_MASK);
 	asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET));
 	mas0 = mfspr(SPRN_MAS0);
+	mtspr(SPRN_MAS4, mas4);
 	local_irq_restore(flags);
 
 	return mas0;
-- 
cgit v0.10.2


From 8f6822c4b9fac6e47414d2f1e11dbabda9bc2163 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:07:40 +0200
Subject: PPC: Add asm helpers for BE 32bit load/store

From assembly code we might not only have to explicitly BE access 64bit values,
but sometimes also 32bit ones. Add helpers that allow for easy use of lwzx/stwx
in their respective byte-reverse or native form.

Signed-off-by: Alexander Graf <agraf@suse.de>
CC: Benjamin Herrenschmidt <benh@kernel.crashing.org>

diff --git a/arch/powerpc/include/asm/asm-compat.h b/arch/powerpc/include/asm/asm-compat.h
index 4b237aa..21be8ae 100644
--- a/arch/powerpc/include/asm/asm-compat.h
+++ b/arch/powerpc/include/asm/asm-compat.h
@@ -34,10 +34,14 @@
 #define PPC_MIN_STKFRM	112
 
 #ifdef __BIG_ENDIAN__
+#define LWZX_BE	stringify_in_c(lwzx)
 #define LDX_BE	stringify_in_c(ldx)
+#define STWX_BE	stringify_in_c(stwx)
 #define STDX_BE	stringify_in_c(stdx)
 #else
+#define LWZX_BE	stringify_in_c(lwbrx)
 #define LDX_BE	stringify_in_c(ldbrx)
+#define STWX_BE	stringify_in_c(stwbrx)
 #define STDX_BE	stringify_in_c(stdbrx)
 #endif
 
-- 
cgit v0.10.2


From 6f22bd3265fb542acb2697026b953ec07298242d Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:16:06 +0200
Subject: KVM: PPC: Book3S HV: Make HTAB code LE host aware

When running on an LE host all data structures are kept in little endian
byte order. However, the HTAB still needs to be maintained in big endian.

So every time we access any HTAB we need to make sure we do so in the right
byte order. Fix up all accesses to manually byte swap.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index ceb70aa..8ac5392 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -162,9 +162,9 @@ extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 			bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 			unsigned long *rmap, long pte_index, int realmode);
-extern void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index);
-void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index);
 extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
 			unsigned long *nb_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index c7871f3..e504f88 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -59,20 +59,29 @@ extern unsigned long kvm_rma_pages;
 /* These bits are reserved in the guest view of the HPTE */
 #define HPTE_GR_RESERVED	HPTE_GR_MODIFIED
 
-static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
+static inline long try_lock_hpte(__be64 *hpte, unsigned long bits)
 {
 	unsigned long tmp, old;
+	__be64 be_lockbit, be_bits;
+
+	/*
+	 * We load/store in native endian, but the HTAB is in big endian. If
+	 * we byte swap all data we apply on the PTE we're implicitly correct
+	 * again.
+	 */
+	be_lockbit = cpu_to_be64(HPTE_V_HVLOCK);
+	be_bits = cpu_to_be64(bits);
 
 	asm volatile("	ldarx	%0,0,%2\n"
 		     "	and.	%1,%0,%3\n"
 		     "	bne	2f\n"
-		     "	ori	%0,%0,%4\n"
+		     "	or	%0,%0,%4\n"
 		     "  stdcx.	%0,0,%2\n"
 		     "	beq+	2f\n"
 		     "	mr	%1,%3\n"
 		     "2:	isync"
 		     : "=&r" (tmp), "=&r" (old)
-		     : "r" (hpte), "r" (bits), "i" (HPTE_V_HVLOCK)
+		     : "r" (hpte), "r" (be_bits), "r" (be_lockbit)
 		     : "cc", "memory");
 	return old == 0;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8056107..2d154d9 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -450,7 +450,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	unsigned long slb_v;
 	unsigned long pp, key;
 	unsigned long v, gr;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int index;
 	int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
 
@@ -473,13 +473,13 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 		preempt_enable();
 		return -ENOENT;
 	}
-	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
-	v = hptep[0] & ~HPTE_V_HVLOCK;
+	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+	v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
 	gr = kvm->arch.revmap[index].guest_rpte;
 
 	/* Unlock the HPTE */
 	asm volatile("lwsync" : : : "memory");
-	hptep[0] = v;
+	hptep[0] = cpu_to_be64(v);
 	preempt_enable();
 
 	gpte->eaddr = eaddr;
@@ -583,7 +583,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				unsigned long ea, unsigned long dsisr)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hptep, hpte[3], r;
+	unsigned long hpte[3], r;
+	__be64 *hptep;
 	unsigned long mmu_seq, psize, pte_size;
 	unsigned long gpa_base, gfn_base;
 	unsigned long gpa, gfn, hva, pfn;
@@ -606,16 +607,16 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	if (ea != vcpu->arch.pgfault_addr)
 		return RESUME_GUEST;
 	index = vcpu->arch.pgfault_index;
-	hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
+	hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
 	rev = &kvm->arch.revmap[index];
 	preempt_disable();
 	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
 		cpu_relax();
-	hpte[0] = hptep[0] & ~HPTE_V_HVLOCK;
-	hpte[1] = hptep[1];
+	hpte[0] = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+	hpte[1] = be64_to_cpu(hptep[1]);
 	hpte[2] = r = rev->guest_rpte;
 	asm volatile("lwsync" : : : "memory");
-	hptep[0] = hpte[0];
+	hptep[0] = cpu_to_be64(hpte[0]);
 	preempt_enable();
 
 	if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
@@ -731,8 +732,9 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	preempt_disable();
 	while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] ||
-	    rev->guest_rpte != hpte[2])
+	if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
+		be64_to_cpu(hptep[1]) != hpte[1] ||
+		rev->guest_rpte != hpte[2])
 		/* HPTE has been changed under us; let the guest retry */
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -752,20 +754,20 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT;
 	r &= rcbits | ~(HPTE_R_R | HPTE_R_C);
 
-	if (hptep[0] & HPTE_V_VALID) {
+	if (be64_to_cpu(hptep[0]) & HPTE_V_VALID) {
 		/* HPTE was previously valid, so we need to invalidate it */
 		unlock_rmap(rmap);
-		hptep[0] |= HPTE_V_ABSENT;
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 		kvmppc_invalidate_hpte(kvm, hptep, index);
 		/* don't lose previous R and C bits */
-		r |= hptep[1] & (HPTE_R_R | HPTE_R_C);
+		r |= be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
 	} else {
 		kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
 	}
 
-	hptep[1] = r;
+	hptep[1] = cpu_to_be64(r);
 	eieio();
-	hptep[0] = hpte[0];
+	hptep[0] = cpu_to_be64(hpte[0]);
 	asm volatile("ptesync" : : : "memory");
 	preempt_enable();
 	if (page && hpte_is_writable(r))
@@ -784,7 +786,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	return ret;
 
  out_unlock:
-	hptep[0] &= ~HPTE_V_HVLOCK;
+	hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	preempt_enable();
 	goto out_put;
 }
@@ -860,7 +862,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 {
 	struct revmap_entry *rev = kvm->arch.revmap;
 	unsigned long h, i, j;
-	unsigned long *hptep;
+	__be64 *hptep;
 	unsigned long ptel, psize, rcbits;
 
 	for (;;) {
@@ -876,11 +878,11 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		 * rmap chain lock.
 		 */
 		i = *rmapp & KVMPPC_RMAP_INDEX;
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
 				cpu_relax();
 			continue;
 		}
@@ -899,14 +901,14 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 		/* Now check and modify the HPTE */
 		ptel = rev[i].guest_rpte;
-		psize = hpte_page_size(hptep[0], ptel);
-		if ((hptep[0] & HPTE_V_VALID) &&
+		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
+		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
 			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= HPTE_V_ABSENT;
+				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
-			rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C);
+			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
 			*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
 			if (rcbits & ~rev[i].guest_rpte) {
 				rev[i].guest_rpte = ptel | rcbits;
@@ -914,7 +916,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 		}
 		unlock_rmap(rmapp);
-		hptep[0] &= ~HPTE_V_HVLOCK;
+		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	}
 	return 0;
 }
@@ -961,7 +963,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 {
 	struct revmap_entry *rev = kvm->arch.revmap;
 	unsigned long head, i, j;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int ret = 0;
 
  retry:
@@ -977,23 +979,24 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
 	do {
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		j = rev[i].forw;
 
 		/* If this HPTE isn't referenced, ignore it */
-		if (!(hptep[1] & HPTE_R_R))
+		if (!(be64_to_cpu(hptep[1]) & HPTE_R_R))
 			continue;
 
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (be64_to_cpu(hptep[0]) & HPTE_V_HVLOCK)
 				cpu_relax();
 			goto retry;
 		}
 
 		/* Now check and modify the HPTE */
-		if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) {
+		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
+		    (be64_to_cpu(hptep[1]) & HPTE_R_R)) {
 			kvmppc_clear_ref_hpte(kvm, hptep, i);
 			if (!(rev[i].guest_rpte & HPTE_R_R)) {
 				rev[i].guest_rpte |= HPTE_R_R;
@@ -1001,7 +1004,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			}
 			ret = 1;
 		}
-		hptep[0] &= ~HPTE_V_HVLOCK;
+		hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -1035,7 +1038,7 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 		do {
 			hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4));
 			j = rev[i].forw;
-			if (hp[1] & HPTE_R_R)
+			if (be64_to_cpu(hp[1]) & HPTE_R_R)
 				goto out;
 		} while ((i = j) != head);
 	}
@@ -1075,7 +1078,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 	unsigned long head, i, j;
 	unsigned long n;
 	unsigned long v, r;
-	unsigned long *hptep;
+	__be64 *hptep;
 	int npages_dirty = 0;
 
  retry:
@@ -1091,7 +1094,8 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 
 	i = head = *rmapp & KVMPPC_RMAP_INDEX;
 	do {
-		hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4));
+		unsigned long hptep1;
+		hptep = (__be64 *) (kvm->arch.hpt_virt + (i << 4));
 		j = rev[i].forw;
 
 		/*
@@ -1108,29 +1112,30 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		 * Otherwise we need to do the tlbie even if C==0 in
 		 * order to pick up any delayed writeback of C.
 		 */
-		if (!(hptep[1] & HPTE_R_C) &&
-		    (!hpte_is_writable(hptep[1]) || vcpus_running(kvm)))
+		hptep1 = be64_to_cpu(hptep[1]);
+		if (!(hptep1 & HPTE_R_C) &&
+		    (!hpte_is_writable(hptep1) || vcpus_running(kvm)))
 			continue;
 
 		if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) {
 			/* unlock rmap before spinning on the HPTE lock */
 			unlock_rmap(rmapp);
-			while (hptep[0] & HPTE_V_HVLOCK)
+			while (hptep[0] & cpu_to_be64(HPTE_V_HVLOCK))
 				cpu_relax();
 			goto retry;
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & HPTE_V_VALID))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
 			continue;
 
 		/* need to make it temporarily absent so C is stable */
-		hptep[0] |= HPTE_V_ABSENT;
+		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 		kvmppc_invalidate_hpte(kvm, hptep, i);
-		v = hptep[0];
-		r = hptep[1];
+		v = be64_to_cpu(hptep[0]);
+		r = be64_to_cpu(hptep[1]);
 		if (r & HPTE_R_C) {
-			hptep[1] = r & ~HPTE_R_C;
+			hptep[1] = cpu_to_be64(r & ~HPTE_R_C);
 			if (!(rev[i].guest_rpte & HPTE_R_C)) {
 				rev[i].guest_rpte |= HPTE_R_C;
 				note_hpte_modification(kvm, &rev[i]);
@@ -1143,7 +1148,7 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
 		}
 		v &= ~(HPTE_V_ABSENT | HPTE_V_HVLOCK);
 		v |= HPTE_V_VALID;
-		hptep[0] = v;
+		hptep[0] = cpu_to_be64(v);
 	} while ((i = j) != head);
 
 	unlock_rmap(rmapp);
@@ -1307,7 +1312,7 @@ struct kvm_htab_ctx {
  * Returns 1 if this HPT entry has been modified or has pending
  * R/C bit changes.
  */
-static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
+static int hpte_dirty(struct revmap_entry *revp, __be64 *hptp)
 {
 	unsigned long rcbits_unset;
 
@@ -1316,13 +1321,14 @@ static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp)
 
 	/* Also need to consider changes in reference and changed bits */
 	rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
-	if ((hptp[0] & HPTE_V_VALID) && (hptp[1] & rcbits_unset))
+	if ((be64_to_cpu(hptp[0]) & HPTE_V_VALID) &&
+	    (be64_to_cpu(hptp[1]) & rcbits_unset))
 		return 1;
 
 	return 0;
 }
 
-static long record_hpte(unsigned long flags, unsigned long *hptp,
+static long record_hpte(unsigned long flags, __be64 *hptp,
 			unsigned long *hpte, struct revmap_entry *revp,
 			int want_valid, int first_pass)
 {
@@ -1337,10 +1343,10 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 		return 0;
 
 	valid = 0;
-	if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+	if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT)) {
 		valid = 1;
 		if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
-		    !(hptp[0] & HPTE_V_BOLTED))
+		    !(be64_to_cpu(hptp[0]) & HPTE_V_BOLTED))
 			valid = 0;
 	}
 	if (valid != want_valid)
@@ -1352,7 +1358,7 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 		preempt_disable();
 		while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
 			cpu_relax();
-		v = hptp[0];
+		v = be64_to_cpu(hptp[0]);
 
 		/* re-evaluate valid and dirty from synchronized HPTE value */
 		valid = !!(v & HPTE_V_VALID);
@@ -1360,9 +1366,9 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 
 		/* Harvest R and C into guest view if necessary */
 		rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
-		if (valid && (rcbits_unset & hptp[1])) {
-			revp->guest_rpte |= (hptp[1] & (HPTE_R_R | HPTE_R_C)) |
-				HPTE_GR_MODIFIED;
+		if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
+			revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
+				(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
 			dirty = 1;
 		}
 
@@ -1381,13 +1387,13 @@ static long record_hpte(unsigned long flags, unsigned long *hptp,
 			revp->guest_rpte = r;
 		}
 		asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-		hptp[0] &= ~HPTE_V_HVLOCK;
+		hptp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		preempt_enable();
 		if (!(valid == want_valid && (first_pass || dirty)))
 			ok = 0;
 	}
-	hpte[0] = v;
-	hpte[1] = r;
+	hpte[0] = cpu_to_be64(v);
+	hpte[1] = cpu_to_be64(r);
 	return ok;
 }
 
@@ -1397,7 +1403,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 	struct kvm_htab_ctx *ctx = file->private_data;
 	struct kvm *kvm = ctx->kvm;
 	struct kvm_get_htab_header hdr;
-	unsigned long *hptp;
+	__be64 *hptp;
 	struct revmap_entry *revp;
 	unsigned long i, nb, nw;
 	unsigned long __user *lbuf;
@@ -1413,7 +1419,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
 	flags = ctx->flags;
 
 	i = ctx->index;
-	hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+	hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 	revp = kvm->arch.revmap + i;
 	lbuf = (unsigned long __user *)buf;
 
@@ -1497,7 +1503,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 	unsigned long i, j;
 	unsigned long v, r;
 	unsigned long __user *lbuf;
-	unsigned long *hptp;
+	__be64 *hptp;
 	unsigned long tmp[2];
 	ssize_t nb;
 	long int err, ret;
@@ -1539,7 +1545,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		    i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
 			break;
 
-		hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
 			err = -EFAULT;
@@ -1551,7 +1557,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 			lbuf += 2;
 			nb += HPTE_SIZE;
 
-			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
 				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
 			err = -EIO;
 			ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
@@ -1577,7 +1583,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 		}
 
 		for (j = 0; j < hdr.n_invalid; ++j) {
-			if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+			if (be64_to_cpu(hptp[0]) & (HPTE_V_VALID | HPTE_V_ABSENT))
 				kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
 			++i;
 			hptp += 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 6e62243..e5c6063 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -154,10 +154,10 @@ static pte_t lookup_linux_pte_and_update(pgd_t *pgdir, unsigned long hva,
 	return kvmppc_read_update_linux_pte(ptep, writing, hugepage_shift);
 }
 
-static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+static inline void unlock_hpte(__be64 *hpte, unsigned long hpte_v)
 {
 	asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
-	hpte[0] = hpte_v;
+	hpte[0] = cpu_to_be64(hpte_v);
 }
 
 long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
@@ -166,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 {
 	unsigned long i, pa, gpa, gfn, psize;
 	unsigned long slot_fn, hva;
-	unsigned long *hpte;
+	__be64 *hpte;
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
@@ -275,9 +275,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		return H_PARAMETER;
 	if (likely((flags & H_EXACT) == 0)) {
 		pte_index &= ~7UL;
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 		for (i = 0; i < 8; ++i) {
-			if ((*hpte & HPTE_V_VALID) == 0 &&
+			if ((be64_to_cpu(*hpte) & HPTE_V_VALID) == 0 &&
 			    try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 					  HPTE_V_ABSENT))
 				break;
@@ -292,11 +292,13 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			 */
 			hpte -= 16;
 			for (i = 0; i < 8; ++i) {
+				u64 pte;
 				while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 					cpu_relax();
-				if (!(*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)))
+				pte = be64_to_cpu(*hpte);
+				if (!(pte & (HPTE_V_VALID | HPTE_V_ABSENT)))
 					break;
-				*hpte &= ~HPTE_V_HVLOCK;
+				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				hpte += 2;
 			}
 			if (i == 8)
@@ -304,14 +306,17 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte_index += i;
 	} else {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 		if (!try_lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID |
 				   HPTE_V_ABSENT)) {
 			/* Lock the slot and check again */
+			u64 pte;
+
 			while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 				cpu_relax();
-			if (*hpte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
-				*hpte &= ~HPTE_V_HVLOCK;
+			pte = be64_to_cpu(*hpte);
+			if (pte & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+				*hpte &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				return H_PTEG_FULL;
 			}
 		}
@@ -347,11 +352,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 	}
 
-	hpte[1] = ptel;
+	hpte[1] = cpu_to_be64(ptel);
 
 	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
 	eieio();
-	hpte[0] = pteh;
+	hpte[0] = cpu_to_be64(pteh);
 	asm volatile("ptesync" : : : "memory");
 
 	*pte_idx_ret = pte_index;
@@ -468,30 +473,35 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
 			unsigned long pte_index, unsigned long avpn,
 			unsigned long *hpret)
 {
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long v, r, rb;
 	struct revmap_entry *rev;
+	u64 pte;
 
 	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
-	    ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn) ||
-	    ((flags & H_ANDCOND) && (hpte[0] & avpn) != 0)) {
-		hpte[0] &= ~HPTE_V_HVLOCK;
+	pte = be64_to_cpu(hpte[0]);
+	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
+	    ((flags & H_ANDCOND) && (pte & avpn) != 0)) {
+		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		return H_NOT_FOUND;
 	}
 
 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
-	v = hpte[0] & ~HPTE_V_HVLOCK;
+	v = pte & ~HPTE_V_HVLOCK;
 	if (v & HPTE_V_VALID) {
-		hpte[0] &= ~HPTE_V_VALID;
-		rb = compute_tlbie_rb(v, hpte[1], pte_index);
+		u64 pte1;
+
+		pte1 = be64_to_cpu(hpte[1]);
+		hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
+		rb = compute_tlbie_rb(v, pte1, pte_index);
 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/* Read PTE low word after tlbie to get final R/C values */
-		remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
+		remove_revmap_chain(kvm, pte_index, rev, v, pte1);
 	}
 	r = rev->guest_rpte & ~HPTE_GR_RESERVED;
 	note_hpte_modification(kvm, rev);
@@ -514,12 +524,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 {
 	struct kvm *kvm = vcpu->kvm;
 	unsigned long *args = &vcpu->arch.gpr[4];
-	unsigned long *hp, *hptes[4], tlbrb[4];
+	__be64 *hp, *hptes[4];
+	unsigned long tlbrb[4];
 	long int i, j, k, n, found, indexes[4];
 	unsigned long flags, req, pte_index, rcbits;
 	int global;
 	long int ret = H_SUCCESS;
 	struct revmap_entry *rev, *revs[4];
+	u64 hp0;
 
 	global = global_invalidates(kvm, 0);
 	for (i = 0; i < 4 && ret == H_SUCCESS; ) {
@@ -542,8 +554,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				ret = H_PARAMETER;
 				break;
 			}
-			hp = (unsigned long *)
-				(kvm->arch.hpt_virt + (pte_index << 4));
+			hp = (__be64 *) (kvm->arch.hpt_virt + (pte_index << 4));
 			/* to avoid deadlock, don't spin except for first */
 			if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
 				if (n)
@@ -552,23 +563,24 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 					cpu_relax();
 			}
 			found = 0;
-			if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+			hp0 = be64_to_cpu(hp[0]);
+			if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
 				switch (flags & 3) {
 				case 0:		/* absolute */
 					found = 1;
 					break;
 				case 1:		/* andcond */
-					if (!(hp[0] & args[j + 1]))
+					if (!(hp0 & args[j + 1]))
 						found = 1;
 					break;
 				case 2:		/* AVPN */
-					if ((hp[0] & ~0x7fUL) == args[j + 1])
+					if ((hp0 & ~0x7fUL) == args[j + 1])
 						found = 1;
 					break;
 				}
 			}
 			if (!found) {
-				hp[0] &= ~HPTE_V_HVLOCK;
+				hp[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 				args[j] = ((0x90 | flags) << 56) + pte_index;
 				continue;
 			}
@@ -577,7 +589,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 			note_hpte_modification(kvm, rev);
 
-			if (!(hp[0] & HPTE_V_VALID)) {
+			if (!(hp0 & HPTE_V_VALID)) {
 				/* insert R and C bits from PTE */
 				rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 				args[j] |= rcbits << (56 - 5);
@@ -585,8 +597,10 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 				continue;
 			}
 
-			hp[0] &= ~HPTE_V_VALID;		/* leave it locked */
-			tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+			/* leave it locked */
+			hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
+			tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
+				be64_to_cpu(hp[1]), pte_index);
 			indexes[n] = j;
 			hptes[n] = hp;
 			revs[n] = rev;
@@ -605,7 +619,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 			pte_index = args[j] & ((1ul << 56) - 1);
 			hp = hptes[k];
 			rev = revs[k];
-			remove_revmap_chain(kvm, pte_index, rev, hp[0], hp[1]);
+			remove_revmap_chain(kvm, pte_index, rev,
+				be64_to_cpu(hp[0]), be64_to_cpu(hp[1]));
 			rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
 			args[j] |= rcbits << (56 - 5);
 			hp[0] = 0;
@@ -620,23 +635,25 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		      unsigned long va)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hpte;
+	__be64 *hpte;
 	struct revmap_entry *rev;
 	unsigned long v, r, rb, mask, bits;
+	u64 pte;
 
 	if (pte_index >= kvm->arch.hpt_npte)
 		return H_PARAMETER;
 
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
 	while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
 		cpu_relax();
-	if ((hpte[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
-	    ((flags & H_AVPN) && (hpte[0] & ~0x7fUL) != avpn)) {
-		hpte[0] &= ~HPTE_V_HVLOCK;
+	pte = be64_to_cpu(hpte[0]);
+	if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+	    ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
+		hpte[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 		return H_NOT_FOUND;
 	}
 
-	v = hpte[0];
+	v = pte;
 	bits = (flags << 55) & HPTE_R_PP0;
 	bits |= (flags << 48) & HPTE_R_KEY_HI;
 	bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -650,12 +667,12 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (hpte[1] & ~mask) | bits;
+	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
 		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = v & ~HPTE_V_VALID;
+		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
 		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
 		 * If the host has this page as readonly but the guest
@@ -681,9 +698,9 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
 			}
 		}
 	}
-	hpte[1] = r;
+	hpte[1] = cpu_to_be64(r);
 	eieio();
-	hpte[0] = v & ~HPTE_V_HVLOCK;
+	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }
@@ -692,7 +709,8 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 		   unsigned long pte_index)
 {
 	struct kvm *kvm = vcpu->kvm;
-	unsigned long *hpte, v, r;
+	__be64 *hpte;
+	unsigned long v, r;
 	int i, n = 1;
 	struct revmap_entry *rev = NULL;
 
@@ -704,9 +722,9 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	}
 	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
 	for (i = 0; i < n; ++i, ++pte_index) {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
-		v = hpte[0] & ~HPTE_V_HVLOCK;
-		r = hpte[1];
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
+		v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+		r = be64_to_cpu(hpte[1]);
 		if (v & HPTE_V_ABSENT) {
 			v &= ~HPTE_V_ABSENT;
 			v |= HPTE_V_VALID;
@@ -721,25 +739,27 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
 	return H_SUCCESS;
 }
 
-void kvmppc_invalidate_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
 			unsigned long pte_index)
 {
 	unsigned long rb;
 
-	hptep[0] &= ~HPTE_V_VALID;
-	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
+	hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
+	rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
+			      pte_index);
 	do_tlbies(kvm, &rb, 1, 1, true);
 }
 EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
 
-void kvmppc_clear_ref_hpte(struct kvm *kvm, unsigned long *hptep,
+void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
 			   unsigned long pte_index)
 {
 	unsigned long rb;
 	unsigned char rbyte;
 
-	rb = compute_tlbie_rb(hptep[0], hptep[1], pte_index);
-	rbyte = (hptep[1] & ~HPTE_R_R) >> 8;
+	rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
+			      pte_index);
+	rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
 	/* modify only the second-last byte, which contains the ref bit */
 	*((char *)hptep + 14) = rbyte;
 	do_tlbies(kvm, &rb, 1, 1, false);
@@ -765,7 +785,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 	unsigned long somask;
 	unsigned long vsid, hash;
 	unsigned long avpn;
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long mask, val;
 	unsigned long v, r;
 
@@ -797,11 +817,11 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 	val |= avpn;
 
 	for (;;) {
-		hpte = (unsigned long *)(kvm->arch.hpt_virt + (hash << 7));
+		hpte = (__be64 *)(kvm->arch.hpt_virt + (hash << 7));
 
 		for (i = 0; i < 16; i += 2) {
 			/* Read the PTE racily */
-			v = hpte[i] & ~HPTE_V_HVLOCK;
+			v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
 
 			/* Check valid/absent, hash, segment size and AVPN */
 			if (!(v & valid) || (v & mask) != val)
@@ -810,8 +830,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 			/* Lock the PTE and read it under the lock */
 			while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
 				cpu_relax();
-			v = hpte[i] & ~HPTE_V_HVLOCK;
-			r = hpte[i+1];
+			v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+			r = be64_to_cpu(hpte[i+1]);
 
 			/*
 			 * Check the HPTE again, including large page size
@@ -825,7 +845,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
 				return (hash << 3) + (i >> 1);
 
 			/* Unlock and move on */
-			hpte[i] = v;
+			hpte[i] = cpu_to_be64(v);
 		}
 
 		if (val & HPTE_V_SECONDARY)
@@ -854,7 +874,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 	struct kvm *kvm = vcpu->kvm;
 	long int index;
 	unsigned long v, r, gr;
-	unsigned long *hpte;
+	__be64 *hpte;
 	unsigned long valid;
 	struct revmap_entry *rev;
 	unsigned long pp, key;
@@ -870,9 +890,9 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
 			return status;	/* there really was no HPTE */
 		return 0;		/* for prot fault, HPTE disappeared */
 	}
-	hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
-	v = hpte[0] & ~HPTE_V_HVLOCK;
-	r = hpte[1];
+	hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+	v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+	r = be64_to_cpu(hpte[1]);
 	rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
 	gr = rev->guest_rpte;
 
-- 
cgit v0.10.2


From 02407552256111479fbfd23a3e01218b399aaa35 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:34:19 +0200
Subject: KVM: PPC: Book3S HV: Access guest VPA in BE

There are a few shared data structures between the host and the guest. Most
of them get registered through the VPA interface.

These data structures are defined to always be in big endian byte order, so
let's make sure we always access them in big endian.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7db9df2..f1281c4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -272,7 +272,7 @@ struct kvm_vcpu *kvmppc_find_vcpu(struct kvm *kvm, int id)
 static void init_vpa(struct kvm_vcpu *vcpu, struct lppaca *vpa)
 {
 	vpa->__old_status |= LPPACA_OLD_SHARED_PROC;
-	vpa->yield_count = 1;
+	vpa->yield_count = cpu_to_be32(1);
 }
 
 static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
@@ -295,8 +295,8 @@ static int set_vpa(struct kvm_vcpu *vcpu, struct kvmppc_vpa *v,
 struct reg_vpa {
 	u32 dummy;
 	union {
-		u16 hword;
-		u32 word;
+		__be16 hword;
+		__be32 word;
 	} length;
 };
 
@@ -335,9 +335,9 @@ static unsigned long do_h_register_vpa(struct kvm_vcpu *vcpu,
 		if (va == NULL)
 			return H_PARAMETER;
 		if (subfunc == H_VPA_REG_VPA)
-			len = ((struct reg_vpa *)va)->length.hword;
+			len = be16_to_cpu(((struct reg_vpa *)va)->length.hword);
 		else
-			len = ((struct reg_vpa *)va)->length.word;
+			len = be32_to_cpu(((struct reg_vpa *)va)->length.word);
 		kvmppc_unpin_guest_page(kvm, va, vpa, false);
 
 		/* Check length */
@@ -542,18 +542,18 @@ static void kvmppc_create_dtl_entry(struct kvm_vcpu *vcpu,
 		return;
 	memset(dt, 0, sizeof(struct dtl_entry));
 	dt->dispatch_reason = 7;
-	dt->processor_id = vc->pcpu + vcpu->arch.ptid;
-	dt->timebase = now + vc->tb_offset;
-	dt->enqueue_to_dispatch_time = stolen;
-	dt->srr0 = kvmppc_get_pc(vcpu);
-	dt->srr1 = vcpu->arch.shregs.msr;
+	dt->processor_id = cpu_to_be16(vc->pcpu + vcpu->arch.ptid);
+	dt->timebase = cpu_to_be64(now + vc->tb_offset);
+	dt->enqueue_to_dispatch_time = cpu_to_be32(stolen);
+	dt->srr0 = cpu_to_be64(kvmppc_get_pc(vcpu));
+	dt->srr1 = cpu_to_be64(vcpu->arch.shregs.msr);
 	++dt;
 	if (dt == vcpu->arch.dtl.pinned_end)
 		dt = vcpu->arch.dtl.pinned_addr;
 	vcpu->arch.dtl_ptr = dt;
 	/* order writing *dt vs. writing vpa->dtl_idx */
 	smp_wmb();
-	vpa->dtl_idx = ++vcpu->arch.dtl_index;
+	vpa->dtl_idx = cpu_to_be64(++vcpu->arch.dtl_index);
 	vcpu->arch.dtl.dirty = true;
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 3a5c568..d562c8e 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -45,14 +45,14 @@ static void reload_slb(struct kvm_vcpu *vcpu)
 		return;
 
 	/* Sanity check */
-	n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+	n = min_t(u32, be32_to_cpu(slb->persistent), SLB_MIN_SIZE);
 	if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
 		return;
 
 	/* Load up the SLB from that */
 	for (i = 0; i < n; ++i) {
-		unsigned long rb = slb->save_area[i].esid;
-		unsigned long rs = slb->save_area[i].vsid;
+		unsigned long rb = be64_to_cpu(slb->save_area[i].esid);
+		unsigned long rs = be64_to_cpu(slb->save_area[i].vsid);
 
 		rb = (rb & ~0xFFFul) | i;	/* insert entry number */
 		asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
-- 
cgit v0.10.2


From 0865a583a4881975cc4b621f4886c02f01600302 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:36:17 +0200
Subject: KVM: PPC: Book3S HV: Access host lppaca and shadow slb in BE

Some data structures are always stored in big endian. Among those are the LPPACA
fields as well as the shadow slb. These structures might be shared with a
hypervisor.

So whenever we access those fields, make sure we do so in big endian byte order.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index e66c1e38..bf5270e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -32,10 +32,6 @@
 
 #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
 
-#ifdef __LITTLE_ENDIAN__
-#error Need to fix lppaca and SLB shadow accesses in little endian mode
-#endif
-
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE	1
 #define NAPPING_NOVCPU	2
@@ -595,9 +591,10 @@ kvmppc_got_guest:
 	ld	r3, VCPU_VPA(r4)
 	cmpdi	r3, 0
 	beq	25f
-	lwz	r5, LPPACA_YIELDCOUNT(r3)
+	li	r6, LPPACA_YIELDCOUNT
+	LWZX_BE	r5, r3, r6
 	addi	r5, r5, 1
-	stw	r5, LPPACA_YIELDCOUNT(r3)
+	STWX_BE	r5, r3, r6
 	li	r6, 1
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
@@ -1442,9 +1439,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 	ld	r8, VCPU_VPA(r9)	/* do they have a VPA? */
 	cmpdi	r8, 0
 	beq	25f
-	lwz	r3, LPPACA_YIELDCOUNT(r8)
+	li	r4, LPPACA_YIELDCOUNT
+	LWZX_BE	r3, r8, r4
 	addi	r3, r3, 1
-	stw	r3, LPPACA_YIELDCOUNT(r8)
+	STWX_BE	r3, r8, r4
 	li	r3, 1
 	stb	r3, VCPU_VPA_DIRTY(r9)
 25:
@@ -1757,8 +1755,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 33:	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
-	ld	r5,SLBSHADOW_SAVEAREA(r8)
-	ld	r6,SLBSHADOW_SAVEAREA+8(r8)
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
 	andis.	r7,r5,SLB_ESID_V@h
 	beq	1f
 	slbmte	r6,r5
-- 
cgit v0.10.2


From 76d072fb05f646eb180f161bbe06ab185af52f38 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:37:52 +0200
Subject: KVM: PPC: Book3S HV: Access XICS in BE

On the exit path from the guest we check what type of interrupt we received
if we received one. This means we're doing hardware access to the XICS interrupt
controller.

However, when running on a little endian system, this access is byte reversed.

So let's make sure to swizzle the bytes back again and virtually make XICS
accesses big endian.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bf5270e..364ca0c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2350,7 +2350,18 @@ kvmppc_read_intr:
 	cmpdi	r6, 0
 	beq-	1f
 	lwzcix	r0, r6, r7
-	rlwinm.	r3, r0, 0, 0xffffff
+	/*
+	 * Save XIRR for later. Since we get in in reverse endian on LE
+	 * systems, save it byte reversed and fetch it back in host endian.
+	 */
+	li	r3, HSTATE_SAVED_XIRR
+	STWX_BE	r0, r3, r13
+#ifdef __LITTLE_ENDIAN__
+	lwz	r3, HSTATE_SAVED_XIRR(r13)
+#else
+	mr	r3, r0
+#endif
+	rlwinm.	r3, r3, 0, 0xffffff
 	sync
 	beq	1f			/* if nothing pending in the ICP */
 
@@ -2382,10 +2393,9 @@ kvmppc_read_intr:
 	li	r3, -1
 1:	blr
 
-42:	/* It's not an IPI and it's for the host, stash it in the PACA
-	 * before exit, it will be picked up by the host ICP driver
+42:	/* It's not an IPI and it's for the host. We saved a copy of XIRR in
+	 * the PACA earlier, it will be picked up by the host ICP driver
 	 */
-	stw	r0, HSTATE_SAVED_XIRR(r13)
 	li	r3, 1
 	b	1b
 
-- 
cgit v0.10.2


From 9bf163f86d0dc2f9070d9b1b8c27cedcf8eec816 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 16 Jun 2014 14:41:15 +0200
Subject: KVM: PPC: Book3S HV: Fix ABIv2 on LE

For code that doesn't live in modules we can just branch to the real function
names, giving us compatibility with ABIv1 and ABIv2.

Do this for the compiled-in code of HV KVM.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 364ca0c..855521e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -668,9 +668,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
 	mr	r31, r4
 	addi	r3, r31, VCPU_FPRS_TM
-	bl	.load_fp_state
+	bl	load_fp_state
 	addi	r3, r31, VCPU_VRS_TM
-	bl	.load_vr_state
+	bl	load_vr_state
 	mr	r4, r31
 	lwz	r7, VCPU_VRSAVE_TM(r4)
 	mtspr	SPRN_VRSAVE, r7
@@ -1414,9 +1414,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
 	/* Save FP/VSX. */
 	addi	r3, r9, VCPU_FPRS_TM
-	bl	.store_fp_state
+	bl	store_fp_state
 	addi	r3, r9, VCPU_VRS_TM
-	bl	.store_vr_state
+	bl	store_vr_state
 	mfspr	r6, SPRN_VRSAVE
 	stw	r6, VCPU_VRSAVE_TM(r9)
 1:
@@ -2430,11 +2430,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	mtmsrd	r8
 	isync
 	addi	r3,r3,VCPU_FPRS
-	bl	.store_fp_state
+	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	addi	r3,r31,VCPU_VRS
-	bl	.store_vr_state
+	bl	store_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	mfspr	r6,SPRN_VRSAVE
@@ -2466,11 +2466,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	mtmsrd	r8
 	isync
 	addi	r3,r4,VCPU_FPRS
-	bl	.load_fp_state
+	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	addi	r3,r31,VCPU_VRS
-	bl	.load_vr_state
+	bl	load_vr_state
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
 	lwz	r7,VCPU_VRSAVE(r31)
-- 
cgit v0.10.2


From 6947f948f06128409b94306afaca5ece873ee5a2 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 11 Jun 2014 10:39:38 +0200
Subject: KVM: PPC: Book3S HV: Enable for little endian hosts

Now that we've fixed all the issues that HV KVM code had on little endian
hosts, we can enable it in the kernel configuration for users to play with.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index d6a53b9..8aeeda1 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,7 +75,6 @@ config KVM_BOOK3S_64
 config KVM_BOOK3S_64_HV
 	tristate "KVM support for POWER7 and PPC970 using hypervisor mode in host"
 	depends on KVM_BOOK3S_64
-	depends on !CPU_LITTLE_ENDIAN
 	select KVM_BOOK3S_HV_POSSIBLE
 	select MMU_NOTIFIER
 	select CMA
-- 
cgit v0.10.2


From debf27d6b92d7a98e0153ca8e3a990ea7a45b4da Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Fri, 4 Jul 2014 11:17:28 +0300
Subject: KVM: PPC: e500: Emulate power management control SPR

For FSL e6500 core the kernel uses power management SPR register (PWRMGTCR0)
to enable idle power down for cores and devices by setting up the idle count
period at boot time. With the host already controlling the power management
configuration the guest could simply benefit from it, so emulate guest request
as a general store.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 62b2cee..faf2f0e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -584,6 +584,7 @@ struct kvm_vcpu_arch {
 	u32 mmucfg;
 	u32 eptcfg;
 	u32 epr;
+	u32 pwrmgtcr0;
 	u32 crit_save;
 	/* guest debug registers*/
 	struct debug_reg dbg_reg;
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index 002d517..c99c40e 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -250,6 +250,14 @@ int kvmppc_core_emulate_mtspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong spr_va
 				spr_val);
 		break;
 
+	case SPRN_PWRMGTCR0:
+		/*
+		 * Guest relies on host power management configurations
+		 * Treat the request as a general store
+		 */
+		vcpu->arch.pwrmgtcr0 = spr_val;
+		break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL] = spr_val;
@@ -368,6 +376,10 @@ int kvmppc_core_emulate_mfspr_e500(struct kvm_vcpu *vcpu, int sprn, ulong *spr_v
 		*spr_val = vcpu->arch.eptcfg;
 		break;
 
+	case SPRN_PWRMGTCR0:
+		*spr_val = vcpu->arch.pwrmgtcr0;
+		break;
+
 	/* extra exceptions */
 	case SPRN_IVOR32:
 		*spr_val = vcpu->arch.ivor[BOOKE_IRQPRIO_SPE_UNAVAIL];
-- 
cgit v0.10.2


From 1287cb3fa85cd4a0d18402f6a23e1d4c6a9d7b8b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 4 Jul 2014 12:52:51 +0200
Subject: KVM: PPC: Book3S: Move vcore definition to end of kvm_arch struct

When building KVM with a lot of vcores (NR_CPUS is big), we can potentially
get out of the ld immediate range for dereferences inside that struct.

Move the array to the end of our kvm_arch struct. This fixes compilation
issues with NR_CPUS=2048 for me.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index faf2f0e..855ba4d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -255,7 +255,6 @@ struct kvm_arch {
 	atomic_t hpte_mod_interest;
 	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
-	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
@@ -273,6 +272,10 @@ struct kvm_arch {
 	struct kvmppc_xics *xics;
 #endif
 	struct kvmppc_ops *kvm_ops;
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	/* This array can grow quite large, keep it at the end */
+	struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
+#endif
 };
 
 /*
-- 
cgit v0.10.2


From 17824b5afcf273f6fc3e04df2d2a9d90d5c864fd Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 10 Jul 2014 19:19:35 +0200
Subject: KVM: PPC: Deflect page write faults properly in kvmppc_st

When we have a page that we're not allowed to write to, xlate() will already
tell us -EPERM on lookup of that page. With the code as is we change it into
a "page missing" error which a guest may get confused about. Instead, just
tell the caller about the -EPERM directly.

This fixes Mac OS X guests when run with DCBZ32 emulation.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index bd75902..9624c56 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -418,11 +418,13 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	      bool data)
 {
 	struct kvmppc_pte pte;
+	int r;
 
 	vcpu->stat.st++;
 
-	if (kvmppc_xlate(vcpu, *eaddr, data, true, &pte))
-		return -ENOENT;
+	r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte);
+	if (r < 0)
+		return r;
 
 	*eaddr = pte.raddr;
 
-- 
cgit v0.10.2


From 2e27ecc961044a2c5c05a4283888352961886a87 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 10 Jul 2014 19:22:03 +0200
Subject: KVM: PPC: Book3S: Stop PTE lookup on write errors

When a page lookup failed because we're not allowed to write to the page, we
should not overwrite that value with another lookup on the second PTEG which
will return "page not found". Instead, we should just tell the caller that we
had a permission problem.

This fixes Mac OS X guests looping endlessly in page lookup code for me.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index 93503bb..cd0b073 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -335,7 +335,7 @@ static int kvmppc_mmu_book3s_32_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	if (r < 0)
 		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
 						   data, iswrite, true);
-	if (r < 0)
+	if (r == -ENOENT)
 		r = kvmppc_mmu_book3s_32_xlate_pte(vcpu, eaddr, pte,
 						   data, iswrite, false);
 
-- 
cgit v0.10.2


From c01e3f66cd5cdc1f727f4c7b0c10b3e3bdb91ba7 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 11 Jul 2014 02:58:58 +0200
Subject: KVM: PPC: Book3S: Add hack for split real mode

Today we handle split real mode by mapping both instruction and data faults
into a special virtual address space that only exists during the split mode
phase.

This is good enough to catch 32bit Linux guests that use split real mode for
copy_from/to_user. In this case we're always prefixed with 0xc0000000 for our
instruction pointer and can map the user space process freely below there.

However, that approach fails when we're running KVM inside of KVM. Here the 1st
level last_inst reader may well be in the same virtual page as a 2nd level
interrupt handler.

It also fails when running Mac OS X guests. Here we have a 4G/4G split, so a
kernel copy_from/to_user implementation can easily overlap with user space
addresses.

The architecturally correct way to fix this would be to implement an instruction
interpreter in KVM that kicks in whenever we go into split real mode. This
interpreter however would not receive a great amount of testing and be a lot of
bloat for a reasonably isolated corner case.

So I went back to the drawing board and tried to come up with a way to make
split real mode work with a single flat address space. And then I realized that
we could get away with the same trick that makes it work for Linux:

Whenever we see an instruction address during split real mode that may collide,
we just move it higher up the virtual address space to a place that hopefully
does not collide (keep your fingers crossed!).

That approach does work surprisingly well. I am able to successfully run
Mac OS X guests with KVM and QEMU (no split real mode hacks like MOL) when I
apply a tiny timing probe hack to QEMU. I'd say this is a win over even more
broken split real mode :).

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 9601741..3f3e530 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -131,6 +131,7 @@
 #define BOOK3S_HFLAG_NATIVE_PS			0x8
 #define BOOK3S_HFLAG_MULTI_PGSIZE		0x10
 #define BOOK3S_HFLAG_NEW_TLBIE			0x20
+#define BOOK3S_HFLAG_SPLIT_HACK			0x40
 
 #define RESUME_FLAG_NV          (1<<0)  /* Reload guest nonvolatile state? */
 #define RESUME_FLAG_HOST        (1<<1)  /* Resume host? */
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 8ac5392..b1cf18d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -324,4 +324,7 @@ static inline bool is_kvmppc_resume_guest(int r)
 /* LPIDs we support with this build -- runtime limit may be lower */
 #define KVMPPC_NR_LPIDS			(LPID_RSVD + 1)
 
+#define SPLIT_HACK_MASK			0xff000000
+#define SPLIT_HACK_OFFS			0xfb000000
+
 #endif /* __ASM_KVM_BOOK3S_H__ */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 9624c56..1d13764 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -72,6 +72,17 @@ void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
+{
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
+		ulong pc = kvmppc_get_pc(vcpu);
+		if ((pc & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS)
+			kvmppc_set_pc(vcpu, pc & ~SPLIT_HACK_MASK);
+		vcpu->arch.hflags &= ~BOOK3S_HFLAG_SPLIT_HACK;
+	}
+}
+EXPORT_SYMBOL_GPL(kvmppc_unfixup_split_real);
+
 static inline unsigned long kvmppc_interrupt_offset(struct kvm_vcpu *vcpu)
 {
 	if (!is_kvmppc_hv_enabled(vcpu->kvm))
@@ -118,6 +129,7 @@ static inline bool kvmppc_critical_section(struct kvm_vcpu *vcpu)
 
 void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
 {
+	kvmppc_unfixup_split_real(vcpu);
 	kvmppc_set_srr0(vcpu, kvmppc_get_pc(vcpu));
 	kvmppc_set_srr1(vcpu, kvmppc_get_msr(vcpu) | flags);
 	kvmppc_set_pc(vcpu, kvmppc_interrupt_offset(vcpu) + vec);
@@ -384,6 +396,13 @@ static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 		pte->may_write = true;
 		pte->may_execute = true;
 		r = 0;
+
+		if ((kvmppc_get_msr(vcpu) & (MSR_IR | MSR_DR)) == MSR_DR &&
+		    !data) {
+			if ((vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+			    ((eaddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte->raddr &= ~SPLIT_HACK_MASK;
+		}
 	}
 
 	return r;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 15fd6c2..6125f60 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -62,6 +62,35 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #define HW_PAGE_SIZE PAGE_SIZE
 #endif
 
+static bool kvmppc_is_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	return (msr & (MSR_IR|MSR_DR)) == MSR_DR;
+}
+
+static void kvmppc_fixup_split_real(struct kvm_vcpu *vcpu)
+{
+	ulong msr = kvmppc_get_msr(vcpu);
+	ulong pc = kvmppc_get_pc(vcpu);
+
+	/* We are in DR only split real mode */
+	if ((msr & (MSR_IR|MSR_DR)) != MSR_DR)
+		return;
+
+	/* We have not fixed up the guest already */
+	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK)
+		return;
+
+	/* The code is in fixupable address space */
+	if (pc & SPLIT_HACK_MASK)
+		return;
+
+	vcpu->arch.hflags |= BOOK3S_HFLAG_SPLIT_HACK;
+	kvmppc_set_pc(vcpu, pc | SPLIT_HACK_OFFS);
+}
+
+void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu);
+
 static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -81,6 +110,9 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, int cpu)
 #ifdef CONFIG_PPC_BOOK3S_32
 	current->thread.kvm_shadow_vcpu = vcpu->arch.shadow_vcpu;
 #endif
+
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
 }
 
 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
@@ -95,6 +127,9 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
 	svcpu_put(svcpu);
 #endif
 
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_unfixup_split_real(vcpu);
+
 	kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
 	kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 
@@ -322,6 +357,11 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 		}
 	}
 
+	if (kvmppc_is_split_real(vcpu))
+		kvmppc_fixup_split_real(vcpu);
+	else
+		kvmppc_unfixup_split_real(vcpu);
+
 	if ((kvmppc_get_msr(vcpu) & (MSR_PR|MSR_IR|MSR_DR)) !=
 		   (old_msr & (MSR_PR|MSR_IR|MSR_DR))) {
 		kvmppc_mmu_flush_segments(vcpu);
@@ -522,6 +562,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		pte.vpage |= ((u64)VSID_REAL << (SID_SHIFT - 12));
 		break;
 	case MSR_DR:
+		if (!data &&
+		    (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) &&
+		    ((pte.raddr & SPLIT_HACK_MASK) == SPLIT_HACK_OFFS))
+			pte.raddr &= ~SPLIT_HACK_MASK;
+		/* fall through */
 	case MSR_IR:
 		vcpu->arch.mmu.esid_to_vsid(vcpu, eaddr >> SID_SHIFT, &vsid);
 
@@ -886,6 +931,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		ulong shadow_srr1 = vcpu->arch.shadow_srr1;
 		vcpu->stat.pf_instruc++;
 
+		if (kvmppc_is_split_real(vcpu))
+			kvmppc_fixup_split_real(vcpu);
+
 #ifdef CONFIG_PPC_BOOK3S_32
 		/* We set segments as unused segments when invalidating them. So
 		 * treat the respective fault as segment fault. */
-- 
cgit v0.10.2


From 89b68c96a24f6520c8815f88254c8e7d09aeb40e Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Sun, 13 Jul 2014 16:37:12 +0200
Subject: KVM: PPC: Book3S: Make magic page properly 4k mappable

The magic page is defined as a 4k page of per-vCPU data that is shared
between the guest and the host to accelerate accesses to privileged
registers.

However, when the host is using 64k page size granularity we weren't quite
as strict about that rule anymore. Instead, we partially treated all of the
upper 64k as magic page and mapped only the uppermost 4k with the actual
magic contents.

This works well enough for Linux which doesn't use any memory in kernel
space in the upper 64k, but Mac OS X got upset. So this patch makes magic
page actually stay in a 4k range even on 64k page size hosts.

This patch fixes magic page usage with Mac OS X (using MOL) on 64k PAGE_SIZE
hosts for me.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index b1cf18d..20fb6f2 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -158,7 +158,7 @@ extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat,
 			   bool upper, u32 val);
 extern void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr);
 extern int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu);
-extern pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+extern pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 			bool *writable);
 extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 			unsigned long *rmap, long pte_index, int realmode);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 1d13764..31facfc 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -354,18 +354,18 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_core_prepare_to_enter);
 
-pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
+pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 			bool *writable)
 {
-	ulong mp_pa = vcpu->arch.magic_page_pa;
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM;
+	gfn_t gfn = gpa >> PAGE_SHIFT;
 
 	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 
 	/* Magic page override */
-	if (unlikely(mp_pa) &&
-	    unlikely(((gfn << PAGE_SHIFT) & KVM_PAM) ==
-		     ((mp_pa & PAGE_MASK) & KVM_PAM))) {
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((gpa & KVM_PAM) == mp_pa)) {
 		ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK;
 		pfn_t pfn;
 
@@ -378,7 +378,7 @@ pfn_t kvmppc_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, bool writing,
 
 	return gfn_to_pfn_prot(vcpu->kvm, gfn, writing, writable);
 }
-EXPORT_SYMBOL_GPL(kvmppc_gfn_to_pfn);
+EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
 
 static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
 			bool iswrite, struct kvmppc_pte *pte)
diff --git a/arch/powerpc/kvm/book3s_32_mmu_host.c b/arch/powerpc/kvm/book3s_32_mmu_host.c
index 678e753..2035d16 100644
--- a/arch/powerpc/kvm/book3s_32_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_32_mmu_host.c
@@ -156,11 +156,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	bool writable;
 
 	/* Get host physical address for gpa */
-	hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
-				   iswrite, &writable);
+	hpaddr = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
 	if (is_error_noslot_pfn(hpaddr)) {
-		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n",
-				 orig_pte->eaddr);
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+				 orig_pte->raddr);
 		r = -EINVAL;
 		goto out;
 	}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index 0ac9839..b982d92 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -104,9 +104,10 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
 	smp_rmb();
 
 	/* Get host physical address for gpa */
-	pfn = kvmppc_gfn_to_pfn(vcpu, gfn, iswrite, &writable);
+	pfn = kvmppc_gpa_to_pfn(vcpu, orig_pte->raddr, iswrite, &writable);
 	if (is_error_noslot_pfn(pfn)) {
-		printk(KERN_INFO "Couldn't get guest page for gfn %lx!\n", gfn);
+		printk(KERN_INFO "Couldn't get guest page for gpa %lx!\n",
+		       orig_pte->raddr);
 		r = -EINVAL;
 		goto out;
 	}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 6125f60..e40765f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -511,19 +511,19 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	put_page(hpage);
 }
 
-static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+static int kvmppc_visible_gpa(struct kvm_vcpu *vcpu, gpa_t gpa)
 {
 	ulong mp_pa = vcpu->arch.magic_page_pa;
 
 	if (!(kvmppc_get_msr(vcpu) & MSR_SF))
 		mp_pa = (uint32_t)mp_pa;
 
-	if (unlikely(mp_pa) &&
-	    unlikely((mp_pa & KVM_PAM) >> PAGE_SHIFT == gfn)) {
+	gpa &= ~0xFFFULL;
+	if (unlikely(mp_pa) && unlikely((mp_pa & KVM_PAM) == (gpa & KVM_PAM))) {
 		return 1;
 	}
 
-	return kvm_is_visible_gfn(vcpu->kvm, gfn);
+	return kvm_is_visible_gfn(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
 int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -614,7 +614,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
 		kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
 	} else if (!is_mmio &&
-		   kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+		   kvmppc_visible_gpa(vcpu, pte.raddr)) {
 		if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
 			/*
 			 * There is already a host HPTE there, presumably
@@ -1387,8 +1387,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_pr(struct kvm *kvm,
 	p = __get_free_page(GFP_KERNEL|__GFP_ZERO);
 	if (!p)
 		goto uninit_vcpu;
-	/* the real shared page fills the last 4k of our page */
-	vcpu->arch.shared = (void *)(p + PAGE_SIZE - 4096);
+	vcpu->arch.shared = (void *)p;
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Always start the shared struct in native endian mode */
 #ifdef __BIG_ENDIAN__
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 7efc2b7..fe0257a 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -190,6 +190,25 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 		vcpu->arch.magic_page_pa = param1 & ~0xfffULL;
 		vcpu->arch.magic_page_ea = param2 & ~0xfffULL;
 
+#ifdef CONFIG_PPC_64K_PAGES
+		/*
+		 * Make sure our 4k magic page is in the same window of a 64k
+		 * page within the guest and within the host's page.
+		 */
+		if ((vcpu->arch.magic_page_pa & 0xf000) !=
+		    ((ulong)vcpu->arch.shared & 0xf000)) {
+			void *old_shared = vcpu->arch.shared;
+			ulong shared = (ulong)vcpu->arch.shared;
+			void *new_shared;
+
+			shared &= PAGE_MASK;
+			shared |= vcpu->arch.magic_page_pa & 0xf000;
+			new_shared = (void*)shared;
+			memcpy(new_shared, old_shared, 0x1000);
+			vcpu->arch.shared = new_shared;
+		}
+#endif
+
 		r2 = KVM_MAGIC_FEAT_SR | KVM_MAGIC_FEAT_MAS0_TO_SPRG7;
 
 		r = EV_SUCCESS;
-- 
cgit v0.10.2


From 1dc0c5b88cae1c211b37fed9187379a692bb469b Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:35 +0530
Subject: kvm: ppc: bookehv: Added wrapper macros for shadow registers

There are shadow registers like, GSPRG[0-3], GSRR0, GSRR1 etc on
BOOKE-HV and these shadow registers are guest accessible.
So these shadow registers needs to be updated on BOOKE-HV.
This patch adds new macro for get/set helper of shadow register .

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e2fd5a1..6520d09 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -472,8 +472,20 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
 #endif
 }
 
+#define SPRNG_WRAPPER_GET(reg, e500hv_spr)				\
+static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
+{									\
+	return mfspr(e500hv_spr);					\
+}									\
+
+#define SPRNG_WRAPPER_SET(reg, e500hv_spr)				\
+static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val)	\
+{									\
+	mtspr(e500hv_spr, val);						\
+}									\
+
 #define SHARED_WRAPPER_GET(reg, size)					\
-static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)	\
+static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
 {									\
 	if (kvmppc_shared_big_endian(vcpu))				\
 	       return be##size##_to_cpu(vcpu->arch.shared->reg);	\
@@ -494,14 +506,30 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)	\
 	SHARED_WRAPPER_GET(reg, size)					\
 	SHARED_WRAPPER_SET(reg, size)					\
 
+#define SPRNG_WRAPPER(reg, e500hv_spr)					\
+	SPRNG_WRAPPER_GET(reg, e500hv_spr)				\
+	SPRNG_WRAPPER_SET(reg, e500hv_spr)				\
+
+#ifdef CONFIG_KVM_BOOKE_HV
+
+#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr)			\
+	SPRNG_WRAPPER(reg, e500hv_spr)					\
+
+#else
+
+#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr)			\
+	SHARED_WRAPPER(reg, size)					\
+
+#endif
+
 SHARED_WRAPPER(critical, 64)
-SHARED_WRAPPER(sprg0, 64)
-SHARED_WRAPPER(sprg1, 64)
-SHARED_WRAPPER(sprg2, 64)
-SHARED_WRAPPER(sprg3, 64)
-SHARED_WRAPPER(srr0, 64)
-SHARED_WRAPPER(srr1, 64)
-SHARED_WRAPPER(dar, 64)
+SHARED_SPRNG_WRAPPER(sprg0, 64, SPRN_GSPRG0)
+SHARED_SPRNG_WRAPPER(sprg1, 64, SPRN_GSPRG1)
+SHARED_SPRNG_WRAPPER(sprg2, 64, SPRN_GSPRG2)
+SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3)
+SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0)
+SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1)
+SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR)
 SHARED_WRAPPER_GET(msr, 64)
 static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
 {
-- 
cgit v0.10.2


From 31579eea69c8088685a4dc82784ca839cfd5ae73 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:36 +0530
Subject: kvm: ppc: booke: Use the shared struct helpers of SRR0 and SRR1

Use kvmppc_set_srr0/srr1() and kvmppc_get_srr0/srr1() helper functions

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ab62109..3b43adb 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -266,13 +266,8 @@ static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
 
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GSRR0, srr0);
-	mtspr(SPRN_GSRR1, srr1);
-#else
-	vcpu->arch.shared->srr0 = srr0;
-	vcpu->arch.shared->srr1 = srr1;
-#endif
+	kvmppc_set_srr0(vcpu, srr0);
+	kvmppc_set_srr1(vcpu, srr1);
 }
 
 static void set_guest_csrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
@@ -1265,8 +1260,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->lr = vcpu->arch.lr;
 	regs->xer = kvmppc_get_xer(vcpu);
 	regs->msr = vcpu->arch.shared->msr;
-	regs->srr0 = vcpu->arch.shared->srr0;
-	regs->srr1 = vcpu->arch.shared->srr1;
+	regs->srr0 = kvmppc_get_srr0(vcpu);
+	regs->srr1 = kvmppc_get_srr1(vcpu);
 	regs->pid = vcpu->arch.pid;
 	regs->sprg0 = vcpu->arch.shared->sprg0;
 	regs->sprg1 = vcpu->arch.shared->sprg1;
@@ -1293,8 +1288,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	vcpu->arch.lr = regs->lr;
 	kvmppc_set_xer(vcpu, regs->xer);
 	kvmppc_set_msr(vcpu, regs->msr);
-	vcpu->arch.shared->srr0 = regs->srr0;
-	vcpu->arch.shared->srr1 = regs->srr1;
+	kvmppc_set_srr0(vcpu, regs->srr0);
+	kvmppc_set_srr1(vcpu, regs->srr1);
 	kvmppc_set_pid(vcpu, regs->pid);
 	vcpu->arch.shared->sprg0 = regs->sprg0;
 	vcpu->arch.shared->sprg1 = regs->sprg1;
-- 
cgit v0.10.2


From a5414d4b5ed8a2b10a1c8fe84f30ed4f19d0a35e Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:37 +0530
Subject: kvm: ppc: booke: Use the shared struct helpers of SPRN_DEAR

Uses kvmppc_set_dar() and kvmppc_get_dar() helper functions

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3b43adb..8e8b14b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 	vcpu->arch.mcsrr1 = srr1;
 }
 
-static unsigned long get_guest_dear(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GDEAR);
-#else
-	return vcpu->arch.shared->dar;
-#endif
-}
-
-static void set_guest_dear(struct kvm_vcpu *vcpu, unsigned long dear)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GDEAR, dear);
-#else
-	vcpu->arch.shared->dar = dear;
-#endif
-}
-
 static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -447,7 +429,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 		if (update_esr == true)
 			set_guest_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
-			set_guest_dear(vcpu, vcpu->arch.queued_dear);
+			kvmppc_set_dar(vcpu, vcpu->arch.queued_dear);
 		if (update_epr == true) {
 			if (vcpu->arch.epr_flags & KVMPPC_EPR_USER)
 				kvm_make_request(KVM_REQ_EPR_EXIT, vcpu);
@@ -1317,7 +1299,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
 	sregs->u.e.csrr1 = vcpu->arch.csrr1;
 	sregs->u.e.mcsr = vcpu->arch.mcsr;
 	sregs->u.e.esr = get_guest_esr(vcpu);
-	sregs->u.e.dear = get_guest_dear(vcpu);
+	sregs->u.e.dear = kvmppc_get_dar(vcpu);
 	sregs->u.e.tsr = vcpu->arch.tsr;
 	sregs->u.e.tcr = vcpu->arch.tcr;
 	sregs->u.e.dec = kvmppc_get_dec(vcpu, tb);
@@ -1335,7 +1317,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 	vcpu->arch.csrr1 = sregs->u.e.csrr1;
 	vcpu->arch.mcsr = sregs->u.e.mcsr;
 	set_guest_esr(vcpu, sregs->u.e.esr);
-	set_guest_dear(vcpu, sregs->u.e.dear);
+	kvmppc_set_dar(vcpu, sregs->u.e.dear);
 	vcpu->arch.vrsave = sregs->u.e.vrsave;
 	kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
 
-- 
cgit v0.10.2


From dc168549d9a0fb55d3021ee408adf25786cfda23 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:38 +0530
Subject: kvm: ppc: booke: Add shared struct helpers of SPRN_ESR

Add and use kvmppc_set_esr() and kvmppc_get_esr() helper functions

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 6520d09..c95bdbd 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -530,6 +530,7 @@ SHARED_SPRNG_WRAPPER(sprg3, 64, SPRN_GSPRG3)
 SHARED_SPRNG_WRAPPER(srr0, 64, SPRN_GSRR0)
 SHARED_SPRNG_WRAPPER(srr1, 64, SPRN_GSRR1)
 SHARED_SPRNG_WRAPPER(dar, 64, SPRN_GDEAR)
+SHARED_SPRNG_WRAPPER(esr, 64, SPRN_GESR)
 SHARED_WRAPPER_GET(msr, 64)
 static inline void kvmppc_set_msr_fast(struct kvm_vcpu *vcpu, u64 val)
 {
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 8e8b14b..25a7e70 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -292,24 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 	vcpu->arch.mcsrr1 = srr1;
 }
 
-static unsigned long get_guest_esr(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GESR);
-#else
-	return vcpu->arch.shared->esr;
-#endif
-}
-
-static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	mtspr(SPRN_GESR, esr);
-#else
-	vcpu->arch.shared->esr = esr;
-#endif
-}
-
 static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -427,7 +409,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
 
 		vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[priority];
 		if (update_esr == true)
-			set_guest_esr(vcpu, vcpu->arch.queued_esr);
+			kvmppc_set_esr(vcpu, vcpu->arch.queued_esr);
 		if (update_dear == true)
 			kvmppc_set_dar(vcpu, vcpu->arch.queued_dear);
 		if (update_epr == true) {
@@ -1298,7 +1280,7 @@ static void get_sregs_base(struct kvm_vcpu *vcpu,
 	sregs->u.e.csrr0 = vcpu->arch.csrr0;
 	sregs->u.e.csrr1 = vcpu->arch.csrr1;
 	sregs->u.e.mcsr = vcpu->arch.mcsr;
-	sregs->u.e.esr = get_guest_esr(vcpu);
+	sregs->u.e.esr = kvmppc_get_esr(vcpu);
 	sregs->u.e.dear = kvmppc_get_dar(vcpu);
 	sregs->u.e.tsr = vcpu->arch.tsr;
 	sregs->u.e.tcr = vcpu->arch.tcr;
@@ -1316,7 +1298,7 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
 	vcpu->arch.csrr0 = sregs->u.e.csrr0;
 	vcpu->arch.csrr1 = sregs->u.e.csrr1;
 	vcpu->arch.mcsr = sregs->u.e.mcsr;
-	set_guest_esr(vcpu, sregs->u.e.esr);
+	kvmppc_set_esr(vcpu, sregs->u.e.esr);
 	kvmppc_set_dar(vcpu, sregs->u.e.dear);
 	vcpu->arch.vrsave = sregs->u.e.vrsave;
 	kvmppc_set_tcr(vcpu, sregs->u.e.tcr);
-- 
cgit v0.10.2


From c1b8a01bf959e87cff9c9dcaf51cb5c1d60a2c52 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:39 +0530
Subject: kvm: ppc: booke: Use the shared struct helpers for SPRN_SPRG0-7

Use kvmppc_set_sprg[0-7]() and kvmppc_get_sprg[0-7]() helper
functions

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 25a7e70..34562d4 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1227,14 +1227,14 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	regs->srr0 = kvmppc_get_srr0(vcpu);
 	regs->srr1 = kvmppc_get_srr1(vcpu);
 	regs->pid = vcpu->arch.pid;
-	regs->sprg0 = vcpu->arch.shared->sprg0;
-	regs->sprg1 = vcpu->arch.shared->sprg1;
-	regs->sprg2 = vcpu->arch.shared->sprg2;
-	regs->sprg3 = vcpu->arch.shared->sprg3;
-	regs->sprg4 = vcpu->arch.shared->sprg4;
-	regs->sprg5 = vcpu->arch.shared->sprg5;
-	regs->sprg6 = vcpu->arch.shared->sprg6;
-	regs->sprg7 = vcpu->arch.shared->sprg7;
+	regs->sprg0 = kvmppc_get_sprg0(vcpu);
+	regs->sprg1 = kvmppc_get_sprg1(vcpu);
+	regs->sprg2 = kvmppc_get_sprg2(vcpu);
+	regs->sprg3 = kvmppc_get_sprg3(vcpu);
+	regs->sprg4 = kvmppc_get_sprg4(vcpu);
+	regs->sprg5 = kvmppc_get_sprg5(vcpu);
+	regs->sprg6 = kvmppc_get_sprg6(vcpu);
+	regs->sprg7 = kvmppc_get_sprg7(vcpu);
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		regs->gpr[i] = kvmppc_get_gpr(vcpu, i);
@@ -1255,14 +1255,14 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	kvmppc_set_srr0(vcpu, regs->srr0);
 	kvmppc_set_srr1(vcpu, regs->srr1);
 	kvmppc_set_pid(vcpu, regs->pid);
-	vcpu->arch.shared->sprg0 = regs->sprg0;
-	vcpu->arch.shared->sprg1 = regs->sprg1;
-	vcpu->arch.shared->sprg2 = regs->sprg2;
-	vcpu->arch.shared->sprg3 = regs->sprg3;
-	vcpu->arch.shared->sprg4 = regs->sprg4;
-	vcpu->arch.shared->sprg5 = regs->sprg5;
-	vcpu->arch.shared->sprg6 = regs->sprg6;
-	vcpu->arch.shared->sprg7 = regs->sprg7;
+	kvmppc_set_sprg0(vcpu, regs->sprg0);
+	kvmppc_set_sprg1(vcpu, regs->sprg1);
+	kvmppc_set_sprg2(vcpu, regs->sprg2);
+	kvmppc_set_sprg3(vcpu, regs->sprg3);
+	kvmppc_set_sprg4(vcpu, regs->sprg4);
+	kvmppc_set_sprg5(vcpu, regs->sprg5);
+	kvmppc_set_sprg6(vcpu, regs->sprg6);
+	kvmppc_set_sprg7(vcpu, regs->sprg7);
 
 	for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
 		kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 27a4b28..28c1588 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -165,16 +165,16 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	 * guest (PR-mode only).
 	 */
 	case SPRN_SPRG4:
-		vcpu->arch.shared->sprg4 = spr_val;
+		kvmppc_set_sprg4(vcpu, spr_val);
 		break;
 	case SPRN_SPRG5:
-		vcpu->arch.shared->sprg5 = spr_val;
+		kvmppc_set_sprg5(vcpu, spr_val);
 		break;
 	case SPRN_SPRG6:
-		vcpu->arch.shared->sprg6 = spr_val;
+		kvmppc_set_sprg6(vcpu, spr_val);
 		break;
 	case SPRN_SPRG7:
-		vcpu->arch.shared->sprg7 = spr_val;
+		kvmppc_set_sprg7(vcpu, spr_val);
 		break;
 
 	case SPRN_IVPR:
-- 
cgit v0.10.2


From 34f754b99e2f642c661967b456764b2c7ccc096e Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Thu, 17 Jul 2014 17:01:40 +0530
Subject: kvm: ppc: Add SPRN_EPR get helper function

kvmppc_set_epr() is already defined in asm/kvm_ppc.h, So
rename and move get_epr helper function to same file.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
[agraf: remove duplicate return]
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index c95bdbd..246fb9a7 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -392,6 +392,17 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 	{ return 0; }
 #endif
 
+static inline unsigned long kvmppc_get_epr(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_KVM_BOOKE_HV
+	return mfspr(SPRN_GEPR);
+#elif defined(CONFIG_BOOKE)
+	return vcpu->arch.epr;
+#else
+	return 0;
+#endif
+}
+
 static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 34562d4..a06ef6b 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -292,15 +292,6 @@ static void set_guest_mcsrr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 	vcpu->arch.mcsrr1 = srr1;
 }
 
-static unsigned long get_guest_epr(struct kvm_vcpu *vcpu)
-{
-#ifdef CONFIG_KVM_BOOKE_HV
-	return mfspr(SPRN_GEPR);
-#else
-	return vcpu->arch.epr;
-#endif
-}
-
 /* Deliver the interrupt of the corresponding priority, if possible. */
 static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
                                         unsigned int priority)
@@ -1452,7 +1443,7 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
 		val = get_reg_val(reg->id, vcpu->arch.dbg_reg.dac2);
 		break;
 	case KVM_REG_PPC_EPR: {
-		u32 epr = get_guest_epr(vcpu);
+		u32 epr = kvmppc_get_epr(vcpu);
 		val = get_reg_val(reg->id, epr);
 		break;
 	}
-- 
cgit v0.10.2


From b5741bb3d4dc751a76f6caaaec8e2b271ff8ca48 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 23 Jul 2014 19:06:18 +0300
Subject: KVM: PPC: e500mc: Revert "add load inst fixup"

The commit 1d628af7 "add load inst fixup" made an attempt to handle
failures generated by reading the guest current instruction. The fixup
code that was added works by chance hiding the real issue.

Load external pid (lwepx) instruction, used by KVM to read guest
instructions, is executed in a subsituted guest translation context
(EPLC[EGS] = 1). In consequence lwepx's TLB error and data storage
interrupts need to be handled by KVM, even though these interrupts
are generated from host context (MSR[GS] = 0) where lwepx is executed.

Currently, KVM hooks only interrupts generated from guest context
(MSR[GS] = 1), doing minimal checks on the fast path to avoid host
performance degradation. As a result, the host kernel handles lwepx
faults searching the faulting guest data address (loaded in DEAR) in
its own Logical Partition ID (LPID) 0 context. In case a host translation
is found the execution returns to the lwepx instruction instead of the
fixup, the host ending up in an infinite loop.

Revert the commit "add load inst fixup". lwepx issue will be addressed
in a subsequent patch without needing fixup code.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index a1712b8..6ff4480 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -29,7 +29,6 @@
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
 #include <asm/bitsperlong.h>
-#include <asm/thread_info.h>
 
 #ifdef CONFIG_64BIT
 #include <asm/exception-64e.h>
@@ -164,32 +163,9 @@
 	PPC_STL	r30, VCPU_GPR(R30)(r4)
 	PPC_STL	r31, VCPU_GPR(R31)(r4)
 	mtspr	SPRN_EPLC, r8
-
-	/* disable preemption, so we are sure we hit the fixup handler */
-	CURRENT_THREAD_INFO(r8, r1)
-	li	r7, 1
-	stw	r7, TI_PREEMPT(r8)
-
 	isync
-
-	/*
-	 * In case the read goes wrong, we catch it and write an invalid value
-	 * in LAST_INST instead.
-	 */
-1:	lwepx	r9, 0, r5
-2:
-.section .fixup, "ax"
-3:	li	r9, KVM_INST_FETCH_FAILED
-	b	2b
-.previous
-.section __ex_table,"a"
-	PPC_LONG_ALIGN
-	PPC_LONG 1b,3b
-.previous
-
+	lwepx   r9, 0, r5
 	mtspr	SPRN_EPLC, r3
-	li	r7, 0
-	stw	r7, TI_PREEMPT(r8)
 	stw	r9, VCPU_LAST_INST(r4)
 	.endif
 
-- 
cgit v0.10.2


From 9c0d4e0dcf69b1ab3a9f8debebd119f53964cb57 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 23 Jul 2014 19:06:19 +0300
Subject: KVM: PPC: Book3e: Add TLBSEL/TSIZE defines for MAS0/1

Add mising defines MAS0_GET_TLBSEL() and MAS1_GET_TSIZE() for Book3E.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index 8d24f78..cd4f04a 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -40,9 +40,11 @@
 
 /* MAS registers bit definitions */
 
-#define MAS0_TLBSEL_MASK        0x30000000
-#define MAS0_TLBSEL_SHIFT       28
-#define MAS0_TLBSEL(x)          (((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
+#define MAS0_TLBSEL_MASK	0x30000000
+#define MAS0_TLBSEL_SHIFT	28
+#define MAS0_TLBSEL(x)		(((x) << MAS0_TLBSEL_SHIFT) & MAS0_TLBSEL_MASK)
+#define MAS0_GET_TLBSEL(mas0)	(((mas0) & MAS0_TLBSEL_MASK) >> \
+			MAS0_TLBSEL_SHIFT)
 #define MAS0_ESEL_MASK		0x0FFF0000
 #define MAS0_ESEL_SHIFT		16
 #define MAS0_ESEL(x)		(((x) << MAS0_ESEL_SHIFT) & MAS0_ESEL_MASK)
@@ -60,6 +62,7 @@
 #define MAS1_TSIZE_MASK		0x00000f80
 #define MAS1_TSIZE_SHIFT	7
 #define MAS1_TSIZE(x)		(((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
+#define MAS1_GET_TSIZE(mas1)	(((mas1) & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT)
 
 #define MAS2_EPN		(~0xFFFUL)
 #define MAS2_X0			0x00000040
-- 
cgit v0.10.2


From 9a26af64d6bba72c9dfd62cc0cab0e79f8a66d7b Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 23 Jul 2014 19:06:20 +0300
Subject: KVM: PPC: Book3s: Remove kvmppc_read_inst() function

In the context of replacing kvmppc_ld() function calls with a version of
kvmppc_get_last_inst() which allow to fail, Alex Graf suggested this:

"If we get EMULATE_AGAIN, we just have to make sure we go back into the guest.
No need to inject an ISI into  the guest - it'll do that all by itself.
With an error returning kvmppc_get_last_inst we can just use completely
get rid of kvmppc_read_inst() and only use kvmppc_get_last_inst() instead."

As a intermediate step get rid of kvmppc_read_inst() and only use kvmppc_ld()
instead.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e40765f..e76aec3 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -710,42 +710,6 @@ static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac)
 #endif
 }
 
-static int kvmppc_read_inst(struct kvm_vcpu *vcpu)
-{
-	ulong srr0 = kvmppc_get_pc(vcpu);
-	u32 last_inst = kvmppc_get_last_inst(vcpu);
-	int ret;
-
-	ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
-	if (ret == -ENOENT) {
-		ulong msr = kvmppc_get_msr(vcpu);
-
-		msr = kvmppc_set_field(msr, 33, 33, 1);
-		msr = kvmppc_set_field(msr, 34, 36, 0);
-		msr = kvmppc_set_field(msr, 42, 47, 0);
-		kvmppc_set_msr_fast(vcpu, msr);
-		kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
-		return EMULATE_AGAIN;
-	}
-
-	return EMULATE_DONE;
-}
-
-static int kvmppc_check_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr)
-{
-
-	/* Need to do paired single emulation? */
-	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
-		return EMULATE_DONE;
-
-	/* Read out the instruction */
-	if (kvmppc_read_inst(vcpu) == EMULATE_DONE)
-		/* Need to emulate */
-		return EMULATE_FAIL;
-
-	return EMULATE_AGAIN;
-}
-
 /* Handle external providers (FPU, Altivec, VSX) */
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 			     ulong msr)
@@ -1149,31 +1113,49 @@ program_interrupt:
 	case BOOK3S_INTERRUPT_VSX:
 	{
 		int ext_msr = 0;
+		int emul;
+		ulong pc;
+		u32 last_inst;
+
+		if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
+			/* Do paired single instruction emulation */
+			pc = kvmppc_get_pc(vcpu);
+			last_inst = kvmppc_get_last_inst(vcpu);
+			emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst,
+					 false);
+			if (emul == EMULATE_DONE)
+				goto program_interrupt;
+			else
+				r = RESUME_GUEST;
 
-		switch (exit_nr) {
-		case BOOK3S_INTERRUPT_FP_UNAVAIL: ext_msr = MSR_FP;  break;
-		case BOOK3S_INTERRUPT_ALTIVEC:    ext_msr = MSR_VEC; break;
-		case BOOK3S_INTERRUPT_VSX:        ext_msr = MSR_VSX; break;
+			break;
 		}
 
-		switch (kvmppc_check_ext(vcpu, exit_nr)) {
-		case EMULATE_DONE:
-			/* everything ok - let's enable the ext */
-			r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
+		/* Enable external provider */
+		switch (exit_nr) {
+		case BOOK3S_INTERRUPT_FP_UNAVAIL:
+			ext_msr = MSR_FP;
 			break;
-		case EMULATE_FAIL:
-			/* we need to emulate this instruction */
-			goto program_interrupt;
+
+		case BOOK3S_INTERRUPT_ALTIVEC:
+			ext_msr = MSR_VEC;
 			break;
-		default:
-			/* nothing to worry about - go again */
+
+		case BOOK3S_INTERRUPT_VSX:
+			ext_msr = MSR_VSX;
 			break;
 		}
+
+		r = kvmppc_handle_ext(vcpu, exit_nr, ext_msr);
 		break;
 	}
 	case BOOK3S_INTERRUPT_ALIGNMENT:
-		if (kvmppc_read_inst(vcpu) == EMULATE_DONE) {
-			u32 last_inst = kvmppc_get_last_inst(vcpu);
+	{
+		ulong pc = kvmppc_get_pc(vcpu);
+		u32 last_inst = kvmppc_get_last_inst(vcpu);
+		int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false);
+
+		if (emul == EMULATE_DONE) {
 			u32 dsisr;
 			u64 dar;
 
@@ -1187,6 +1169,7 @@ program_interrupt:
 		}
 		r = RESUME_GUEST;
 		break;
+	}
 #ifdef CONFIG_PPC_BOOK3S_64
 	case BOOK3S_INTERRUPT_FAC_UNAVAIL:
 		kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
-- 
cgit v0.10.2


From 51f047261e717b74b226f837a16455994b61ae30 Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 23 Jul 2014 19:06:21 +0300
Subject: KVM: PPC: Allow kvmppc_get_last_inst() to fail

On book3e, guest last instruction is read on the exit path using load
external pid (lwepx) dedicated instruction. This load operation may fail
due to TLB eviction and execute-but-not-read entries.

This patch lay down the path for an alternative solution to read the guest
last instruction, by allowing kvmppc_get_lat_inst() function to fail.
Architecture specific implmentations of kvmppc_load_last_inst() may read
last guest instruction and instruct the emulation layer to re-execute the
guest in case of failure.

Make kvmppc_get_last_inst() definition common between architectures.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 20fb6f2..a86ca65 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -276,32 +276,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 	return (kvmppc_get_msr(vcpu) & MSR_LE) != (MSR_KERNEL & MSR_LE);
 }
 
-static inline u32 kvmppc_get_last_inst_internal(struct kvm_vcpu *vcpu, ulong pc)
-{
-	/* Load the instruction manually if it failed to do so in the
-	 * exit path */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
-		kvmppc_ld(vcpu, &pc, sizeof(u32), &vcpu->arch.last_inst, false);
-
-	return kvmppc_need_byteswap(vcpu) ? swab32(vcpu->arch.last_inst) :
-		vcpu->arch.last_inst;
-}
-
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu));
-}
-
-/*
- * Like kvmppc_get_last_inst(), but for fetching a sc instruction.
- * Because the sc instruction sets SRR0 to point to the following
- * instruction, we have to fetch from pc - 4.
- */
-static inline u32 kvmppc_get_last_sc(struct kvm_vcpu *vcpu)
-{
-	return kvmppc_get_last_inst_internal(vcpu, kvmppc_get_pc(vcpu) - 4);
-}
-
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dar;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index c7aed61..cbb1990 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -69,11 +69,6 @@ static inline bool kvmppc_need_byteswap(struct kvm_vcpu *vcpu)
 	return false;
 }
 
-static inline u32 kvmppc_get_last_inst(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.last_inst;
-}
-
 static inline void kvmppc_set_ctr(struct kvm_vcpu *vcpu, ulong val)
 {
 	vcpu->arch.ctr = val;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 246fb9a7..e381363 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -47,6 +47,11 @@ enum emulation_result {
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
 };
 
+enum instruction_type {
+	INST_GENERIC,
+	INST_SC,		/* system call */
+};
+
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern void kvmppc_handler_highmem(void);
@@ -62,6 +67,9 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 			       u64 val, unsigned int bytes,
 			       int is_default_endian);
 
+extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
+				 enum instruction_type type, u32 *inst);
+
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
@@ -234,6 +242,29 @@ struct kvmppc_ops {
 extern struct kvmppc_ops *kvmppc_hv_ops;
 extern struct kvmppc_ops *kvmppc_pr_ops;
 
+static inline int kvmppc_get_last_inst(struct kvm_vcpu *vcpu,
+					enum instruction_type type, u32 *inst)
+{
+	int ret = EMULATE_DONE;
+	u32 fetched_inst;
+
+	/* Load the instruction manually if it failed to do so in the
+	 * exit path */
+	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED)
+		ret = kvmppc_load_last_inst(vcpu, type, &vcpu->arch.last_inst);
+
+	/*  Write fetch_failed unswapped if the fetch failed */
+	if (ret == EMULATE_DONE)
+		fetched_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.last_inst) :
+				vcpu->arch.last_inst;
+	else
+		fetched_inst = vcpu->arch.last_inst;
+
+	*inst = fetched_inst;
+	return ret;
+}
+
 static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
 {
 	return kvm->arch.kvm_ops == kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 31facfc..37ca8a0 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -488,6 +488,23 @@ mmio:
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+					 u32 *inst)
+{
+	ulong pc = kvmppc_get_pc(vcpu);
+	int r;
+
+	if (type == INST_SC)
+		pc -= 4;
+
+	r = kvmppc_ld(vcpu, &pc, sizeof(u32), inst, false);
+	if (r == EMULATE_DONE)
+		return r;
+	else
+		return EMULATE_AGAIN;
+}
+EXPORT_SYMBOL_GPL(kvmppc_load_last_inst);
+
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
 	return 0;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2d154d9..fa944a3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -530,21 +530,14 @@ static int instruction_is_store(unsigned int instr)
 static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 				  unsigned long gpa, gva_t ea, int is_store)
 {
-	int ret;
 	u32 last_inst;
-	unsigned long srr0 = kvmppc_get_pc(vcpu);
 
-	/* We try to load the last instruction.  We don't let
-	 * emulate_instruction do it as it doesn't check what
-	 * kvmppc_ld returns.
+	/*
 	 * If we fail, we just return to the guest and try executing it again.
 	 */
-	if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) {
-		ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false);
-		if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED)
-			return RESUME_GUEST;
-		vcpu->arch.last_inst = last_inst;
-	}
+	if (kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst) !=
+		EMULATE_DONE)
+		return RESUME_GUEST;
 
 	/*
 	 * WARNING: We do not know for sure whether the instruction we just
@@ -558,7 +551,7 @@ static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	 * we just return and retry the instruction.
 	 */
 
-	if (instruction_is_store(kvmppc_get_last_inst(vcpu)) != !!is_store)
+	if (instruction_is_store(last_inst) != !!is_store)
 		return RESUME_GUEST;
 
 	/*
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index 6c8011f..bfb8035 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -639,26 +639,36 @@ static int kvmppc_ps_one_in(struct kvm_vcpu *vcpu, bool rc,
 
 int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	u32 inst = kvmppc_get_last_inst(vcpu);
+	u32 inst;
 	enum emulation_result emulated = EMULATE_DONE;
+	int ax_rd, ax_ra, ax_rb, ax_rc;
+	short full_d;
+	u64 *fpr_d, *fpr_a, *fpr_b, *fpr_c;
 
-	int ax_rd = inst_get_field(inst, 6, 10);
-	int ax_ra = inst_get_field(inst, 11, 15);
-	int ax_rb = inst_get_field(inst, 16, 20);
-	int ax_rc = inst_get_field(inst, 21, 25);
-	short full_d = inst_get_field(inst, 16, 31);
-
-	u64 *fpr_d = &VCPU_FPR(vcpu, ax_rd);
-	u64 *fpr_a = &VCPU_FPR(vcpu, ax_ra);
-	u64 *fpr_b = &VCPU_FPR(vcpu, ax_rb);
-	u64 *fpr_c = &VCPU_FPR(vcpu, ax_rc);
-
-	bool rcomp = (inst & 1) ? true : false;
-	u32 cr = kvmppc_get_cr(vcpu);
+	bool rcomp;
+	u32 cr;
 #ifdef DEBUG
 	int i;
 #endif
 
+	emulated = kvmppc_get_last_inst(vcpu, INST_GENERIC, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	ax_rd = inst_get_field(inst, 6, 10);
+	ax_ra = inst_get_field(inst, 11, 15);
+	ax_rb = inst_get_field(inst, 16, 20);
+	ax_rc = inst_get_field(inst, 21, 25);
+	full_d = inst_get_field(inst, 16, 31);
+
+	fpr_d = &VCPU_FPR(vcpu, ax_rd);
+	fpr_a = &VCPU_FPR(vcpu, ax_ra);
+	fpr_b = &VCPU_FPR(vcpu, ax_rb);
+	fpr_c = &VCPU_FPR(vcpu, ax_rc);
+
+	rcomp = (inst & 1) ? true : false;
+	cr = kvmppc_get_cr(vcpu);
+
 	if (!kvmppc_inst_is_paired_single(vcpu, inst))
 		return EMULATE_FAIL;
 
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e76aec3..b18f2d4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1018,15 +1018,24 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	{
 		enum emulation_result er;
 		ulong flags;
+		u32 last_inst;
+		int emul;
 
 program_interrupt:
 		flags = vcpu->arch.shadow_srr1 & 0x1f0000ull;
 
+		emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
+		if (emul != EMULATE_DONE) {
+			r = RESUME_GUEST;
+			break;
+		}
+
 		if (kvmppc_get_msr(vcpu) & MSR_PR) {
 #ifdef EXIT_DEBUG
-			printk(KERN_INFO "Userspace triggered 0x700 exception at 0x%lx (0x%x)\n", kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+			pr_info("Userspace triggered 0x700 exception at\n 0x%lx (0x%x)\n",
+				kvmppc_get_pc(vcpu), last_inst);
 #endif
-			if ((kvmppc_get_last_inst(vcpu) & 0xff0007ff) !=
+			if ((last_inst & 0xff0007ff) !=
 			    (INS_DCBZ & 0xfffffff7)) {
 				kvmppc_core_queue_program(vcpu, flags);
 				r = RESUME_GUEST;
@@ -1045,7 +1054,7 @@ program_interrupt:
 			break;
 		case EMULATE_FAIL:
 			printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
-			       __func__, kvmppc_get_pc(vcpu), kvmppc_get_last_inst(vcpu));
+			       __func__, kvmppc_get_pc(vcpu), last_inst);
 			kvmppc_core_queue_program(vcpu, flags);
 			r = RESUME_GUEST;
 			break;
@@ -1062,8 +1071,23 @@ program_interrupt:
 		break;
 	}
 	case BOOK3S_INTERRUPT_SYSCALL:
+	{
+		u32 last_sc;
+		int emul;
+
+		/* Get last sc for papr */
+		if (vcpu->arch.papr_enabled) {
+			/* The sc instuction points SRR0 to the next inst */
+			emul = kvmppc_get_last_inst(vcpu, INST_SC, &last_sc);
+			if (emul != EMULATE_DONE) {
+				kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) - 4);
+				r = RESUME_GUEST;
+				break;
+			}
+		}
+
 		if (vcpu->arch.papr_enabled &&
-		    (kvmppc_get_last_sc(vcpu) == 0x44000022) &&
+		    (last_sc == 0x44000022) &&
 		    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
 			/* SC 1 papr hypercalls */
 			ulong cmd = kvmppc_get_gpr(vcpu, 3);
@@ -1108,21 +1132,19 @@ program_interrupt:
 			r = RESUME_GUEST;
 		}
 		break;
+	}
 	case BOOK3S_INTERRUPT_FP_UNAVAIL:
 	case BOOK3S_INTERRUPT_ALTIVEC:
 	case BOOK3S_INTERRUPT_VSX:
 	{
 		int ext_msr = 0;
 		int emul;
-		ulong pc;
 		u32 last_inst;
 
 		if (vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE) {
 			/* Do paired single instruction emulation */
-			pc = kvmppc_get_pc(vcpu);
-			last_inst = kvmppc_get_last_inst(vcpu);
-			emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst,
-					 false);
+			emul = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+						    &last_inst);
 			if (emul == EMULATE_DONE)
 				goto program_interrupt;
 			else
@@ -1151,9 +1173,8 @@ program_interrupt:
 	}
 	case BOOK3S_INTERRUPT_ALIGNMENT:
 	{
-		ulong pc = kvmppc_get_pc(vcpu);
-		u32 last_inst = kvmppc_get_last_inst(vcpu);
-		int emul = kvmppc_ld(vcpu, &pc, sizeof(u32), &last_inst, false);
+		u32 last_inst;
+		int emul = kvmppc_get_last_inst(vcpu, INST_GENERIC, &last_inst);
 
 		if (emul == EMULATE_DONE) {
 			u32 dsisr;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index a06ef6b..50df5e3 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -702,6 +702,9 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		 * they were actually modified by emulation. */
 		return RESUME_GUEST_NV;
 
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
+
 	case EMULATE_DO_DCR:
 		run->exit_reason = KVM_EXIT_DCR;
 		return RESUME_HOST;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 79677d7..4385c14 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -610,6 +610,12 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 	}
 }
 
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+			  u32 *instr)
+{
+	return EMULATE_AGAIN;
+}
+
 /************* MMU Notifiers *************/
 
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index da86d9b..c5c64b6 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -224,19 +224,25 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
  * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
-	u32 inst = kvmppc_get_last_inst(vcpu);
-	int ra = get_ra(inst);
-	int rs = get_rs(inst);
-	int rt = get_rt(inst);
-	int sprn = get_sprn(inst);
-	enum emulation_result emulated = EMULATE_DONE;
+	u32 inst;
+	int ra, rs, rt, sprn;
+	enum emulation_result emulated;
 	int advance = 1;
 
 	/* this default type might be overwritten by subcategories */
 	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
 
+	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
 	pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
+	ra = get_ra(inst);
+	rs = get_rs(inst);
+	rt = get_rt(inst);
+	sprn = get_sprn(inst);
+
 	switch (get_op(inst)) {
 	case OP_TRAP:
 #ifdef CONFIG_PPC_BOOK3S
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index fe0257a..cfa6cfa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -280,6 +280,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		 * actually modified. */
 		r = RESUME_GUEST_NV;
 		break;
+	case EMULATE_AGAIN:
+		r = RESUME_GUEST;
+		break;
 	case EMULATE_DO_MMIO:
 		run->exit_reason = KVM_EXIT_MMIO;
 		/* We must reload nonvolatiles because "update" load/store
@@ -289,11 +292,15 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 		r = RESUME_HOST_NV;
 		break;
 	case EMULATE_FAIL:
+	{
+		u32 last_inst;
+
+		kvmppc_get_last_inst(vcpu, false, &last_inst);
 		/* XXX Deliver Program interrupt to guest. */
-		printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__,
-		       kvmppc_get_last_inst(vcpu));
+		pr_emerg("%s: emulation failed (%08x)\n", __func__, last_inst);
 		r = RESUME_HOST;
 		break;
+	}
 	default:
 		WARN_ON(1);
 		r = RESUME_GUEST;
-- 
cgit v0.10.2


From f5250471b2d6ad27d536cb34ce39d76b91b2b36b Mon Sep 17 00:00:00 2001
From: Mihai Caraman <mihai.caraman@freescale.com>
Date: Wed, 23 Jul 2014 19:06:22 +0300
Subject: KVM: PPC: Bookehv: Get vcpu's last instruction for emulation

On book3e, KVM uses load external pid (lwepx) dedicated instruction to read
guest last instruction on the exit path. lwepx exceptions (DTLB_MISS, DSI
and LRAT), generated by loading a guest address, needs to be handled by KVM.
These exceptions are generated in a substituted guest translation context
(EPLC[EGS] = 1) from host context (MSR[GS] = 0).

Currently, KVM hooks only interrupts generated from guest context (MSR[GS] = 1),
doing minimal checks on the fast path to avoid host performance degradation.
lwepx exceptions originate from host state (MSR[GS] = 0) which implies
additional checks in DO_KVM macro (beside the current MSR[GS] = 1) by looking
at the Exception Syndrome Register (ESR[EPID]) and the External PID Load Context
Register (EPLC[EGS]). Doing this on each Data TLB miss exception is obvious
too intrusive for the host.

Read guest last instruction from kvmppc_load_last_inst() by searching for the
physical address and kmap it. This address the TODO for TLB eviction and
execute-but-not-read entries, and allow us to get rid of lwepx until we are
able to handle failures.

A simple stress benchmark shows a 1% sys performance degradation compared with
previous approach (lwepx without failure handling):

time for i in `seq 1 10000`; do /bin/echo > /dev/null; done

real    0m 8.85s
user    0m 4.34s
sys     0m 4.48s

vs

real    0m 8.84s
user    0m 4.36s
sys     0m 4.44s

A solution to use lwepx and to handle its exceptions in KVM would be to temporary
highjack the interrupt vector from host. This imposes additional synchronizations
for cores like FSL e6500 that shares host IVOR registers between hardware threads.
This optimized solution can be later developed on top of this patch.

Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 50df5e3..97bcde2 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -819,6 +819,28 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
 	}
 }
 
+static int kvmppc_resume_inst_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+				  enum emulation_result emulated, u32 last_inst)
+{
+	switch (emulated) {
+	case EMULATE_AGAIN:
+		return RESUME_GUEST;
+
+	case EMULATE_FAIL:
+		pr_debug("%s: load instruction from guest address %lx failed\n",
+		       __func__, vcpu->arch.pc);
+		/* For debugging, encode the failing instruction and
+		 * report it to userspace. */
+		run->hw.hardware_exit_reason = ~0ULL << 32;
+		run->hw.hardware_exit_reason |= last_inst;
+		kvmppc_core_queue_program(vcpu, ESR_PIL);
+		return RESUME_HOST;
+
+	default:
+		BUG();
+	}
+}
+
 /**
  * kvmppc_handle_exit
  *
@@ -830,6 +852,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	int r = RESUME_HOST;
 	int s;
 	int idx;
+	u32 last_inst = KVM_INST_FETCH_FAILED;
+	enum emulation_result emulated = EMULATE_DONE;
 
 	/* update before a new last_exit_type is rewritten */
 	kvmppc_update_timing_stats(vcpu);
@@ -837,6 +861,20 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	/* restart interrupts if they were meant for the host */
 	kvmppc_restart_interrupt(vcpu, exit_nr);
 
+	/*
+	 * get last instruction before beeing preempted
+	 * TODO: for e6500 check also BOOKE_INTERRUPT_LRAT_ERROR & ESR_DATA
+	 */
+	switch (exit_nr) {
+	case BOOKE_INTERRUPT_DATA_STORAGE:
+	case BOOKE_INTERRUPT_DTLB_MISS:
+	case BOOKE_INTERRUPT_HV_PRIV:
+		emulated = kvmppc_get_last_inst(vcpu, false, &last_inst);
+		break;
+	default:
+		break;
+	}
+
 	local_irq_enable();
 
 	trace_kvm_exit(exit_nr, vcpu);
@@ -845,6 +883,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	run->exit_reason = KVM_EXIT_UNKNOWN;
 	run->ready_for_interrupt_injection = 1;
 
+	if (emulated != EMULATE_DONE) {
+		r = kvmppc_resume_inst_load(run, vcpu, emulated, last_inst);
+		goto out;
+	}
+
 	switch (exit_nr) {
 	case BOOKE_INTERRUPT_MACHINE_CHECK:
 		printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR));
@@ -1134,6 +1177,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		BUG();
 	}
 
+out:
 	/*
 	 * To avoid clobbering exit_reason, only check for signals if we
 	 * aren't already exiting to userspace for some other reason.
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 6ff4480..e000b39 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -121,38 +121,14 @@
 1:
 
 	.if	\flags & NEED_EMU
-	/*
-	 * This assumes you have external PID support.
-	 * To support a bookehv CPU without external PID, you'll
-	 * need to look up the TLB entry and create a temporary mapping.
-	 *
-	 * FIXME: we don't currently handle if the lwepx faults.  PR-mode
-	 * booke doesn't handle it either.  Since Linux doesn't use
-	 * broadcast tlbivax anymore, the only way this should happen is
-	 * if the guest maps its memory execute-but-not-read, or if we
-	 * somehow take a TLB miss in the middle of this entry code and
-	 * evict the relevant entry.  On e500mc, all kernel lowmem is
-	 * bolted into TLB1 large page mappings, and we don't use
-	 * broadcast invalidates, so we should not take a TLB miss here.
-	 *
-	 * Later we'll need to deal with faults here.  Disallowing guest
-	 * mappings that are execute-but-not-read could be an option on
-	 * e500mc, but not on chips with an LRAT if it is used.
-	 */
-
-	mfspr	r3, SPRN_EPLC	/* will already have correct ELPID and EGS */
 	PPC_STL	r15, VCPU_GPR(R15)(r4)
 	PPC_STL	r16, VCPU_GPR(R16)(r4)
 	PPC_STL	r17, VCPU_GPR(R17)(r4)
 	PPC_STL	r18, VCPU_GPR(R18)(r4)
 	PPC_STL	r19, VCPU_GPR(R19)(r4)
-	mr	r8, r3
 	PPC_STL	r20, VCPU_GPR(R20)(r4)
-	rlwimi	r8, r6, EPC_EAS_SHIFT - MSR_IR_LG, EPC_EAS
 	PPC_STL	r21, VCPU_GPR(R21)(r4)
-	rlwimi	r8, r6, EPC_EPR_SHIFT - MSR_PR_LG, EPC_EPR
 	PPC_STL	r22, VCPU_GPR(R22)(r4)
-	rlwimi	r8, r10, EPC_EPID_SHIFT, EPC_EPID
 	PPC_STL	r23, VCPU_GPR(R23)(r4)
 	PPC_STL	r24, VCPU_GPR(R24)(r4)
 	PPC_STL	r25, VCPU_GPR(R25)(r4)
@@ -162,10 +138,15 @@
 	PPC_STL	r29, VCPU_GPR(R29)(r4)
 	PPC_STL	r30, VCPU_GPR(R30)(r4)
 	PPC_STL	r31, VCPU_GPR(R31)(r4)
-	mtspr	SPRN_EPLC, r8
-	isync
-	lwepx   r9, 0, r5
-	mtspr	SPRN_EPLC, r3
+
+	/*
+	 * We don't use external PID support. lwepx faults would need to be
+	 * handled by KVM and this implies aditional code in DO_KVM (for
+	 * DTB_MISS, DSI and LRAT) to check ESR[EPID] and EPLC[EGS] which
+	 * is too intrusive for the host. Get last instuction in
+	 * kvmppc_get_last_inst().
+	 */
+	li	r9, KVM_INST_FETCH_FAILED
 	stw	r9, VCPU_LAST_INST(r4)
 	.endif
 
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 4385c14..4150826 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -610,11 +610,103 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr,
 	}
 }
 
+#ifdef CONFIG_KVM_BOOKE_HV
+int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
+			  u32 *instr)
+{
+	gva_t geaddr;
+	hpa_t addr;
+	hfn_t pfn;
+	hva_t eaddr;
+	u32 mas1, mas2, mas3;
+	u64 mas7_mas3;
+	struct page *page;
+	unsigned int addr_space, psize_shift;
+	bool pr;
+	unsigned long flags;
+
+	/* Search TLB for guest pc to get the real address */
+	geaddr = kvmppc_get_pc(vcpu);
+
+	addr_space = (vcpu->arch.shared->msr & MSR_IS) >> MSR_IR_LG;
+
+	local_irq_save(flags);
+	mtspr(SPRN_MAS6, (vcpu->arch.pid << MAS6_SPID_SHIFT) | addr_space);
+	mtspr(SPRN_MAS5, MAS5_SGS | vcpu->kvm->arch.lpid);
+	asm volatile("tlbsx 0, %[geaddr]\n" : :
+		     [geaddr] "r" (geaddr));
+	mtspr(SPRN_MAS5, 0);
+	mtspr(SPRN_MAS8, 0);
+	mas1 = mfspr(SPRN_MAS1);
+	mas2 = mfspr(SPRN_MAS2);
+	mas3 = mfspr(SPRN_MAS3);
+#ifdef CONFIG_64BIT
+	mas7_mas3 = mfspr(SPRN_MAS7_MAS3);
+#else
+	mas7_mas3 = ((u64)mfspr(SPRN_MAS7) << 32) | mas3;
+#endif
+	local_irq_restore(flags);
+
+	/*
+	 * If the TLB entry for guest pc was evicted, return to the guest.
+	 * There are high chances to find a valid TLB entry next time.
+	 */
+	if (!(mas1 & MAS1_VALID))
+		return EMULATE_AGAIN;
+
+	/*
+	 * Another thread may rewrite the TLB entry in parallel, don't
+	 * execute from the address if the execute permission is not set
+	 */
+	pr = vcpu->arch.shared->msr & MSR_PR;
+	if (unlikely((pr && !(mas3 & MAS3_UX)) ||
+		     (!pr && !(mas3 & MAS3_SX)))) {
+		pr_err_ratelimited(
+			"%s: Instuction emulation from guest addres %08lx without execute permission\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/*
+	 * The real address will be mapped by a cacheable, memory coherent,
+	 * write-back page. Check for mismatches when LRAT is used.
+	 */
+	if (has_feature(vcpu, VCPU_FTR_MMU_V2) &&
+	    unlikely((mas2 & MAS2_I) || (mas2 & MAS2_W) || !(mas2 & MAS2_M))) {
+		pr_err_ratelimited(
+			"%s: Instuction emulation from guest addres %08lx mismatches storage attributes\n",
+			__func__, geaddr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Get pfn */
+	psize_shift = MAS1_GET_TSIZE(mas1) + 10;
+	addr = (mas7_mas3 & (~0ULL << psize_shift)) |
+	       (geaddr & ((1ULL << psize_shift) - 1ULL));
+	pfn = addr >> PAGE_SHIFT;
+
+	/* Guard against emulation from devices area */
+	if (unlikely(!page_is_ram(pfn))) {
+		pr_err_ratelimited("%s: Instruction emulation from non-RAM host addres %08llx is not supported\n",
+			 __func__, addr);
+		return EMULATE_AGAIN;
+	}
+
+	/* Map a page and get guest's instruction */
+	page = pfn_to_page(pfn);
+	eaddr = (unsigned long)kmap_atomic(page);
+	*instr = *(u32 *)(eaddr | (unsigned long)(addr & ~PAGE_MASK));
+	kunmap_atomic((u32 *)eaddr);
+
+	return EMULATE_DONE;
+}
+#else
 int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
 			  u32 *instr)
 {
 	return EMULATE_AGAIN;
 }
+#endif
 
 /************* MMU Notifiers *************/
 
-- 
cgit v0.10.2


From 99e99d19a86dc596703ed79dcecf9ca6b32a6a8a Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Mon, 21 Jul 2014 11:23:26 +0530
Subject: kvm: ppc: bookehv: Save restore SPRN_SPRG9 on guest entry exit

SPRN_SPRG is used by debug interrupt handler, so this is required for
debug support.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 855ba4d..562f685 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -587,6 +587,7 @@ struct kvm_vcpu_arch {
 	u32 mmucfg;
 	u32 eptcfg;
 	u32 epr;
+	u64 sprg9;
 	u32 pwrmgtcr0;
 	u32 crit_save;
 	/* guest debug registers*/
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 17ffcb4..ab9ae04 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -668,6 +668,7 @@ int main(void)
 	DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr));
 	DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr));
 	DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc));
+	DEFINE(VCPU_SPRG9, offsetof(struct kvm_vcpu, arch.sprg9));
 	DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
 	DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear));
 	DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr));
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index e000b39..b4f8fba 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -398,6 +398,7 @@ _GLOBAL(kvmppc_resume_host)
 #ifdef CONFIG_64BIT
 	PPC_LL	r3, PACA_SPRG_VDSO(r13)
 #endif
+	mfspr	r5, SPRN_SPRG9
 	PPC_STD(r6, VCPU_SHARED_SPRG4, r11)
 	mfspr	r8, SPRN_SPRG6
 	PPC_STD(r7, VCPU_SHARED_SPRG5, r11)
@@ -405,6 +406,7 @@ _GLOBAL(kvmppc_resume_host)
 #ifdef CONFIG_64BIT
 	mtspr	SPRN_SPRG_VDSO_WRITE, r3
 #endif
+	PPC_STD(r5, VCPU_SPRG9, r4)
 	PPC_STD(r8, VCPU_SHARED_SPRG6, r11)
 	mfxer	r3
 	PPC_STD(r9, VCPU_SHARED_SPRG7, r11)
@@ -639,7 +641,9 @@ lightweight_exit:
 	mtspr	SPRN_SPRG5W, r6
 	PPC_LD(r8, VCPU_SHARED_SPRG7, r11)
 	mtspr	SPRN_SPRG6W, r7
+	PPC_LD(r5, VCPU_SPRG9, r4)
 	mtspr	SPRN_SPRG7W, r8
+	mtspr	SPRN_SPRG9, r5
 
 	/* Load some guest volatiles. */
 	PPC_LL	r3, VCPU_LR(r4)
-- 
cgit v0.10.2


From 28d2f421bcff6781b80decac59da414f86f35c2a Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Fri, 25 Jul 2014 11:21:08 +0530
Subject: KVM: PPC: Booke-hv: Add one reg interface for SPRG9

We now support SPRG9 for guest, so also add a one reg interface for same
Note: Changes are in bookehv code only as we do not have SPRG9 on booke-pr.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 2bc4a94..0e56d9e 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -555,6 +555,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_DABRX	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
 #define KVM_REG_PPC_WORT	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c
index 690499d..164bad2 100644
--- a/arch/powerpc/kvm/e500mc.c
+++ b/arch/powerpc/kvm/e500mc.c
@@ -267,14 +267,32 @@ static int kvmppc_core_set_sregs_e500mc(struct kvm_vcpu *vcpu,
 static int kvmppc_get_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
 			      union kvmppc_one_reg *val)
 {
-	int r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		*val = get_reg_val(id, vcpu->arch.sprg9);
+		break;
+	default:
+		r = kvmppc_get_one_reg_e500_tlb(vcpu, id, val);
+	}
+
 	return r;
 }
 
 static int kvmppc_set_one_reg_e500mc(struct kvm_vcpu *vcpu, u64 id,
 			      union kvmppc_one_reg *val)
 {
-	int r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	int r = 0;
+
+	switch (id) {
+	case KVM_REG_PPC_SPRG9:
+		vcpu->arch.sprg9 = set_reg_val(id, *val);
+		break;
+	default:
+		r = kvmppc_set_one_reg_e500_tlb(vcpu, id, val);
+	}
+
 	return r;
 }
 
-- 
cgit v0.10.2


From 8c95ead6039d46d2d5c375d3cadac8708121fbe4 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Fri, 25 Jul 2014 11:32:43 +0530
Subject: KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer

Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer,
but using SPRN_SPRG_THREAD <=> SPRG3 (thread->vcpu). So this comment
is not valid now.

Note: SPRN_SPRG3R is not supported (do not see any need as of now),
and if we want to support this in future then we have to shift to using
SPRG1 for VCPU pointer.

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c8f3381..0ef17ad 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -944,9 +944,6 @@
  *      readable variant for reads, which can avoid a fault
  *      with KVM type virtualization.
  *
- *      (*) Under KVM, the host SPRG1 is used to point to
- *      the current VCPU data structure
- *
  * 32-bit 8xx:
  *	- SPRG0 scratch for exception vectors
  *	- SPRG1 scratch for exception vectors
-- 
cgit v0.10.2


From b2677b8dd8de0dc1496ede4da09b9dfd59f15cea Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 25 Jul 2014 10:38:59 +0200
Subject: KVM: PPC: Remove 440 support

The 440 target hasn't been properly functioning for a few releases and
before I was the only one who fixes a very serious bug that indicates to
me that nobody used it before either.

Furthermore KVM on 440 is slow to the extent of unusable.

We don't have to carry along completely unused code. Remove 440 and give
us one less thing to worry about.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/Documentation/powerpc/00-INDEX b/Documentation/powerpc/00-INDEX
index 6db73df..a68784d 100644
--- a/Documentation/powerpc/00-INDEX
+++ b/Documentation/powerpc/00-INDEX
@@ -17,8 +17,6 @@ firmware-assisted-dump.txt
 	- Documentation on the firmware assisted dump mechanism "fadump".
 hvcs.txt
 	- IBM "Hypervisor Virtual Console Server" Installation Guide
-kvm_440.txt
-	- Various notes on the implementation of KVM for PowerPC 440.
 mpc52xx.txt
 	- Linux 2.6.x on MPC52xx family
 pmu-ebb.txt
diff --git a/Documentation/powerpc/kvm_440.txt b/Documentation/powerpc/kvm_440.txt
deleted file mode 100644
index c02a003..0000000
--- a/Documentation/powerpc/kvm_440.txt
+++ /dev/null
@@ -1,41 +0,0 @@
-Hollis Blanchard <hollisb@us.ibm.com>
-15 Apr 2008
-
-Various notes on the implementation of KVM for PowerPC 440:
-
-To enforce isolation, host userspace, guest kernel, and guest userspace all
-run at user privilege level. Only the host kernel runs in supervisor mode.
-Executing privileged instructions in the guest traps into KVM (in the host
-kernel), where we decode and emulate them. Through this technique, unmodified
-440 Linux kernels can be run (slowly) as guests. Future performance work will
-focus on reducing the overhead and frequency of these traps.
-
-The usual code flow is started from userspace invoking an "run" ioctl, which
-causes KVM to switch into guest context. We use IVPR to hijack the host
-interrupt vectors while running the guest, which allows us to direct all
-interrupts to kvmppc_handle_interrupt(). At this point, we could either
-- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or
-- let the host interrupt handler run (e.g. when the decrementer fires), or
-- return to host userspace (e.g. when the guest performs device MMIO)
-
-Address spaces: We take advantage of the fact that Linux doesn't use the AS=1
-address space (in host or guest), which gives us virtual address space to use
-for guest mappings. While the guest is running, the host kernel remains mapped
-in AS=0, but the guest can only use AS=1 mappings.
-
-TLB entries: The TLB entries covering the host linear mapping remain
-present while running the guest. This reduces the overhead of lightweight
-exits, which are handled by KVM running in the host kernel. We keep three
-copies of the TLB:
- - guest TLB: contents of the TLB as the guest sees it
- - shadow TLB: the TLB that is actually in hardware while guest is running
- - host TLB: to restore TLB state when context switching guest -> host
-When a TLB miss occurs because a mapping was not present in the shadow TLB,
-but was present in the guest TLB, KVM handles the fault without invoking the
-guest. Large guest pages are backed by multiple 4KB shadow pages through this
-mechanism.
-
-IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network
-and block IO, so those drivers must be enabled in the guest. It's possible
-that some qemu device emulation (e.g. e1000 or rtl8139) may also work with
-little effort.
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 790352f..93500f6 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -202,9 +202,7 @@ config PPC_EARLY_DEBUG_BEAT
 
 config PPC_EARLY_DEBUG_44x
 	bool "Early serial debugging for IBM/AMCC 44x CPUs"
-	# PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water
-	# mark, which doesn't work with current 440 KVM.
-	depends on 44x && !KVM
+	depends on 44x
 	help
 	  Select this to enable early debugging for IBM 44x chips via the
 	  inbuilt serial port.  If you enable this, ensure you set
diff --git a/arch/powerpc/configs/ppc44x_defconfig b/arch/powerpc/configs/ppc44x_defconfig
index ccf66b9..924e10d 100644
--- a/arch/powerpc/configs/ppc44x_defconfig
+++ b/arch/powerpc/configs/ppc44x_defconfig
@@ -127,4 +127,3 @@ CONFIG_CRYPTO_PCBC=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
 # CONFIG_CRYPTO_HW is not set
 CONFIG_VIRTUALIZATION=y
-CONFIG_KVM_440=y
diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h
deleted file mode 100644
index a0e5761..0000000
--- a/arch/powerpc/include/asm/kvm_44x.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __ASM_44X_H__
-#define __ASM_44X_H__
-
-#include <linux/kvm_host.h>
-
-#define PPC44x_TLB_SIZE 64
-
-/* If the guest is expecting it, this can be as large as we like; we'd just
- * need to find some way of advertising it. */
-#define KVM44x_GUEST_TLB_SIZE 64
-
-struct kvmppc_44x_tlbe {
-	u32 tid; /* Only the low 8 bits are used. */
-	u32 word0;
-	u32 word1;
-	u32 word2;
-};
-
-struct kvmppc_44x_shadow_ref {
-	struct page *page;
-	u16 gtlb_index;
-	u8 writeable;
-	u8 tid;
-};
-
-struct kvmppc_vcpu_44x {
-	/* Unmodified copy of the guest's TLB. */
-	struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE];
-
-	/* References to guest pages in the hardware TLB. */
-	struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE];
-
-	/* State of the shadow TLB at guest context switch time. */
-	struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE];
-	u8 shadow_tlb_mod[PPC44x_TLB_SIZE];
-
-	struct kvm_vcpu vcpu;
-};
-
-static inline struct kvmppc_vcpu_44x *to_44x(struct kvm_vcpu *vcpu)
-{
-	return container_of(vcpu, struct kvmppc_vcpu_44x, vcpu);
-}
-
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu);
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu);
-
-#endif /* __ASM_44X_H__ */
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 3f3e530..b8901c4 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -33,7 +33,6 @@
 /* IVPR must be 64KiB-aligned. */
 #define VCPU_SIZE_ORDER 4
 #define VCPU_SIZE_LOG   (VCPU_SIZE_ORDER + 12)
-#define VCPU_TLB_PGSZ   PPC44x_TLB_64K
 #define VCPU_SIZE_BYTES (1<<VCPU_SIZE_LOG)
 
 #define BOOKE_INTERRUPT_CRITICAL 0
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 562f685..5fe2b5d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -49,7 +49,6 @@
 #define KVM_NR_IRQCHIPS          1
 #define KVM_IRQCHIP_NUM_PINS     256
 
-#if !defined(CONFIG_KVM_440)
 #include <linux/mmu_notifier.h>
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
@@ -62,8 +61,6 @@ extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
-#endif
-
 #define HPTEG_CACHE_NUM			(1 << 15)
 #define HPTEG_HASH_BITS_PTE		13
 #define HPTEG_HASH_BITS_PTE_LONG	12
diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c
deleted file mode 100644
index 9cb4b0a..0000000
--- a/arch/powerpc/kvm/44x.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/err.h>
-#include <linux/export.h>
-#include <linux/module.h>
-#include <linux/miscdevice.h>
-
-#include <asm/reg.h>
-#include <asm/cputable.h>
-#include <asm/tlbflush.h>
-#include <asm/kvm_44x.h>
-#include <asm/kvm_ppc.h>
-
-#include "44x_tlb.h"
-#include "booke.h"
-
-static void kvmppc_core_vcpu_load_44x(struct kvm_vcpu *vcpu, int cpu)
-{
-	kvmppc_booke_vcpu_load(vcpu, cpu);
-	kvmppc_44x_tlb_load(vcpu);
-}
-
-static void kvmppc_core_vcpu_put_44x(struct kvm_vcpu *vcpu)
-{
-	kvmppc_44x_tlb_put(vcpu);
-	kvmppc_booke_vcpu_put(vcpu);
-}
-
-int kvmppc_core_check_processor_compat(void)
-{
-	int r;
-
-	if (strncmp(cur_cpu_spec->platform, "ppc440", 6) == 0)
-		r = 0;
-	else
-		r = -ENOTSUPP;
-
-	return r;
-}
-
-int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[0];
-	int i;
-
-	tlbe->tid = 0;
-	tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID;
-	tlbe->word1 = 0;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR;
-
-	tlbe++;
-	tlbe->tid = 0;
-	tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID;
-	tlbe->word1 = 0xef600000;
-	tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR
-	              | PPC44x_TLB_I | PPC44x_TLB_G;
-
-	/* Since the guest can directly access the timebase, it must know the
-	 * real timebase frequency. Accordingly, it must see the state of
-	 * CCR1[TCS]. */
-	/* XXX CCR1 doesn't exist on all 440 SoCs. */
-	vcpu->arch.ccr1 = mfspr(SPRN_CCR1);
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++)
-		vcpu_44x->shadow_refs[i].gtlb_index = -1;
-
-	vcpu->arch.cpu_type = KVM_CPU_440;
-	vcpu->arch.pvr = mfspr(SPRN_PVR);
-
-	return 0;
-}
-
-/* 'linear_address' is actually an encoding of AS|PID|EADDR . */
-int kvmppc_core_vcpu_translate(struct kvm_vcpu *vcpu,
-                               struct kvm_translation *tr)
-{
-	int index;
-	gva_t eaddr;
-	u8 pid;
-	u8 as;
-
-	eaddr = tr->linear_address;
-	pid = (tr->linear_address >> 32) & 0xff;
-	as = (tr->linear_address >> 40) & 0x1;
-
-	index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as);
-	if (index == -1) {
-		tr->valid = 0;
-		return 0;
-	}
-
-	tr->physical_address = kvmppc_mmu_xlate(vcpu, index, eaddr);
-	/* XXX what does "writeable" and "usermode" even mean? */
-	tr->valid = 1;
-
-	return 0;
-}
-
-static int kvmppc_core_get_sregs_44x(struct kvm_vcpu *vcpu,
-				      struct kvm_sregs *sregs)
-{
-	return kvmppc_get_sregs_ivor(vcpu, sregs);
-}
-
-static int kvmppc_core_set_sregs_44x(struct kvm_vcpu *vcpu,
-				     struct kvm_sregs *sregs)
-{
-	return kvmppc_set_sregs_ivor(vcpu, sregs);
-}
-
-static int kvmppc_get_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
-				  union kvmppc_one_reg *val)
-{
-	return -EINVAL;
-}
-
-static int kvmppc_set_one_reg_44x(struct kvm_vcpu *vcpu, u64 id,
-				  union kvmppc_one_reg *val)
-{
-	return -EINVAL;
-}
-
-static struct kvm_vcpu *kvmppc_core_vcpu_create_44x(struct kvm *kvm,
-						    unsigned int id)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x;
-	struct kvm_vcpu *vcpu;
-	int err;
-
-	vcpu_44x = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
-	if (!vcpu_44x) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	vcpu = &vcpu_44x->vcpu;
-	err = kvm_vcpu_init(vcpu, kvm, id);
-	if (err)
-		goto free_vcpu;
-
-	vcpu->arch.shared = (void*)__get_free_page(GFP_KERNEL|__GFP_ZERO);
-	if (!vcpu->arch.shared)
-		goto uninit_vcpu;
-
-	return vcpu;
-
-uninit_vcpu:
-	kvm_vcpu_uninit(vcpu);
-free_vcpu:
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-out:
-	return ERR_PTR(err);
-}
-
-static void kvmppc_core_vcpu_free_44x(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-
-	free_page((unsigned long)vcpu->arch.shared);
-	kvm_vcpu_uninit(vcpu);
-	kmem_cache_free(kvm_vcpu_cache, vcpu_44x);
-}
-
-static int kvmppc_core_init_vm_44x(struct kvm *kvm)
-{
-	return 0;
-}
-
-static void kvmppc_core_destroy_vm_44x(struct kvm *kvm)
-{
-}
-
-static struct kvmppc_ops kvm_ops_44x = {
-	.get_sregs = kvmppc_core_get_sregs_44x,
-	.set_sregs = kvmppc_core_set_sregs_44x,
-	.get_one_reg = kvmppc_get_one_reg_44x,
-	.set_one_reg = kvmppc_set_one_reg_44x,
-	.vcpu_load   = kvmppc_core_vcpu_load_44x,
-	.vcpu_put    = kvmppc_core_vcpu_put_44x,
-	.vcpu_create = kvmppc_core_vcpu_create_44x,
-	.vcpu_free   = kvmppc_core_vcpu_free_44x,
-	.mmu_destroy  = kvmppc_mmu_destroy_44x,
-	.init_vm = kvmppc_core_init_vm_44x,
-	.destroy_vm = kvmppc_core_destroy_vm_44x,
-	.emulate_op = kvmppc_core_emulate_op_44x,
-	.emulate_mtspr = kvmppc_core_emulate_mtspr_44x,
-	.emulate_mfspr = kvmppc_core_emulate_mfspr_44x,
-};
-
-static int __init kvmppc_44x_init(void)
-{
-	int r;
-
-	r = kvmppc_booke_init();
-	if (r)
-		goto err_out;
-
-	r = kvm_init(NULL, sizeof(struct kvmppc_vcpu_44x), 0, THIS_MODULE);
-	if (r)
-		goto err_out;
-	kvm_ops_44x.owner = THIS_MODULE;
-	kvmppc_pr_ops = &kvm_ops_44x;
-
-err_out:
-	return r;
-}
-
-static void __exit kvmppc_44x_exit(void)
-{
-	kvmppc_pr_ops = NULL;
-	kvmppc_booke_exit();
-}
-
-module_init(kvmppc_44x_init);
-module_exit(kvmppc_44x_exit);
-MODULE_ALIAS_MISCDEV(KVM_MINOR);
-MODULE_ALIAS("devname:kvm");
diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c
deleted file mode 100644
index 92c9ab4..0000000
--- a/arch/powerpc/kvm/44x_emulate.c
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2008
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <asm/kvm_ppc.h>
-#include <asm/dcr.h>
-#include <asm/dcr-regs.h>
-#include <asm/disassemble.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "booke.h"
-#include "44x_tlb.h"
-
-#define XOP_MFDCRX  259
-#define XOP_MFDCR   323
-#define XOP_MTDCRX  387
-#define XOP_MTDCR   451
-#define XOP_TLBSX   914
-#define XOP_ICCCI   966
-#define XOP_TLBWE   978
-
-static int emulate_mtdcr(struct kvm_vcpu *vcpu, int rs, int dcrn)
-{
-	/* emulate some access in kernel */
-	switch (dcrn) {
-	case DCRN_CPR0_CONFIG_ADDR:
-		vcpu->arch.cpr0_cfgaddr = kvmppc_get_gpr(vcpu, rs);
-		return EMULATE_DONE;
-	default:
-		vcpu->run->dcr.dcrn = dcrn;
-		vcpu->run->dcr.data = kvmppc_get_gpr(vcpu, rs);
-		vcpu->run->dcr.is_write = 1;
-		vcpu->arch.dcr_is_write = 1;
-		vcpu->arch.dcr_needed = 1;
-		kvmppc_account_exit(vcpu, DCR_EXITS);
-		return EMULATE_DO_DCR;
-	}
-}
-
-static int emulate_mfdcr(struct kvm_vcpu *vcpu, int rt, int dcrn)
-{
-	/* The guest may access CPR0 registers to determine the timebase
-	 * frequency, and it must know the real host frequency because it
-	 * can directly access the timebase registers.
-	 *
-	 * It would be possible to emulate those accesses in userspace,
-	 * but userspace can really only figure out the end frequency.
-	 * We could decompose that into the factors that compute it, but
-	 * that's tricky math, and it's easier to just report the real
-	 * CPR0 values.
-	 */
-	switch (dcrn) {
-	case DCRN_CPR0_CONFIG_ADDR:
-		kvmppc_set_gpr(vcpu, rt, vcpu->arch.cpr0_cfgaddr);
-		break;
-	case DCRN_CPR0_CONFIG_DATA:
-		local_irq_disable();
-		mtdcr(DCRN_CPR0_CONFIG_ADDR,
-			  vcpu->arch.cpr0_cfgaddr);
-		kvmppc_set_gpr(vcpu, rt,
-			       mfdcr(DCRN_CPR0_CONFIG_DATA));
-		local_irq_enable();
-		break;
-	default:
-		vcpu->run->dcr.dcrn = dcrn;
-		vcpu->run->dcr.data =  0;
-		vcpu->run->dcr.is_write = 0;
-		vcpu->arch.dcr_is_write = 0;
-		vcpu->arch.io_gpr = rt;
-		vcpu->arch.dcr_needed = 1;
-		kvmppc_account_exit(vcpu, DCR_EXITS);
-		return EMULATE_DO_DCR;
-	}
-
-	return EMULATE_DONE;
-}
-
-int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
-			       unsigned int inst, int *advance)
-{
-	int emulated = EMULATE_DONE;
-	int dcrn = get_dcrn(inst);
-	int ra = get_ra(inst);
-	int rb = get_rb(inst);
-	int rc = get_rc(inst);
-	int rs = get_rs(inst);
-	int rt = get_rt(inst);
-	int ws = get_ws(inst);
-
-	switch (get_op(inst)) {
-	case 31:
-		switch (get_xop(inst)) {
-
-		case XOP_MFDCR:
-			emulated = emulate_mfdcr(vcpu, rt, dcrn);
-			break;
-
-		case XOP_MFDCRX:
-			emulated = emulate_mfdcr(vcpu, rt,
-					kvmppc_get_gpr(vcpu, ra));
-			break;
-
-		case XOP_MTDCR:
-			emulated = emulate_mtdcr(vcpu, rs, dcrn);
-			break;
-
-		case XOP_MTDCRX:
-			emulated = emulate_mtdcr(vcpu, rs,
-					kvmppc_get_gpr(vcpu, ra));
-			break;
-
-		case XOP_TLBWE:
-			emulated = kvmppc_44x_emul_tlbwe(vcpu, ra, rs, ws);
-			break;
-
-		case XOP_TLBSX:
-			emulated = kvmppc_44x_emul_tlbsx(vcpu, rt, ra, rb, rc);
-			break;
-
-		case XOP_ICCCI:
-			break;
-
-		default:
-			emulated = EMULATE_FAIL;
-		}
-
-		break;
-
-	default:
-		emulated = EMULATE_FAIL;
-	}
-
-	if (emulated == EMULATE_FAIL)
-		emulated = kvmppc_booke_emulate_op(run, vcpu, inst, advance);
-
-	return emulated;
-}
-
-int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
-{
-	int emulated = EMULATE_DONE;
-
-	switch (sprn) {
-	case SPRN_PID:
-		kvmppc_set_pid(vcpu, spr_val); break;
-	case SPRN_MMUCR:
-		vcpu->arch.mmucr = spr_val; break;
-	case SPRN_CCR0:
-		vcpu->arch.ccr0 = spr_val; break;
-	case SPRN_CCR1:
-		vcpu->arch.ccr1 = spr_val; break;
-	default:
-		emulated = kvmppc_booke_emulate_mtspr(vcpu, sprn, spr_val);
-	}
-
-	return emulated;
-}
-
-int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
-{
-	int emulated = EMULATE_DONE;
-
-	switch (sprn) {
-	case SPRN_PID:
-		*spr_val = vcpu->arch.pid; break;
-	case SPRN_MMUCR:
-		*spr_val = vcpu->arch.mmucr; break;
-	case SPRN_CCR0:
-		*spr_val = vcpu->arch.ccr0; break;
-	case SPRN_CCR1:
-		*spr_val = vcpu->arch.ccr1; break;
-	default:
-		emulated = kvmppc_booke_emulate_mfspr(vcpu, sprn, spr_val);
-	}
-
-	return emulated;
-}
-
diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
deleted file mode 100644
index 0deef10..0000000
--- a/arch/powerpc/kvm/44x_tlb.c
+++ /dev/null
@@ -1,528 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/kvm.h>
-#include <linux/kvm_host.h>
-#include <linux/highmem.h>
-
-#include <asm/tlbflush.h>
-#include <asm/mmu-44x.h>
-#include <asm/kvm_ppc.h>
-#include <asm/kvm_44x.h>
-#include "timing.h"
-
-#include "44x_tlb.h"
-#include "trace.h"
-
-#ifndef PPC44x_TLBE_SIZE
-#define PPC44x_TLBE_SIZE	PPC44x_TLB_4K
-#endif
-
-#define PAGE_SIZE_4K (1<<12)
-#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1))
-
-#define PPC44x_TLB_UATTR_MASK \
-	(PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3)
-#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW)
-#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW)
-
-#ifdef DEBUG
-void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe;
-	int i;
-
-	printk("vcpu %d TLB dump:\n", vcpu->vcpu_id);
-	printk("| %2s | %3s | %8s | %8s | %8s |\n",
-			"nr", "tid", "word0", "word1", "word2");
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		tlbe = &vcpu_44x->guest_tlb[i];
-		if (tlbe->word0 & PPC44x_TLB_VALID)
-			printk(" G%2d |  %02X | %08X | %08X | %08X |\n",
-			       i, tlbe->tid, tlbe->word0, tlbe->word1,
-			       tlbe->word2);
-	}
-}
-#endif
-
-static inline void kvmppc_44x_tlbie(unsigned int index)
-{
-	/* 0 <= index < 64, so the V bit is clear and we can use the index as
-	 * word0. */
-	asm volatile(
-		"tlbwe %[index], %[index], 0\n"
-	:
-	: [index] "r"(index)
-	);
-}
-
-static inline void kvmppc_44x_tlbre(unsigned int index,
-                                    struct kvmppc_44x_tlbe *tlbe)
-{
-	asm volatile(
-		"tlbre %[word0], %[index], 0\n"
-		"mfspr %[tid], %[sprn_mmucr]\n"
-		"andi. %[tid], %[tid], 0xff\n"
-		"tlbre %[word1], %[index], 1\n"
-		"tlbre %[word2], %[index], 2\n"
-		: [word0] "=r"(tlbe->word0),
-		  [word1] "=r"(tlbe->word1),
-		  [word2] "=r"(tlbe->word2),
-		  [tid]   "=r"(tlbe->tid)
-		: [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-		: "cc"
-	);
-}
-
-static inline void kvmppc_44x_tlbwe(unsigned int index,
-                                    struct kvmppc_44x_tlbe *stlbe)
-{
-	unsigned long tmp;
-
-	asm volatile(
-		"mfspr %[tmp], %[sprn_mmucr]\n"
-		"rlwimi %[tmp], %[tid], 0, 0xff\n"
-		"mtspr %[sprn_mmucr], %[tmp]\n"
-		"tlbwe %[word0], %[index], 0\n"
-		"tlbwe %[word1], %[index], 1\n"
-		"tlbwe %[word2], %[index], 2\n"
-		: [tmp]   "=&r"(tmp)
-		: [word0] "r"(stlbe->word0),
-		  [word1] "r"(stlbe->word1),
-		  [word2] "r"(stlbe->word2),
-		  [tid]   "r"(stlbe->tid),
-		  [index] "r"(index),
-		  [sprn_mmucr] "i"(SPRN_MMUCR)
-	);
-}
-
-static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode)
-{
-	/* We only care about the guest's permission and user bits. */
-	attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_UATTR_MASK;
-
-	if (!usermode) {
-		/* Guest is in supervisor mode, so we need to translate guest
-		 * supervisor permissions into user permissions. */
-		attrib &= ~PPC44x_TLB_USER_PERM_MASK;
-		attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3;
-	}
-
-	/* Make sure host can always access this memory. */
-	attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW;
-
-	/* WIMGE = 0b00100 */
-	attrib |= PPC44x_TLB_M;
-
-	return attrib;
-}
-
-/* Load shadow TLB back into hardware. */
-void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbwe(i, stlbe);
-	}
-}
-
-static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x,
-                                         unsigned int i)
-{
-	vcpu_44x->shadow_tlb_mod[i] = 1;
-}
-
-/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */
-void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++) {
-		struct kvmppc_44x_tlbe *stlbe = &vcpu_44x->shadow_tlb[i];
-
-		if (vcpu_44x->shadow_tlb_mod[i])
-			kvmppc_44x_tlbre(i, stlbe);
-
-		if (get_tlb_v(stlbe) && get_tlb_ts(stlbe))
-			kvmppc_44x_tlbie(i);
-	}
-}
-
-
-/* Search the guest TLB for a matching entry. */
-int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid,
-                         unsigned int as)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	/* XXX Replace loop with fancy data structures. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->guest_tlb); i++) {
-		struct kvmppc_44x_tlbe *tlbe = &vcpu_44x->guest_tlb[i];
-		unsigned int tid;
-
-		if (eaddr < get_tlb_eaddr(tlbe))
-			continue;
-
-		if (eaddr > get_tlb_end(tlbe))
-			continue;
-
-		tid = get_tlb_tid(tlbe);
-		if (tid && (tid != pid))
-			continue;
-
-		if (!get_tlb_v(tlbe))
-			continue;
-
-		if (get_tlb_ts(tlbe) != as)
-			continue;
-
-		return i;
-	}
-
-	return -1;
-}
-
-gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
-                       gva_t eaddr)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-	unsigned int pgmask = get_tlb_bytes(gtlbe) - 1;
-
-	return get_tlb_raddr(gtlbe) | (eaddr & pgmask);
-}
-
-int kvmppc_mmu_itlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
-	unsigned int as = !!(vcpu->arch.shared->msr & MSR_IS);
-
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-}
-
-int kvmppc_mmu_dtlb_index(struct kvm_vcpu *vcpu, gva_t eaddr)
-{
-	unsigned int as = !!(vcpu->arch.shared->msr & MSR_DS);
-
-	return kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as);
-}
-
-void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu)
-{
-}
-
-static void kvmppc_44x_shadow_release(struct kvmppc_vcpu_44x *vcpu_44x,
-                                      unsigned int stlb_index)
-{
-	struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[stlb_index];
-
-	if (!ref->page)
-		return;
-
-	/* Discard from the TLB. */
-	/* Note: we could actually invalidate a host mapping, if the host overwrote
-	 * this TLB entry since we inserted a guest mapping. */
-	kvmppc_44x_tlbie(stlb_index);
-
-	/* Now release the page. */
-	if (ref->writeable)
-		kvm_release_page_dirty(ref->page);
-	else
-		kvm_release_page_clean(ref->page);
-
-	ref->page = NULL;
-
-	/* XXX set tlb_44x_index to stlb_index? */
-
-	trace_kvm_stlb_inval(stlb_index);
-}
-
-void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i <= tlb_44x_hwater; i++)
-		kvmppc_44x_shadow_release(vcpu_44x, i);
-}
-
-/**
- * kvmppc_mmu_map -- create a host mapping for guest memory
- *
- * If the guest wanted a larger page than the host supports, only the first
- * host page is mapped here and the rest are demand faulted.
- *
- * If the guest wanted a smaller page than the host page size, we map only the
- * guest-size page (i.e. not a full host page mapping).
- *
- * Caller must ensure that the specified guest TLB entry is safe to insert into
- * the shadow TLB.
- */
-void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
-                    unsigned int gtlb_index)
-{
-	struct kvmppc_44x_tlbe stlbe;
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *gtlbe = &vcpu_44x->guest_tlb[gtlb_index];
-	struct kvmppc_44x_shadow_ref *ref;
-	struct page *new_page;
-	hpa_t hpaddr;
-	gfn_t gfn;
-	u32 asid = gtlbe->tid;
-	u32 flags = gtlbe->word2;
-	u32 max_bytes = get_tlb_bytes(gtlbe);
-	unsigned int victim;
-
-	/* Select TLB entry to clobber. Indirectly guard against races with the TLB
-	 * miss handler by disabling interrupts. */
-	local_irq_disable();
-	victim = ++tlb_44x_index;
-	if (victim > tlb_44x_hwater)
-		victim = 0;
-	tlb_44x_index = victim;
-	local_irq_enable();
-
-	/* Get reference to new page. */
-	gfn = gpaddr >> PAGE_SHIFT;
-	new_page = gfn_to_page(vcpu->kvm, gfn);
-	if (is_error_page(new_page)) {
-		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
-			(unsigned long long)gfn);
-		return;
-	}
-	hpaddr = page_to_phys(new_page);
-
-	/* Invalidate any previous shadow mappings. */
-	kvmppc_44x_shadow_release(vcpu_44x, victim);
-
-	/* XXX Make sure (va, size) doesn't overlap any other
-	 * entries. 440x6 user manual says the result would be
-	 * "undefined." */
-
-	/* XXX what about AS? */
-
-	/* Force TS=1 for all guest mappings. */
-	stlbe.word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS;
-
-	if (max_bytes >= PAGE_SIZE) {
-		/* Guest mapping is larger than or equal to host page size. We can use
-		 * a "native" host mapping. */
-		stlbe.word0 |= (gvaddr & PAGE_MASK) | PPC44x_TLBE_SIZE;
-	} else {
-		/* Guest mapping is smaller than host page size. We must restrict the
-		 * size of the mapping to be at most the smaller of the two, but for
-		 * simplicity we fall back to a 4K mapping (this is probably what the
-		 * guest is using anyways). */
-		stlbe.word0 |= (gvaddr & PAGE_MASK_4K) | PPC44x_TLB_4K;
-
-		/* 'hpaddr' is a host page, which is larger than the mapping we're
-		 * inserting here. To compensate, we must add the in-page offset to the
-		 * sub-page. */
-		hpaddr |= gpaddr & (PAGE_MASK ^ PAGE_MASK_4K);
-	}
-
-	stlbe.word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf);
-	stlbe.word2 = kvmppc_44x_tlb_shadow_attrib(flags,
-	                                            vcpu->arch.shared->msr & MSR_PR);
-	stlbe.tid = !(asid & 0xff);
-
-	/* Keep track of the reference so we can properly release it later. */
-	ref = &vcpu_44x->shadow_refs[victim];
-	ref->page = new_page;
-	ref->gtlb_index = gtlb_index;
-	ref->writeable = !!(stlbe.word2 & PPC44x_TLB_UW);
-	ref->tid = stlbe.tid;
-
-	/* Insert shadow mapping into hardware TLB. */
-	kvmppc_44x_tlbe_set_modified(vcpu_44x, victim);
-	kvmppc_44x_tlbwe(victim, &stlbe);
-	trace_kvm_stlb_write(victim, stlbe.tid, stlbe.word0, stlbe.word1,
-			     stlbe.word2);
-}
-
-/* For a particular guest TLB entry, invalidate the corresponding host TLB
- * mappings and release the host pages. */
-static void kvmppc_44x_invalidate(struct kvm_vcpu *vcpu,
-                                  unsigned int gtlb_index)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-		if (ref->gtlb_index == gtlb_index)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
-}
-
-void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
-{
-	int usermode = vcpu->arch.shared->msr & MSR_PR;
-
-	vcpu->arch.shadow_pid = !usermode;
-}
-
-void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	int i;
-
-	if (unlikely(vcpu->arch.pid == new_pid))
-		return;
-
-	vcpu->arch.pid = new_pid;
-
-	/* Guest userspace runs with TID=0 mappings and PID=0, to make sure it
-	 * can't access guest kernel mappings (TID=1). When we switch to a new
-	 * guest PID, which will also use host PID=0, we must discard the old guest
-	 * userspace mappings. */
-	for (i = 0; i < ARRAY_SIZE(vcpu_44x->shadow_refs); i++) {
-		struct kvmppc_44x_shadow_ref *ref = &vcpu_44x->shadow_refs[i];
-
-		if (ref->tid == 0)
-			kvmppc_44x_shadow_release(vcpu_44x, i);
-	}
-}
-
-static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu,
-                             const struct kvmppc_44x_tlbe *tlbe)
-{
-	gpa_t gpa;
-
-	if (!get_tlb_v(tlbe))
-		return 0;
-
-	/* Does it match current guest AS? */
-	/* XXX what about IS != DS? */
-	if (get_tlb_ts(tlbe) != !!(vcpu->arch.shared->msr & MSR_IS))
-		return 0;
-
-	gpa = get_tlb_raddr(tlbe);
-	if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT))
-		/* Mapping is not for RAM. */
-		return 0;
-
-	return 1;
-}
-
-int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws)
-{
-	struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu);
-	struct kvmppc_44x_tlbe *tlbe;
-	unsigned int gtlb_index;
-	int idx;
-
-	gtlb_index = kvmppc_get_gpr(vcpu, ra);
-	if (gtlb_index >= KVM44x_GUEST_TLB_SIZE) {
-		printk("%s: index %d\n", __func__, gtlb_index);
-		kvmppc_dump_vcpu(vcpu);
-		return EMULATE_FAIL;
-	}
-
-	tlbe = &vcpu_44x->guest_tlb[gtlb_index];
-
-	/* Invalidate shadow mappings for the about-to-be-clobbered TLB entry. */
-	if (tlbe->word0 & PPC44x_TLB_VALID)
-		kvmppc_44x_invalidate(vcpu, gtlb_index);
-
-	switch (ws) {
-	case PPC44x_TLB_PAGEID:
-		tlbe->tid = get_mmucr_stid(vcpu);
-		tlbe->word0 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	case PPC44x_TLB_XLAT:
-		tlbe->word1 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	case PPC44x_TLB_ATTRIB:
-		tlbe->word2 = kvmppc_get_gpr(vcpu, rs);
-		break;
-
-	default:
-		return EMULATE_FAIL;
-	}
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	if (tlbe_is_host_safe(vcpu, tlbe)) {
-		gva_t eaddr;
-		gpa_t gpaddr;
-		u32 bytes;
-
-		eaddr = get_tlb_eaddr(tlbe);
-		gpaddr = get_tlb_raddr(tlbe);
-
-		/* Use the advertised page size to mask effective and real addrs. */
-		bytes = get_tlb_bytes(tlbe);
-		eaddr &= ~(bytes - 1);
-		gpaddr &= ~(bytes - 1);
-
-		kvmppc_mmu_map(vcpu, eaddr, gpaddr, gtlb_index);
-	}
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
-	trace_kvm_gtlb_write(gtlb_index, tlbe->tid, tlbe->word0, tlbe->word1,
-			     tlbe->word2);
-
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS);
-	return EMULATE_DONE;
-}
-
-int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb, u8 rc)
-{
-	u32 ea;
-	int gtlb_index;
-	unsigned int as = get_mmucr_sts(vcpu);
-	unsigned int pid = get_mmucr_stid(vcpu);
-
-	ea = kvmppc_get_gpr(vcpu, rb);
-	if (ra)
-		ea += kvmppc_get_gpr(vcpu, ra);
-
-	gtlb_index = kvmppc_44x_tlb_index(vcpu, ea, pid, as);
-	if (rc) {
-		u32 cr = kvmppc_get_cr(vcpu);
-
-		if (gtlb_index < 0)
-			kvmppc_set_cr(vcpu, cr & ~0x20000000);
-		else
-			kvmppc_set_cr(vcpu, cr | 0x20000000);
-	}
-	kvmppc_set_gpr(vcpu, rt, gtlb_index);
-
-	kvmppc_set_exit_type(vcpu, EMULATED_TLBSX_EXITS);
-	return EMULATE_DONE;
-}
diff --git a/arch/powerpc/kvm/44x_tlb.h b/arch/powerpc/kvm/44x_tlb.h
deleted file mode 100644
index a9ff80e..0000000
--- a/arch/powerpc/kvm/44x_tlb.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- *
- * Copyright IBM Corp. 2007
- *
- * Authors: Hollis Blanchard <hollisb@us.ibm.com>
- */
-
-#ifndef __KVM_POWERPC_TLB_H__
-#define __KVM_POWERPC_TLB_H__
-
-#include <linux/kvm_host.h>
-#include <asm/mmu-44x.h>
-
-extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr,
-                                unsigned int pid, unsigned int as);
-
-extern int kvmppc_44x_emul_tlbsx(struct kvm_vcpu *vcpu, u8 rt, u8 ra, u8 rb,
-                                 u8 rc);
-extern int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws);
-
-/* TLB helper functions */
-static inline unsigned int get_tlb_size(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 4) & 0xf;
-}
-
-static inline gva_t get_tlb_eaddr(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return tlbe->word0 & 0xfffffc00;
-}
-
-static inline gva_t get_tlb_bytes(const struct kvmppc_44x_tlbe *tlbe)
-{
-	unsigned int pgsize = get_tlb_size(tlbe);
-	return 1 << 10 << (pgsize << 1);
-}
-
-static inline gva_t get_tlb_end(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1;
-}
-
-static inline u64 get_tlb_raddr(const struct kvmppc_44x_tlbe *tlbe)
-{
-	u64 word1 = tlbe->word1;
-	return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00);
-}
-
-static inline unsigned int get_tlb_tid(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return tlbe->tid & 0xff;
-}
-
-static inline unsigned int get_tlb_ts(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 8) & 0x1;
-}
-
-static inline unsigned int get_tlb_v(const struct kvmppc_44x_tlbe *tlbe)
-{
-	return (tlbe->word0 >> 9) & 0x1;
-}
-
-static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.mmucr & 0xff;
-}
-
-static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mmucr >> 16) & 0x1;
-}
-
-#endif /* __KVM_POWERPC_TLB_H__ */
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 8aeeda1..8f104a6 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -112,23 +112,9 @@ config KVM_BOOK3S_64_PR
 config KVM_BOOKE_HV
 	bool
 
-config KVM_440
-	bool "KVM support for PowerPC 440 processors"
-	depends on 44x
-	select KVM
-	select KVM_MMIO
-	---help---
-	  Support running unmodified 440 guest kernels in virtual machines on
-	  440 host processors.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  If unsure, say N.
-
 config KVM_EXIT_TIMING
 	bool "Detailed exit timing"
-	depends on KVM_440 || KVM_E500V2 || KVM_E500MC
+	depends on KVM_E500V2 || KVM_E500MC
 	---help---
 	  Calculate elapsed time for every exit/enter cycle. A per-vcpu
 	  report is available in debugfs kvm/vm#_vcpu#_timing.
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ce569b6..777f894 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -10,7 +10,6 @@ KVM := ../../../virt/kvm
 common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 		$(KVM)/eventfd.o
 
-CFLAGS_44x_tlb.o  := -I.
 CFLAGS_e500_mmu.o := -I.
 CFLAGS_e500_mmu_host.o := -I.
 CFLAGS_emulate.o  := -I.
@@ -21,16 +20,6 @@ obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
 
 AFLAGS_booke_interrupts.o := -I$(obj)
 
-kvm-440-objs := \
-	$(common-objs-y) \
-	booke.o \
-	booke_emulate.o \
-	booke_interrupts.o \
-	44x.o \
-	44x_tlb.o \
-	44x_emulate.o
-kvm-objs-$(CONFIG_KVM_440) := $(kvm-440-objs)
-
 kvm-e500-objs := \
 	$(common-objs-y) \
 	booke.o \
@@ -127,7 +116,6 @@ kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
 
 kvm-objs := $(kvm-objs-m) $(kvm-objs-y)
 
-obj-$(CONFIG_KVM_440) += kvm.o
 obj-$(CONFIG_KVM_E500V2) += kvm.o
 obj-$(CONFIG_KVM_E500MC) += kvm.o
 obj-$(CONFIG_KVM_BOOK3S_64) += kvm.o
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index b632cd3..f753543 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -99,13 +99,6 @@ enum int_class {
 
 void kvmppc_set_pending_interrupt(struct kvm_vcpu *vcpu, enum int_class type);
 
-extern void kvmppc_mmu_destroy_44x(struct kvm_vcpu *vcpu);
-extern int kvmppc_core_emulate_op_44x(struct kvm_run *run, struct kvm_vcpu *vcpu,
-				      unsigned int inst, int *advance);
-extern int kvmppc_core_emulate_mtspr_44x(struct kvm_vcpu *vcpu, int sprn,
-					 ulong spr_val);
-extern int kvmppc_core_emulate_mfspr_44x(struct kvm_vcpu *vcpu, int sprn,
-					 ulong *spr_val);
 extern void kvmppc_mmu_destroy_e500(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_emulate_op_e500(struct kvm_run *run,
 				       struct kvm_vcpu *vcpu,
diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S
index 2c6deb5ef..84c308a 100644
--- a/arch/powerpc/kvm/booke_interrupts.S
+++ b/arch/powerpc/kvm/booke_interrupts.S
@@ -21,7 +21,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
-#include <asm/mmu-44x.h>
 #include <asm/page.h>
 #include <asm/asm-offsets.h>
 
@@ -424,10 +423,6 @@ lightweight_exit:
 	mtspr	SPRN_PID1, r3
 #endif
 
-#ifdef CONFIG_44x
-	iccci	0, 0 /* XXX hack */
-#endif
-
 	/* Load some guest volatiles. */
 	lwz	r0, VCPU_GPR(R0)(r4)
 	lwz	r2, VCPU_GPR(R2)(r4)
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index b4f8fba..e9fa56a 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -24,7 +24,6 @@
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
-#include <asm/mmu-44x.h>
 #include <asm/page.h>
 #include <asm/asm-compat.h>
 #include <asm/asm-offsets.h>
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index cfa6cfa..8e03568 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -217,7 +217,6 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
 	case KVM_HCALL_TOKEN(KVM_HC_FEATURES):
 		r = EV_SUCCESS;
 #if defined(CONFIG_PPC_BOOK3S) || defined(CONFIG_KVM_E500V2)
-		/* XXX Missing magic page on 44x */
 		r2 |= (1 << KVM_FEATURE_MAGIC_PAGE);
 #endif
 
-- 
cgit v0.10.2


From a0840240c0c6bcbac8f0f5db11f95c19aaf9b52f Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Sat, 19 Jul 2014 17:59:34 +1000
Subject: KVM: PPC: Book3S: Fix LPCR one_reg interface

Unfortunately, the LPCR got defined as a 32-bit register in the
one_reg interface.  This is unfortunate because KVM allows userspace
to control the DPFD (default prefetch depth) field, which is in the
upper 32 bits.  The result is that DPFD always get set to 0, which
reduces performance in the guest.

We can't just change KVM_REG_PPC_LPCR to be a 64-bit register ID,
since that would break existing userspace binaries.  Instead we define
a new KVM_REG_PPC_LPCR_64 id which is 64-bit.  Userspace can still use
the old KVM_REG_PPC_LPCR id, but it now only modifies those fields in
the bottom 32 bits that userspace can modify (ILE, TC and AIL).
If userspace uses the new KVM_REG_PPC_LPCR_64 id, it can modify DPFD
as well.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6955318..884f819 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1869,7 +1869,8 @@ registers, find a list below:
   PPC   | KVM_REG_PPC_PID	| 64
   PPC   | KVM_REG_PPC_ACOP	| 64
   PPC   | KVM_REG_PPC_VRSAVE	| 32
-  PPC   | KVM_REG_PPC_LPCR	| 64
+  PPC   | KVM_REG_PPC_LPCR	| 32
+  PPC   | KVM_REG_PPC_LPCR_64	| 64
   PPC   | KVM_REG_PPC_PPR	| 64
   PPC   | KVM_REG_PPC_ARCH_COMPAT 32
   PPC   | KVM_REG_PPC_DABRX     | 32
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 0e56d9e..e0e49db 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -548,6 +548,7 @@ struct kvm_get_htab_header {
 
 #define KVM_REG_PPC_VRSAVE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
 #define KVM_REG_PPC_LPCR	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_LPCR_64	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5)
 #define KVM_REG_PPC_PPR		(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
 
 /* Architecture compatibility level */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f1281c4..0c5266e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -863,7 +863,8 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
-static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
+static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
+		bool preserve_top32)
 {
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	u64 mask;
@@ -898,6 +899,10 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr)
 	mask = LPCR_DPFD | LPCR_ILE | LPCR_TC;
 	if (cpu_has_feature(CPU_FTR_ARCH_207S))
 		mask |= LPCR_AIL;
+
+	/* Broken 32-bit version of LPCR must not clear top bits */
+	if (preserve_top32)
+		mask &= 0xFFFFFFFF;
 	vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
 	spin_unlock(&vc->lock);
 }
@@ -1011,6 +1016,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, vcpu->arch.vcore->tb_offset);
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		*val = get_reg_val(id, vcpu->arch.vcore->lpcr);
 		break;
 	case KVM_REG_PPC_PPR:
@@ -1216,7 +1222,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 			ALIGN(set_reg_val(id, *val), 1UL << 24);
 		break;
 	case KVM_REG_PPC_LPCR:
-		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val));
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), true);
+		break;
+	case KVM_REG_PPC_LPCR_64:
+		kvmppc_set_lpcr(vcpu, set_reg_val(id, *val), false);
 		break;
 	case KVM_REG_PPC_PPR:
 		vcpu->arch.ppr = set_reg_val(id, *val);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b18f2d4..e7a1fa2 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1314,6 +1314,7 @@ static int kvmppc_get_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		*val = get_reg_val(id, to_book3s(vcpu)->hior);
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		/*
 		 * We are only interested in the LPCR_ILE bit
 		 */
@@ -1349,6 +1350,7 @@ static int kvmppc_set_one_reg_pr(struct kvm_vcpu *vcpu, u64 id,
 		to_book3s(vcpu)->hior_explicit = true;
 		break;
 	case KVM_REG_PPC_LPCR:
+	case KVM_REG_PPC_LPCR_64:
 		kvmppc_set_lpcr_pr(vcpu, set_reg_val(id, *val));
 		break;
 	default:
-- 
cgit v0.10.2


From ef1af2e29622ff3403926ae801a2b10da075a2de Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 19 Jul 2014 17:59:35 +1000
Subject: KVM: PPC: Book3S PR: Take SRCU read lock around RTAS kvm_read_guest()
 call

This does for PR KVM what c9438092cae4 ("KVM: PPC: Book3S HV: Take SRCU
read lock around kvm_read_guest() call") did for HV KVM, that is,
eliminate a "suspicious rcu_dereference_check() usage!" warning by
taking the SRCU lock around the call to kvmppc_rtas_hcall().

It also fixes a return of RESUME_HOST to return EMULATE_FAIL instead,
since kvmppc_h_pr() is supposed to return EMULATE_* values.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index 6d0143f..ce3c893 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -267,6 +267,8 @@ static int kvmppc_h_pr_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
 
 int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 {
+	int rc, idx;
+
 	if (cmd <= MAX_HCALL_OPCODE &&
 	    !test_bit(cmd/4, vcpu->kvm->arch.enabled_hcalls))
 		return EMULATE_FAIL;
@@ -299,8 +301,11 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
 		break;
 	case H_RTAS:
 		if (list_empty(&vcpu->kvm->arch.rtas_tokens))
-			return RESUME_HOST;
-		if (kvmppc_rtas_hcall(vcpu))
+			break;
+		idx = srcu_read_lock(&vcpu->kvm->srcu);
+		rc = kvmppc_rtas_hcall(vcpu);
+		srcu_read_unlock(&vcpu->kvm->srcu, idx);
+		if (rc)
 			break;
 		kvmppc_set_gpr(vcpu, 3, 0);
 		return EMULATE_DONE;
-- 
cgit v0.10.2


From 1b2e33b071b13980a1f0823fbf139242059697b4 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Sat, 19 Jul 2014 17:59:36 +1000
Subject: KVM: PPC: Book3S: Make kvmppc_ld return a more accurate error
 indication

At present, kvmppc_ld calls kvmppc_xlate, and if kvmppc_xlate returns
any error indication, it returns -ENOENT, which is taken to mean an
HPTE not found error.  However, the error could have been a segment
found (no SLB entry) or a permission error.  Similarly,
kvmppc_pte_to_hva currently does permission checking, but any error
from it is taken by kvmppc_ld to mean that the access is an emulated
MMIO access.  Also, kvmppc_ld does no execute permission checking.

This fixes these problems by (a) returning any error from kvmppc_xlate
directly, (b) moving the permission check from kvmppc_pte_to_hva
into kvmppc_ld, and (c) adding an execute permission check to kvmppc_ld.

This is similar to what was done for kvmppc_st() by commit 82ff911317c3
("KVM: PPC: Deflect page write faults properly in kvmppc_st").

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 37ca8a0..a3cbada 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -413,17 +413,10 @@ static hva_t kvmppc_bad_hva(void)
 	return PAGE_OFFSET;
 }
 
-static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
-			       bool read)
+static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 {
 	hva_t hpage;
 
-	if (read && !pte->may_read)
-		goto err;
-
-	if (!read && !pte->may_write)
-		goto err;
-
 	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
 	if (kvm_is_error_hva(hpage))
 		goto err;
@@ -462,15 +455,23 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 {
 	struct kvmppc_pte pte;
 	hva_t hva = *eaddr;
+	int rc;
 
 	vcpu->stat.ld++;
 
-	if (kvmppc_xlate(vcpu, *eaddr, data, false, &pte))
-		goto nopte;
+	rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte);
+	if (rc)
+		return rc;
 
 	*eaddr = pte.raddr;
 
-	hva = kvmppc_pte_to_hva(vcpu, &pte, true);
+	if (!pte.may_read)
+		return -EPERM;
+
+	if (!data && !pte.may_execute)
+		return -ENOEXEC;
+
+	hva = kvmppc_pte_to_hva(vcpu, &pte);
 	if (kvm_is_error_hva(hva))
 		goto mmio;
 
@@ -481,8 +482,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	return EMULATE_DONE;
 
-nopte:
-	return -ENOENT;
 mmio:
 	return EMULATE_DO_MMIO;
 }
-- 
cgit v0.10.2


From de9bdd1a604d30b4e05dc18b5cc6354949253abd Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Fri, 18 Jul 2014 14:18:42 +1000
Subject: Split out struct kvmppc_vcore creation to separate function

No code changes, just split it out to a function so that with the addition
of micro partition prefetch buffer allocation (in subsequent patch) looks
neater and doesn't require excessive indentation.

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0c5266e..5042ccc 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1303,6 +1303,26 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
 	return r;
 }
 
+static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
+{
+	struct kvmppc_vcore *vcore;
+
+	vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
+
+	if (vcore == NULL)
+		return NULL;
+
+	INIT_LIST_HEAD(&vcore->runnable_threads);
+	spin_lock_init(&vcore->lock);
+	init_waitqueue_head(&vcore->wq);
+	vcore->preempt_tb = TB_NIL;
+	vcore->lpcr = kvm->arch.lpcr;
+	vcore->first_vcpuid = core * threads_per_subcore;
+	vcore->kvm = kvm;
+
+	return vcore;
+}
+
 static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 						   unsigned int id)
 {
@@ -1354,16 +1374,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
 	mutex_lock(&kvm->lock);
 	vcore = kvm->arch.vcores[core];
 	if (!vcore) {
-		vcore = kzalloc(sizeof(struct kvmppc_vcore), GFP_KERNEL);
-		if (vcore) {
-			INIT_LIST_HEAD(&vcore->runnable_threads);
-			spin_lock_init(&vcore->lock);
-			init_waitqueue_head(&vcore->wq);
-			vcore->preempt_tb = TB_NIL;
-			vcore->lpcr = kvm->arch.lpcr;
-			vcore->first_vcpuid = core * threads_per_subcore;
-			vcore->kvm = kvm;
-		}
+		vcore = kvmppc_vcore_create(kvm, core);
 		kvm->arch.vcores[core] = vcore;
 		kvm->arch.online_vcores++;
 	}
-- 
cgit v0.10.2


From 9678cdaae93932473f696fdea5debf3eee1e1260 Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Fri, 18 Jul 2014 14:18:43 +1000
Subject: Use the POWER8 Micro Partition Prefetch Engine in KVM HV on POWER8

The POWER8 processor has a Micro Partition Prefetch Engine, which is
a fancy way of saying "has way to store and load contents of L2 or
L2+MRU way of L3 cache". We initiate the storing of the log (list of
addresses) using the logmpp instruction and start restore by writing
to a SPR.

The logmpp instruction takes parameters in a single 64bit register:
- starting address of the table to store log of L2/L2+L3 cache contents
  - 32kb for L2
  - 128kb for L2+L3
  - Aligned relative to maximum size of the table (32kb or 128kb)
- Log control (no-op, L2 only, L2 and L3, abort logout)

We should abort any ongoing logging before initiating one.

To initiate restore, we write to the MPPR SPR. The format of what to write
to the SPR is similar to the logmpp instruction parameter:
- starting address of the table to read from (same alignment requirements)
- table size (no data, until end of table)
- prefetch rate (from fastest possible to slower. about every 8, 16, 24 or
  32 cycles)

The idea behind loading and storing the contents of L2/L3 cache is to
reduce memory latency in a system that is frequently swapping vcores on
a physical CPU.

The best case scenario for doing this is when some vcores are doing very
cache heavy workloads. The worst case is when they have about 0 cache hits,
so we just generate needless memory operations.

This implementation just does L2 store/load. In my benchmarks this proves
to be useful.

Benchmark 1:
 - 16 core POWER8
 - 3x Ubuntu 14.04LTS guests (LE) with 8 VCPUs each
 - No split core/SMT
 - two guests running sysbench memory test.
   sysbench --test=memory --num-threads=8 run
 - one guest running apache bench (of default HTML page)
   ab -n 490000 -c 400 http://localhost/

This benchmark aims to measure performance of real world application (apache)
where other guests are cache hot with their own workloads. The sysbench memory
benchmark does pointer sized writes to a (small) memory buffer in a loop.

In this benchmark with this patch I can see an improvement both in requests
per second (~5%) and in mean and median response times (again, about 5%).
The spread of minimum and maximum response times were largely unchanged.

benchmark 2:
 - Same VM config as benchmark 1
 - all three guests running sysbench memory benchmark

This benchmark aims to see if there is a positive or negative affect to this
cache heavy benchmark. Although due to the nature of the benchmark (stores) we
may not see a difference in performance, but rather hopefully an improvement
in consistency of performance (when vcore switched in, don't have to wait
many times for cachelines to be pulled in)

The results of this benchmark are improvements in consistency of performance
rather than performance itself. With this patch, the few outliers in duration
go away and we get more consistent performance in each guest.

benchmark 3:
 - same 3 guests and CPU configuration as benchmark 1 and 2.
 - two idle guests
 - 1 guest running STREAM benchmark

This scenario also saw performance improvement with this patch. On Copy and
Scale workloads from STREAM, I got 5-6% improvement with this patch. For
Add and triad, it was around 10% (or more).

benchmark 4:
 - same 3 guests as previous benchmarks
 - two guests running sysbench --memory, distinctly different cache heavy
   workload
 - one guest running STREAM benchmark.

Similar improvements to benchmark 3.

benchmark 5:
 - 1 guest, 8 VCPUs, Ubuntu 14.04
 - Host configured with split core (SMT8, subcores-per-core=4)
 - STREAM benchmark

In this benchmark, we see a 10-20% performance improvement across the board
of STREAM benchmark results with this patch.

Based on preliminary investigation and microbenchmarks
by Prerna Saxena <prerna@linux.vnet.ibm.com>

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/cache.h b/arch/powerpc/include/asm/cache.h
index ed0afc1..34a05a1 100644
--- a/arch/powerpc/include/asm/cache.h
+++ b/arch/powerpc/include/asm/cache.h
@@ -3,6 +3,7 @@
 
 #ifdef __KERNEL__
 
+#include <asm/reg.h>
 
 /* bytes per L1 cache line */
 #if defined(CONFIG_8xx) || defined(CONFIG_403GCX)
@@ -39,6 +40,12 @@ struct ppc64_caches {
 };
 
 extern struct ppc64_caches ppc64_caches;
+
+static inline void logmpp(u64 x)
+{
+	asm volatile(PPC_LOGMPP(R1) : : "r" (x));
+}
+
 #endif /* __powerpc64__ && ! __ASSEMBLY__ */
 
 #if defined(__ASSEMBLY__)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 5fe2b5d..11385bb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -307,6 +307,8 @@ struct kvmppc_vcore {
 	u32 arch_compat;
 	ulong pcr;
 	ulong dpdes;		/* doorbell state (POWER8) */
+	void *mpp_buffer; /* Micro Partition Prefetch buffer */
+	bool mpp_buffer_is_valid;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 3132bb9..c636841 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -139,6 +139,7 @@
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
 #define PPC_INST_LDARX			0x7c0000a8
+#define PPC_INST_LOGMPP			0x7c0007e4
 #define PPC_INST_LSWI			0x7c0004aa
 #define PPC_INST_LSWX			0x7c00042a
 #define PPC_INST_LWARX			0x7c000028
@@ -275,6 +276,20 @@
 #define __PPC_EH(eh)	0
 #endif
 
+/* POWER8 Micro Partition Prefetch (MPP) parameters */
+/* Address mask is common for LOGMPP instruction and MPPR SPR */
+#define PPC_MPPE_ADDRESS_MASK 0xffffffffc000
+
+/* Bits 60 and 61 of MPP SPR should be set to one of the following */
+/* Aborting the fetch is indeed setting 00 in the table size bits */
+#define PPC_MPPR_FETCH_ABORT (0x0ULL << 60)
+#define PPC_MPPR_FETCH_WHOLE_TABLE (0x2ULL << 60)
+
+/* Bits 54 and 55 of register for LOGMPP instruction should be set to: */
+#define PPC_LOGMPP_LOG_L2 (0x02ULL << 54)
+#define PPC_LOGMPP_LOG_L2L3 (0x01ULL << 54)
+#define PPC_LOGMPP_LOG_ABORT (0x03ULL << 54)
+
 /* Deal with instructions that older assemblers aren't aware of */
 #define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_INST_DCBAL | \
 					__PPC_RA(a) | __PPC_RB(b))
@@ -283,6 +298,8 @@
 #define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LDARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_LOGMPP(b)		stringify_in_c(.long PPC_INST_LOGMPP | \
+					__PPC_RB(b))
 #define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LWARX | \
 					___PPC_RT(t) | ___PPC_RA(a) | \
 					___PPC_RB(b) | __PPC_EH(eh))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 0ef17ad..c547b26 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -225,6 +225,7 @@
 #define   CTRL_TE	0x00c00000	/* thread enable */
 #define   CTRL_RUNLATCH	0x1
 #define SPRN_DAWR	0xB4
+#define SPRN_MPPR	0xB8	/* Micro Partition Prefetch Register */
 #define SPRN_RPR	0xBA	/* Relative Priority Register */
 #define SPRN_CIABR	0xBB
 #define   CIABR_PRIV		0x3
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5042ccc..c470d55 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -35,6 +35,7 @@
 
 #include <asm/reg.h>
 #include <asm/cputable.h>
+#include <asm/cache.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 #include <asm/uaccess.h>
@@ -69,6 +70,13 @@
 
 static DECLARE_BITMAP(default_enabled_hcalls, MAX_HCALL_OPCODE/4 + 1);
 
+#if defined(CONFIG_PPC_64K_PAGES)
+#define MPP_BUFFER_ORDER	0
+#elif defined(CONFIG_PPC_4K_PAGES)
+#define MPP_BUFFER_ORDER	3
+#endif
+
+
 static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
 static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
 
@@ -1320,6 +1328,13 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
 	vcore->first_vcpuid = core * threads_per_subcore;
 	vcore->kvm = kvm;
 
+	vcore->mpp_buffer_is_valid = false;
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		vcore->mpp_buffer = (void *)__get_free_pages(
+			GFP_KERNEL|__GFP_ZERO,
+			MPP_BUFFER_ORDER);
+
 	return vcore;
 }
 
@@ -1586,6 +1601,33 @@ static int on_primary_thread(void)
 	return 1;
 }
 
+static void kvmppc_start_saving_l2_cache(struct kvmppc_vcore *vc)
+{
+	phys_addr_t phy_addr, mpp_addr;
+
+	phy_addr = (phys_addr_t)virt_to_phys(vc->mpp_buffer);
+	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+
+	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_ABORT);
+	logmpp(mpp_addr | PPC_LOGMPP_LOG_L2);
+
+	vc->mpp_buffer_is_valid = true;
+}
+
+static void kvmppc_start_restoring_l2_cache(const struct kvmppc_vcore *vc)
+{
+	phys_addr_t phy_addr, mpp_addr;
+
+	phy_addr = virt_to_phys(vc->mpp_buffer);
+	mpp_addr = phy_addr & PPC_MPPE_ADDRESS_MASK;
+
+	/* We must abort any in-progress save operations to ensure
+	 * the table is valid so that prefetch engine knows when to
+	 * stop prefetching. */
+	logmpp(mpp_addr | PPC_LOGMPP_LOG_ABORT);
+	mtspr(SPRN_MPPR, mpp_addr | PPC_MPPR_FETCH_WHOLE_TABLE);
+}
+
 /*
  * Run a set of guest threads on a physical core.
  * Called with vc->lock held.
@@ -1663,9 +1705,16 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 
 	srcu_idx = srcu_read_lock(&vc->kvm->srcu);
 
+	if (vc->mpp_buffer_is_valid)
+		kvmppc_start_restoring_l2_cache(vc);
+
 	__kvmppc_vcore_entry();
 
 	spin_lock(&vc->lock);
+
+	if (vc->mpp_buffer)
+		kvmppc_start_saving_l2_cache(vc);
+
 	/* disable sending of IPIs on virtual external irqs */
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list)
 		vcpu->cpu = -1;
@@ -2413,8 +2462,14 @@ static void kvmppc_free_vcores(struct kvm *kvm)
 {
 	long int i;
 
-	for (i = 0; i < KVM_MAX_VCORES; ++i)
+	for (i = 0; i < KVM_MAX_VCORES; ++i) {
+		if (kvm->arch.vcores[i] && kvm->arch.vcores[i]->mpp_buffer) {
+			struct kvmppc_vcore *vc = kvm->arch.vcores[i];
+			free_pages((unsigned long)vc->mpp_buffer,
+				   MPP_BUFFER_ORDER);
+		}
 		kfree(kvm->arch.vcores[i]);
+	}
 	kvm->arch.online_vcores = 0;
 }
 
-- 
cgit v0.10.2


From 784aa3d7fb6f729c06d5836c9d9569f58e4d05ae Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:27:35 +0200
Subject: KVM: Rename and add argument to check_extension

In preparation to make the check_extension function available to VM scope
we add a struct kvm * argument to the function header and rename the function
accordingly. It will still be called from the /dev/kvm fd, but with a NULL
argument for struct kvm *.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 3c82b37..cb77f999 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -184,7 +184,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	}
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	switch (ext) {
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 6a4309b..0729ba6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -190,7 +190,7 @@ void kvm_arch_check_processor_compat(void *rtn)
 	*(int *)rtn = 0;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 
 	int r;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index d687c6e..3ca79aa 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -885,7 +885,7 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 8e03568..d870bac 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -391,7 +391,7 @@ void kvm_arch_sync_events(struct kvm *kvm)
 {
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 	/* FIXME!!
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2f3e14f..00268ca 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -146,7 +146,7 @@ long kvm_arch_dev_ioctl(struct file *filp,
 	return -EINVAL;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5a8691b..5a62d91 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2616,7 +2616,7 @@ out:
 	return r;
 }
 
-int kvm_dev_ioctl_check_extension(long ext)
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ec4e3bd..5065b95 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -602,7 +602,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg);
 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf);
 
-int kvm_dev_ioctl_check_extension(long ext);
+int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext);
 
 int kvm_get_dirty_log(struct kvm *kvm,
 			struct kvm_dirty_log *log, int *is_dirty);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4b6c01b..e28f3ca 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2571,7 +2571,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_dev_ioctl_check_extension_generic(long arg)
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 {
 	switch (arg) {
 	case KVM_CAP_USER_MEMORY:
@@ -2595,7 +2595,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 	default:
 		break;
 	}
-	return kvm_dev_ioctl_check_extension(arg);
+	return kvm_vm_ioctl_check_extension(kvm, arg);
 }
 
 static long kvm_dev_ioctl(struct file *filp,
@@ -2614,7 +2614,7 @@ static long kvm_dev_ioctl(struct file *filp,
 		r = kvm_dev_ioctl_create_vm(arg);
 		break;
 	case KVM_CHECK_EXTENSION:
-		r = kvm_dev_ioctl_check_extension_generic(arg);
+		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;
 	case KVM_GET_VCPU_MMAP_SIZE:
 		r = -EINVAL;
-- 
cgit v0.10.2


From 92b591a4c46b103ebd3fc0d03a084e1efd331253 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:33:08 +0200
Subject: KVM: Allow KVM_CHECK_EXTENSION on the vm fd

The KVM_CHECK_EXTENSION is only available on the kvm fd today. Unfortunately
on PPC some of the capabilities change depending on the way a VM was created.

So instead we need a way to expose capabilities as VM ioctl, so that we can
see which VM type we're using (HV or PR). To enable this, add the
KVM_CHECK_EXTENSION ioctl to our vm ioctl portfolio.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 884f819..8898caf 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -148,9 +148,9 @@ of banks, as set via the KVM_X86_SETUP_MCE ioctl.
 
 4.4 KVM_CHECK_EXTENSION
 
-Capability: basic
+Capability: basic, KVM_CAP_CHECK_EXTENSION_VM for vm ioctl
 Architectures: all
-Type: system ioctl
+Type: system ioctl, vm ioctl
 Parameters: extension identifier (KVM_CAP_*)
 Returns: 0 if unsupported; 1 (or some other positive integer) if supported
 
@@ -160,6 +160,9 @@ receives an integer that describes the extension availability.
 Generally 0 means no and 1 means yes, but some extensions may report
 additional information in the integer return value.
 
+Based on their initialization different VMs may have different capabilities.
+It is thus encouraged to use the vm ioctl to query for capabilities (available
+with KVM_CAP_CHECK_EXTENSION_VM on the vm fd)
 
 4.5 KVM_GET_VCPU_MMAP_SIZE
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0418b74..51776ca 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -759,6 +759,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_ARM_PSCI_0_2 102
 #define KVM_CAP_PPC_FIXUP_HCALL 103
 #define KVM_CAP_PPC_ENABLE_HCALL 104
+#define KVM_CAP_CHECK_EXTENSION_VM 105
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e28f3ca..1b95cc9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2324,6 +2324,34 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
 	return 0;
 }
 
+static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
+{
+	switch (arg) {
+	case KVM_CAP_USER_MEMORY:
+	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
+	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
+#ifdef CONFIG_KVM_APIC_ARCHITECTURE
+	case KVM_CAP_SET_BOOT_CPU_ID:
+#endif
+	case KVM_CAP_INTERNAL_ERROR_DATA:
+#ifdef CONFIG_HAVE_KVM_MSI
+	case KVM_CAP_SIGNAL_MSI:
+#endif
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQFD_RESAMPLE:
+#endif
+	case KVM_CAP_CHECK_EXTENSION_VM:
+		return 1;
+#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
+	case KVM_CAP_IRQ_ROUTING:
+		return KVM_MAX_IRQ_ROUTES;
+#endif
+	default:
+		break;
+	}
+	return kvm_vm_ioctl_check_extension(kvm, arg);
+}
+
 static long kvm_vm_ioctl(struct file *filp,
 			   unsigned int ioctl, unsigned long arg)
 {
@@ -2487,6 +2515,9 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_CHECK_EXTENSION:
+		r = kvm_vm_ioctl_check_extension_generic(kvm, arg);
+		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
 		if (r == -ENOTTY)
@@ -2571,33 +2602,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
 	return r;
 }
 
-static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
-{
-	switch (arg) {
-	case KVM_CAP_USER_MEMORY:
-	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-	case KVM_CAP_JOIN_MEMORY_REGIONS_WORKS:
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
-	case KVM_CAP_SET_BOOT_CPU_ID:
-#endif
-	case KVM_CAP_INTERNAL_ERROR_DATA:
-#ifdef CONFIG_HAVE_KVM_MSI
-	case KVM_CAP_SIGNAL_MSI:
-#endif
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQFD_RESAMPLE:
-#endif
-		return 1;
-#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
-	case KVM_CAP_IRQ_ROUTING:
-		return KVM_MAX_IRQ_ROUTES;
-#endif
-	default:
-		break;
-	}
-	return kvm_vm_ioctl_check_extension(kvm, arg);
-}
-
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
-- 
cgit v0.10.2


From 7a58777a33128b28379851b94070934b17ed9176 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 14 Jul 2014 18:55:19 +0200
Subject: KVM: PPC: Book3S: Provide different CAPs based on HV or PR mode

With Book3S KVM we can create both PR and HV VMs in parallel on the same
machine. That gives us new challenges on the CAPs we return - both have
different capabilities.

When we get asked about CAPs on the kvm fd, there's nothing we can do. We
can try to be smart and assume we're running HV if HV is available, PR
otherwise. However with the newly added VM CHECK_EXTENSION we can now ask
for capabilities directly on a VM which knows whether it's PR or HV.

With this patch I can successfully expose KVM PVINFO data to user space
in the PR case, fixing magic page mapping for PAPR guests.

Signed-off-by: Alexander Graf <agraf@suse.de>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d870bac..eaa57da 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -394,11 +394,17 @@ void kvm_arch_sync_events(struct kvm *kvm)
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 {
 	int r;
-	/* FIXME!!
-	 * Should some of this be vm ioctl ? is it possible now ?
-	 */
+	/* Assume we're using HV mode when the HV module is loaded */
 	int hv_enabled = kvmppc_hv_ops ? 1 : 0;
 
+	if (kvm) {
+		/*
+		 * Hooray - we know which VM type we're running on. Depend on
+		 * that rather than the guess above.
+		 */
+		hv_enabled = is_kvmppc_hv_enabled(kvm);
+	}
+
 	switch (ext) {
 #ifdef CONFIG_BOOKE
 	case KVM_CAP_PPC_BOOKE_SREGS:
-- 
cgit v0.10.2


From 63fff5c1e3695ff5c9ad8c198a3b03ca4c0c73a8 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sun, 29 Jun 2014 16:47:30 +0530
Subject: KVM: PPC: BOOK3S: HV: Update compute_tlbie_rb to handle 16MB base
 page

When calculating the lower bits of AVA field, use the shift
count based on the base page size. Also add the missing segment
size and remove stale comment.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index e504f88..07cf9df 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -147,6 +147,8 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	 */
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
+
+	rb |= v >> (62 - 8);			/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index
@@ -177,10 +179,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
 	{
 		int aval_shift;
 		/*
-		 * remaining 7bits of AVA/LP fields
+		 * remaining bits of AVA/LP fields
 		 * Also contain the rr bits of LP
 		 */
-		rb |= (va_low & 0x7f) << 16;
+		rb |= (va_low << mmu_psize_defs[b_psize].shift) & 0x7ff000;
 		/*
 		 * Now clear not needed LP bits based on actual psize
 		 */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c470d55..27cced9 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2064,12 +2064,6 @@ static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 	(*sps)->page_shift = def->shift;
 	(*sps)->slb_enc = def->sllp;
 	(*sps)->enc[0].page_shift = def->shift;
-	/*
-	 * Only return base page encoding. We don't want to return
-	 * all the supporting pte_enc, because our H_ENTER doesn't
-	 * support MPSS yet. Once they do, we can start passing all
-	 * support pte_enc here
-	 */
 	(*sps)->enc[0].pte_enc = def->penc[linux_psize];
 	/*
 	 * Add 16MB MPSS support if host supports it
-- 
cgit v0.10.2


From 7d15c06f1abfe4b893c6c2c8a306b02210a6a6db Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 20 Jun 2014 13:52:36 +0200
Subject: KVM: PPC: Implement kvmppc_xlate for all targets

We have a nice API to find the translated GPAs of a GVA including protection
flags. So far we only use it on Book3S, but there's no reason the same shouldn't
be used on BookE as well.

Implement a kvmppc_xlate() version for BookE and clean it up to make it more
readable in general.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index e381363..1a60af9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -52,6 +52,16 @@ enum instruction_type {
 	INST_SC,		/* system call */
 };
 
+enum xlate_instdata {
+	XLATE_INST,		/* translate instruction address */
+	XLATE_DATA		/* translate data address */
+};
+
+enum xlate_readwrite {
+	XLATE_READ,		/* check for read permissions */
+	XLATE_WRITE		/* check for write permissions */
+};
+
 extern int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
 extern void kvmppc_handler_highmem(void);
@@ -94,6 +104,9 @@ extern gpa_t kvmppc_mmu_xlate(struct kvm_vcpu *vcpu, unsigned int gtlb_index,
                               gva_t eaddr);
 extern void kvmppc_mmu_dtlb_miss(struct kvm_vcpu *vcpu);
 extern void kvmppc_mmu_itlb_miss(struct kvm_vcpu *vcpu);
+extern int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr,
+			enum xlate_instdata xlid, enum xlate_readwrite xlrw,
+			struct kvmppc_pte *pte);
 
 extern struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm,
                                                 unsigned int id);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index a3cbada..0b6c84e 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -380,9 +380,11 @@ pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing,
 }
 EXPORT_SYMBOL_GPL(kvmppc_gpa_to_pfn);
 
-static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
-			bool iswrite, struct kvmppc_pte *pte)
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
 {
+	bool data = (xlid == XLATE_DATA);
+	bool iswrite = (xlrw == XLATE_WRITE);
 	int relocated = (kvmppc_get_msr(vcpu) & (data ? MSR_DR : MSR_IR));
 	int r;
 
@@ -434,7 +436,8 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	vcpu->stat.st++;
 
-	r = kvmppc_xlate(vcpu, *eaddr, data, true, &pte);
+	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			 XLATE_WRITE, &pte);
 	if (r < 0)
 		return r;
 
@@ -459,7 +462,8 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 
 	vcpu->stat.ld++;
 
-	rc = kvmppc_xlate(vcpu, *eaddr, data, false, &pte);
+	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			  XLATE_READ, &pte);
 	if (rc)
 		return rc;
 
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 97bcde2..2f697b4 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1785,6 +1785,57 @@ void kvm_guest_protect_msr(struct kvm_vcpu *vcpu, ulong prot_bitmap, bool set)
 #endif
 }
 
+int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
+		 enum xlate_readwrite xlrw, struct kvmppc_pte *pte)
+{
+	int gtlb_index;
+	gpa_t gpaddr;
+
+#ifdef CONFIG_KVM_E500V2
+	if (!(vcpu->arch.shared->msr & MSR_PR) &&
+	    (eaddr & PAGE_MASK) == vcpu->arch.magic_page_ea) {
+		pte->eaddr = eaddr;
+		pte->raddr = (vcpu->arch.magic_page_pa & PAGE_MASK) |
+			     (eaddr & ~PAGE_MASK);
+		pte->vpage = eaddr >> PAGE_SHIFT;
+		pte->may_read = true;
+		pte->may_write = true;
+		pte->may_execute = true;
+
+		return 0;
+	}
+#endif
+
+	/* Check the guest TLB. */
+	switch (xlid) {
+	case XLATE_INST:
+		gtlb_index = kvmppc_mmu_itlb_index(vcpu, eaddr);
+		break;
+	case XLATE_DATA:
+		gtlb_index = kvmppc_mmu_dtlb_index(vcpu, eaddr);
+		break;
+	default:
+		BUG();
+	}
+
+	/* Do we have a TLB entry at all? */
+	if (gtlb_index < 0)
+		return -ENOENT;
+
+	gpaddr = kvmppc_mmu_xlate(vcpu, gtlb_index, eaddr);
+
+	pte->eaddr = eaddr;
+	pte->raddr = (gpaddr & PAGE_MASK) | (eaddr & ~PAGE_MASK);
+	pte->vpage = eaddr >> PAGE_SHIFT;
+
+	/* XXX read permissions from the guest TLB */
+	pte->may_read = true;
+	pte->may_write = true;
+	pte->may_execute = true;
+
+	return 0;
+}
+
 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 					 struct kvm_guest_debug *dbg)
 {
-- 
cgit v0.10.2


From 35c4a7330dbe1ae6f590a5645b185e35ddb3f6d9 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 20 Jun 2014 13:58:16 +0200
Subject: KVM: PPC: Move kvmppc_ld/st to common code

We have enough common infrastructure now to resolve GVA->GPA mappings at
runtime. With this we can move our book3s specific helpers to load / store
in guest virtual address space to common code as well.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a86ca65..172fd6d 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -148,8 +148,8 @@ extern void kvmppc_mmu_hpte_sysexit(void);
 extern int kvmppc_mmu_hv_init(void);
 extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc);
 
+/* XXX remove this export when load_last_inst() is generic */
 extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
-extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data);
 extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec);
 extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu,
 					  unsigned int vec);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 11385bb..66f5b59 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -111,15 +111,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 	u32 dbell_exits;
 	u32 gdbell_exits;
+	u32 ld;
+	u32 st;
 #ifdef CONFIG_PPC_BOOK3S
 	u32 pf_storage;
 	u32 pf_instruc;
 	u32 sp_storage;
 	u32 sp_instruc;
 	u32 queue_intr;
-	u32 ld;
 	u32 ld_slow;
-	u32 st;
 	u32 st_slow;
 #endif
 };
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 1a60af9..17fa277 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -80,6 +80,10 @@ extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
 extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
 				 enum instruction_type type, u32 *inst);
 
+extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
+extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		     bool data);
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 0b6c84e..de8da33 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -410,87 +410,6 @@ int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, enum xlate_instdata xlid,
 	return r;
 }
 
-static hva_t kvmppc_bad_hva(void)
-{
-	return PAGE_OFFSET;
-}
-
-static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
-{
-	hva_t hpage;
-
-	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (kvm_is_error_hva(hpage))
-		goto err;
-
-	return hpage | (pte->raddr & ~PAGE_MASK);
-err:
-	return kvmppc_bad_hva();
-}
-
-int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
-	      bool data)
-{
-	struct kvmppc_pte pte;
-	int r;
-
-	vcpu->stat.st++;
-
-	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
-			 XLATE_WRITE, &pte);
-	if (r < 0)
-		return r;
-
-	*eaddr = pte.raddr;
-
-	if (!pte.may_write)
-		return -EPERM;
-
-	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
-		return EMULATE_DO_MMIO;
-
-	return EMULATE_DONE;
-}
-EXPORT_SYMBOL_GPL(kvmppc_st);
-
-int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
-		      bool data)
-{
-	struct kvmppc_pte pte;
-	hva_t hva = *eaddr;
-	int rc;
-
-	vcpu->stat.ld++;
-
-	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
-			  XLATE_READ, &pte);
-	if (rc)
-		return rc;
-
-	*eaddr = pte.raddr;
-
-	if (!pte.may_read)
-		return -EPERM;
-
-	if (!data && !pte.may_execute)
-		return -ENOEXEC;
-
-	hva = kvmppc_pte_to_hva(vcpu, &pte);
-	if (kvm_is_error_hva(hva))
-		goto mmio;
-
-	if (copy_from_user(ptr, (void __user *)hva, size)) {
-		printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
-		goto mmio;
-	}
-
-	return EMULATE_DONE;
-
-mmio:
-	return EMULATE_DO_MMIO;
-}
-EXPORT_SYMBOL_GPL(kvmppc_ld);
-
 int kvmppc_load_last_inst(struct kvm_vcpu *vcpu, enum instruction_type type,
 					 u32 *inst)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index eaa57da..2c5a1c3 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -309,6 +309,87 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
+static hva_t kvmppc_bad_hva(void)
+{
+	return PAGE_OFFSET;
+}
+
+static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+	hva_t hpage;
+
+	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+	if (kvm_is_error_hva(hpage))
+		goto err;
+
+	return hpage | (pte->raddr & ~PAGE_MASK);
+err:
+	return kvmppc_bad_hva();
+}
+
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+	      bool data)
+{
+	struct kvmppc_pte pte;
+	int r;
+
+	vcpu->stat.st++;
+
+	r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			 XLATE_WRITE, &pte);
+	if (r < 0)
+		return r;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_write)
+		return -EPERM;
+
+	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
+		return EMULATE_DO_MMIO;
+
+	return EMULATE_DONE;
+}
+EXPORT_SYMBOL_GPL(kvmppc_st);
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
+		      bool data)
+{
+	struct kvmppc_pte pte;
+	hva_t hva = *eaddr;
+	int rc;
+
+	vcpu->stat.ld++;
+
+	rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST,
+			  XLATE_READ, &pte);
+	if (rc)
+		return rc;
+
+	*eaddr = pte.raddr;
+
+	if (!pte.may_read)
+		return -EPERM;
+
+	if (!data && !pte.may_execute)
+		return -ENOEXEC;
+
+	hva = kvmppc_pte_to_hva(vcpu, &pte);
+	if (kvm_is_error_hva(hva))
+		goto mmio;
+
+	if (copy_from_user(ptr, (void __user *)hva, size)) {
+		printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
+		goto mmio;
+	}
+
+	return EMULATE_DONE;
+
+mmio:
+	return EMULATE_DO_MMIO;
+}
+EXPORT_SYMBOL_GPL(kvmppc_ld);
+
 int kvm_arch_hardware_enable(void *garbage)
 {
 	return 0;
-- 
cgit v0.10.2


From 9897e88a79e1c6a3c5bbe74eccd64c4ba32a3b19 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 20 Jun 2014 13:59:43 +0200
Subject: KVM: PPC: Remove kvmppc_bad_hva()

We have a proper define for invalid HVA numbers. Use those instead of the
ppc specific kvmppc_bad_hva().

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 2c5a1c3..3d59730 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -309,11 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
-static hva_t kvmppc_bad_hva(void)
-{
-	return PAGE_OFFSET;
-}
-
 static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 {
 	hva_t hpage;
@@ -324,7 +319,7 @@ static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 
 	return hpage | (pte->raddr & ~PAGE_MASK);
 err:
-	return kvmppc_bad_hva();
+	return KVM_HVA_ERR_BAD;
 }
 
 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
-- 
cgit v0.10.2


From c45c551403f0a7b152e56c53735b954faa36c54c Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 20 Jun 2014 14:17:30 +0200
Subject: KVM: PPC: Use kvm_read_guest in kvmppc_ld

We have a nice and handy helper to read from guest physical address space,
so we should make use of it in kvmppc_ld as we already do for its counterpart
in kvmppc_st.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 3d59730..be40886 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -309,19 +309,6 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 
-static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
-{
-	hva_t hpage;
-
-	hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (kvm_is_error_hva(hpage))
-		goto err;
-
-	return hpage | (pte->raddr & ~PAGE_MASK);
-err:
-	return KVM_HVA_ERR_BAD;
-}
-
 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	      bool data)
 {
@@ -351,7 +338,6 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 		      bool data)
 {
 	struct kvmppc_pte pte;
-	hva_t hva = *eaddr;
 	int rc;
 
 	vcpu->stat.ld++;
@@ -369,19 +355,10 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	if (!data && !pte.may_execute)
 		return -ENOEXEC;
 
-	hva = kvmppc_pte_to_hva(vcpu, &pte);
-	if (kvm_is_error_hva(hva))
-		goto mmio;
-
-	if (copy_from_user(ptr, (void __user *)hva, size)) {
-		printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
-		goto mmio;
-	}
+	if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
+		return EMULATE_DO_MMIO;
 
 	return EMULATE_DONE;
-
-mmio:
-	return EMULATE_DO_MMIO;
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
-- 
cgit v0.10.2


From c12fb43c2f6d6a57a4e21afe74ff56485d699ee7 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Fri, 20 Jun 2014 14:43:36 +0200
Subject: KVM: PPC: Handle magic page in kvmppc_ld/st

We use kvmppc_ld and kvmppc_st to emulate load/store instructions that may as
well access the magic page. Special case it out so that we can properly access
it.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 172fd6d..6166791 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -286,6 +286,13 @@ static inline bool is_kvmppc_resume_guest(int r)
 	return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
 }
 
+static inline bool is_kvmppc_hv_enabled(struct kvm *kvm);
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Only PR KVM supports the magic page */
+	return !is_kvmppc_hv_enabled(vcpu->kvm);
+}
+
 /* Magic register values loaded into r3 and r4 before the 'sc' assembly
  * instruction for the OSI hypercalls */
 #define OSI_SC_MAGIC_R3			0x113724FA
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index cbb1990..f7aa5cc 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -103,4 +103,14 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dear;
 }
+
+static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
+{
+	/* Magic page is only supported on e500v2 */
+#ifdef CONFIG_KVM_E500V2
+	return true;
+#else
+	return false;
+#endif
+}
 #endif /* __ASM_KVM_BOOKE_H__ */
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index be40886..544d1d3 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -312,6 +312,7 @@ EXPORT_SYMBOL_GPL(kvmppc_emulate_mmio);
 int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	      bool data)
 {
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
 	struct kvmppc_pte pte;
 	int r;
 
@@ -327,6 +328,16 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	if (!pte.may_write)
 		return -EPERM;
 
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(magic, ptr, size);
+		return EMULATE_DONE;
+	}
+
 	if (kvm_write_guest(vcpu->kvm, pte.raddr, ptr, size))
 		return EMULATE_DO_MMIO;
 
@@ -337,6 +348,7 @@ EXPORT_SYMBOL_GPL(kvmppc_st);
 int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 		      bool data)
 {
+	ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK;
 	struct kvmppc_pte pte;
 	int rc;
 
@@ -355,6 +367,16 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 	if (!data && !pte.may_execute)
 		return -ENOEXEC;
 
+	/* Magic page override */
+	if (kvmppc_supports_magic_page(vcpu) && mp_pa &&
+	    ((pte.raddr & KVM_PAM & PAGE_MASK) == mp_pa) &&
+	    !(kvmppc_get_msr(vcpu) & MSR_PR)) {
+		void *magic = vcpu->arch.shared;
+		magic += pte.eaddr & 0xfff;
+		memcpy(ptr, magic, size);
+		return EMULATE_DONE;
+	}
+
 	if (kvm_read_guest(vcpu->kvm, pte.raddr, ptr, size))
 		return EMULATE_DO_MMIO;
 
-- 
cgit v0.10.2


From d69614a295aef72f8fb22da8e3ccf1a8f19a7ffc Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Jun 2014 14:53:49 +0200
Subject: KVM: PPC: Separate loadstore emulation from priv emulation

Today the instruction emulator can get called via 2 separate code paths. It
can either be called by MMIO emulation detection code or by privileged
instruction traps.

This is bad, as both code paths prepare the environment differently. For MMIO
emulation we already know the virtual address we faulted on, so instructions
there don't have to actually fetch that information.

Split out the two separate use cases into separate files.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 17fa277..2214ee6 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -86,6 +86,7 @@ extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 		     bool data);
 extern int kvmppc_emulate_instruction(struct kvm_run *run,
                                       struct kvm_vcpu *vcpu);
+extern int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu);
 extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu);
 extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 777f894..1ccd7a1 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -13,8 +13,9 @@ common-objs-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 CFLAGS_e500_mmu.o := -I.
 CFLAGS_e500_mmu_host.o := -I.
 CFLAGS_emulate.o  := -I.
+CFLAGS_emulate_loadstore.o  := -I.
 
-common-objs-y += powerpc.o emulate.o
+common-objs-y += powerpc.o emulate.o emulate_loadstore.o
 obj-$(CONFIG_KVM_EXIT_TIMING) += timing.o
 obj-$(CONFIG_KVM_BOOK3S_HANDLER) += book3s_exports.o
 
@@ -91,6 +92,7 @@ kvm-book3s_64-module-objs += \
 	$(KVM)/eventfd.o \
 	powerpc.o \
 	emulate.o \
+	emulate_loadstore.o \
 	book3s.o \
 	book3s_64_vio.o \
 	book3s_rtas.o \
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index c5c64b6..e96b50d 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -207,25 +207,12 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt)
 	return emulated;
 }
 
-/* XXX to do:
- * lhax
- * lhaux
- * lswx
- * lswi
- * stswx
- * stswi
- * lha
- * lhau
- * lmw
- * stmw
- *
- */
 /* XXX Should probably auto-generate instruction decoding for a particular core
  * from opcode tables in the future. */
 int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 {
 	u32 inst;
-	int ra, rs, rt, sprn;
+	int rs, rt, sprn;
 	enum emulation_result emulated;
 	int advance = 1;
 
@@ -238,7 +225,6 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
 	pr_debug("Emulating opcode %d / %d\n", get_op(inst), get_xop(inst));
 
-	ra = get_ra(inst);
 	rs = get_rs(inst);
 	rt = get_rt(inst);
 	sprn = get_sprn(inst);
@@ -270,200 +256,24 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
 #endif
 			advance = 0;
 			break;
-		case OP_31_XOP_LWZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-			break;
-
-		case OP_31_XOP_LBZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			break;
-
-		case OP_31_XOP_LBZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_STWX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               4, 1);
-			break;
-
-		case OP_31_XOP_STBX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               1, 1);
-			break;
-
-		case OP_31_XOP_STBUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               1, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
-		case OP_31_XOP_LHAX:
-			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-			break;
-
-		case OP_31_XOP_LHZX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			break;
-
-		case OP_31_XOP_LHZUX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
 
 		case OP_31_XOP_MFSPR:
 			emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
 			break;
 
-		case OP_31_XOP_STHX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 1);
-			break;
-
-		case OP_31_XOP_STHUX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 1);
-			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-			break;
-
 		case OP_31_XOP_MTSPR:
 			emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
 			break;
 
-		case OP_31_XOP_DCBST:
-		case OP_31_XOP_DCBF:
-		case OP_31_XOP_DCBI:
-			/* Do nothing. The guest is performing dcbi because
-			 * hardware DMA is not snooped by the dcache, but
-			 * emulated DMA either goes through the dcache as
-			 * normal writes, or the host kernel has handled dcache
-			 * coherence. */
-			break;
-
-		case OP_31_XOP_LWBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
-			break;
-
 		case OP_31_XOP_TLBSYNC:
 			break;
 
-		case OP_31_XOP_STWBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               4, 0);
-			break;
-
-		case OP_31_XOP_LHBRX:
-			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
-			break;
-
-		case OP_31_XOP_STHBRX:
-			emulated = kvmppc_handle_store(run, vcpu,
-						       kvmppc_get_gpr(vcpu, rs),
-			                               2, 0);
-			break;
-
 		default:
 			/* Attempt core-specific emulation below. */
 			emulated = EMULATE_FAIL;
 		}
 		break;
 
-	case OP_LWZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		break;
-
-	/* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
-	case OP_LD:
-		rt = get_rt(inst);
-		emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
-		break;
-
-	case OP_LWZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LBZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		break;
-
-	case OP_LBZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STW:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               4, 1);
-		break;
-
-	/* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
-	case OP_STD:
-		rs = get_rs(inst);
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               8, 1);
-		break;
-
-	case OP_STWU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               4, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STB:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               1, 1);
-		break;
-
-	case OP_STBU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               1, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHZ:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHZU:
-		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_LHA:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		break;
-
-	case OP_LHAU:
-		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
-	case OP_STH:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               2, 1);
-		break;
-
-	case OP_STHU:
-		emulated = kvmppc_handle_store(run, vcpu,
-					       kvmppc_get_gpr(vcpu, rs),
-		                               2, 1);
-		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
-		break;
-
 	default:
 		emulated = EMULATE_FAIL;
 	}
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
new file mode 100644
index 0000000..0de4ffa
--- /dev/null
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -0,0 +1,272 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ * Copyright 2011 Freescale Semiconductor, Inc.
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#include <linux/jiffies.h>
+#include <linux/hrtimer.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm_host.h>
+#include <linux/clockchips.h>
+
+#include <asm/reg.h>
+#include <asm/time.h>
+#include <asm/byteorder.h>
+#include <asm/kvm_ppc.h>
+#include <asm/disassemble.h>
+#include <asm/ppc-opcode.h>
+#include "timing.h"
+#include "trace.h"
+
+/* XXX to do:
+ * lhax
+ * lhaux
+ * lswx
+ * lswi
+ * stswx
+ * stswi
+ * lha
+ * lhau
+ * lmw
+ * stmw
+ *
+ */
+int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	u32 inst;
+	int ra, rs, rt;
+	enum emulation_result emulated;
+	int advance = 1;
+
+	/* this default type might be overwritten by subcategories */
+	kvmppc_set_exit_type(vcpu, EMULATED_INST_EXITS);
+
+	emulated = kvmppc_get_last_inst(vcpu, false, &inst);
+	if (emulated != EMULATE_DONE)
+		return emulated;
+
+	ra = get_ra(inst);
+	rs = get_rs(inst);
+	rt = get_rt(inst);
+
+	switch (get_op(inst)) {
+	case 31:
+		switch (get_xop(inst)) {
+		case OP_31_XOP_LWZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+			break;
+
+		case OP_31_XOP_LBZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+			break;
+
+		case OP_31_XOP_LBZUX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_STWX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               4, 1);
+			break;
+
+		case OP_31_XOP_STBX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               1, 1);
+			break;
+
+		case OP_31_XOP_STBUX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               1, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_LHAX:
+			emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+			break;
+
+		case OP_31_XOP_LHZX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			break;
+
+		case OP_31_XOP_LHZUX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_STHX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 1);
+			break;
+
+		case OP_31_XOP_STHUX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 1);
+			kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+			break;
+
+		case OP_31_XOP_DCBST:
+		case OP_31_XOP_DCBF:
+		case OP_31_XOP_DCBI:
+			/* Do nothing. The guest is performing dcbi because
+			 * hardware DMA is not snooped by the dcache, but
+			 * emulated DMA either goes through the dcache as
+			 * normal writes, or the host kernel has handled dcache
+			 * coherence. */
+			break;
+
+		case OP_31_XOP_LWBRX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0);
+			break;
+
+		case OP_31_XOP_STWBRX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               4, 0);
+			break;
+
+		case OP_31_XOP_LHBRX:
+			emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0);
+			break;
+
+		case OP_31_XOP_STHBRX:
+			emulated = kvmppc_handle_store(run, vcpu,
+						       kvmppc_get_gpr(vcpu, rs),
+			                               2, 0);
+			break;
+
+		default:
+			emulated = EMULATE_FAIL;
+			break;
+		}
+		break;
+
+	case OP_LWZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		break;
+
+	/* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
+	case OP_LD:
+		rt = get_rt(inst);
+		emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+		break;
+
+	case OP_LWZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LBZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		break;
+
+	case OP_LBZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STW:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               4, 1);
+		break;
+
+	/* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
+	case OP_STD:
+		rs = get_rs(inst);
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               8, 1);
+		break;
+
+	case OP_STWU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               4, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STB:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               1, 1);
+		break;
+
+	case OP_STBU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               1, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LHZ:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		break;
+
+	case OP_LHZU:
+		emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_LHA:
+		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+		break;
+
+	case OP_LHAU:
+		emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	case OP_STH:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               2, 1);
+		break;
+
+	case OP_STHU:
+		emulated = kvmppc_handle_store(run, vcpu,
+					       kvmppc_get_gpr(vcpu, rs),
+		                               2, 1);
+		kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+		break;
+
+	default:
+		emulated = EMULATE_FAIL;
+		break;
+	}
+
+	if (emulated == EMULATE_FAIL) {
+		advance = 0;
+		kvmppc_core_queue_program(vcpu, 0);
+	}
+
+	trace_kvm_ppc_instr(inst, kvmppc_get_pc(vcpu), emulated);
+
+	/* Advance past emulated instruction. */
+	if (advance)
+		kvmppc_set_pc(vcpu, kvmppc_get_pc(vcpu) + 4);
+
+	return emulated;
+}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 544d1d3..c14ed15 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -272,7 +272,7 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	enum emulation_result er;
 	int r;
 
-	er = kvmppc_emulate_instruction(run, vcpu);
+	er = kvmppc_emulate_loadstore(vcpu);
 	switch (er) {
 	case EMULATE_DONE:
 		/* Future optimization: only reload non-volatiles if they were
-- 
cgit v0.10.2


From 8de12015ff75967b16f70e5938b151390dac9b77 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 18 Jun 2014 21:56:55 +0200
Subject: KVM: PPC: Expose helper functions for data/inst faults

We're going to implement guest code interpretation in KVM for some rare
corner cases. This code needs to be able to inject data and instruction
faults into the guest when it encounters them.

Expose generic APIs to do this in a reasonably subarch agnostic fashion.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 2214ee6..cbee453 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -132,6 +132,14 @@ extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
 extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
                                        struct kvm_interrupt *irq);
 extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu, ulong dear_flags,
+					ulong esr_flags);
+extern void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+					   ulong dear_flags,
+					   ulong esr_flags);
+extern void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
+					   ulong esr_flags);
 extern void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu);
 extern int kvmppc_core_check_requests(struct kvm_vcpu *vcpu);
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index de8da33..dd03f6b 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -230,6 +230,23 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu)
 	kvmppc_book3s_dequeue_irqprio(vcpu, BOOK3S_INTERRUPT_EXTERNAL_LEVEL);
 }
 
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu, ulong dar,
+				    ulong flags)
+{
+	kvmppc_set_dar(vcpu, dar);
+	kvmppc_set_dsisr(vcpu, flags);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DATA_STORAGE);
+}
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong flags)
+{
+	u64 msr = kvmppc_get_msr(vcpu);
+	msr &= ~(SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
+	msr |= flags & (SRR1_ISI_NOPT | SRR1_ISI_N_OR_G | SRR1_ISI_PROT);
+	kvmppc_set_msr_fast(vcpu, msr);
+	kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_INST_STORAGE);
+}
+
 int kvmppc_book3s_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
 {
 	int deliver = 1;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 2f697b4..f30948a 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -185,24 +185,28 @@ static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
 	set_bit(priority, &vcpu->arch.pending_exceptions);
 }
 
-static void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
-                                        ulong dear_flags, ulong esr_flags)
+void kvmppc_core_queue_dtlb_miss(struct kvm_vcpu *vcpu,
+				 ulong dear_flags, ulong esr_flags)
 {
 	vcpu->arch.queued_dear = dear_flags;
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DTLB_MISS);
 }
 
-static void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
-                                           ulong dear_flags, ulong esr_flags)
+void kvmppc_core_queue_data_storage(struct kvm_vcpu *vcpu,
+				    ulong dear_flags, ulong esr_flags)
 {
 	vcpu->arch.queued_dear = dear_flags;
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE);
 }
 
-static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu,
-                                           ulong esr_flags)
+void kvmppc_core_queue_itlb_miss(struct kvm_vcpu *vcpu)
+{
+	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ITLB_MISS);
+}
+
+void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, ulong esr_flags)
 {
 	vcpu->arch.queued_esr = esr_flags;
 	kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE);
-- 
cgit v0.10.2


From ce91ddc471b77ec75e5b2a43c803efac605f37b3 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Mon, 28 Jul 2014 19:29:13 +0200
Subject: KVM: PPC: Remove DCR handling

DCR handling was only needed for 440 KVM. Since we removed it, we can also
remove handling of DCR accesses.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 8898caf..a21ff22 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2613,8 +2613,8 @@ The 'data' member contains, in its first 'len' bytes, the value as it would
 appear if the VCPU performed a load or store of the appropriate width directly
 to the byte array.
 
-NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_DCR,
-      KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding
+NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI KVM_EXIT_PAPR and
+      KVM_EXIT_EPR the corresponding
 operations are complete (and guest state is consistent) only after userspace
 has re-entered the kernel with KVM_RUN.  The kernel side will first finish
 incomplete operations and then check for pending signals.  Userspace
@@ -2685,7 +2685,7 @@ Principles of Operation Book in the Chapter for Dynamic Address Translation
 			__u8  is_write;
 		} dcr;
 
-powerpc specific.
+Deprecated - was used for 440 KVM.
 
 		/* KVM_EXIT_OSI */
 		struct {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 66f5b59..98d9dd5 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -94,7 +94,6 @@ struct kvm_vm_stat {
 struct kvm_vcpu_stat {
 	u32 sum_exits;
 	u32 mmio_exits;
-	u32 dcr_exits;
 	u32 signal_exits;
 	u32 light_exits;
 	/* Account for special types of light exits: */
@@ -126,7 +125,6 @@ struct kvm_vcpu_stat {
 
 enum kvm_exit_types {
 	MMIO_EXITS,
-	DCR_EXITS,
 	SIGNAL_EXITS,
 	ITLB_REAL_MISS_EXITS,
 	ITLB_VIRT_MISS_EXITS,
@@ -601,8 +599,6 @@ struct kvm_vcpu_arch {
 	u8 io_gpr; /* GPR used as IO source/target */
 	u8 mmio_is_bigendian;
 	u8 mmio_sign_extend;
-	u8 dcr_needed;
-	u8 dcr_is_write;
 	u8 osi_needed;
 	u8 osi_enabled;
 	u8 papr_enabled;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index cbee453..8e36c1e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -41,7 +41,6 @@
 enum emulation_result {
 	EMULATE_DONE,         /* no further processing */
 	EMULATE_DO_MMIO,      /* kvm_run filled with MMIO request */
-	EMULATE_DO_DCR,       /* kvm_run filled with DCR request */
 	EMULATE_FAIL,         /* can't emulate this instruction */
 	EMULATE_AGAIN,        /* something went wrong. go again */
 	EMULATE_EXIT_USER,    /* emulation requires exit to user-space */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index f30948a..b4c89fa 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -51,7 +51,6 @@ unsigned long kvmppc_booke_handlers;
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
 	{ "mmio",       VCPU_STAT(mmio_exits) },
-	{ "dcr",        VCPU_STAT(dcr_exits) },
 	{ "sig",        VCPU_STAT(signal_exits) },
 	{ "itlb_r",     VCPU_STAT(itlb_real_miss_exits) },
 	{ "itlb_v",     VCPU_STAT(itlb_virt_miss_exits) },
@@ -709,10 +708,6 @@ static int emulation_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	case EMULATE_AGAIN:
 		return RESUME_GUEST;
 
-	case EMULATE_DO_DCR:
-		run->exit_reason = KVM_EXIT_DCR;
-		return RESUME_HOST;
-
 	case EMULATE_FAIL:
 		printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
 		       __func__, vcpu->arch.pc, vcpu->arch.last_inst);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c14ed15..288b4bb 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -743,12 +743,6 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 #endif
 }
 
-static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu,
-                                     struct kvm_run *run)
-{
-	kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, run->dcr.data);
-}
-
 static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
                                       struct kvm_run *run)
 {
@@ -945,10 +939,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
 		if (!vcpu->mmio_is_write)
 			kvmppc_complete_mmio_load(vcpu, run);
 		vcpu->mmio_needed = 0;
-	} else if (vcpu->arch.dcr_needed) {
-		if (!vcpu->arch.dcr_is_write)
-			kvmppc_complete_dcr_load(vcpu, run);
-		vcpu->arch.dcr_needed = 0;
 	} else if (vcpu->arch.osi_needed) {
 		u64 *gprs = run->osi.gprs;
 		int i;
diff --git a/arch/powerpc/kvm/timing.c b/arch/powerpc/kvm/timing.c
index 07b6110..e44d2b2 100644
--- a/arch/powerpc/kvm/timing.c
+++ b/arch/powerpc/kvm/timing.c
@@ -110,7 +110,6 @@ void kvmppc_update_timing_stats(struct kvm_vcpu *vcpu)
 
 static const char *kvm_exit_names[__NUMBER_OF_KVM_EXIT_TYPES] = {
 	[MMIO_EXITS] =              "MMIO",
-	[DCR_EXITS] =               "DCR",
 	[SIGNAL_EXITS] =            "SIGNAL",
 	[ITLB_REAL_MISS_EXITS] =    "ITLBREAL",
 	[ITLB_VIRT_MISS_EXITS] =    "ITLBVIRT",
diff --git a/arch/powerpc/kvm/timing.h b/arch/powerpc/kvm/timing.h
index bf191e7..3123690 100644
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -63,9 +63,6 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 	case EMULATED_INST_EXITS:
 		vcpu->stat.emulated_inst_exits++;
 		break;
-	case DCR_EXITS:
-		vcpu->stat.dcr_exits++;
-		break;
 	case DSI_EXITS:
 		vcpu->stat.dsi_exits++;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 51776ca..f6f24ae 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -162,7 +162,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_TPR_ACCESS       12
 #define KVM_EXIT_S390_SIEIC       13
 #define KVM_EXIT_S390_RESET       14
-#define KVM_EXIT_DCR              15
+#define KVM_EXIT_DCR              15 /* deprecated */
 #define KVM_EXIT_NMI              16
 #define KVM_EXIT_INTERNAL_ERROR   17
 #define KVM_EXIT_OSI              18
@@ -268,7 +268,7 @@ struct kvm_run {
 			__u64 trans_exc_code;
 			__u32 pgm_code;
 		} s390_ucontrol;
-		/* KVM_EXIT_DCR */
+		/* KVM_EXIT_DCR (deprecated) */
 		struct {
 			__u32 dcrn;
 			__u32 data;
-- 
cgit v0.10.2


From 5a484c7c1efd2c45f8cc726e4d21283a5324e361 Mon Sep 17 00:00:00 2001
From: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Date: Wed, 30 Jul 2014 15:03:56 +0530
Subject: KVM: PPC: BOOKEHV: rename e500hv_spr to bookehv_spr

This are not specific to e500hv but applicable for bookehv
(As per comment from Scott Wood on my patch
"kvm: ppc: bookehv: Added wrapper macros for shadow registers")

Signed-off-by: Bharat Bhushan <Bharat.Bhushan@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 8e36c1e..fb86a22 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -539,16 +539,16 @@ static inline bool kvmppc_shared_big_endian(struct kvm_vcpu *vcpu)
 #endif
 }
 
-#define SPRNG_WRAPPER_GET(reg, e500hv_spr)				\
+#define SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
 static inline ulong kvmppc_get_##reg(struct kvm_vcpu *vcpu)		\
 {									\
-	return mfspr(e500hv_spr);					\
+	return mfspr(bookehv_spr);					\
 }									\
 
-#define SPRNG_WRAPPER_SET(reg, e500hv_spr)				\
+#define SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
 static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, ulong val)	\
 {									\
-	mtspr(e500hv_spr, val);						\
+	mtspr(bookehv_spr, val);						\
 }									\
 
 #define SHARED_WRAPPER_GET(reg, size)					\
@@ -573,18 +573,18 @@ static inline void kvmppc_set_##reg(struct kvm_vcpu *vcpu, u##size val)	\
 	SHARED_WRAPPER_GET(reg, size)					\
 	SHARED_WRAPPER_SET(reg, size)					\
 
-#define SPRNG_WRAPPER(reg, e500hv_spr)					\
-	SPRNG_WRAPPER_GET(reg, e500hv_spr)				\
-	SPRNG_WRAPPER_SET(reg, e500hv_spr)				\
+#define SPRNG_WRAPPER(reg, bookehv_spr)					\
+	SPRNG_WRAPPER_GET(reg, bookehv_spr)				\
+	SPRNG_WRAPPER_SET(reg, bookehv_spr)				\
 
 #ifdef CONFIG_KVM_BOOKE_HV
 
-#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr)			\
-	SPRNG_WRAPPER(reg, e500hv_spr)					\
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
+	SPRNG_WRAPPER(reg, bookehv_spr)					\
 
 #else
 
-#define SHARED_SPRNG_WRAPPER(reg, size, e500hv_spr)			\
+#define SHARED_SPRNG_WRAPPER(reg, size, bookehv_spr)			\
 	SHARED_WRAPPER(reg, size)					\
 
 #endif
-- 
cgit v0.10.2


From 29577fc00ba40a89fc824f030bcc80c583259346 Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Wed, 30 Jul 2014 15:25:48 +0200
Subject: KVM: PPC: HV: Remove generic instruction emulation

Now that we have properly split load/store instruction emulation and generic
instruction emulation, we can move the generic one from kvm.ko to kvm-pr.ko
on book3s_64.

This reduces the attack surface and amount of code loaded on HV KVM kernels.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 1ccd7a1..2d590de 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -48,6 +48,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) := \
 
 kvm-pr-y := \
 	fpu.o \
+	emulate.o \
 	book3s_paired_singles.o \
 	book3s_pr.o \
 	book3s_pr_papr.o \
@@ -91,7 +92,6 @@ kvm-book3s_64-module-objs += \
 	$(KVM)/kvm_main.o \
 	$(KVM)/eventfd.o \
 	powerpc.o \
-	emulate.o \
 	emulate_loadstore.o \
 	book3s.o \
 	book3s_64_vio.o \
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd..a674f09 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -291,6 +291,26 @@ TRACE_EVENT(kvm_unmap_hva,
 	TP_printk("unmap hva 0x%lx\n", __entry->hva)
 );
 
+TRACE_EVENT(kvm_ppc_instr,
+	TP_PROTO(unsigned int inst, unsigned long _pc, unsigned int emulate),
+	TP_ARGS(inst, _pc, emulate),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	inst		)
+		__field(	unsigned long,	pc		)
+		__field(	unsigned int,	emulate		)
+	),
+
+	TP_fast_assign(
+		__entry->inst		= inst;
+		__entry->pc		= _pc;
+		__entry->emulate	= emulate;
+	),
+
+	TP_printk("inst %u pc 0x%lx emulate %u\n",
+		  __entry->inst, __entry->pc, __entry->emulate)
+);
+
 #endif /* _TRACE_KVM_H */
 
 /* This part must be outside protection */
-- 
cgit v0.10.2


From 8e6afa36e754be84b468d7df9e5aa71cf4003f3b Mon Sep 17 00:00:00 2001
From: Alexander Graf <agraf@suse.de>
Date: Thu, 31 Jul 2014 10:21:59 +0200
Subject: KVM: PPC: PR: Handle FSCR feature deselects

We handle FSCR feature bits (well, TAR only really today) lazily when the guest
starts using them. So when a guest activates the bit and later uses that feature
we enable it for real in hardware.

However, when the guest stops using that bit we don't stop setting it in
hardware. That means we can potentially lose a trap that the guest expects to
happen because it thinks a feature is not active.

This patch adds support to drop TAR when then guest turns it off in FSCR. While
at it it also restricts FSCR access to 64bit systems - 32bit ones don't have it.

Signed-off-by: Alexander Graf <agraf@suse.de>

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6166791..6acf0c2 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -182,6 +182,7 @@ extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
 			struct kvm_memory_slot *memslot, unsigned long *map);
 extern void kvmppc_update_lpcr(struct kvm *kvm, unsigned long lpcr,
 			unsigned long mask);
+extern void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr);
 
 extern void kvmppc_entry_trampoline(void);
 extern void kvmppc_hv_entry_trampoline(void);
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 84fddcd..5a2bc4b 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -449,10 +449,10 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
 	case SPRN_GQR7:
 		to_book3s(vcpu)->gqr[sprn - SPRN_GQR0] = spr_val;
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_FSCR:
-		vcpu->arch.fscr = spr_val;
+		kvmppc_set_fscr(vcpu, spr_val);
 		break;
-#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_BESCR:
 		vcpu->arch.bescr = spr_val;
 		break;
@@ -593,10 +593,10 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
 	case SPRN_GQR7:
 		*spr_val = to_book3s(vcpu)->gqr[sprn - SPRN_GQR0];
 		break;
+#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_FSCR:
 		*spr_val = vcpu->arch.fscr;
 		break;
-#ifdef CONFIG_PPC_BOOK3S_64
 	case SPRN_BESCR:
 		*spr_val = vcpu->arch.bescr;
 		break;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index e7a1fa2..faffb27 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -871,6 +871,15 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, ulong fac)
 
 	return RESUME_GUEST;
 }
+
+void kvmppc_set_fscr(struct kvm_vcpu *vcpu, u64 fscr)
+{
+	if ((vcpu->arch.fscr & FSCR_TAR) && !(fscr & FSCR_TAR)) {
+		/* TAR got dropped, drop it in shadow too */
+		kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
+	}
+	vcpu->arch.fscr = fscr;
+}
 #endif
 
 int kvmppc_handle_exit_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
-- 
cgit v0.10.2